#!/bin/bash
#  A "delete duplicates" utility for Linux. 
#  Renames & deletes files at current directory
#  according to match (name & size) at given path.
#  Can include file extension overrride as a command line option
#  but naturally there is no size check in that case !  
#
#  Copyright 2012 Arto Jääskeläinen   
#  temp001(at)pp.inet.fi, ajaaskel(at)forum.ubuntu-fi.org
#
#  This program is free software; you can redistribute it and/or 
#  modify it under the terms of the GNU General Public License as 
#  published by the Free Software Foundation; either version 2 of the
#  License, or (at your option) any later version.
# 
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  Revision history:
#  Ver 1.10 February 10, 2012 / A.J. support for override mode included
#  Ver 1.11 February 11, 2012 / A.J. command line filter improved
#  Ver 1.12 February 11, 2012 / A.J. truncated file detection fix
#  Ver 1.13 February 11, 2012 / A.J. white space problem fixed
#  Ver 1.14 February 11, 2012 / A.J. white space part2
#  Ver 1.15 2014-11-05  Bug fixes

THIS_VER=1.15    # <--Remember to update...
#
# Do not add ANY characters in front of the line above --- version control variable.
#

GET_VERSION()
{
if [ -e /usr/local/bin/"$installname" ]; then  # Some version of chknodes utility exists ?
    INSTALLED_VER=`cut -b 1-14 /usr/local/bin/"$installname" | grep THIS_VER= | sed 's/[a-Z _#<='-']*//g' `
    if [ -z "$INSTALLED_VER" ]; then   #No version number found
        INSTALLED_VER=1.00
    fi 
else
    INSTALLED_VER=0    #No ver file found
fi
}

CONV_IN() #Returns string looking like integer
#Entry ST, return RET
{
DOTPOS=`expr index "$ST" '.'`       #Position of dot
ST1=`expr match "$ST" '\([0-9]*\)'` #All before dot
ST2=`echo "${ST:$DOTPOS}"`          #All after dot
LEN_ST2="${#ST2}"                   #Length after dot
if [ "$LEN_ST2" != 2 ]; then        #We force always two digits
    ST2=$(expr $ST2 \* 10)
    if [ "$ST2" = 0 ]; then             #Specific case of zero
        ST2=00                            #Two digits for zero, too
    fi
fi
RET="$ST1$ST2"
}

INSTALL()
{
if [ "$THIS_VER_IN" -gt "$INSTALLED_VER_IN" ] ; then          # Need to update 
    sudo cp "$0" /usr/local/bin/"$installname"
    GET_VERSION
    ST="$INSTALLED_VER"
    CONV_IN
    INSTALLED_VER_IN="$RET"
    if [ "$THIS_VER_IN" = "$INSTALLED_VER_IN" ] ; then
        echo -e 
        fgblue; echo 'Installed/updated '"$installname"' utility succesfully.'; fgdefault
        installer_run="yes"
    else
        fgred
        echo 'Could not install/update '"$installname"' utility.'
        echo "this ver: $THIS_VER  installed ver: $INSTALLED_VER"
        fgdefault
    fi
fi
}

UNINSTALL()
{
if [ ! -e /usr/local/bin/"$installname" ]; then
    fgblue; echo "Nothing to uninstall."; fgdefault; return 1
fi    
sudo rm /usr/local/bin/"$installname" 2>/dev/null
if [ ! -e /usr/local/bin/"$installname" ]; then
    fgblue; echo "Uninstall succesful."; fgdefault
else
    fgred; echo "Could not uninstall $installname  utility."; fgdefault
fi
}


fgred()
{
echo -en "\x1b[31;01m"
}

fgblue()
{
echo -en "\x1b[34;01m"
}

fgbold()
{
echo -en "\x1b[1m"
}

fgdefault()
{
tput sgr0
}


HELP_SHORT()
{
echo
echo "$installname v$THIS_VER --Rename and delete duplicates"  
echo
echo
fgbold
echo "    $installname [ +ext1=ext2+ ]  path_to_filenames_to_compare"
echo 
echo "    $installname [-h | --help, -u | --uninstall]"
fgdefault
echo
}

HELP()
{
cat << HERE

  $installname v$THIS_VER --Rename and delete duplicates

HERE

fgbold
cat <<HERE

        $installname [ +ext1=ext2+ ]  path_to_filenames_to_compare

        $installname [-h | --help, -u | --uninstall]

HERE
fgdefault
cat <<HERE
  Reads file names from path given and looks for matching filenames
  in current directory. Renames matching files in current directory
  to .dup extension. Renamed files can be either deleted by pressing
  capital R or renamed back if needed.
  Compares and makes sure that also file sizes match and logs errors (a truncated
  file etc.) to <homedir>/delduplog/ directory. Files with incorrect size are left intact.

  Optional file extension override can be specified using two plus signs, an example:
HERE
fgbold
cat <<HERE

        $installname +mp3=ogg+  path_to_filenames_to_compare

HERE
fgdefault
cat <<HERE
  which means: Rename/delete only those mp3 files in current directory
  which are also stored as ogg at the given path.
  Be aware: Checking of matching file size is not possible if you use override !
  On first run $installname will install itself to /usr/local/bin.

HERE
}


error1()
{
echo  
fgred; echo '"path_to_filenames_to_compare" cannot be your current directory unless using override.'; fgdefault 
echo 
exit 1
}

error2()
{
echo  
fgred; echo "Cannot find directory $dirpath"; fgdefault 
echo 
exit 255
}

error3()
{
echo  
fgred; echo "Invalid parameter" "$cmdline"; fgdefault 
HELP_SHORT 
echo
exit 255
}

error4()
{
echo
fgred; echo "\"path_to_filenames_to_compare\" missing."; fgdefault
echo
exit 255
}

error5()
{
echo  
fgred; echo "Invalid parameter"; fgdefault 
echo "Override example: deldup +mp3=ogg+ /some/path/here/with/ogg/files."
echo "Matching mp3 files at current directory will be renamed/deleted." 
echo
exit 255
}

check_vars()
{    
is_eq=$(echo "$dirpath" | grep -o "=" | wc -l) 
is_plus=$(echo "$dirpath" | grep -o "+" | wc -l)
if [ "$is_eq" -eq 0 ] || [ "$is_plus" -ne 2 ] ; then
error5 
fi 
}


drop_extension()
# Cuts off file extension
{
newname="$(echo "$1" | sed 's/\(.*\)\..*/\1/')"
}

override()
# Replaces extension of file $1 with $ext
# Called by: Set "ext", call "override <filename>"
{
drop_extension "$1"    
newname="$newname.$ext"    
}

check_size_match()
{
filesize_1=$(stat -c%s "$filename")  #In current dir 
filesize_2=$(stat -c%s "$fullpath")  #In list dir
    if [ "$filesize_1" -ne "$filesize_2" ] ; then
    (( size_mismatches++ ))
    echo "$filename file size mismatch" >> "$logfile"
    return 255  
    fi  
}

rendup()
{
# Rename matching file in current dir
# Standard mode prerequisites:
# $original_extensions="yes", $check_size="yes", $dirpath
# Override mode specifics:
# $original_extensions="no", $check_size="no", $ext_cur, $ext_path

filename="$1"
filename_c="${filename#./}"     # remove possibly leading ./
fullpath="$dirpath/$filename_c"

if [ "$original_extensions" = "yes" ] ; then
    if [ -f "$fullpath" ] ; then
        if [ "$check_size" = "yes" ] ; then
        check_size_match
        fi
        if [ "$size_mismatches" = "0" ] ; then
        echo "$filename"
        mv "$filename" "$filename.dup"
        ((matches++))
        fi
    fi
else         #File extensions override
    ext="$ext_cur"    #User given extension for files at current dir
    override "$filename"  # Extension removed and replaced by user given one
    fn_cur="$newname"     # Acceptable filename
    if [ "$filename" = "$fn_cur" ] ; then   #Accept only files with user given extension
    ext="$ext_path"                #Extension for file at path-to-search
    override "$filename"
    fn_path="$newname"
    fullpath="$dirpath/$fn_path"
        if [ -f "$fullpath" ] ; then
            echo "$fn_cur"
            mv "$fn_cur" "$fn_cur".dup
            ((matches++))
        fi
    fi
fi
}


############################################################
# Main
############################################################
#Version/update control ----------------------
cmdline="$@"
installname="deldup"
installer_run="no"  #Installer will change this
GET_VERSION
ST="$INSTALLED_VER"
CONV_IN
INSTALLED_VER_IN="$RET"
ST="$THIS_VER"
CONV_IN
THIS_VER_IN="$RET"
args="$#"
if [ "$args" = 0 ] ; then
    INSTALL
    if [ "$installer_run" = "no" ] ; then
    error4

    fi
    HELP_SHORT; exit 1  #No command line parameters
fi
#-------------------------------------------------------------

case "$1" in
    -h | --help)       HELP;exit 0 ;;
    -u | --uninstall)  UNINSTALL;exit 0 ;;
    -v | --version)     echo "$installname v$THIS_VER";exit 0 ;;
    *)
        case "$1" in  # Catch junk arguments & exit
            -* | --* )   error3 ;;
        esac
esac

#-------------------------------------------------------------
# Global defaults
#
dirpath="$@"
matches=0
check_size="yes"
original_extensions="yes" 
newname=""
ext=""
ext_cur=""
ext_path=""  
size_mismatches=0
datetimenow="$(date "+%F_%T")"
mkdir ~/delduplog 2>/dev/null
logfile=~/delduplog/deldup_"$datetimenow".log
echo "Deldup started at $datetimenow" >> "$logfile"

case "$dirpath" in
    .)       error1 ;;
    .\/)     error1 ;;
    .\/.)    error1 ;;
    $(pwd))  error1 ;;
esac


# Command line not allowed to start with [:alnum:] or "="
first_alnum=$(echo -n "${dirpath[0]:0:1}" | tr -d "[:punct:]")
if [ -n "$first_alnum" ] || [ "${dirpath[0]:0:1}" = "=" ] ; then
error3
fi

if [ "${dirpath[0]:0:1}" = "+" ] ; then  #Override mode 
    check_vars "$dirpath"
    ext_vars=$(echo "$dirpath" | cut -d "+" -f2) 
    dirpath=$(echo "$dirpath" | cut -d "+" -f3)
    dirpath="${dirpath/ /}"     #Cut off leading white space
    if [ "$dirpath" = '' ] ; then error4; fi
    ext_cur=$(echo "$ext_vars" | cut -d "=" -f1)
    ext_path=$(echo "$ext_vars" | cut -d "=" -f2)
    original_extensions="no"
    check_size="no"
    echo "Filename extension overdrive mode"
fi

if [ ! -d "$dirpath" ]; then error2 "$dirpath"; fi
echo
echo "Found directory: $dirpath"
echo -n "Start to search for duplicate files in current directory $PWD y/n ?"
read answer
echo
if [ "$answer" = "y" ] || [ "$answer" = "Y" ]; then
    oldifs="$IFS"
    IFS=$'\n'
    flist=($(find . -maxdepth 1 -type f )) # Get file name list from current dir
    IFS="$oldifs"
    entries="${#flist[*]}"       #Number of entries in file list table "flist"

    for (( i=0; i<"$entries"; i++ )); do
        rendup "${flist[i]}"  # Check & rename each duplicate in current dir
    done

    if [ "$matches" -gt 0 ] ; then 
        echo -n "$matches files renamed to .dup, only capital R will remove them permanently, R/n ? "
        read answer
        if [ "$answer" = "R" ]; then
            rm *.dup
        else
            echo "Files not deleted but left intact as .dup files."
            echo
        fi
    fi
    if [ "$matches" == "0" ] ; then
        echo "No duplicates found."
    fi
fi

if [ "$size_mismatches" -ne 0 ] ; then
    fgred 
    echo "File size mismatch was detected with $size_mismatches file(s),"
    echo "see details at <homedir>/delduplog." 
    fgdefault
    echo
fi
ls *.dup  &>/dev/null || exit
echo -n "Files renamed to .dup available, rename them back to original y/n ?"
read answer
revcount=0
if [ "$answer" = "y" ] || [ "$answer" = "Y" ]; then
    for f in *.dup;do
        mv "$f" "${f%.dup}"; 
        (( revcount++ ))
    done;
    echo "\".dup\" extension removed from $revcount files."
    echo
fi

