Skip to navigation

Removing duplicate lines from a file

Case-insensitive:

{ rm "$file" && awk '!x[tolower($1)]++' > "$file"; } < "$file"

Case-sensitive:

{ rm "$file" && awk '!x[$1]++' > "$file"; } < "$file"

Here's a shell script you could use, as it stands, only works for one file at a time:

#!/bin/bash                                                                                  

usage()
{
cat << EOF                                                                                   

usage: $(basename $0) options filename                                                       

Removes duplicates from a file                                                               

OPTIONS:
   -h      Show this message
   -i      case insensitive
EOF
               }                                                                             

insensitive="no"                                                                             

while getopts "hi" option; do
    case $option in
        h)
            usage  ; exit 0
            ;;
        i)
            insensitive="yes"
            ;;
        "")
            usage ; exit 1
            ;;
        *)
            usage  ; exit 1
            ;;
    esac
done                                                                                         

shift $(($OPTIND - 1))                                                                       

filename="$1"                                                                                

if [ "$filename" == "" ] ; then
    usage ; exit 1
elif [ ! -f "$filename" ] ; then
    echo "File $filename does not exist" ; exit 1
fi                                                                                           

# make a backup
cp "$filename" "$filename"~                                                                  

if [ $insensitive == "yes" ] ; then
    { rm "$filename" && awk '!x[tolower($1)]++' > "$filename"; } < "$filename"
else
    { rm "$filename" && awk '!x[$1]++' > "$filename"; } < "$filename"
fi

Here's some nice concise information about getopts and shift.

Leave a Reply