Skip to navigation

Removing duplicate lines from a file

Case-insensitive:

{ rm "$file" && awk '!x[tolower($1)]++' > "$file"; } < "$file"

Case-sensitive:

{ rm "$file" && awk '!x[$1]++' > "$file"; } < "$file"

Here's a shell script you could use, as it stands, only works for one file at a time:

#!/bin/bash                                                                                  
                                                                                             
usage()                                                                                      
{                                                                                            
cat << EOF                                                                                   
                                                                                             
usage: $(basename $0) options filename                                                       
                                                                                             
Removes duplicates from a file                                                               
                                                                                             
OPTIONS:                                                                                     
   -h      Show this message                                                                 
   -i      case insensitive                                                                  
EOF                                                                                          
               }                                                                             
                                                                                             
insensitive="no"                                                                             
                                                                                             
while getopts "hi" option; do                                                                
    case $option in                                                                          
        h)                                                                                   
            usage  ; exit 0                                                               
            ;;                                                                               
        i)                                                                                   
            insensitive="yes"                                                                
            ;;                                                                               
        "")                                                                                  
            usage ; exit 1
            ;;                                                                               
        *)                                                                                   
            usage  ; exit 1
            ;;                                                                               
    esac                                                                                     
done                                                                                         
                                                                                             
shift $(($OPTIND - 1))                                                                       
                                                                                             
filename="$1"                                                                                
                                                                                             
if [ "$filename" == "" ] ; then                                                              
    usage ; exit 1                                                                           
elif [ ! -f "$filename" ] ; then                                                             
    echo "File $filename does not exist" ; exit 1                                            
fi                                                                                           

# make a backup                                                                              
cp "$filename" "$filename"~                                                                  
                                                                                             
if [ $insensitive == "yes" ] ; then                                                          
    { rm "$filename" && awk '!x[tolower($1)]++' > "$filename"; } < "$filename"               
else                                                                                         
    { rm "$filename" && awk '!x[$1]++' > "$filename"; } < "$filename"                        
fi

Here's some nice concise information about getopts and shift.

Comments are closed.