Linux Awk Comparing Two CSV Files and Creating a New File with a Flag

linux awk comparing two csv files and creating a new file with a flag

This might work for you:

diff  -W999 --side-by-side OLD NEW |
sed '/^[^\t]*\t\s*|\t\(.*\)/{s//\1 U/;b};/^\([^\t]*\)\t*\s*<$/{s//\1 D/;b};/^.*>\t\(.*\)/{s//\1 N/;b};d'
DTL,11111111,1111111111111111,11111111111,Y,Y,xx,xx U
DTL,22222222,2222222222222222,22222222222,Y,N,cc,cc U
DTL,33333333,3333333333333333,33333333333,Y,Y,dd,dd D
DTL,77777777,7777777777777777,77777777777,N,N,ee,ee N

an awk solution along the same lines:

diff -W999 --side-by-side OLD NEW |
awk '/[|][\t]/{split($0,a,"[|][\t]");print a[2]" U"};/[\t] *<$/{split($0,a,"[\t]* *<$");print a[1]" D"};/>[\t]/{split($0,a,">[\t]");print a[2]" N"}'
DTL,11111111,1111111111111111,11111111111,Y,Y,xx,xx U
DTL,22222222,2222222222222222,22222222222,Y,N,cc,cc U
DTL,33333333,3333333333333333,33333333333,Y,Y,dd,dd D
DTL,77777777,7777777777777777,77777777777,N,N,ee,ee N

Comparing two csv files and updating a field according to the comparison using awk

You can use awk like this:

cat csv.awk

BEGIN{FS=OFS=","}                
FNR==NR {
   a[$1 SUBSEP $5]
   next
}
{
   $5 = (($1 SUBSEP $5) in a)?"Match":"No Match"
}
1

Then use it as:

awk -f csv.awk file2.csv file1.csv

ID, Name, Address, Phone,Match
1, Jim, 12 Main,123-456-7890,No Match
2, Kim,11 Bush,987-654-3210,Match
3, Tim,33 Main,111-111-1111,Match
4, Zim,66 Main,111-555-1111,No Match

and also:

awk -f csv.awk file1.csv file2.csv

ID, Name, Address, Phone,Match, Blood Type, Left/Right Handed
1, Jon, 122 Main,333-456-7890,No Match, A Pos, Right
2, Tom,111 Bush,999-654-3210,Match, A Neg, Right
3, Tam,333 Main,111-222-1111,Match, O Neg, Left
4, Zam,99 Main,555-555-1111,No Match, A Pos, Left

Update: If you want to do it a single awk command then here it is.

cat csv.awk 

BEGIN{FS=OFS=","}
{
   key = $1 SUBSEP $5 
}
FNR == NR { 
   a[key]
   next
}
FILENAME == ARGV[2] { 
   if (key in a) {
      $5 = "Match"
      b[key]
   }
   else
      $5 = "No Match"
   print > "_" ARGV[2]
   next
}
{
   $5 = (key in b)?"Match":"No Match"
   print > "_" ARGV[3]
}

Then execute it as:

awk -f csv.awk file2.csv file1.csv file2.csv &&
mv _file1.csv file1.csv &&
mv _filw2.csv file2.csv

Compare two text files and if the second file has a row which contains both the columns of first file delete that row

This assumes the pairs in file1 never have the same value in both fields:

$ cat tst.awk
NR==FNR {
    pairs1[NR] = $1
    pairs2[NR] = $2
    next
}
{
    orig = $0
    gsub(/[[:space:],]+/," ")
    delete vals
    for (i=1; i<=NF; i++) {
        vals[$i]
    }
    for (nr in pairs1) {
        if ( (pairs1[nr] in vals) && (pairs2[nr] in vals) ) {
            next
        }
    }
    print orig
}

$ awk -f tst.awk file1 file2
         2002,       5052,       7001,       1500,       2500
         2003,       5051,       3500,       4500,       4952

Compare two columns from two files and append the output in any of the two files as a new column

Could you please try following, written and tested with shown samples in GNU awk.

awk '
BEGIN{
  FS=OFS=","
}
FNR==NR{
  arr[$2]=$NF
  next
}
{
  print $1,(($1 in arr)?($NF>arr[$1]?"Old_file":"New_File"):"No_Match")
}
'  Input_file1  Input_file2

Explanation: Adding detailed explanation for above.

awk '                           ##Starting awk program from here.
BEGIN{                          ##Starting BEGIN section of this program from here.
  FS=OFS=","                    ##Setting field separator and outpout field separator as comma.
}
FNR==NR{                        ##Checking condition if FNR==NR when Input_file1 is being read.
  arr[$2]=$NF                   ##Creating arr with index of 2nd field and having value as last field.
  next                          ##next will skip further statements from here.
}
{
  print $1,($1 in arr)?($NF>arr[$1]?"Old_file":"New_File"):"No_Match"
                                ##printing 1st field and checking condition if 1st field is present in arr then check
                                ##if last field is greater than arr value then print Old_file else print
                                ##New_file OR if 1st field is NOT in arr then print No_Match.
}
'  Input_file1  Input_file2     ##Mentioning Input_file names here.

Shell script: Check if data in columns `X` from two CSV files are matched

[ "$(cut -d' ' -f1 a.txt)" = "$(cut -d' ' -f1 b.txt)" ]; echo $?

Explanation:

[ "string1" = "string2" ] - The test command. If the string1 equals to the string2, it returns 0, else 1. See man test for another information.
cut -d' ' -f1 a.txt - cut the first column from the file a.txt.
- -d' ' - set the field delimiter to the space.
- -f1 - select only the field number 1. You can use a variable, instead of the number 1 in this case, like the num=1; [ "$(cut -d' ' -f$num a.txt)" = "$(cut -d' ' -f$num b.txt)" ]; echo $?.
echo $? - print the exit status of the last executed program.

Match common IDs between two huge csv files

Can do this with standard join utility

file1.txt

1 a
2 b
3 c

file2.txt

1 002
2 x90
5 d07

join example

join -1 1 -2 1 -o 1.1,1.2,2.2 file1.txt file2.txt

here join is joining from file1.field1 to file2.field2 and outputting the fields specified with the -o flag

output

1 a 002
2 b x90

Compare two files and output results according to conditionals

Using any awk in any shell on every Unix box and assuming that your quoted fields don't contain ;s and that you don't care about the output order:

$ cat tst.awk
BEGIN { FS=";" }
NR==FNR {
    first[$1]
    next
}
{
    print
    delete first[$1]
}
END {
    for ( i in first ) {
        print i
    }
}

$ awk -f tst.awk file1.csv file2.csv
"router51";"DatacenterA - cab1";"Prod - Tenant12"
"switch33";"DatacenterB - cab14";"Prod - Tenant4"
"switch2";"DatacenterA - cab3";"Dev - Tenant5"
"router44";"DatacenterC - cab2";"Test - Tenant2"
"switch6"
"router12"
"switch10"

If you do care about the output order you could use the decorate/sort/undecorate idiom with any awk+sort+cut:

$ cat tst.awk
BEGIN { FS=OFS=";" }
NR==FNR {
    first[$1]
    next
}
{
    prt($0)
    delete first[$1]
}
END {
    for ( i in first ) {
        prt(i)
    }
}

function prt(str,       arr, alpha, numeric) {
    split(str,arr)
    alpha = numeric = arr[1]
    sub(/[0-9].*/,"",alpha)
    gsub(/[^0-9]/,"",numeric)
    print alpha, numeric, str
}

$ awk -f tst.awk file1.csv file2.csv | sort -t';' -k1,1 -k2,2n | cut -d';' -f3-
"router12"
"router44";"DatacenterC - cab2";"Test - Tenant2"
"router51";"DatacenterA - cab1";"Prod - Tenant12"
"switch2";"DatacenterA - cab3";"Dev - Tenant5"
"switch6"
"switch10"
"switch33";"DatacenterB - cab14";"Prod - Tenant4"

Linux Awk Comparing Two CSV Files and Creating a New File with a Flag