linux awk comparing two csv files and creating a new file with a flag
This might work for you:
diff -W999 --side-by-side OLD NEW |
sed '/^[^\t]*\t\s*|\t\(.*\)/{s//\1 U/;b};/^\([^\t]*\)\t*\s*<$/{s//\1 D/;b};/^.*>\t\(.*\)/{s//\1 N/;b};d'
DTL,11111111,1111111111111111,11111111111,Y,Y,xx,xx U
DTL,22222222,2222222222222222,22222222222,Y,N,cc,cc U
DTL,33333333,3333333333333333,33333333333,Y,Y,dd,dd D
DTL,77777777,7777777777777777,77777777777,N,N,ee,ee N
an awk solution along the same lines:
diff -W999 --side-by-side OLD NEW |
awk '/[|][\t]/{split($0,a,"[|][\t]");print a[2]" U"};/[\t] *<$/{split($0,a,"[\t]* *<$");print a[1]" D"};/>[\t]/{split($0,a,">[\t]");print a[2]" N"}'
DTL,11111111,1111111111111111,11111111111,Y,Y,xx,xx U
DTL,22222222,2222222222222222,22222222222,Y,N,cc,cc U
DTL,33333333,3333333333333333,33333333333,Y,Y,dd,dd D
DTL,77777777,7777777777777777,77777777777,N,N,ee,ee N
Comparing two csv files and updating a field according to the comparison using awk
You can use awk like this:
cat csv.awk
BEGIN{FS=OFS=","}
FNR==NR {
a[$1 SUBSEP $5]
next
}
{
$5 = (($1 SUBSEP $5) in a)?"Match":"No Match"
}
1
Then use it as:
awk -f csv.awk file2.csv file1.csv
ID, Name, Address, Phone,Match
1, Jim, 12 Main,123-456-7890,No Match
2, Kim,11 Bush,987-654-3210,Match
3, Tim,33 Main,111-111-1111,Match
4, Zim,66 Main,111-555-1111,No Match
and also:
awk -f csv.awk file1.csv file2.csv
ID, Name, Address, Phone,Match, Blood Type, Left/Right Handed
1, Jon, 122 Main,333-456-7890,No Match, A Pos, Right
2, Tom,111 Bush,999-654-3210,Match, A Neg, Right
3, Tam,333 Main,111-222-1111,Match, O Neg, Left
4, Zam,99 Main,555-555-1111,No Match, A Pos, Left
Update: If you want to do it a single awk command then here it is.
cat csv.awk
BEGIN{FS=OFS=","}
{
key = $1 SUBSEP $5
}
FNR == NR {
a[key]
next
}
FILENAME == ARGV[2] {
if (key in a) {
$5 = "Match"
b[key]
}
else
$5 = "No Match"
print > "_" ARGV[2]
next
}
{
$5 = (key in b)?"Match":"No Match"
print > "_" ARGV[3]
}
Then execute it as:
awk -f csv.awk file2.csv file1.csv file2.csv &&
mv _file1.csv file1.csv &&
mv _filw2.csv file2.csv
Compare two text files and if the second file has a row which contains both the columns of first file delete that row
This assumes the pairs in file1 never have the same value in both fields:
$ cat tst.awk
NR==FNR {
pairs1[NR] = $1
pairs2[NR] = $2
next
}
{
orig = $0
gsub(/[[:space:],]+/," ")
delete vals
for (i=1; i<=NF; i++) {
vals[$i]
}
for (nr in pairs1) {
if ( (pairs1[nr] in vals) && (pairs2[nr] in vals) ) {
next
}
}
print orig
}
$ awk -f tst.awk file1 file2
2002, 5052, 7001, 1500, 2500
2003, 5051, 3500, 4500, 4952
Compare two columns from two files and append the output in any of the two files as a new column
Could you please try following, written and tested with shown samples in GNU awk
.
awk '
BEGIN{
FS=OFS=","
}
FNR==NR{
arr[$2]=$NF
next
}
{
print $1,(($1 in arr)?($NF>arr[$1]?"Old_file":"New_File"):"No_Match")
}
' Input_file1 Input_file2
Explanation: Adding detailed explanation for above.
awk ' ##Starting awk program from here.
BEGIN{ ##Starting BEGIN section of this program from here.
FS=OFS="," ##Setting field separator and outpout field separator as comma.
}
FNR==NR{ ##Checking condition if FNR==NR when Input_file1 is being read.
arr[$2]=$NF ##Creating arr with index of 2nd field and having value as last field.
next ##next will skip further statements from here.
}
{
print $1,($1 in arr)?($NF>arr[$1]?"Old_file":"New_File"):"No_Match"
##printing 1st field and checking condition if 1st field is present in arr then check
##if last field is greater than arr value then print Old_file else print
##New_file OR if 1st field is NOT in arr then print No_Match.
}
' Input_file1 Input_file2 ##Mentioning Input_file names here.
Shell script: Check if data in columns `X` from two CSV files are matched
[ "$(cut -d' ' -f1 a.txt)" = "$(cut -d' ' -f1 b.txt)" ]; echo $?
Explanation:
[ "string1" = "string2" ]
- The test command. If thestring1
equals to thestring2
, it returns0
, else1
. Seeman test
for another information.cut -d' ' -f1 a.txt
- cut the first column from the filea.txt
.-d' '
- set the field delimiter to the space.-f1
- select only the field number1
. You can use a variable, instead of the number1
in this case, like thenum=1; [ "$(cut -d' ' -f$num a.txt)" = "$(cut -d' ' -f$num b.txt)" ]; echo $?
.
echo $?
- print the exit status of the last executed program.
Match common IDs between two huge csv files
Can do this with standard join
utility
file1.txt
1 a
2 b
3 c
file2.txt
1 002
2 x90
5 d07
join example
join -1 1 -2 1 -o 1.1,1.2,2.2 file1.txt file2.txt
here join is joining from file1.field1 to file2.field2 and outputting the fields specified with the -o flag
output
1 a 002
2 b x90
Compare two files and output results according to conditionals
Using any awk in any shell on every Unix box and assuming that your quoted fields don't contain ;
s and that you don't care about the output order:
$ cat tst.awk
BEGIN { FS=";" }
NR==FNR {
first[$1]
next
}
{
print
delete first[$1]
}
END {
for ( i in first ) {
print i
}
}
$ awk -f tst.awk file1.csv file2.csv
"router51";"DatacenterA - cab1";"Prod - Tenant12"
"switch33";"DatacenterB - cab14";"Prod - Tenant4"
"switch2";"DatacenterA - cab3";"Dev - Tenant5"
"router44";"DatacenterC - cab2";"Test - Tenant2"
"switch6"
"router12"
"switch10"
If you do care about the output order you could use the decorate/sort/undecorate idiom with any awk+sort+cut:
$ cat tst.awk
BEGIN { FS=OFS=";" }
NR==FNR {
first[$1]
next
}
{
prt($0)
delete first[$1]
}
END {
for ( i in first ) {
prt(i)
}
}
function prt(str, arr, alpha, numeric) {
split(str,arr)
alpha = numeric = arr[1]
sub(/[0-9].*/,"",alpha)
gsub(/[^0-9]/,"",numeric)
print alpha, numeric, str
}
$ awk -f tst.awk file1.csv file2.csv | sort -t';' -k1,1 -k2,2n | cut -d';' -f3-
"router12"
"router44";"DatacenterC - cab2";"Test - Tenant2"
"router51";"DatacenterA - cab1";"Prod - Tenant12"
"switch2";"DatacenterA - cab3";"Dev - Tenant5"
"switch6"
"switch10"
"switch33";"DatacenterB - cab14";"Prod - Tenant4"
Related Topics
Unix/Linux Ipc: Reading from a Pipe. How to Know Length of Data at Runtime
Trouble with Installing Nloptr by Locally on Ubuntu
Why Can One Remove/Rename Open Files in Linux
Using Curl with Commands in Go
Git Clone Using Ssh Failed in Windows Due to Permission Issue
Error: You Must Install at Least One Postgresql-Client-<Version> Package
How to Get CPU Serial Under Linux Without Root Permissions
How to Specify Which Kernel to Build with Bitbake/Yocto
Ffmpeg Img to Video = Could Find No File with Path
Undelete The Deleted Command in Bash
Where Is G_Multi Configured in Beaglebone Black
How to Attach Domain Name to My Server
Ssh Command Output to Save in a Text File in Shell Script
How to Find List of Valid Locales in My Linux Using Perl