Comparing two files using difflib in python
Check to see if the first character in each element has a +
or -
at the start (marking the line having changed):
with open("compare.txt") as f, open("test.txt") as g:
flines = f.readlines()
glines = g.readlines()
d = difflib.Differ()
diffs = [x for x in d.compare(flines, glines) if x[0] in ('+', '-')]
if diffs:
# all rows with changes
else:
print('No changes')
comparing two .txt, difflib module tells me that a line is unique ('-') when in fact it is present in both .txt
diff
doesn't check if line is unique in all file but if line is in the same place in other file - so you should first sort lines.
But If you want to check if lines exist in both files or if they unique in one file then better convert to set()
and compare sets.
Minimal working code
a = ['A', 'B', 'C']
b = ['A', 'C', 'D']
print('a:', a)
print('b:', b)
set_a = set(a)
set_b = set(b)
print('--- duplicated ---')
duplicated = set_a & set_b
for item in sorted(duplicated):
print(item)
print('--- unique a ---')
unique_a = set_a - set_b
for item in sorted(unique_a):
print(item)
print('--- unique b ---')
unique_b = set_b - set_a
for item in sorted(unique_b):
print(item)
Result
a: ['A', 'B', 'C']
b: ['A', 'C', 'D']
--- duplicated ---
A
C
--- unique a ---
B
--- unique b ---
D
Comparing two .txt files using difflib in Python
For starters, you need to pass strings to difflib.SequenceMatcher, not files:
# Like so
difflib.SequenceMatcher(None, str1, str2)
# Or just read the files in
difflib.SequenceMatcher(None, file1.read(), file2.read())
That'll fix your error.
To get the first non-matching string, see the difflib documentation.
Using python difflib to compare more than two files
Pure Python solution, no libraries or extra dependencies.
Note: this solutions works due some assumptions:
- Order of lines do not matter
- A line either exists, or is missing (no logic to check similarity between lines)
from collections import defaultdict
import re
def transform(input):
# differing hashvalues from ldd can be ignored, we only care about version and path
input = re.sub("([a-zA-Z0-9_.-]{32}\/|\([a-zA-Z0-9_.-]*\))", "<>", input)
return sorted(input.splitlines())
def generate_diff(outputs: dict, common_threshold = 0):
"""
common_threshold: how many outputs need to contain line to consider it common
and mark outputs that do not have it as missing
"""
assert(common_threshold <= len(outputs))
mapping = defaultdict(set)
for target, output in outputs.items():
for line in transform(output):
mapping[line].add(target)
for line in sorted(mapping.keys()):
found = mapping[line]
if len(outputs) == len(found):
print(' ' + line)
elif len(found) >= common_threshold:
missed_str = ",".join(map(str, set(outputs.keys()) - found))
print(f'- {line} ({missed_str})')
else:
added_str = ",".join(map(str, found))
print(f'+ {line} ({added_str})')
Sample execution
my_ldd_outputs = {
'A': """
linux-vdso.so.1 (0x00007ffde4f09000)
libtinfo.so.6 => /lib/x86_64-linux-gnu/libtinfo.so.6 (0x00007fe0594f3000)
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007fe0592cb000)
/lib64/ld-linux-x86-64.so.2 (0x00007fe059690000)
""",
'B': """
linux-vdso.so.1 (0x00007fff697b6000)
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f1c54045000)
/lib64/ld-linux-x86-64.so.2 (0x00007f1c54299000)
""",
'C': """
linux-vdso.so.1 (0x00007fffd61f9000)
libcrypto.so.3 => /lib/x86_64-linux-gnu/libcrypto.so.3 (0x00007f08a51a3000)
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f08a4f7b000)
/lib64/ld-linux-x86-64.so.2 (0x00007f08a5612000)
""",
'D': """
linux-vdso.so.1 (0x00007ffcf9ddd000)
libcrypto.so.3 => /lib/x86_64-linux-gnu/libcrypto.so.3 (0x00007fa2e381b000)
libselinux.so.1 => /lib/x86_64-linux-gnu/libselinux.so.1 (0x00007fa2e37ef000)
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007fa2e35c7000)
libpcre2-8.so.0 => /lib/x86_64-linux-gnu/libpcre2-8.so.0 (0x00007fa2e3530000)
/lib64/ld-linux-x86-64.so.2 (0x00007fa2e3cd7000)
""",
'E': """
linux-vdso.so.1 (0x00007ffc2deab000)
libcrypto.so.3 => /lib/x86_64-linux-gnu/libcrypto.so.3 (0x00007f31fed91000)
libz.so.1 => /lib/x86_64-linux-gnu/libz.so.1 (0x00007f31fed75000)
libselinux.so.1 => /lib/x86_64-linux-gnu/libselinux.so.1 (0x00007f31fed49000)
libgssapi_krb5.so.2 => /lib/x86_64-linux-gnu/libgssapi_krb5.so.2 (0x00007f31fecf5000)
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f31feacd000)
libpcre2-8.so.0 => /lib/x86_64-linux-gnu/libpcre2-8.so.0 (0x00007f31fea34000)
/lib64/ld-linux-x86-64.so.2 (0x00007f31ff2af000)
libkrb5.so.3 => /lib/x86_64-linux-gnu/libkrb5.so.3 (0x00007f31fe969000)
libk5crypto.so.3 => /lib/x86_64-linux-gnu/libk5crypto.so.3 (0x00007f31fe93a000)
libcom_err.so.2 => /lib/x86_64-linux-gnu/libcom_err.so.2 (0x00007f31fe934000)
libkrb5support.so.0 => /lib/x86_64-linux-gnu/libkrb5support.so.0 (0x00007f31fe926000)
libkeyutils.so.1 => /lib/x86_64-linux-gnu/libkeyutils.so.1 (0x00007f31fe91f000)
libresolv.so.2 => /lib/x86_64-linux-gnu/libresolv.so.2 (0x00007f31fe909000)
"""
}
generate_diff(my_ldd_outputs, 2)
Outputs
/lib64/ld-linux-x86-64.so.2 <>
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 <>
+ libcom_err.so.2 => /lib/x86_64-linux-gnu/libcom_err.so.2 <> (E)
- libcrypto.so.3 => /lib/x86_64-linux-gnu/libcrypto.so.3 <> (B,A)
+ libgssapi_krb5.so.2 => /lib/x86_64-linux-gnu/libgssapi_krb5.so.2 <> (E)
+ libk5crypto.so.3 => /lib/x86_64-linux-gnu/libk5crypto.so.3 <> (E)
+ libkeyutils.so.1 => /lib/x86_64-linux-gnu/libkeyutils.so.1 <> (E)
+ libkrb5.so.3 => /lib/x86_64-linux-gnu/libkrb5.so.3 <> (E)
+ libkrb5support.so.0 => /lib/x86_64-linux-gnu/libkrb5support.so.0 <> (E)
- libpcre2-8.so.0 => /lib/x86_64-linux-gnu/libpcre2-8.so.0 <> (C,B,A)
+ libresolv.so.2 => /lib/x86_64-linux-gnu/libresolv.so.2 <> (E)
- libselinux.so.1 => /lib/x86_64-linux-gnu/libselinux.so.1 <> (C,B,A)
+ libtinfo.so.6 => /lib/x86_64-linux-gnu/libtinfo.so.6 <> (A)
+ libz.so.1 => /lib/x86_64-linux-gnu/libz.so.1 <> (E)
linux-vdso.so.1 <>
compare two files in python ignore comparing commented lines
I would eliminate the #
-marked lines using a list comprehension:
...
with open('testfile1') as text1, open('testfile2') as text2:
diff = difflib.ndiff(
[line for line in text1 if not line.startswith('#')],
[line for line in text2 if not line.startswith('#')]
)
...
Related Topics
Oserror: [Winerror 193] %1 Is Not a Valid Win32 Application
How to Make an Image with a Transparent Backround in Pygame
How to Break Up This Long Line in Python
How to Make a Barplot and a Lineplot in the Same Seaborn Plot with Different Y Axes Nicely
How to Extract Frequency Associated with Fft Values in Python
How to Convert 24 Hour Time to 12 Hour Time
Send Data from a Textbox into Flask
How to Rotate Selenium Webrowser Ip Address
Python SQLite Parameter Substitution with Wildcards in Like
Why Might Python's 'From' Form of an Import Statement Bind a Module Name
Can You Give a Django App a Verbose Name for Use Throughout the Admin
How to Check That Multiple Keys Are in a Dict in a Single Pass
Why Is the Value of _Name_ Changing After Assignment to Sys.Modules[_Name_]