Comparing XML files using python scripting
Try this:
from lxml import etree
root_1 = etree.parse('test1.xml').getroot()
root_2 = etree.parse('test2.xml').getroot()
d1, d2 = [], []
for node in root_1.findall('.//catalog_item'):
for x in node.iter():
if x.attrib:
d1.append(x.attrib.values()[0])
if x.text.strip():
d1.append(x.text.strip())
for node in root_2.findall('.//catalog_item'):
for x in node.iter():
if x.attrib:
d2.append(x.attrib.values()[0])
if x.text.strip():
d2.append(x.text.strip())
print('Data is same in both XML files') if set(d1) == set(d2) else print('Data is different in both XML files')
Another method
It will store witch properties are which are different in a dictionary.
from lxml import etree
from collections import defaultdict
root_1 = etree.parse('test1.xml').getroot()
root_2 = etree.parse('test2.xml').getroot()
d1, d2 = [], []
for node in root_1.findall('.//catalog_item'):
item = defaultdict(list)
for x in node.iter():
if x.attrib:
item[x.attrib.keys()[0]].append(x.attrib.values()[0])
if x.text.strip():
item[x.tag].append(x.text.strip())
d1.append(dict(item))
for node in root_2.findall('.//catalog_item'):
item = defaultdict(list)
for x in node.iter():
if x.attrib:
item[x.attrib.keys()[0]].append(x.attrib.values()[0])
if x.text.strip():
item[x.tag].append(x.text.strip())
d2.append(dict(item))
d1 = sorted(d1, key = lambda x: x['item_number'])
d2 = sorted(d2, key = lambda x: x['item_number'])
res_dict = defaultdict(list)
for x, y in zip(d1, d2):
for key1, key2 in zip(x.keys(), y.keys()):
if key1 == key2 and sorted(x[key1]) != sorted(y[key2]):
res_dict[x['item_number'][0]].append({key1: list(set(x[key1]) ^ set(y[key2]))})
print('Data is same in both XML files') if res_dict == {} else print('Data is different in both XML files \n', dict(res_dict))
compare xml files using python
You can switch to the XMLFormatter
and manually filter out the results:
...
# Change formatter:
formatter = formatting.XMLFormatter(normalize=formatting.WS_BOTH)
...
# after `out` has been retrieved:
import re
for i in out.splitlines():
if re.search(r'\bdiff:\w+', i):
print(i)
# Result:
# <type st="9999" diff:delete=""/>
Compare two values from two XML files
see below. the idea is to extract the 'key' data and the 'value' data from the 2 xml docs. Loop over both and compare.
import xml.etree.ElementTree as ET
xml1 = '''<rates>
<item>
<from>method1</from>
<to>data1</to>
<in>1</in>
<tofee>0.0006</tofee>
<out>0.00006084091504736</out>
<amount>39.508888709782</amount>
<minamount>105</minamount>
<maxamount>10000</maxamount>
</item>
<item>
<from>method2</from>
<to>data1</to>
<in>1</in>
<tofee>0.0006</tofee>
<out>0.00007190617745180</out>
<amount>39.508888709782</amount>
<minamount>90</minamount>
<maxamount>10000 EUR</maxamount>
</item>
<item>
<from>method1</from>
<to>data2</to>
<in>1</in>
<tofee>0.0006</tofee>
<out>0.00225030292539380</out>
<amount>39.508888709782</amount>
<minamount>105</minamount>
<maxamount>10000</maxamount>
</item>
<item>
<from>method2</from>
<to>data2</to>
<in>1</in>
<tofee>0.0006</tofee>
<out>0.00266214200040956</out>
<amount>39.508888709782</amount>
<minamount>90</minamount>
<maxamount>10000</maxamount>
</item>
</rates>'''
xml2 = '''<rates>
<item>
<from>method1</from>
<to>data1</to>
<in>1</in>
<tofee>0.0006</tofee>
<out>0.00076084091504736</out>
<amount>39.508888709782</amount>
<minamount>105</minamount>
<maxamount>10000</maxamount>
</item>
<item>
<from>method2</from>
<to>data1</to>
<in>1</in>
<tofee>0.0006</tofee>
<out>0.00077190617745180</out>
<amount>39.508888709782</amount>
<minamount>90</minamount>
<maxamount>10000</maxamount>
</item>
<item>
<from>method1</from>
<to>data2</to>
<in>1</in>
<tofee>0.0006</tofee>
<out>0.07225030292539380</out>
<amount>39.508888709782</amount>
<minamount>105</minamount>
<maxamount>10000</maxamount>
</item>
<item>
<from>method2</from>
<to>data2</to>
<in>1</in>
<tofee>0.0006</tofee>
<out>0.0000007266214200040956</out>
<amount>39.508888709782</amount>
<minamount>90</minamount>
<maxamount>10000</maxamount>
</item>
</rates>'''
root1 = ET.fromstring(xml1)
root2 = ET.fromstring(xml2)
items1 = root1.findall('.//item')
items2 = root2.findall('.//item')
# I need to compare each "out" value from method+data pair in "file_1", to similar "out" value in "file_2" and get alert
# if there is an "out" in "file_1" greater or equal than "out" in "file_2".
data1 = {(e.find('from').text, e.find('to').text): float(e.find('out').text) for e in items1}
data2 = {(e.find('from').text, e.find('to').text): float(e.find('out').text) for e in items2}
for k, v in data1.items():
print(f'key: {k} -> xml1 out: {v} | xml2 out: {data2[k]}')
if v >= data2[k]:
print(f'Alert: {v} >= {data2[k]}')
output
key: ('method1', 'data1') -> xml1 out: 6.084091504736e-05 | xml2 out: 0.00076084091504736
key: ('method2', 'data1') -> xml1 out: 7.19061774518e-05 | xml2 out: 0.0007719061774518
key: ('method1', 'data2') -> xml1 out: 0.0022503029253938 | xml2 out: 0.0722503029253938
key: ('method2', 'data2') -> xml1 out: 0.00266214200040956 | xml2 out: 7.266214200040956e-07
Alert: 0.00266214200040956 >= 7.266214200040956e-07
Related Topics
How to Extract All Upper from a String - Python
How to Hide Tkinter Python Gui
How to Drop Rows of Pandas Dataframe Whose Value in a Certain Column Is Nan
Vscode: There Is No Pip Installer Available in the Selected Environment
Stripping Whitespaces from a List Inside the List of Tuples
How to Repeat a Function N Times
In Python, How to Check If a String Only Contains Certain Characters
Sort Array and Return Original Indexes of Sorted Array
Masking Horizontal and Vertical Lines With Open Cv
Best Way to Identify and Extract Dates from Text Python
Converting Text File into Json in a Specific Format ( Python )
How to Count Duplicate Rows in Pandas Dataframe
Correctly Reading Text from Windows-1252(Cp1252) File in Python
Is There a Short-Hand for Nth Root of X in Python
Deleting Dataframe Row in Pandas If a Combination of Column Values Equals a Tuple in a List