Comparing Two Xml Files in Python

Comparing XML files using python scripting

Try this:

from lxml import etree

root_1 = etree.parse('test1.xml').getroot()
root_2 = etree.parse('test2.xml').getroot()

d1, d2 = [], []
for node in root_1.findall('.//catalog_item'):
for x in node.iter():
if x.attrib:
d1.append(x.attrib.values()[0])
if x.text.strip():
d1.append(x.text.strip())

for node in root_2.findall('.//catalog_item'):
for x in node.iter():
if x.attrib:
d2.append(x.attrib.values()[0])
if x.text.strip():
d2.append(x.text.strip())

print('Data is same in both XML files') if set(d1) == set(d2) else print('Data is different in both XML files')

Another method

It will store witch properties are which are different in a dictionary.

from lxml import etree
from collections import defaultdict

root_1 = etree.parse('test1.xml').getroot()
root_2 = etree.parse('test2.xml').getroot()

d1, d2 = [], []
for node in root_1.findall('.//catalog_item'):
item = defaultdict(list)
for x in node.iter():
if x.attrib:
item[x.attrib.keys()[0]].append(x.attrib.values()[0])
if x.text.strip():
item[x.tag].append(x.text.strip())
d1.append(dict(item))

for node in root_2.findall('.//catalog_item'):
item = defaultdict(list)
for x in node.iter():
if x.attrib:
item[x.attrib.keys()[0]].append(x.attrib.values()[0])
if x.text.strip():
item[x.tag].append(x.text.strip())
d2.append(dict(item))

d1 = sorted(d1, key = lambda x: x['item_number'])
d2 = sorted(d2, key = lambda x: x['item_number'])

res_dict = defaultdict(list)
for x, y in zip(d1, d2):
for key1, key2 in zip(x.keys(), y.keys()):
if key1 == key2 and sorted(x[key1]) != sorted(y[key2]):
res_dict[x['item_number'][0]].append({key1: list(set(x[key1]) ^ set(y[key2]))})

print('Data is same in both XML files') if res_dict == {} else print('Data is different in both XML files \n', dict(res_dict))

compare xml files using python

You can switch to the XMLFormatter and manually filter out the results:

...
# Change formatter:
formatter = formatting.XMLFormatter(normalize=formatting.WS_BOTH)

...

# after `out` has been retrieved:
import re
for i in out.splitlines():
if re.search(r'\bdiff:\w+', i):
print(i)

# Result:
# <type st="9999" diff:delete=""/>

Compare two values from two XML files

see below. the idea is to extract the 'key' data and the 'value' data from the 2 xml docs. Loop over both and compare.

import xml.etree.ElementTree as ET

xml1 = '''<rates>
<item>
<from>method1</from>
<to>data1</to>
<in>1</in>
<tofee>0.0006</tofee>
<out>0.00006084091504736</out>
<amount>39.508888709782</amount>
<minamount>105</minamount>
<maxamount>10000</maxamount>
</item>
<item>
<from>method2</from>
<to>data1</to>
<in>1</in>
<tofee>0.0006</tofee>
<out>0.00007190617745180</out>
<amount>39.508888709782</amount>
<minamount>90</minamount>
<maxamount>10000 EUR</maxamount>
</item>
<item>
<from>method1</from>
<to>data2</to>
<in>1</in>
<tofee>0.0006</tofee>
<out>0.00225030292539380</out>
<amount>39.508888709782</amount>
<minamount>105</minamount>
<maxamount>10000</maxamount>
</item>
<item>
<from>method2</from>
<to>data2</to>
<in>1</in>
<tofee>0.0006</tofee>
<out>0.00266214200040956</out>
<amount>39.508888709782</amount>
<minamount>90</minamount>
<maxamount>10000</maxamount>
</item>
</rates>'''

xml2 = '''<rates>
<item>
<from>method1</from>
<to>data1</to>
<in>1</in>
<tofee>0.0006</tofee>
<out>0.00076084091504736</out>
<amount>39.508888709782</amount>
<minamount>105</minamount>
<maxamount>10000</maxamount>
</item>
<item>
<from>method2</from>
<to>data1</to>
<in>1</in>
<tofee>0.0006</tofee>
<out>0.00077190617745180</out>
<amount>39.508888709782</amount>
<minamount>90</minamount>
<maxamount>10000</maxamount>
</item>
<item>
<from>method1</from>
<to>data2</to>
<in>1</in>
<tofee>0.0006</tofee>
<out>0.07225030292539380</out>
<amount>39.508888709782</amount>
<minamount>105</minamount>
<maxamount>10000</maxamount>
</item>
<item>
<from>method2</from>
<to>data2</to>
<in>1</in>
<tofee>0.0006</tofee>
<out>0.0000007266214200040956</out>
<amount>39.508888709782</amount>
<minamount>90</minamount>
<maxamount>10000</maxamount>
</item>
</rates>'''

root1 = ET.fromstring(xml1)
root2 = ET.fromstring(xml2)
items1 = root1.findall('.//item')
items2 = root2.findall('.//item')
# I need to compare each "out" value from method+data pair in "file_1", to similar "out" value in "file_2" and get alert
# if there is an "out" in "file_1" greater or equal than "out" in "file_2".
data1 = {(e.find('from').text, e.find('to').text): float(e.find('out').text) for e in items1}
data2 = {(e.find('from').text, e.find('to').text): float(e.find('out').text) for e in items2}
for k, v in data1.items():
print(f'key: {k} -> xml1 out: {v} | xml2 out: {data2[k]}')
if v >= data2[k]:
print(f'Alert: {v} >= {data2[k]}')

output

key: ('method1', 'data1') -> xml1 out: 6.084091504736e-05 | xml2 out: 0.00076084091504736
key: ('method2', 'data1') -> xml1 out: 7.19061774518e-05 | xml2 out: 0.0007719061774518
key: ('method1', 'data2') -> xml1 out: 0.0022503029253938 | xml2 out: 0.0722503029253938
key: ('method2', 'data2') -> xml1 out: 0.00266214200040956 | xml2 out: 7.266214200040956e-07
Alert: 0.00266214200040956 >= 7.266214200040956e-07


Related Topics



Leave a reply



Submit