How to Parse Xml Using the Sax Parser

Java: parse xml file using SAX

Basically the characters method is where the values are read. In your case you were printing only for one tag. if (currentElement.equals("fileContent")). Follow the below program. This displays all values of all tags. Another thing to notice is the characters method reads a chuck of max 2048 bytes (if i remember correctly), so the best approach is to use append later process the value in endElement() method as shown in the example. Please not I'm using DatatypeConverter for Base64 decoding. You could use your own decoder.

import java.io.File;

import javax.xml.bind.DatatypeConverter;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class SaxSample {

public static void main(String argv[]) {

try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();

DefaultHandler handler = new DefaultHandler() {

StringBuilder value;

public void startElement(String uri, String localName,
String qName, Attributes attributes)
throws SAXException {
value = new StringBuilder();
}

public void endElement(String uri, String localName,
String qName) throws SAXException {
if ("fileContent".equalsIgnoreCase(qName)) {
String decodedValue = new String(DatatypeConverter.parseBase64Binary(value.toString()));
System.out.println(qName + " = " + decodedValue);
} else {
System.out.println(qName + " = " + value);
}
value = new StringBuilder();
}

public void characters(char ch[], int start, int length)
throws SAXException {
value.append(new String(ch, start, length));
}

};

saxParser.parse(new File("data.xml"), handler);
} catch (Exception e) {
e.printStackTrace();
}

}

}

How to use xml sax parser to read and write a large xml?

You could use a xml.sax.saxutils.XMLFilterBase implementation to filter out your project1 nodes.

Instead of assembling the xml strings yourself you could use xml.sax.saxutils.XMLGenerator.

The following is Python3 code, adjust super if you require Python2.

from xml.sax import make_parser
from xml.sax.saxutils import XMLFilterBase, XMLGenerator


class Project1Filter(XMLFilterBase):
"""This decides which SAX events to forward to the ContentHandler

We will not forward events when we are inside any elements with a
name specified in the 'tags_names_to_exclude' parameter
"""

def __init__(self, tag_names_to_exclude, parent=None):
super().__init__(parent)

# set of tag names to exclude
self._tag_names_to_exclude = tag_names_to_exclude

# _project_1_count keeps track of opened project1 elements
self._project_1_count = 0

def _forward_events(self):
# will return True when we are not inside a project1 element
return self._project_1_count == 0

def startElement(self, name, attrs):
if name in self._tag_names_to_exclude:
self._project_1_count += 1

if self._forward_events():
super().startElement(name, attrs)

def endElement(self, name):
if self._forward_events():
super().endElement(name)

if name in self._tag_names_to_exclude:
self._project_1_count -= 1

def characters(self, content):
if self._forward_events():
super().characters(content)

# override other content handler methods on XMLFilterBase as neccessary


def main():
tag_names_to_exclude = {'project1', 'project2', 'project3'}
reader = Project1Filter(tag_names_to_exclude, make_parser())

with open('out-small.xml', 'w') as f:
handler = XMLGenerator(f)
reader.setContentHandler(handler)
reader.parse('input.xml')


if __name__ == "__main__":
main()

xml parsing data from child tags using sax parser

Here is the complete parsing solution to extract the values from the xml data from the url.

A sample xml data from the provided link :

<?xml version="1.0" encoding="UTF-8"?>
<data>
<request>
<type>City</type>
<query>Peshawar, Pakistan</query>
</request>
<current_condition>
<observation_time>04:31 AM</observation_time>
<temp_C>7</temp_C>
<temp_F>45</temp_F>
<weatherCode>113</weatherCode>
<weatherIconUrl><![CDATA[http://www.worldweatheronline.com/images/wsymbols01_png_64/wsymbol_0001_sunny.png]]>
</weatherIconUrl>
<weatherDesc><![CDATA[Sunny]]>
</weatherDesc>
<windspeedMiles>0</windspeedMiles>
<windspeedKmph>0</windspeedKmph>
<winddirDegree>0</winddirDegree>
<winddir16Point>N</winddir16Point>
<precipMM>0.0</precipMM>
<humidity>70</humidity>
<visibility>4</visibility>
<pressure>1021</pressure>
<cloudcover>0</cloudcover>
</current_condition>
<weather>
<date>2013-01-26</date>
<tempMaxC>22</tempMaxC>
<tempMaxF>71</tempMaxF>
<tempMinC>7</tempMinC>
<tempMinF>45</tempMinF>
<windspeedMiles>8</windspeedMiles>
<windspeedKmph>13</windspeedKmph>
<winddirection>NW</winddirection>
<winddir16Point>NW</winddir16Point>
<winddirDegree>315</winddirDegree>
<weatherCode>113</weatherCode>
<weatherIconUrl><![CDATA[http://www.worldweatheronline.com/images/wsymbols01_png_64/wsymbol_0001_sunny.png]]>
</weatherIconUrl>
<weatherDesc><![CDATA[Sunny]]>
</weatherDesc>
<precipMM>0.0</precipMM>
</weather>
<weather>
<date>2013-01-27</date>
<tempMaxC>20</tempMaxC>
<tempMaxF>68</tempMaxF>
<tempMinC>8</tempMinC>
<tempMinF>46</tempMinF>
<windspeedMiles>7</windspeedMiles>
<windspeedKmph>12</windspeedKmph>
<winddirection>N</winddirection>
<winddir16Point>N</winddir16Point>
<winddirDegree>352</winddirDegree>
<weatherCode>113</weatherCode>
<weatherIconUrl><![CDATA[http://www.worldweatheronline.com/images/wsymbols01_png_64/wsymbol_0001_sunny.png]]>
</weatherIconUrl>
<weatherDesc><![CDATA[Sunny]]>
</weatherDesc>
<precipMM>0.0</precipMM>
</weather>
<weather>
<date>2013-01-28</date>
<tempMaxC>20</tempMaxC>
<tempMaxF>68</tempMaxF>
<tempMinC>7</tempMinC>
<tempMinF>45</tempMinF>
<windspeedMiles>9</windspeedMiles>
<windspeedKmph>14</windspeedKmph>
<winddirection>NW</winddirection>
<winddir16Point>NW</winddir16Point>
<winddirDegree>316</winddirDegree>
<weatherCode>113</weatherCode>
<weatherIconUrl><![CDATA[http://www.worldweatheronline.com/images/wsymbols01_png_64/wsymbol_0001_sunny.png]]>
</weatherIconUrl>
<weatherDesc><![CDATA[Sunny]]>
</weatherDesc>
<precipMM>0.0</precipMM>
</weather>
<weather>
<date>2013-01-29</date>
<tempMaxC>21</tempMaxC>
<tempMaxF>70</tempMaxF>
<tempMinC>8</tempMinC>
<tempMinF>46</tempMinF>
<windspeedMiles>6</windspeedMiles>
<windspeedKmph>9</windspeedKmph>
<winddirection>NW</winddirection>
<winddir16Point>NW</winddir16Point>
<winddirDegree>306</winddirDegree>
<weatherCode>113</weatherCode>
<weatherIconUrl><![CDATA[http://www.worldweatheronline.com/images/wsymbols01_png_64/wsymbol_0001_sunny.png]]>
</weatherIconUrl>
<weatherDesc><![CDATA[Sunny]]>
</weatherDesc>
<precipMM>0.0</precipMM>
</weather>
<weather>
<date>2013-01-30</date>
<tempMaxC>19</tempMaxC>
<tempMaxF>66</tempMaxF>
<tempMinC>8</tempMinC>
<tempMinF>47</tempMinF>
<windspeedMiles>8</windspeedMiles>
<windspeedKmph>13</windspeedKmph>
<winddirection>WNW</winddirection>
<winddir16Point>WNW</winddir16Point>
<winddirDegree>289</winddirDegree>
<weatherCode>116</weatherCode>
<weatherIconUrl><![CDATA[http://www.worldweatheronline.com/images/wsymbols01_png_64/wsymbol_0002_sunny_intervals.png]]>
</weatherIconUrl>
<weatherDesc><![CDATA[Partly Cloudy ]]>
</weatherDesc>
<precipMM>0.0</precipMM>
</weather>
</data>

HandlingXmlStuff.java :

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/**
*
* @author visruth
*/
public class HandlingXmlStuff extends DefaultHandler {

private String key;
private Map<String, String> request = new HashMap<String, String>();
private Map<String, String> current_condition = new HashMap<String, String>();
private List<Map<String, String>> weather = new ArrayList<Map<String, String>>();
private Map<String, String> weatherMap;
private boolean requestStatus;
private boolean current_conditionStatus;
private boolean weatherStatus;

public Map<String, String> getCurrent_condition() {
return this.current_condition;
}

public Map<String, String> getRequest() {
return this.request;
}

public List<Map<String, String>> getWeather() {
return this.weather;
}

@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
key = qName;
if (qName.equals("request")) {

requestStatus = true;
} else if (qName.equals("current_condition")) {

current_conditionStatus = true;
} else if (qName.equals("weather")) {
weatherMap = new HashMap<String, String>();
weatherStatus = true;
}

}

@Override
public void characters(char[] ch, int start, int length) throws SAXException {

String value = new String(ch, start, length).trim();
if (requestStatus) {
if (!key.trim().equals("request") && !key.trim().isEmpty() && !value.isEmpty()) {
request.put(key, value);
}
} else if (current_conditionStatus) {
if (!key.trim().equals("current_condition") && !key.trim().isEmpty() && !value.isEmpty()) {
current_condition.put(key, new String(ch, start, length).trim());
}
} else if (weatherStatus) {
if (!key.trim().equals("weather") && !key.trim().isEmpty() && !value.isEmpty()) {
weatherMap.put(key, new String(ch, start, length).trim());
}
}
}

@Override
public void endElement(String uri, String localName, String qName) throws SAXException {

if (qName.equals("request")) {
requestStatus = false;
} else if (qName.equals("current_condition")) {
current_conditionStatus = false;
} else if (qName.equals("weather")) {
weather.add(weatherMap);
weatherStatus = false;
}
}

public void parseDocument() {
//get a factory
SAXParserFactory spf = SAXParserFactory.newInstance();
try {
//get a new instance of parser
SAXParser sp = spf.newSAXParser();
//parse the file and also register this class for call backs
sp.parse("http://free.worldweatheronline.com/feed/weather.ashx?q=peshawar,pakistan&format=xml&num_of_days=5&key=eab9f57359164426132301", this);
} catch (SAXException se) {
se.printStackTrace();
} catch (ParserConfigurationException pce) {
pce.printStackTrace();
} catch (IOException ie) {
ie.printStackTrace();
}
}
}

You can retrieve the values using the following code :

HandlingXmlStuff handlingXmlStuf = new HandlingXmlStuff();
handlingXmlStuf.parseDocument();
System.out.println("----values inside request tag-----");
Map<String, String> request = handlingXmlStuf.getRequest();
for (String key : request.keySet()) {
System.out.println(key + ":" + request.get(key) + ":" + key);
}
System.out.println("----values inside current_condition tag-----");
Map<String, String> current_condition = handlingXmlStuf.getCurrent_condition();
for (String key : current_condition.keySet()) {
System.out.println(key + ":" + current_condition.get(key) + ":" + key);
}

List<Map<String, String>> weather = handlingXmlStuf.getWeather();
for (Map<String, String> map : weather) {
System.out.println("\n----each set of weather-----start");
for (String key : map.keySet()) {
System.out.println(key + ":" + map.get(key) + ":" + key);
}
System.out.println("----each set of weather-----end");
}

From the above sample xml data you will get the below output :

----values inside request tag-----
query:Peshawar, Pakistan:query
type:City:type
----values inside current_condition tag-----
cloudcover:0:cloudcover
observation_time:04:31 AM:observation_time
pressure:1021:pressure
temp_C:7:temp_C
visibility:4:visibility
temp_F:45:temp_F
windspeedMiles:0:windspeedMiles
precipMM:0.0:precipMM
winddirDegree:0:winddirDegree
winddir16Point:N:winddir16Point
weatherIconUrl:http://www.worldweatheronline.com/images/wsymbols01_png_64/wsymbol_0001_sunny.png:weatherIconUrl
humidity:70:humidity
windspeedKmph:0:windspeedKmph
weatherCode:113:weatherCode
weatherDesc:Sunny:weatherDesc

----each set of weather-----start
windspeedMiles:8:windspeedMiles
winddirection:NW:winddirection
date:2013-01-26:date
precipMM:0.0:precipMM
winddir16Point:NW:winddir16Point
winddirDegree:315:winddirDegree
weatherIconUrl:http://www.worldweatheronline.com/images/wsymbols01_png_64/wsymbol_0001_sunny.png:weatherIconUrl
tempMinC:7:tempMinC
windspeedKmph:13:windspeedKmph
tempMaxC:22:tempMaxC
weatherCode:113:weatherCode
tempMaxF:71:tempMaxF
tempMinF:45:tempMinF
weatherDesc:Sunny:weatherDesc
----each set of weather-----end

----each set of weather-----start
windspeedMiles:7:windspeedMiles
winddirection:N:winddirection
date:2013-01-27:date
precipMM:0.0:precipMM
winddir16Point:N:winddir16Point
winddirDegree:352:winddirDegree
weatherIconUrl:_0001_sunny.png:weatherIconUrl
tempMinC:8:tempMinC
windspeedKmph:12:windspeedKmph
tempMaxC:20:tempMaxC
weatherCode:113:weatherCode
tempMaxF:68:tempMaxF
tempMinF:46:tempMinF
weatherDesc:Sunny:weatherDesc
----each set of weather-----end

----each set of weather-----start
windspeedMiles:9:windspeedMiles
winddirection:NW:winddirection
date:2013-01-28:date
precipMM:0.0:precipMM
winddir16Point:NW:winddir16Point
winddirDegree:316:winddirDegree
weatherIconUrl:http://www.worldweatheronline.com/images/wsymbols01_png_64/wsymbol_0001_sunny.png:weatherIconUrl
tempMinC:7:tempMinC
windspeedKmph:14:windspeedKmph
tempMaxC:20:tempMaxC
weatherCode:113:weatherCode
tempMaxF:68:tempMaxF
tempMinF:45:tempMinF
weatherDesc:Sunny:weatherDesc
----each set of weather-----end

----each set of weather-----start
windspeedMiles:6:windspeedMiles
winddirection:NW:winddirection
date:2013-01-29:date
precipMM:0.0:precipMM
winddir16Point:NW:winddir16Point
winddirDegree:306:winddirDegree
weatherIconUrl:http://www.worldweatheronline.com/images/wsymbols01_png_64/wsymbol_0001_sunny.png:weatherIconUrl
tempMinC:8:tempMinC
windspeedKmph:9:windspeedKmph
tempMaxC:21:tempMaxC
weatherCode:113:weatherCode
tempMaxF:70:tempMaxF
tempMinF:46:tempMinF
weatherDesc:Sunny:weatherDesc
----each set of weather-----end

----each set of weather-----start
windspeedMiles:8:windspeedMiles
winddirection:WNW:winddirection
date:2013-01-30:date
precipMM:0.0:precipMM
winddir16Point:WNW:winddir16Point
winddirDegree:289:winddirDegree
weatherIconUrl:unny_intervals.png:weatherIconUrl
tempMinC:8:tempMinC
windspeedKmph:13:windspeedKmph
tempMaxC:19:tempMaxC
weatherCode:116:weatherCode
tempMaxF:66:tempMaxF
tempMinF:47:tempMinF
weatherDesc:Partly Cloudy:weatherDesc
----each set of weather-----end

The startElement invokes when an opening tag is read, the endElement invokes when a closing tag is read and the characters method invokes each time after startElement and endElement method invoke.

Parsing and updating xml using SAX parser in java

Anything is possible with SAX, it's just waaaaay harder than it has to be. It's pretty old school and there are many easier ways to do this (JAXB, XQuery, XPath, DOM etc ).

That said lets do it with SAX.

It sounds like the problem you are having is that you are not tracking the state of your progress through the document. SAX simply works by making the callbacks when it stumbles across an event within the document

This is a fairly crude way of parsing the doc and updating the relevant node using SAX. Basically I am checking when we hit a element with the value you want to update (Environment) and setting a flag so that when we get to the contents of the defaultValue node, the characters callback lets me remove the existing value and replace it with the new value.

import java.io.StringReader;
import java.util.Arrays;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

public class Q26897496 extends DefaultHandler {

public static String xmlDoc = "<?xml version='1.0'?>"
+ "<properties>"
+ " <definition>"
+ " <name>IP</name>"
+ " <description></description>"
+ " <defaultValue>10.1.1.1</defaultValue>"
+ " </definition>"
+ " <definition>"
+ " <name>Name</name>"
+ " <description></description>"
+ " <defaultValue>MyName</defaultValue>"
+ " </definition>"
+ " <definition>"
+ " <name>Environment</name>"
+ " <description></description>"
+ " <defaultValue>Production</defaultValue>"
+ " </definition>"
+ "</properties>";


String elementName;
boolean mark = false;
char[] updatedDoc;

public static void main(String[] args) {

Q26897496 q = new Q26897496();
try {
q.parse();
} catch (Exception e) {
e.printStackTrace();
}

}

public Q26897496() {

}

public void parse() throws Exception {
SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setNamespaceAware(true);
SAXParser saxParser = spf.newSAXParser();
XMLReader xml = saxParser.getXMLReader();
xml.setContentHandler(this);
xml.parse(new InputSource(new StringReader(xmlDoc)));

System.out.println("new xml: \n" + new String(updatedDoc));
}

@Override
public void startDocument() throws SAXException {
System.out.println("starting");

}

@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
this.elementName = localName;
}

@Override
public void characters(char[] ch, int start, int length)
throws SAXException {

String value = new String(ch).substring(start, start + length);
if (elementName.equals("name")) {
if (value.equals("Environment")) {
this.mark = true;
}
}

if (elementName.equals("defaultValue") && mark == true) {
// update
String tmpDoc = new String(ch);
String leading = tmpDoc.substring(0, start);
String trailing = tmpDoc.substring(start + length, tmpDoc.length());
this.updatedDoc = (leading + "NewValueForDefaulValue" + trailing).toCharArray();
mark = false;
}
}
}

traversing xml document using SAX parser and printing output in desired format

class UserHandler extends DefaultHandler
{
List li_elements,li_values;

 LinkedHashMap<List<String>,List<String>> hm; 
boolean endElementFlag;

@Override
public void startDocument()
{
System.out.println("Document started");
li_elements = new ArrayList<String>();
li_values=new ArrayList<String>();
}
@Override
public void endDocument()
{
System.out.println("Documents ended"+hm.size());
for(Map.Entry m:hm.entrySet())
{
System.out.println(m.getKey()+""+m.getValue());
}

}

@Override
public void startElement(String namespaceURI,String localName,String qname,Attributes atts) throws SAXException
{

li_elements.add(qname);

//System.out.println("Element Started");
//System.out.println(qname+" added in element list");
}
@Override
public void endElement(String uri,String localName, String qname)
{

if(!li_values.isEmpty())
{
System.out.println("Element address list:-"+li_elements+"and Corresponding Value:-"+li_values);

System.out.println();
}

li_elements.remove(li_elements.size()-1);
li_values.clear();

}
@Override
public void characters(char[] ch, int start, int length)
{

String str = new String(ch,start,length);
li_values.add(str);
}

}

I was expecting something like this. this prints the output in a format that I was hoping for.

Parsing and modifying xml string with sax parser

This is a working code, just add missing imports. It uses SAX and changes <name>user1</name> to <name>user2</name>. If you figure out how it works plus read SAX API you can do anything with your xml. Note that SAX had been considered the most efficient xml parser until StAX came into being

public static void main(String[] args) throws Exception {
String xml = "<users><user><name>user1</name></user></users>";
XMLReader xr = new XMLFilterImpl(XMLReaderFactory.createXMLReader()) {
private String tagName = "";
@Override
public void startElement(String uri, String localName, String qName, Attributes atts)
throws SAXException {
tagName = qName;
super.startElement(uri, localName, qName, atts);
}
public void endElement(String uri, String localName, String qName) throws SAXException {
tagName = "";
super.endElement(uri, localName, qName);
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
if (tagName.equals("name")) {
ch = "user2".toCharArray();
start = 0;
length = ch.length;
}
super.characters(ch, start, length);
}
};
Source src = new SAXSource(xr, new InputSource(new StringReader(xml)));
Result res = new StreamResult(System.out);
TransformerFactory.newInstance().newTransformer().transform(src, res);
}

Java SAX parsing of an XML String

Use SAXParser.parse(InputSource is, DefaultHandler dh) where

 InputSource is = new InputSource(new StringReader(str));


Related Topics



Leave a reply



Submit