Parsing Deep Xml file using DOM Parser



I have the following XML file:



<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE output SYSTEM "resources/schema/AISE.dtd">
<output xmlns='urn:columbia:ccls:aiSE:0.1'>
<sentence>
<rootToken value ='They'>
<token value ='They'>
<solution CI_class ='latin'/>
</token>
</rootToken>
<rootToken value ='are'>
<token value ='are'>
<solution CI_class ='FA' />
</token>
</rootToken>
<rootToken value ='the'>
<token value ='the'>
<solution CI_class ='FA' />
</token>
</rootToken>
<rootToken value ='same'>
<token value ='same'>
<solution CI_class ='FA' />
</token>
</rootToken>
<rootToken value =' thing.'>
<token value ='thing'>
<solution CI_class ='FA' />
</token>
<rootToken value ='.'>
<token value ='.'>
<solution CI_class ='FA' />
</token>
</rootToken>

<sentence>
<rootToken value ='We'>
<token value ='We'>
<solution CI_class ='FA' />
</token>
</rootToken>
<rootToken value ='can'>
<token value ='can'>
<solution CI_class ='FA' />
</token>
</rootToken>
<rootToken value ='wait'>
<token value ='wait'>
<solution CI_class ='FA' />
</token>
</rootToken>
<rootToken value ='for'>
<token value ='for'>
<solution CI_class ='FA' />
</token>
</rootToken>
<rootToken value ='you'>
<token value ='you'>
<solution CI_class ='FA' />
</token>
</rootToken>
<rootToken value ='to'>
<token value ='to'>
<solution CI_class ='FA' />
</token>
</rootToken>
<rootToken value ='move'>
<token value ='move'>
<solution CI_class ='FA' />
</token>
</rootToken>
<rootToken value ='back'>
<token value ='back'>
<solution CI_class ='FA' />
</token>
</rootToken>
</sentence>
</output>


I would like to parse this file and generate the following file with the following output:



Sentnece1: We FA
can FA
wait FA
For FA
you FA
to FA
move FA
back FA
. FA

Sentence2: They FA
are FA
the FA
same FA
thing FA


My problem is that I'm would like print the a new line character that splits the two sentences. I could get the above output without having any separator.So, I don't know what I'm missing.


This is the code I'm using to parse the XML file:



import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.SAXException;

public class Todelete{

public static void main(String[] args) throws ParserConfigurationException, IOException, SAXException {
// TODO Auto-generated method stub
//try {

File fXmlFile = new File("out333.xml");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(fXmlFile);
FileWriter out = new FileWriter("OUT-TEST.txt");

doc.getDocumentElement().normalize();

System.out.println("Root element :" + doc.getDocumentElement().getNodeName());
out.write("Root element :"+ doc.getDocumentElement().getNodeName() +"\n");
NodeList sList = doc.getElementsByTagName("sentence");
System.out.println("----------------------------");
NodeList rtList = doc.getElementsByTagName("rootToken"); //Root token
NodeList tList = doc.getElementsByTagName("token"); //token
NodeList cList = doc.getElementsByTagName("solution"); //tag(class)

for (int temp2 = 0; temp2 < tList.getLength(); temp2++) { //token loop


Node tNode = tList.item(temp2); //token
Node rtNode = rtList.item(temp2); //Root token
Node cNode = cList.item(temp2); //tag(class)

if ((tNode.getNodeType() == Node.ELEMENT_NODE)) {

Element tElement = (Element) tNode;
System.out.println("Token : " + tElement.getAttribute("value"));
out.write(tElement.getAttribute("value")+"\t");

if(tNode.hasChildNodes()){

Element cElement = (Element) cNode;
System.out.println("class : " + cElement.getAttribute("CI_class"));
out.write(cElement.getAttribute("CI_class"));
out.write("\n");

}

}

}

out.close();
}

}

No comments:

Post a Comment