Parsing Deep Xml file using DOM Parser

I have the following XML file:


<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE output SYSTEM "resources/schema/AISE.dtd">
<output xmlns='urn:columbia:ccls:aiSE:0.1'>
    <sentence>
        <rootToken value ='They'>
            <token value ='They'>
                <solution CI_class ='latin'/>
            </token>
        </rootToken>   
        <rootToken value ='are'>
            <token value ='are'>
                <solution CI_class ='FA' />
            </token>
                </rootToken>
        <rootToken value ='the'>
            <token value ='the'>
                <solution CI_class ='FA' />
            </token>
        </rootToken>
        <rootToken value ='same'>
            <token value ='same'>
                <solution CI_class ='FA' />
            </token>
        </rootToken>
        <rootToken value =' thing.'>
            <token value ='thing'>
                <solution CI_class ='FA' />
            </token>
        <rootToken value ='.'>
            <token value ='.'>
                <solution CI_class ='FA' />
            </token>
        </rootToken>

    <sentence>
        <rootToken value ='We'>
            <token value ='We'>
                <solution CI_class ='FA' />
            </token>
        </rootToken>
        <rootToken value ='can'>
            <token value ='can'>
                        <solution CI_class ='FA' />
            </token>
        </rootToken>
        <rootToken value ='wait'>
            <token value ='wait'>
                        <solution CI_class ='FA' />
            </token>
        </rootToken>
        <rootToken value ='for'>
            <token value ='for'>
                <solution CI_class ='FA' />
                </token>
        </rootToken>
        <rootToken value ='you'>
            <token value ='you'>
                <solution CI_class ='FA' />
            </token>
        </rootToken>
        <rootToken value ='to'>
            <token value ='to'>
                <solution CI_class ='FA' />
            </token>
        </rootToken>
        <rootToken value ='move'>
            <token value ='move'>
                <solution CI_class ='FA' />
            </token>
        </rootToken>
        <rootToken value ='back'>
            <token value ='back'>
                <solution CI_class ='FA' />
            </token>
         </rootToken>
    </sentence>
</output>

I would like to parse this file and generate the following file with the following output:


    Sentnece1: We    FA
               can   FA
               wait  FA
               For   FA
               you   FA
               to    FA
               move  FA
               back  FA
               .     FA

    Sentence2: They   FA
               are    FA
               the    FA
               same   FA
               thing  FA

My problem is that I'm would like print the a new line character that splits the two sentences. I could get the above output without having any separator.So, I don't know what I'm missing.

This is the code I'm using to parse the XML file:


import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.SAXException;

public class Todelete{

public static void main(String[] args) throws ParserConfigurationException, IOException, SAXException {
    // TODO Auto-generated method stub
    //try {

        File fXmlFile = new File("out333.xml");
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
        Document doc = dBuilder.parse(fXmlFile);
        FileWriter out = new FileWriter("OUT-TEST.txt");

        doc.getDocumentElement().normalize();

        System.out.println("Root element :" + doc.getDocumentElement().getNodeName());
            out.write("Root element :"+ doc.getDocumentElement().getNodeName() +"\n");                                                
                    NodeList sList = doc.getElementsByTagName("sentence");                                     
                    System.out.println("----------------------------");                            
                    NodeList rtList = doc.getElementsByTagName("rootToken"); //Root token                                
                    NodeList tList = doc.getElementsByTagName("token"); //token                                 
                    NodeList cList = doc.getElementsByTagName("solution");  //tag(class)

                              for (int temp2 = 0; temp2 < tList.getLength(); temp2++) { //token loop


                                  Node tNode = tList.item(temp2); //token                               
                                  Node rtNode = rtList.item(temp2); //Root token                                
                                  Node cNode = cList.item(temp2); //tag(class)

                                  if ((tNode.getNodeType() == Node.ELEMENT_NODE)) {

                                      Element tElement = (Element) tNode;                                                                    
                                      System.out.println("Token : " + tElement.getAttribute("value"));                                                           
                                      out.write(tElement.getAttribute("value")+"\t");

                                      if(tNode.hasChildNodes()){

                                          Element cElement = (Element) cNode;                                   
                                          System.out.println("class : " + cElement.getAttribute("CI_class"));                                                                       
                                          out.write(cElement.getAttribute("CI_class"));                                    
                                          out.write("\n");

                                      }

                                  }   

                              }            

                              out.close();
    }

}

Parsing Deep Xml file using DOM Parser

No comments:

Post a Comment