Monday, 4 January 2016

XML : to create an xml from a text file in java

      Can anyone suggest me a way to create an xml document from a text file in java?         I have already coded some program it doesnt meet my needs. My code section is here       

I am trying to create a program that converts a pdf file to a ppt for that i need to convert pdf to text file and then that text file into xml to extract the features.But now i cant continue further because the xml file is not getting correctly

   This code generates an xml document but the heading of the text file and       certain section are not being shown as intended and am using three tags for generating xml       Section,SubSection,SubSubSection and sentences                           public class TextToXml {                  StreamResult out;                    TransformerHandler th;                public void convrt(File f)            {            try{             String fname=f.getName().replaceAll("pdf", "txt");          FileInputStream fstream = new FileInputStream(fname);           DataInputStream in = new DataInputStream(fstream);          BufferedReader br = new BufferedReader(new InputStreamReader(in));          out = new StreamResult("djksgh.xml");          openXml();          String strLine;          int cnt=0;          char strarray[]=new char[250];          char c;            while ((strLine = br.readLine()) != null)   {                for(int i=0;i<strLine.length();i++)              {                  c=strLine.charAt(i);                  strarray[i]=c;              }            //  cnt=count(strLine);                if( (Character.isDigit(strarray[0]))&&(strarray[1]=='.')&&(Character.isWhitespace(strarray[2]))&&(Character.isLetter(strarray[3])))              {                  processhead(strLine);                }              else if((Character.isDigit(strarray[0]))&&(strarray[1]=='.')&&(Character.isDigit(strarray[2]))&&(strarray[3]=='.')&&(Character.isWhitespace(strarray[4]))&&(Character.isLetter(strarray[5])))              {                 processShead(strLine);               }              else if((Character.isDigit(strarray[0]))&&(strarray[1]=='.')&&(Character.isDigit(strarray[2]))&&(strarray[3]=='.')&&(Character.isDigit(strarray[4]))&&(Character.isWhitespace(strarray[5]))&&(Character.isLetter(strarray[6])))              {                  processSS(strLine);              }                else                {                   process(strLine);                  }          }                in.close();          closeXml();          }            catch (Exception e)          {            System.err.println("Error: " + e.getMessage());          }           }        public void openXml() throws ParserConfigurationException,         TransformerConfigurationException, SAXException {                SAXTransformerFactory tf = (SAXTransformerFactory) SAXTransformerFactory.newInstance();              th = tf.newTransformerHandler();                // pretty XML output              Transformer serializer = th.getTransformer();              serializer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");              serializer.setOutputProperty(OutputKeys.INDENT, "yes");                th.setResult(out);              th.startDocument();              th.startElement(null, null, "MyXml", null);          }          public static boolean isupper(String str) {              for(int i=0;i<str.length();i++)              {                  char c=str.charAt(i);                  if(c>=97&&c<=122)                  {                      return false;                  }              }              return true;          }          public void process(String s) throws SAXException {                      th.startElement(null, null, "Sentence", null);                  th.characters(s.toCharArray(), 0, s.length());                      th.endElement(null, null, "Sentence");                        }        public void processhead(String s) throws SAXException {                th.startElement(null, null, "Section", null);                th.characters(s.toCharArray(), 0, s.length());             th.startDocument();             th.startElement(null, null, "Section", null);                }      public void processShead(String s) throws SAXException {              th.startElement(null, null, "SubSection", null);              th.characters(s.toCharArray(), 0, s.length());              th.endElement(null, null, "SubSection");          }      public void processSS(String s) throws SAXException {              th.startElement(null, null, "SubSubSection", null);              th.characters(s.toCharArray(), 0, s.length());              th.endElement(null, null, "SubSubSection");          }              public void closeXml() throws SAXException {              th.endElement(null, null, "MyXml");              th.endDocument();          }                                   }    

No comments:

Post a Comment