import java.io.BufferedReader; import java.io.DataInputStream; import java.io.FileInputStream; import java.io.InputStreamReader; public class GetXMLData { public static void main(String[] args) { try { FileInputStream fstream = new FileInputStream("htmltable.txt"); DataInputStream in = new DataInputStream(fstream); BufferedReader br = new BufferedReader(new InputStreamReader(in)); String strLine; int trcount = 0; int imgcount = 0; int diccount = 0; int titlecount = 0; int addcount = 0; while ((strLine = br.readLine()) != null) { // System.out.println (strLine); if (strLine.contains("<table")) // System.out.println("<DEALS>"); if (strLine.contains("<tr>")) // System.out.println("t<DEAL>"); if (strLine.contains("<img")) { // System.out.print("tt<IMAGELINK>"); // System.out.print(strLine.substring(strLine.indexOf("src="")+5, // strLine.indexOf("" alt"))); // System.out.println("</IMAGELINK>"); } if (strLine.contains("<h4>")) { // System.out.print("tt<TITLE>"); System.out.print(strLine.substring( strLine.indexOf("<h4>") + 4, strLine.indexOf("</h4>"))); // System.out.println("</TITLE>"); // System.out.println("tt<TAGS></TAGS>"); } if (strLine.contains("<p>")) { // System.out.print("tt<DESCRIPTION>"); String desc = ""; String address = ""; String tel = ""; String mob = ""; String add = ""; desc = desc + strLine.substring(strLine.indexOf("<p>") + 3, strLine.indexOf("</p>")); while ((strLine = br.readLine()) != null) { if (strLine.contains("<p>Address")) { // System.out.print(desc); // System.out.println("</DESCRIPTION>"); add = strLine.substring(strLine.indexOf("<p>") + 3); while ((strLine = br.readLine()) != null) { if (strLine.trim().equals("</p>")) { // System.out.printlnprint("tt<ADDRESS>"); String addr = ""; if (add.contains("TEL:")) addr = add.substring(0, add.indexOf("TEL:")); else if (add.contains("MOBILE:")) addr = add.substring(0, add.indexOf("MOBILE:")); else addr = add; addr = addr.replaceAll("<br/>", " "); addr = addr.substring(addr .indexOf("Address:") + 8); // System.out.printlnprint(addr); // System.out.printlnprintln("</ADDRESS>"); if (add.contains("TEL:")) { if (add.contains("MOBILE:")) { String telephone = add.substring( add.indexOf("TEL:") + 4, add.indexOf("MOBILE:")); // System.out.printlnprint("tt<TELEPHONE>"); // System.out.printlnprint(telephone.replaceAll("<br/>", // "")); // System.out.printlnprintln("</TELEPHONE>"); String mobile = add.substring(add .indexOf("MOBILE:") + 7); // System.out.printlnprint("tt<MOBILE>"); // System.out.printlnprint(mobile.replaceAll("<br/>", // "")); // System.out.printlnprintln("</MOBILE>"); } else { String telephone = add .substring(add .indexOf("TEL:") + 4); // System.out.printlnprint("tt<TELEPHONE>"); // System.out.printlnprint(telephone.replaceAll("<br/>", // "")); // System.out.printlnprintln("</TELEPHONE>"); // System.out.printlnprint("tt<MOBILE>"); System.out.println("</MOBILE>"); } } else if (add.contains("MOBILE:")) { System.out.print("tt<TELEPHONE>"); System.out.println("</TELEPHONE>"); String mobile = add.substring(add .indexOf("MOBILE:") + 7); System.out.print("tt<MOBILE>"); System.out.print(mobile.replaceAll( "<br/>", "")); System.out.println("</MOBILE>"); } else { System.out.print("tt<TELEPHONE>"); System.out.println("</TELEPHONE>"); System.out.print("tt<MOBILE>"); System.out.println("</MOBILE>"); } break; } else { add = add + strLine.trim(); } } break; } else if (strLine.contains("<p>")) { desc = desc + strLine.substring( strLine.indexOf("<p>") + 3, strLine.indexOf("</p>")); } else { } } } if (strLine.contains("</tr>")) { System.out.println("tt<CATEGORY></CATEGORY>"); System.out.println("tt<DISCOUNT></DISCOUNT>"); System.out.println("tt<LOCALPAGEURL></LOCALPAGEURL>"); System.out.println("tt<SITEURL></SITEURL>"); System.out.println("t</DEAL>"); } if (strLine.contains("</table>")) System.out.println("</DEALS>"); } in.close(); } catch (Exception e) { System.err.println("Error: " + e.getMessage()); } } }