import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.InputStreamReader;
public class GetXMLData {
public static void main(String[] args)
{
try
{
FileInputStream fstream = new FileInputStream("htmltable.txt");
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new InputStreamReader(in));
String strLine;
int trcount = 0;
int imgcount = 0;
int diccount = 0;
int titlecount = 0;
int addcount = 0;
while ((strLine = br.readLine()) != null)
{
// System.out.println (strLine);
if (strLine.contains("<table"))
// System.out.println("<DEALS>");
if (strLine.contains("<tr>"))
// System.out.println("t<DEAL>");
if (strLine.contains("<img"))
{
// System.out.print("tt<IMAGELINK>");
// System.out.print(strLine.substring(strLine.indexOf("src="")+5,
// strLine.indexOf("" alt")));
// System.out.println("</IMAGELINK>");
}
if (strLine.contains("<h4>"))
{
// System.out.print("tt<TITLE>");
System.out.print(strLine.substring(
strLine.indexOf("<h4>") + 4,
strLine.indexOf("</h4>")));
// System.out.println("</TITLE>");
// System.out.println("tt<TAGS></TAGS>");
}
if (strLine.contains("<p>"))
{
// System.out.print("tt<DESCRIPTION>");
String desc = "";
String address = "";
String tel = "";
String mob = "";
String add = "";
desc = desc
+ strLine.substring(strLine.indexOf("<p>") + 3,
strLine.indexOf("</p>"));
while ((strLine = br.readLine()) != null)
{
if (strLine.contains("<p>Address"))
{
// System.out.print(desc);
// System.out.println("</DESCRIPTION>");
add = strLine.substring(strLine.indexOf("<p>") + 3);
while ((strLine = br.readLine()) != null)
{
if (strLine.trim().equals("</p>"))
{
// System.out.printlnprint("tt<ADDRESS>");
String addr = "";
if (add.contains("TEL:"))
addr = add.substring(0,
add.indexOf("TEL:"));
else if (add.contains("MOBILE:"))
addr = add.substring(0,
add.indexOf("MOBILE:"));
else
addr = add;
addr = addr.replaceAll("<br/>", " ");
addr = addr.substring(addr
.indexOf("Address:") + 8);
// System.out.printlnprint(addr);
// System.out.printlnprintln("</ADDRESS>");
if (add.contains("TEL:"))
{
if (add.contains("MOBILE:"))
{
String telephone = add.substring(
add.indexOf("TEL:") + 4,
add.indexOf("MOBILE:"));
// System.out.printlnprint("tt<TELEPHONE>");
// System.out.printlnprint(telephone.replaceAll("<br/>",
// ""));
// System.out.printlnprintln("</TELEPHONE>");
String mobile = add.substring(add
.indexOf("MOBILE:") + 7);
// System.out.printlnprint("tt<MOBILE>");
// System.out.printlnprint(mobile.replaceAll("<br/>",
// ""));
// System.out.printlnprintln("</MOBILE>");
}
else
{
String telephone = add
.substring(add
.indexOf("TEL:") + 4);
// System.out.printlnprint("tt<TELEPHONE>");
// System.out.printlnprint(telephone.replaceAll("<br/>",
// ""));
// System.out.printlnprintln("</TELEPHONE>");
// System.out.printlnprint("tt<MOBILE>");
System.out.println("</MOBILE>");
}
}
else if (add.contains("MOBILE:"))
{
System.out.print("tt<TELEPHONE>");
System.out.println("</TELEPHONE>");
String mobile = add.substring(add
.indexOf("MOBILE:") + 7);
System.out.print("tt<MOBILE>");
System.out.print(mobile.replaceAll(
"<br/>", ""));
System.out.println("</MOBILE>");
}
else
{
System.out.print("tt<TELEPHONE>");
System.out.println("</TELEPHONE>");
System.out.print("tt<MOBILE>");
System.out.println("</MOBILE>");
}
break;
}
else
{
add = add + strLine.trim();
}
}
break;
}
else if (strLine.contains("<p>"))
{
desc = desc
+ strLine.substring(
strLine.indexOf("<p>") + 3,
strLine.indexOf("</p>"));
}
else
{
}
}
}
if (strLine.contains("</tr>"))
{
System.out.println("tt<CATEGORY></CATEGORY>");
System.out.println("tt<DISCOUNT></DISCOUNT>");
System.out.println("tt<LOCALPAGEURL></LOCALPAGEURL>");
System.out.println("tt<SITEURL></SITEURL>");
System.out.println("t</DEAL>");
}
if (strLine.contains("</table>"))
System.out.println("</DEALS>");
}
in.close();
}
catch (Exception e)
{
System.err.println("Error: " + e.getMessage());
}
}
}