Given an XML with
many contracts, this program will split 10 contracts in each xml in
the output.
Here, I have
placed a contract in between <Contract> and </Contract>
tags. If you have different tags, replace it with the Contract tag.
Example: if your
input xml has 104 contracts, the output will be 11 xmls, 10 xmls
having 10 contracts each and 11th xml has 4 contracts.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileInputStream; import java.io.FileWriter; import java.io.InputStreamReader; import java.io.Writer; import java.util.ArrayList; import java.util.List; public class SplitXML { public static void main(String [] args) throws Exception { double startTime=System.currentTimeMillis(); int count= 0 ; int i= 1 ; StringBuffer sb= new StringBuffer(); StringBuffer str= new StringBuffer(); //Writer output=null; String newline = System.getProperty( "line.separator" ); //System.out.println("args[0]========"+args[0]); //System.out.println("args[1]========"+args[1]); FileInputStream fi= new FileInputStream(args[ 0 ]); BufferedReader br= new BufferedReader( new InputStreamReader(fi)); String temp,strLine; List<String> list= new ArrayList<String>(); while ((temp=br.readLine())!= null ) { if (temp.equals( "<CONTRACT>" )||temp.equals( "<contract>" )) temp= "<Contract>" ; if (temp.equals( "</CONTRACT>" )||temp.equals( "</contract>" )) temp= "</Contract>" ; if (temp.equalsIgnoreCase( "<ROOT>" )||temp.equalsIgnoreCase( "</ROOT>" )) temp= " " ; strLine=temp+ "n" ; /*if(strLine.equals("<root>")) strLine=strLine.replace("<root>", " "); if(strLine.equals("</root>")) strLine=strLine.replace("</root>", " "); if(strLine.equals("</ROOT>")) strLine=strLine.replace("</ROOT>", " ");*/ sb.append(strLine); //System.out.println(strLine); // if(strLine.equals("contStrTag")) // count++; if(strLine.contains("</Contract>")) { // System.out.println("first loop"); //String data=sb.toString(); sb.delete(0, sb.length()); //System.out.println(data); //System.out.println("inside iffffffff"); count++; list.add(strLine); // sb.delete(0, sb.length()); } } // System.out.println("no of contracts=="+count); //String temp[] = sb.toString().split("#"); /* for (int j = 0; j < temp.length; j++) { System.out.println("second loop"); System.out.println(temp[j]); }*/ System.out.println("count===="+count); int extra=count%10; //int tot=count/10; //for(String str:list){ int item=0; //String str=""; int tem=0; if(list.isEmpty()==false){ for(i=0;i<list.size();i++) { tem++; //for(int j=1;j<=10;j++){ //System.out.println("for each"); //str=str+list.get(i); str.append(list.get(i)); if((i+1) % 10 == 0){ Writer output = null; item++; output=new BufferedWriter(new FileWriter(args[1]+"/Life2012_"+item+".xml")); System.out.println("file created!!!!"); output.write("<root>"); // System.out.println("after root"); //System.out.println("after root"); output.write(newline); output.write(str.toString()); //System.out.println(str); output.write("</root>"); output.close(); str.delete(0, str.length()); } } } System.out.println("tem value===="+tem); if(extra>0){ /*for(int j=tem;j<list.size();j++){ str=str+list.get(j); }*/ Writer output = null ; item++; output= new BufferedWriter( new FileWriter(args[ 1 ]+ "/Life2012_" +item+ ".xml" )); System.out.println( "file created!!!!" ); output.write( "<root>" ); // System.out.println("after root"); //System.out.println("after root"); output.write(newline); output.write(str.toString()); output.write( "</root>" ); output.close(); str= null ; } //} // } //System.out.println("args[0]========"+args[0]); //System.out.println("args[1]========"+args[1]); System.out.println( "total files created==" +item); System.out.println( "done" ); System.out.println( "Total Time Taken:" +(System.currentTimeMillis()-startTime)/ 1000 + " Seconds" ); } } |