Given an XML with
many contracts, this program will split 10 contracts in each xml in
the output.
Here, I have
placed a contract in between <Contract> and </Contract>
tags. If you have different tags, replace it with the Contract tag.
Example: if your
input xml has 104 contracts, the output will be 11 xmls, 10 xmls
having 10 contracts each and 11th xml has 4 contracts.
import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileInputStream; import java.io.FileWriter; import java.io.InputStreamReader; import java.io.Writer; import java.util.ArrayList; import java.util.List; public class SplitXML { public static void main(String [] args) throws Exception { double startTime=System.currentTimeMillis(); int count=0; int i=1; StringBuffer sb=new StringBuffer(); StringBuffer str=new StringBuffer(); //Writer output=null; String newline = System.getProperty("line.separator"); //System.out.println("args[0]========"+args[0]); //System.out.println("args[1]========"+args[1]); FileInputStream fi=new FileInputStream(args[0]); BufferedReader br=new BufferedReader(new InputStreamReader(fi)); String temp,strLine; List<String> list=new ArrayList<String>(); while((temp=br.readLine())!=null) { if(temp.equals("<CONTRACT>")||temp.equals("<contract>")) temp="<Contract>"; if(temp.equals("</CONTRACT>")||temp.equals("</contract>")) temp="</Contract>"; if(temp.equalsIgnoreCase("<ROOT>")||temp.equalsIgnoreCase("</ROOT>")) temp=" "; strLine=temp+"n"; /*if(strLine.equals("<root>")) strLine=strLine.replace("<root>", " "); if(strLine.equals("</root>")) strLine=strLine.replace("</root>", " "); if(strLine.equals("</ROOT>")) strLine=strLine.replace("</ROOT>", " ");*/ sb.append(strLine); //System.out.println(strLine); // if(strLine.equals("contStrTag")) // count++; if(strLine.contains("</Contract>")) { // System.out.println("first loop"); //String data=sb.toString(); sb.delete(0, sb.length()); //System.out.println(data); //System.out.println("inside iffffffff"); count++; list.add(strLine); // sb.delete(0, sb.length()); } } // System.out.println("no of contracts=="+count); //String temp[] = sb.toString().split("#"); /* for (int j = 0; j < temp.length; j++) { System.out.println("second loop"); System.out.println(temp[j]); }*/ System.out.println("count===="+count); int extra=count%10; //int tot=count/10; //for(String str:list){ int item=0; //String str=""; int tem=0; if(list.isEmpty()==false){ for(i=0;i<list.size();i++) { tem++; //for(int j=1;j<=10;j++){ //System.out.println("for each"); //str=str+list.get(i); str.append(list.get(i)); if((i+1) % 10 == 0){ Writer output = null; item++; output=new BufferedWriter(new FileWriter(args[1]+"/Life2012_"+item+".xml")); System.out.println("file created!!!!"); output.write("<root>"); // System.out.println("after root"); //System.out.println("after root"); output.write(newline); output.write(str.toString()); //System.out.println(str); output.write("</root>"); output.close(); str.delete(0, str.length()); } } } System.out.println("tem value===="+tem); if(extra>0){ /*for(int j=tem;j<list.size();j++){ str=str+list.get(j); }*/ Writer output = null; item++; output=new BufferedWriter(new FileWriter(args[1]+"/Life2012_"+item+".xml")); System.out.println("file created!!!!"); output.write("<root>"); // System.out.println("after root"); //System.out.println("after root"); output.write(newline); output.write(str.toString()); output.write("</root>"); output.close(); str=null; } //} // } //System.out.println("args[0]========"+args[0]); //System.out.println("args[1]========"+args[1]); System.out.println("total files created=="+item); System.out.println("done"); System.out.println("Total Time Taken:"+(System.currentTimeMillis()-startTime)/1000+" Seconds"); } }