This is a reusable code written in Java with a simple Standalone program. User can just run this program with the two command line arguments, Out of these two arguments, First one should be the Input file name along with the path (which includes Drive and Folder) and Second one should the output file name along with the path (which includes Drive and Folder).
POI jars should be included in the classpath.
import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.util.HashSet; import java.util.Iterator; import java.util.Set; import java.util.StringTokenizer; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.poifs.filesystem.POIFSFileSystem; /** * TODO To change the template for this generated type comment go to Window - * Preferences - Java - Code Style - Code Templates */ public class RemoveDuplicates { /** * */ public RemoveDuplicates() { super(); // TODO Auto-generated constructor stub } public static void main(String[] args) { HSSFWorkbook workBook = null; POIFSFileSystem fs = null; HSSFSheet sheet = null; try { fs = new POIFSFileSystem(new FileInputStream(args[0])); workBook = new HSSFWorkbook(fs); sheet = workBook.getSheetAt(0); int rows = sheet.getPhysicalNumberOfRows(); System.out.println(rows); Set s=new HashSet(); String str=""; for (int i = 0; i < rows; i++) { str=""; HSSFRow row = sheet.getRow((short) i); int columns = row.getPhysicalNumberOfCells(); for (int j = 0; j < columns; ++j) { HSSFCell cell0 = row.getCell((short) j); int type=cell0.getCellType(); if(type==0){ double intValue= cell0.getNumericCellValue(); str=str+String.valueOf(intValue)+","; }else if(type==1){ String stringValue=cell0.getStringCellValue(); str=str+stringValue+","; } } str=str .replace(str.charAt(str .lastIndexOf(",")), ' '); s.add(str.trim()); } StringTokenizer st=null; String result=""; Iterator iter=s.iterator(); //Create a new workbook for the output excel HSSFWorkbook workBookOut = new HSSFWorkbook(); //Create a new Sheet in the output excel workbook HSSFSheet sheetOut = workBookOut.createSheet("Remove Duplicates"); HSSFRow[] row = new HSSFRow[s.size()]; int rowCount=0; int cellCount=0; while(iter.hasNext()){ cellCount=0; row[rowCount] = sheetOut.createRow(rowCount); result=iter.next().toString(); System.out.println(result); st=new StringTokenizer(result," "); HSSFCell[] cell= new HSSFCell[st.countTokens()]; while(st.hasMoreTokens()){ cell[cellCount]=row[rowCount].createCell((short)cellCount); cell[cellCount].setCellValue(st.nextToken()); ++cellCount; } ++rowCount; } FileOutputStream fileOut = new FileOutputStream(args[1]); workBookOut.write(fileOut); fileOut.close(); } catch (IOException ioe) { ioe.printStackTrace(); } }}