poi-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Greg Woolsey <greg.wool...@gmail.com>
Subject Re: Need help in reading large XLSX file using poi 3.16
Date Tue, 02 May 2017 15:35:08 GMT
What are your jvm parameters? For large workbooks you really need to set
-Xmx2g or more.  I've gone as high as 16g.  If you can break up the data
into multiple sheets that helps too.

On Tue, May 2, 2017, 01:35 Mubasshar Ahmad <shamnot@msn.com> wrote:

> Hello,
>
>
> I spent a lot of time on searching the solution but not succeeded. Thats
> why I am writing to you.
>
>
> Actually I have to read a large XLSX file haing 200 columns and 30000
> rows. I have tried both SAX parsing (
> http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api) approach-1
> and new XSSFWorkbook() approach-2 as follows:
>
>
> XSSFWorkbook workbook = new XSSFWorkbook (file);
> XSSFSheet sheet = workbook.getSheetAt(0);
> Iterator ite = sheet.rowIterator();
> while(ite.hasNext()){
>     Row row = ite.next();
>     Iterator<Cell> cite = row.cellIterator();
>     while(cite.hasNext())
>
>     {
>         Cell c = cite.next();
>         System.out.print(c.toString() +"  ");
>     }
> }
>
>
> But the problem is that in approach 1, it takes too much time for parsing
> the file, 10 minutes for 2000 rows. And in approach-2, it got stuck on the
> first line of the code snippet and throws OutOfMemoryException.
>
>
> Please suggest me something so that I may parse a very large XLSX file
> more than 2 GB size.
>
>
> Here is the handler code.
>
>
> ////////////////////////////////////////////////////////   START
>  ////////////////////////////////////////////////
>
>
> public void startElement(String uri, String localName, String name,
> Attributes attributes) throws SAXException {
> // c => cell, row ==> row, r ==> cell referrence
> if(name.equals(C))
> {
> cellType = attributes.getValue(T);
> }
> if(name.equalsIgnoreCase(ROW2))
> {
> //reset rowcount for loading header of new file import
> if(handler.getClass().getSimpleName().equals(EXCEL_COLUMN_HELPER) &&
> rowCount > 0 && canContinue && counterReset){
> //rowcount has value because of an old import file
> rowCount = 0;
> counterReset=false;
> }
>
> row = new ArrayList<TableCellValue<?>>();
> isRowStarted = true;
> rowNumber++;
> }
> }
> public void endElement(String uri, String localName, String name)
> throws SAXException {
> if(name.equalsIgnoreCase(ROW2))
> {
> isRowStarted = false;
> if(canContinue)
> {
> if(handler.getClass().getSimpleName().equals(EXCEL_COLUMN_HELPER))
> {
> //processing header row
> //its a header row
> if(rowNumber==headerRow)
> {
> canContinue = false;
> handler.acceptRow(row);
> }
> rowCount = rowCount + 1;
> }
> if(handler.getClass().getSimpleName().equals(EXCEL_TABLE_PASTER))
> {
> // accept the data rows. These are the rows after the header row
> if(rowNumber > headerRow)
> {
> rowData.add(row);
> if((rowData.size() == blockSize) || (rowNumber == rowCount))
> {
> List<List<TableCellValue<?>>> tempRowData = new
> ArrayList<List<TableCellValue<?>>>(rowData);
> handler.acceptRows(tempRowData);
> rowData.clear();
> }
> }
> }
> }else
> {
> if(handler.getClass().getSimpleName().equals(EXCEL_COLUMN_HELPER)){
> rowCount = rowCount + 1;
> }
> }
> }
> if(name.equals(C) && canContinue){
> if(contents == null || contents.length() < 0){
> //it is a blank cell
> row.add( new TableCellValue<Object>(null));
> }
> else
> {
> processRecord();
> }
> contents = EMPTY; //clear contents after value edited cell closed
> }
> }
>
> /**
> * Cache contents of a cell
> * It is called for some cells more than once
> * we append to get full value of a cell
> */
> public void characters(char[] ch, int start, int length)
> throws SAXException {
> if(!(new String(ch, start, length).equalsIgnoreCase(NEW_LINE)))
> {
> contents += new String(ch, start, length); //save contents of specific cell
> }
> }
> /**
> * process each record
> */
> private void processRecord(){
> try
> {
> if(canContinue)
> {
> if(rowNumber == headerRow)
> {
> if(cellType == null)
> {
> if(isRowStarted)
> {
> row.add( new TableCellValue<Object>(contents) );
> }
>
> }
> else
> {
> //its headers of table
> if(reader.getSharedStringsTable().getCount() > 0)
> {
> //its shared strings table format
> if
> (!reader.getSharedStringsTable().getItems().get(Integer.parseInt(contents)).isSetT())
> {
> List<CTRElt> list =
> reader.getSharedStringsTable().getItems().get(Integer.parseInt(contents)).getRList();
> contents = EMPTY;
> for (CTRElt c : list)
> {
> contents += c.getT();
> }
> }
> else
> {
> contents =
> reader.getSharedStringsTable().getItems().get(Integer.parseInt(contents)).getT();
> }
> }
> if(cellType.equalsIgnoreCase(INLINE_STR) || cellType.equalsIgnoreCase(S))
> {
> if(isRowStarted){
> row.add( new TableCellValue<Object>(contents) );
> }
> }
> }
> }
> else
> {
> if(cellType != null)
> {
> if(reader.getSharedStringsTable().getCount() > 0 &&
> contents.trim().length() > 0)
> {
> //rows are saved in sharedTable
> contents = (new
> XSSFRichTextString(reader.getSharedStringsTable().getEntryAt((Integer.parseInt(contents))))).toString();
> }
> if(cellType.equalsIgnoreCase(S))
> {
> row.add( new TableCellValue<Object>(contents) );
>
> }
> else if(cellType.equalsIgnoreCase(N))
> {
> Number number = null;
> try
> {
> number = NumberFormat.getInstance().parse(contents );
> } catch (ParseException e)
> {
> e.printStackTrace();
> }
> row.add( new TableCellValue<Object>(
> NumbersUtil.convertToAppropriateType(number) ) );
> }
> else if(cellType.equals(B))
> {
> row.add( new TableCellValue<Object>(new Boolean(contents) ));
> }
> else if(cellType.equals(STR))
> {
> row.add( new TableCellValue<Object>(contents));
> }
> else if(cellType.equals(INLINE_STR))
> {
> row.add( new TableCellValue<Object>(contents));
> }
> else
> {
> row.add( new TableCellValue<Object>(null));
> }
> }
> else
> {
> if(reader.getSharedStringsTable().getCount() > 0)
> {
> row.add( new TableCellValue<Object>(contents) );
> }
> }
> }
> }
> }catch(Exception ex){
> System.out.println(ex.getMessage());
> }
> }
>
>
> ///////////////////////////////////////////   END
>  ///////////////////////////////////////////////////
>
>
>
> Best Regards,
>
>
> Mubasshar Ahmad
>
>

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message