poi-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Mubasshar Ahmad <sham...@msn.com>
Subject Need help in reading large XLSX file using poi 3.16
Date Tue, 02 May 2017 11:29:17 GMT
Hello,


I spent a lot of time on searching the solution but not succeeded. Thats why I am writing
to you.


Actually I have to read a large XLSX file haing 200 columns and 30000 rows. I have tried both
SAX parsing (http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api) approach-1 and new
XSSFWorkbook() approach-2 as follows:


XSSFWorkbook workbook = new XSSFWorkbook (file);
XSSFSheet sheet = workbook.getSheetAt(0);
Iterator ite = sheet.rowIterator();
while(ite.hasNext()){
    Row row = ite.next();
    Iterator<Cell> cite = row.cellIterator();
    while(cite.hasNext())

    {
        Cell c = cite.next();
        System.out.print(c.toString() +"  ");
    }
}


But the problem is that in approach 1, it takes too much time for parsing the file, 10 minutes
for 2000 rows. And in approach-2, it got stuck on the first line of the code snippet and throws
OutOfMemoryException.


Please suggest me something so that I may parse a very large XLSX file more than 2 GB size.


Here is the handler code.


////////////////////////////////////////////////////////   START   ////////////////////////////////////////////////


public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
// c => cell, row ==> row, r ==> cell referrence
if(name.equals(C))
{
cellType = attributes.getValue(T);
}
if(name.equalsIgnoreCase(ROW2))
{
//reset rowcount for loading header of new file import
if(handler.getClass().getSimpleName().equals(EXCEL_COLUMN_HELPER) &&  rowCount >
0 && canContinue && counterReset){
//rowcount has value because of an old import file
rowCount = 0;
counterReset=false;
}

row = new ArrayList<TableCellValue<?>>();
isRowStarted = true;
rowNumber++;
}
}
public void endElement(String uri, String localName, String name)
throws SAXException {
if(name.equalsIgnoreCase(ROW2))
{
isRowStarted = false;
if(canContinue)
{
if(handler.getClass().getSimpleName().equals(EXCEL_COLUMN_HELPER))
{
//processing header row
//its a header row
if(rowNumber==headerRow)
{
canContinue = false;
handler.acceptRow(row);
}
rowCount = rowCount + 1;
}
if(handler.getClass().getSimpleName().equals(EXCEL_TABLE_PASTER))
{
// accept the data rows. These are the rows after the header row
if(rowNumber > headerRow)
{
rowData.add(row);
if((rowData.size() == blockSize) || (rowNumber == rowCount))
{
List<List<TableCellValue<?>>> tempRowData = new ArrayList<List<TableCellValue<?>>>(rowData);
handler.acceptRows(tempRowData);
rowData.clear();
}
}
}
}else
{
if(handler.getClass().getSimpleName().equals(EXCEL_COLUMN_HELPER)){
rowCount = rowCount + 1;
}
}
}
if(name.equals(C) && canContinue){
if(contents == null || contents.length() < 0){
//it is a blank cell
row.add( new TableCellValue<Object>(null));
}
else
{
processRecord();
}
contents = EMPTY; //clear contents after value edited cell closed
}
}

/**
* Cache contents of a cell
* It is called for some cells more than once
* we append to get full value of a cell
*/
public void characters(char[] ch, int start, int length)
throws SAXException {
if(!(new String(ch, start, length).equalsIgnoreCase(NEW_LINE)))
{
contents += new String(ch, start, length); //save contents of specific cell
}
}
/**
* process each record
*/
private void processRecord(){
try
{
if(canContinue)
{
if(rowNumber == headerRow)
{
if(cellType == null)
{
if(isRowStarted)
{
row.add( new TableCellValue<Object>(contents) );
}

}
else
{
//its headers of table
if(reader.getSharedStringsTable().getCount() > 0)
{
//its shared strings table format
if (!reader.getSharedStringsTable().getItems().get(Integer.parseInt(contents)).isSetT())
{
List<CTRElt> list = reader.getSharedStringsTable().getItems().get(Integer.parseInt(contents)).getRList();
contents = EMPTY;
for (CTRElt c : list)
{
contents += c.getT();
}
}
else
{
contents = reader.getSharedStringsTable().getItems().get(Integer.parseInt(contents)).getT();
}
}
if(cellType.equalsIgnoreCase(INLINE_STR) || cellType.equalsIgnoreCase(S))
{
if(isRowStarted){
row.add( new TableCellValue<Object>(contents) );
}
}
}
}
else
{
if(cellType != null)
{
if(reader.getSharedStringsTable().getCount() > 0 && contents.trim().length() >
0)
{
//rows are saved in sharedTable
contents = (new XSSFRichTextString(reader.getSharedStringsTable().getEntryAt((Integer.parseInt(contents))))).toString();
}
if(cellType.equalsIgnoreCase(S))
{
row.add( new TableCellValue<Object>(contents) );

}
else if(cellType.equalsIgnoreCase(N))
{
Number number = null;
try
{
number = NumberFormat.getInstance().parse(contents );
} catch (ParseException e)
{
e.printStackTrace();
}
row.add( new TableCellValue<Object>( NumbersUtil.convertToAppropriateType(number) )
);
}
else if(cellType.equals(B))
{
row.add( new TableCellValue<Object>(new Boolean(contents) ));
}
else if(cellType.equals(STR))
{
row.add( new TableCellValue<Object>(contents));
}
else if(cellType.equals(INLINE_STR))
{
row.add( new TableCellValue<Object>(contents));
}
else
{
row.add( new TableCellValue<Object>(null));
}
}
else
{
if(reader.getSharedStringsTable().getCount() > 0)
{
row.add( new TableCellValue<Object>(contents) );
}
}
}
}
}catch(Exception ex){
System.out.println(ex.getMessage());
}
}


///////////////////////////////////////////   END   ///////////////////////////////////////////////////



Best Regards,


Mubasshar Ahmad


Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message