tika-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From n...@apache.org
Subject svn commit: r1221119 - /tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
Date Tue, 20 Dec 2011 06:44:38 GMT
Author: nick
Date: Tue Dec 20 06:44:38 2011
New Revision: 1221119

URL: http://svn.apache.org/viewvc?rev=1221119&view=rev
Log:
TIKA-816 The Excel (XLS) Parser should format numeric formula cell values, and handle string
formula cell values

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java?rev=1221119&r1=1221118&r2=1221119&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
(original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
Tue Dec 20 06:44:38 2011
@@ -51,6 +51,7 @@ import org.apache.poi.hssf.record.Number
 import org.apache.poi.hssf.record.RKRecord;
 import org.apache.poi.hssf.record.Record;
 import org.apache.poi.hssf.record.SSTRecord;
+import org.apache.poi.hssf.record.StringRecord;
 import org.apache.poi.hssf.record.TextObjectRecord;
 import org.apache.poi.hssf.record.chart.SeriesTextRecord;
 import org.apache.poi.hssf.record.common.UnicodeString;
@@ -181,6 +182,7 @@ public class ExcelExtractor extends Abst
         private Exception exception = null;
 
         private SSTRecord sstRecord;
+        private FormulaRecord stringFormulaRecord;
         
         private short previousSid;
 
@@ -274,6 +276,7 @@ public class ExcelExtractor extends Abst
                 hssfRequest.addListener(formatListener, LabelSSTRecord.sid);
                 hssfRequest.addListener(formatListener, NumberRecord.sid);
                 hssfRequest.addListener(formatListener, RKRecord.sid);
+                hssfRequest.addListener(formatListener, StringRecord.sid);
                 hssfRequest.addListener(formatListener, HyperlinkRecord.sid);
                 hssfRequest.addListener(formatListener, TextObjectRecord.sid);
                 hssfRequest.addListener(formatListener, SeriesTextRecord.sid);
@@ -375,7 +378,22 @@ public class ExcelExtractor extends Abst
 
             case FormulaRecord.sid: // Cell value from a formula
                 FormulaRecord formula = (FormulaRecord) record;
-                addCell(record, new NumberCell(formula.getValue(), format));
+                if (formula.hasCachedResultString()) {
+                   // The String itself should be the next record
+                   stringFormulaRecord = formula;
+                } else {
+                   addTextCell(record, formatListener.formatNumberDateCell(formula));
+                }
+                break;
+                
+            case StringRecord.sid:
+                if (previousSid == FormulaRecord.sid) {
+                   // Cached string value of a string formula
+                   StringRecord sr = (StringRecord) record;
+                   addTextCell(stringFormulaRecord, sr.getString());
+                } else {
+                   // Some other string not associated with a cell, skip
+                }
                 break;
 
             case LabelRecord.sid: // strings stored directly in the cell
@@ -435,6 +453,10 @@ public class ExcelExtractor extends Abst
             }
 
             previousSid = record.getSid();
+            
+            if (stringFormulaRecord != record) {
+               stringFormulaRecord = null;
+            }
         }
 
         private void processExtraText() throws SAXException {



Mime
View raw message