tika-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From n...@apache.org
Subject svn commit: r1221111 - /tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
Date Tue, 20 Dec 2011 06:08:15 GMT
Author: nick
Date: Tue Dec 20 06:08:15 2011
New Revision: 1221111

URL: http://svn.apache.org/viewvc?rev=1221111&view=rev
Log:
TIKA-757 Tidy the Word Extractor picture locating code

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java?rev=1221111&r1=1221110&r2=1221111&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
(original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
Tue Dec 20 06:08:15 2011
@@ -535,15 +535,10 @@ public class WordExtractor extends Abstr
           picturesTable = doc.getPicturesTable();
           all = picturesTable.getAllPictures();
           
-          // Compute the Offset-Picture lookup
+          // Build the Offset-Picture lookup map
           lookup = new HashMap<Integer, Picture>();
           for(Picture p : all) {
-             // TODO Make this nicer when POI 3.7 is out
-             String name = p.suggestFullFileName();
-             if(name.indexOf('.') > -1)
-                name = name.substring(0, name.indexOf('.'));
-             int offset = Integer.parseInt(name, 16);
-             lookup.put(offset, p);
+             lookup.put(p.getStartOffset(), p);
           }
           
           // Work out which Pictures aren't referenced by



Mime
View raw message