jackrabbit-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mreut...@apache.org
Subject svn commit: r617385 - in /jackrabbit/trunk: jackrabbit-text-extractors/pom.xml jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/MsWordTextExtractor.java pom.xml
Date Fri, 01 Feb 2008 09:17:41 GMT
Author: mreutegg
Date: Fri Feb  1 01:17:38 2008
New Revision: 617385

URL: http://svn.apache.org/viewvc?rev=617385&view=rev
Log:
JCR-1290: tm-extractors.jar blocks usage of newer poi versions

Modified:
    jackrabbit/trunk/jackrabbit-text-extractors/pom.xml
    jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/MsWordTextExtractor.java
    jackrabbit/trunk/pom.xml

Modified: jackrabbit/trunk/jackrabbit-text-extractors/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-text-extractors/pom.xml?rev=617385&r1=617384&r2=617385&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-text-extractors/pom.xml (original)
+++ jackrabbit/trunk/jackrabbit-text-extractors/pom.xml Fri Feb  1 01:17:38 2008
@@ -47,16 +47,16 @@
 
   <dependencies>
     <dependency>
-      <groupId>poi</groupId>
+      <groupId>org.apache.poi</groupId>
       <artifactId>poi</artifactId>
     </dependency>
     <dependency>
-      <groupId>pdfbox</groupId>
-      <artifactId>pdfbox</artifactId>
+      <groupId>org.apache.poi</groupId>
+      <artifactId>poi-scratchpad</artifactId>
     </dependency>
     <dependency>
-      <groupId>org.textmining</groupId>
-      <artifactId>tm-extractors</artifactId>
+      <groupId>pdfbox</groupId>
+      <artifactId>pdfbox</artifactId>
     </dependency>
     <dependency>
       <groupId>nekohtml</groupId>

Modified: jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/MsWordTextExtractor.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/MsWordTextExtractor.java?rev=617385&r1=617384&r2=617385&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/MsWordTextExtractor.java
(original)
+++ jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/MsWordTextExtractor.java
Fri Feb  1 01:17:38 2008
@@ -18,7 +18,7 @@
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import org.textmining.text.extraction.WordExtractor;
+import org.apache.poi.hwpf.HWPFDocument;
 
 import java.io.Reader;
 import java.io.InputStream;
@@ -40,7 +40,7 @@
      * Force loading of dependent class.
      */
     static {
-        WordExtractor.class.getName();
+        HWPFDocument.class.getName();
     }
 
     /**
@@ -61,11 +61,8 @@
                               String type,
                               String encoding) throws IOException {
         try {
-            WordExtractor extractor = new WordExtractor();
-
-            // This throws raw Exception - not nice
-            String text = extractor.extractText(stream);
-
+            HWPFDocument doc = new HWPFDocument(stream);
+            String text = doc.getRange().text();
             return new StringReader(text);
         } catch (Exception e) {
             logger.warn("Failed to extract Word text content", e);

Modified: jackrabbit/trunk/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/pom.xml?rev=617385&r1=617384&r2=617385&view=diff
==============================================================================
--- jackrabbit/trunk/pom.xml (original)
+++ jackrabbit/trunk/pom.xml Fri Feb  1 01:17:38 2008
@@ -828,19 +828,19 @@
         <version>10.2.1.6</version>
       </dependency>
       <dependency>
-        <groupId>poi</groupId>
+        <groupId>org.apache.poi</groupId>
         <artifactId>poi</artifactId>
-        <version>2.5.1-final-20040804</version>
+        <version>3.0.1-FINAL</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.poi</groupId>
+        <artifactId>poi-scratchpad</artifactId>
+        <version>3.0.1-FINAL</version>
       </dependency>
       <dependency>
         <groupId>pdfbox</groupId>
         <artifactId>pdfbox</artifactId>
         <version>0.6.4</version>
-      </dependency>
-      <dependency>
-        <groupId>org.textmining</groupId>
-        <artifactId>tm-extractors</artifactId>
-        <version>0.4</version>
       </dependency>
       <dependency>
         <groupId>nekohtml</groupId>



Mime
View raw message