jackrabbit-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mreut...@apache.org
Subject svn commit: r647114 - /jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/MsPowerPointTextExtractor.java
Date Fri, 11 Apr 2008 11:18:19 GMT
Author: mreutegg
Date: Fri Apr 11 04:18:06 2008
New Revision: 647114

URL: http://svn.apache.org/viewvc?rev=647114&view=rev
Log:
JSR-1530: MsPowerPointTextExtractor does not extract from PPTs with € sign

Modified:
    jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/MsPowerPointTextExtractor.java

Modified: jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/MsPowerPointTextExtractor.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/MsPowerPointTextExtractor.java?rev=647114&r1=647113&r2=647114&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/MsPowerPointTextExtractor.java
(original)
+++ jackrabbit/trunk/jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/MsPowerPointTextExtractor.java
Fri Apr 11 04:18:06 2008
@@ -17,20 +17,13 @@
 package org.apache.jackrabbit.extractor;
 
 import org.apache.poi.poifs.eventfilesystem.POIFSReader;
-import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
-import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
-import org.apache.poi.poifs.filesystem.DocumentInputStream;
-import org.apache.poi.util.LittleEndian;
+import org.apache.poi.hslf.extractor.PowerPointExtractor;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.Reader;
 import java.io.InputStream;
 import java.io.IOException;
-import java.io.OutputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.InputStreamReader;
-import java.io.ByteArrayInputStream;
 import java.io.StringReader;
 
 /**
@@ -68,51 +61,15 @@
                               String type,
                               String encoding) throws IOException {
         try {
-            ByteArrayOutputStream baos = new ByteArrayOutputStream();
-            MsPowerPointListener listener = new MsPowerPointListener(baos);
-            POIFSReader reader = new POIFSReader();
-            reader.registerListener(listener);
-            reader.read(stream);
-            return new InputStreamReader(
-                    new ByteArrayInputStream(baos.toByteArray()));
+            PowerPointExtractor extractor = new PowerPointExtractor(stream);
+            return new StringReader(extractor.getText(true, true));
         } catch (RuntimeException e) {
             logger.warn("Failed to extract PowerPoint text content", e);
             return new StringReader("");
         } finally {
-            stream.close();
-        }
-    }
-
-    //------------------------------------------------< MsPowerPointListener >
-
-    /**
-     * Reader listener.
-     */
-    private class MsPowerPointListener implements POIFSReaderListener {
-        private OutputStream os;
-
-        MsPowerPointListener(OutputStream os) {
-            this.os = os;
-        }
-
-        public void processPOIFSReaderEvent(POIFSReaderEvent event) {
             try {
-                if (!event.getName().equalsIgnoreCase("PowerPoint Document")) {
-                    return;
-                }
-                DocumentInputStream input = event.getStream();
-                byte[] buffer = new byte[input.available()];
-                input.read(buffer, 0, input.available());
-                for (int i = 0; i < buffer.length - 20; i++) {
-                    long type = LittleEndian.getUShort(buffer, i + 2);
-                    long size = LittleEndian.getUInt(buffer, i + 4);
-                    if (type == 4008) {
-                        os.write(buffer, i + 4 + 1, (int) size + 3);
-                        i = i + 4 + 1 + (int) size - 1;
-                    }
-                }
-            } catch (Exception e) {
-
+                stream.close();
+            } catch (IOException ignored) {
             }
         }
     }



Mime
View raw message