tika-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From max...@apache.org
Subject svn commit: r1304247 - in /tika/trunk/tika-server/src: main/java/org/apache/tika/server/ test/java/org/apache/tika/server/
Date Fri, 23 Mar 2012 09:45:34 GMT
Author: maxcom
Date: Fri Mar 23 09:45:34 2012
New Revision: 1304247

URL: http://svn.apache.org/viewvc?rev=1304247&view=rev
Log:
New rewritten UnpackerResource for TIKA-593:

1) Support for TAR output (addition to ZIP)
2) Fix for empty OLE attachements problem (simular to TIKA-877)
3) "/all" resource to get text + meta + emdeddings in one request


Added:
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/TarWriter.java
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipWriter.java
Removed:
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/PartExtractor.java
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipOutput.java
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipUtils.java
Modified:
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
    tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
    tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java

Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java?rev=1304247&r1=1304246&r2=1304247&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java (original)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java Fri
Mar 23 09:45:34 2012
@@ -55,16 +55,22 @@ public class MetadataResource {
 
     return new StreamingOutput() {
       public void write(OutputStream outputStream) throws IOException, WebApplicationException
{
-        CSVWriter writer = new CSVWriter(new OutputStreamWriter(outputStream));
-        for (String name : metadata.names()) {
-          String[] values = metadata.getValues(name);
-          ArrayList<String> list = new ArrayList<String>(values.length+1);
-          list.add(name);
-          list.addAll(Arrays.asList(values));
-          writer.writeNext(list.toArray(values));
-        }
-        writer.close();
+        metadataToCsv(metadata, outputStream);
       }
     };
   }
+
+  public static void metadataToCsv(Metadata metadata, OutputStream outputStream) throws IOException
{
+    CSVWriter writer = new CSVWriter(new OutputStreamWriter(outputStream, "UTF-8"));
+
+    for (String name : metadata.names()) {
+      String[] values = metadata.getValues(name);
+      ArrayList<String> list = new ArrayList<String>(values.length+1);
+      list.add(name);
+      list.addAll(Arrays.asList(values));
+      writer.writeNext(list.toArray(values));
+    }
+
+    writer.close();
+  }
 }

Added: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TarWriter.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TarWriter.java?rev=1304247&view=auto
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TarWriter.java (added)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TarWriter.java Fri Mar 23
09:45:34 2012
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.server;
+
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
+
+import javax.ws.rs.Produces;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.MultivaluedMap;
+import javax.ws.rs.ext.MessageBodyWriter;
+import javax.ws.rs.ext.Provider;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.lang.annotation.Annotation;
+import java.lang.reflect.Type;
+import java.util.Map;
+
+@Provider
+@Produces("application/x-tar")
+public class TarWriter implements MessageBodyWriter<Map<String, byte[]>> {
+  private static void tarStoreBuffer(TarArchiveOutputStream zip, String name, byte[] dataBuffer)
throws IOException {
+    TarArchiveEntry entry = new TarArchiveEntry(name);
+
+    entry.setSize(dataBuffer.length);
+
+    zip.putArchiveEntry(entry);
+
+    zip.write(dataBuffer);
+
+    zip.closeArchiveEntry();
+  }
+
+  public boolean isWriteable(Class<?> type, Type genericType, Annotation[] annotations,
MediaType mediaType) {
+    return Map.class.isAssignableFrom(type);
+  }
+
+  public long getSize(Map<String, byte[]> stringMap, Class<?> type, Type genericType,
Annotation[] annotations, MediaType mediaType) {
+    return -1;
+  }
+
+  public void writeTo(Map<String, byte[]> parts, Class<?> type, Type genericType,
Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, Object> httpHeaders,
OutputStream entityStream) throws IOException, WebApplicationException {
+    TarArchiveOutputStream zip = new TarArchiveOutputStream(entityStream);
+
+    for (Map.Entry<String, byte[]> entry : parts.entrySet()) {
+      tarStoreBuffer(zip, entry.getKey(), entry.getValue());
+    }
+
+    zip.close();
+  }
+}

Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java?rev=1304247&r1=1304246&r2=1304247&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java (original)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java Fri
Mar 23 09:45:34 2012
@@ -20,19 +20,19 @@ package org.apache.tika.server;
 import org.apache.commons.lang.mutable.MutableInt;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.poi.poifs.filesystem.Ole10Native;
-import org.apache.poi.poifs.filesystem.Ole10NativeException;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.poifs.filesystem.*;
 import org.apache.poi.util.IOUtils;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaMetadataKeys;
 import org.apache.tika.mime.MimeTypeException;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.microsoft.OfficeParser;
+import org.apache.tika.sax.BodyContentHandler;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.DefaultHandler;
@@ -41,17 +41,19 @@ import javax.ws.rs.PUT;
 import javax.ws.rs.Path;
 import javax.ws.rs.Produces;
 import javax.ws.rs.WebApplicationException;
-import javax.ws.rs.core.*;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Collections;
-import java.util.zip.ZipOutputStream;
+import javax.ws.rs.core.Context;
+import javax.ws.rs.core.HttpHeaders;
+import javax.ws.rs.core.Response;
+import javax.ws.rs.core.UriInfo;
+import java.io.*;
+import java.util.HashMap;
+import java.util.Map;
 
-@Path("/unpacker{id:(/.*)?}")
+@Path("/")
 public class UnpackerResource {
   private static final Log logger = LogFactory.getLog(UnpackerResource.class);
+  public static final String TEXT_FILENAME = "__TEXT__";
+  private static final String META_FILENAME = "__METADATA__";
 
   private final TikaConfig tikaConfig;
 
@@ -59,13 +61,34 @@ public class UnpackerResource {
     tikaConfig = TikaConfig.getDefaultConfig();
   }
 
+  @Path("unpacker{id:(/.*)?}")
   @PUT
-  @Produces("application/zip")
-  public StreamingOutput getText(
+  @Produces({"application/zip", "application/x-tar"})
+  public Map<String, byte[]> unpack(
           InputStream is,
           @Context HttpHeaders httpHeaders,
           @Context UriInfo info
   ) throws Exception {
+    return process(is, httpHeaders, info, false);
+  }
+
+  @Path("all{id:(/.*)?}")
+  @PUT
+  @Produces({"application/zip", "application/x-tar"})
+  public Map<String, byte[]> unpackAll(
+          InputStream is,
+          @Context HttpHeaders httpHeaders,
+          @Context UriInfo info
+  ) throws Exception {
+    return process(is, httpHeaders, info, true);
+  }
+
+  private Map<String, byte[]> process(
+          InputStream is,
+          @Context HttpHeaders httpHeaders,
+          @Context UriInfo info,
+          boolean saveAll
+  ) throws Exception {
     Metadata metadata = new Metadata();
 
     AutoDetectParser parser = TikaResource.createParser();
@@ -73,14 +96,21 @@ public class UnpackerResource {
     TikaResource.fillMetadata(parser, metadata, httpHeaders);
     TikaResource.logRequest(logger, info, metadata);
 
-    ContentHandler ch = new DefaultHandler();
+    ContentHandler ch;
+    ByteArrayOutputStream text = new ByteArrayOutputStream();
+
+    if (saveAll) {
+      ch = new BodyContentHandler(new RichTextContentHandler(new OutputStreamWriter(text,
"UTF-8")));
+    } else {
+      ch = new DefaultHandler();
+    }
 
     ParseContext pc = new ParseContext();
 
-    ZipOutput zout = new ZipOutput();
+    Map<String, byte[]> files = new HashMap<String, byte[]>();
     MutableInt count = new MutableInt();
 
-    pc.set(EmbeddedDocumentExtractor.class, new MyEmbeddedDocumentExtractor(count, zout));
+    pc.set(EmbeddedDocumentExtractor.class, new MyEmbeddedDocumentExtractor(count, files));
 
     try {
       parser.parse(is, ch, metadata, pc);
@@ -89,20 +119,31 @@ public class UnpackerResource {
               "%s: Unpacker failed",
               info.getPath()
       ), ex);
+
+      throw ex;
     }
 
-    if (count.intValue() == 0) {
+    if (count.intValue() == 0 && !saveAll) {
       throw new WebApplicationException(Response.Status.NO_CONTENT);
     }
 
-    return zout;
+    if (saveAll) {
+      files.put(TEXT_FILENAME, text.toByteArray());
+
+      ByteArrayOutputStream metaStream = new ByteArrayOutputStream();
+      MetadataResource.metadataToCsv(metadata, metaStream);
+
+      files.put(META_FILENAME, metaStream.toByteArray());
+    }
+
+    return files;
   }
 
   private class MyEmbeddedDocumentExtractor implements EmbeddedDocumentExtractor {
     private final MutableInt count;
-    private final ZipOutput zout;
+    private final Map<String, byte[]> zout;
 
-    MyEmbeddedDocumentExtractor(MutableInt count, ZipOutput zout) {
+    MyEmbeddedDocumentExtractor(MutableInt count, Map<String, byte[]> zout) {
       this.count = count;
       this.zout = zout;
     }
@@ -123,7 +164,7 @@ public class UnpackerResource {
         name = Integer.toString(count.intValue());
       }
 
-      if (!name.contains(".")) {
+      if (!name.contains(".") && contentType!=null) {
         try {
           String ext = tikaConfig.getMimeRepository().forName(contentType).getExtension();
 
@@ -159,17 +200,48 @@ public class UnpackerResource {
         } else {
           name += '.' + type.getExtension();
         }
-      }      
+      }
 
       final String finalName = name;
 
-      zout.put(new PartExtractor<byte[]>() {
-        public void extract(byte[] part, ZipOutputStream output) throws IOException {
-          ZipUtils.zipStoreBuffer(output, finalName, part);
+      if (data.length > 0) {
+        zout.put(finalName, data);
+
+        count.increment();
+      } else {
+        if (inputStream instanceof TikaInputStream) {
+          TikaInputStream tin = (TikaInputStream)  inputStream;
+
+          if (tin.getOpenContainer()!=null && tin.getOpenContainer() instanceof DirectoryEntry)
{
+            POIFSFileSystem fs = new POIFSFileSystem();
+            copy((DirectoryEntry) tin.getOpenContainer(), fs.getRoot());
+            ByteArrayOutputStream bos2 = new ByteArrayOutputStream();
+            fs.writeFilesystem(bos2);
+            bos2.close();
+
+            zout.put(finalName, bos2.toByteArray());
+          }
         }
-      }, Collections.singletonList(data));
+      }
+    }
 
-      count.increment();
+    protected void copy(DirectoryEntry sourceDir, DirectoryEntry destDir)
+            throws IOException {
+      for (Entry entry : sourceDir) {
+        if (entry instanceof DirectoryEntry) {
+          // Need to recurse
+          DirectoryEntry newDir = destDir.createDirectory(entry.getName());
+          copy((DirectoryEntry) entry, newDir);
+        } else {
+          // Copy entry
+          InputStream contents = new DocumentInputStream((DocumentEntry) entry);
+          try {
+            destDir.createDocument(entry.getName(), contents);
+          } finally {
+            contents.close();
+          }
+        }
+      }
     }
   }
-}
+}
\ No newline at end of file

Added: tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipWriter.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipWriter.java?rev=1304247&view=auto
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipWriter.java (added)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipWriter.java Fri Mar 23
09:45:34 2012
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.server;
+
+import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
+
+import javax.ws.rs.Produces;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.MultivaluedMap;
+import javax.ws.rs.ext.MessageBodyWriter;
+import javax.ws.rs.ext.Provider;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.lang.annotation.Annotation;
+import java.lang.reflect.Type;
+import java.util.Map;
+import java.util.UUID;
+import java.util.zip.CRC32;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipException;
+import java.util.zip.ZipOutputStream;
+
+@Provider
+@Produces("application/zip")
+public class ZipWriter implements MessageBodyWriter<Map<String, byte[]>> {
+  private static void zipStoreBuffer(ZipArchiveOutputStream zip, String name, byte[] dataBuffer)
throws IOException {
+    ZipEntry zipEntry = new ZipEntry(name!=null?name: UUID.randomUUID().toString());
+    zipEntry.setMethod(ZipOutputStream.STORED);
+
+    zipEntry.setSize(dataBuffer.length);
+    CRC32 crc32 = new CRC32();
+    crc32.update(dataBuffer);
+    zipEntry.setCrc(crc32.getValue());
+
+    try {
+      zip.putArchiveEntry(new ZipArchiveEntry(zipEntry));
+    } catch (ZipException ex) {
+      if (name!=null) {
+        zipStoreBuffer(zip, "x-"+name, dataBuffer);
+        return;
+      }
+    }
+
+    zip.write(dataBuffer);
+
+    zip.closeArchiveEntry();
+  }
+
+  public boolean isWriteable(Class<?> type, Type genericType, Annotation[] annotations,
MediaType mediaType) {
+    return Map.class.isAssignableFrom(type);
+  }
+
+  public long getSize(Map<String, byte[]> stringMap, Class<?> type, Type genericType,
Annotation[] annotations, MediaType mediaType) {
+    return -1;
+  }
+
+  public void writeTo(Map<String, byte[]> parts, Class<?> type, Type genericType,
Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, Object> httpHeaders,
OutputStream entityStream) throws IOException, WebApplicationException {
+    ZipArchiveOutputStream zip = new ZipArchiveOutputStream(entityStream);
+
+    zip.setMethod(ZipArchiveOutputStream.STORED);
+
+    for (Map.Entry<String, byte[]> entry : parts.entrySet()) {
+      zipStoreBuffer(zip, entry.getKey(), entry.getValue());
+    }
+
+    zip.close();
+  }
+}

Modified: tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java?rev=1304247&r1=1304246&r2=1304247&view=diff
==============================================================================
--- tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java
(original)
+++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java
Fri Mar 23 09:45:34 2012
@@ -17,21 +17,27 @@
 
 package org.apache.tika.server;
 
+import com.sun.jersey.api.client.ClientResponse;
 import com.sun.jersey.test.framework.JerseyTest;
 import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.compress.archivers.ArchiveEntry;
+import org.apache.commons.compress.archivers.ArchiveInputStream;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
 import org.apache.tika.io.IOUtils;
 import org.junit.Test;
 
-import java.io.*;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
 import java.util.HashMap;
 import java.util.Map;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipInputStream;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.*;
 
 public class UnpackerResourceTest extends JerseyTest {
   private static final String UNPACKER_PATH = "/unpacker";
+  private static final String ALL_PATH = "/all";
 
   private static final String TEST_DOC_WAV = "Doc1_ole.doc";
   private static final String WAV1_MD5 = "bdd0a78a54968e362445364f95d8dc96";
@@ -74,12 +80,32 @@ public class UnpackerResourceTest extend
                     .type(APPLICATION_MSWORD)
                     .put(InputStream.class, ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
 
-    ZipInputStream zip = new ZipInputStream(is);
+    ArchiveInputStream zip = new ZipArchiveInputStream(is);
 
-    Map<String, String> data = readZip(zip);
+    Map<String, String> data = readArchive(zip);
 
     assertEquals(WAV1_MD5, data.get(WAV1_NAME));
     assertEquals(WAV2_MD5, data.get(WAV2_NAME));
+
+    assertFalse(data.containsKey(UnpackerResource.TEXT_FILENAME));
+  }
+
+  @Test
+  public void testDocWAVText() throws Exception {
+    InputStream is =
+            resource()
+                    .path(ALL_PATH)
+                    .type(APPLICATION_MSWORD)
+                    .put(InputStream.class, ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
+
+    ArchiveInputStream zip = new ZipArchiveInputStream(is);
+
+    Map<String, String> data = readArchive(zip);
+
+    assertEquals(WAV1_MD5, data.get(WAV1_NAME));
+    assertEquals(WAV2_MD5, data.get(WAV2_NAME));
+
+    assertTrue(data.containsKey(UnpackerResource.TEXT_FILENAME));
   }
 
   @Test
@@ -90,9 +116,9 @@ public class UnpackerResourceTest extend
                     .type(APPLICATION_MSWORD)
                     .put(InputStream.class, ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
 
-    ZipInputStream zip = new ZipInputStream(is);
+    ZipArchiveInputStream zip = new ZipArchiveInputStream(is);
 
-    Map<String, String> data = readZip(zip);
+    Map<String, String> data = readArchive(zip);
 
     assertEquals(JPG_MD5, data.get(JPG_NAME));
   }
@@ -105,9 +131,9 @@ public class UnpackerResourceTest extend
                     .type(APPLICATION_MSWORD)
                     .put(InputStream.class, ClassLoader.getSystemResourceAsStream("2pic.doc"));
 
-    ZipInputStream zip = new ZipInputStream(is);
+    ZipArchiveInputStream zip = new ZipArchiveInputStream(is);
 
-    Map<String, String> data = readZip(zip);
+    Map<String, String> data = readArchive(zip);
 
     assertEquals(JPG2_MD5, data.get(JPG2_NAME));
   }
@@ -119,15 +145,26 @@ public class UnpackerResourceTest extend
                     .path(UNPACKER_PATH)
                     .put(InputStream.class, ClassLoader.getSystemResourceAsStream(TEST_DOCX_IMAGE));
 
-    ZipInputStream zip = new ZipInputStream(is);
+    ZipArchiveInputStream zip = new ZipArchiveInputStream(is);
 
-    Map<String, String> data = readZip(zip);
+    Map<String, String> data = readArchive(zip);
 
     assertEquals(DOCX_IMAGE1_MD5, data.get(DOCX_IMAGE1_NAME));
     assertEquals(DOCX_IMAGE2_MD5, data.get(DOCX_IMAGE2_NAME));
   }
 
   @Test
+  public void test415() throws Exception {
+    ClientResponse cr =
+            resource()
+                    .path(UNPACKER_PATH)
+                    .type("xxx/xxx")
+                    .put(ClientResponse.class, ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
+
+    assertEquals(415, cr.getStatus());
+  }
+
+  @Test
   public void testExeDOCX() throws Exception {
     String TEST_DOCX_EXE = "2exe.docx";
     InputStream is =
@@ -135,29 +172,14 @@ public class UnpackerResourceTest extend
                     .path(UNPACKER_PATH)
                     .put(InputStream.class, ClassLoader.getSystemResourceAsStream(TEST_DOCX_EXE));
 
-    ZipInputStream zip = new ZipInputStream(is);
+    ZipArchiveInputStream zip = new ZipArchiveInputStream(is);
 
-    Map<String, String> data = readZip(zip);
+    Map<String, String> data = readArchive(zip);
 
     assertEquals(DOCX_EXE1_MD5, data.get(DOCX_EXE1_NAME));
     assertEquals(DOCX_EXE2_MD5, data.get(DOCX_EXE2_NAME));
   }
-/*
-  @Test
-  public void testImageXSLX() throws Exception {
-    InputStream is =
-            webResource
-                    .path(UNPACKER_PATH)
-                    .put(InputStream.class, ClassLoader.getSystemResourceAsStream("pic.xlsx"));
-
-    ZipInputStream zip = new ZipInputStream(is);
 
-    Map<String, String> data = readZip(zip);
-
-    assertEquals(XSL_IMAGE1_MD5, data.get(XSLX_IMAGE1_NAME));
-    assertEquals(XSL_IMAGE2_MD5, data.get(XSLX_IMAGE2_NAME));
-  }
-*/
   @Test
   public void testImageXSL() throws Exception {
     InputStream is =
@@ -165,19 +187,19 @@ public class UnpackerResourceTest extend
                     .path(UNPACKER_PATH)
                     .put(InputStream.class, ClassLoader.getSystemResourceAsStream("pic.xls"));
 
-    ZipInputStream zip = new ZipInputStream(is);
+    ZipArchiveInputStream zip = new ZipArchiveInputStream(is);
 
-    Map<String, String> data = readZip(zip);
+    Map<String, String> data = readArchive(zip);
 
     assertEquals(XSL_IMAGE1_MD5, data.get("0.jpg"));
     assertEquals(XSL_IMAGE2_MD5, data.get("1.jpg"));
   }
 
-  private static Map<String, String> readZip(ZipInputStream zip) throws IOException
{
+  private static Map<String, String> readArchive(ArchiveInputStream zip) throws IOException
{
     Map<String, String> data = new HashMap<String, String>();
 
     while (true) {
-      ZipEntry entry = zip.getNextEntry();
+      ArchiveEntry entry = zip.getNextEntry();
 
       if (entry==null) {
         break;
@@ -192,4 +214,55 @@ public class UnpackerResourceTest extend
 
     return data;
   }
+
+  private static String readArchiveText(ArchiveInputStream zip) throws IOException {
+    while (true) {
+      ArchiveEntry entry = zip.getNextEntry();
+
+      if (entry==null) {
+        break;
+      }
+
+      if (!entry.getName().equals(UnpackerResource.TEXT_FILENAME)) {
+        continue;
+      }
+
+      ByteArrayOutputStream bos = new ByteArrayOutputStream();
+
+      IOUtils.copy(zip, bos);
+
+      return bos.toString("UTF-8");
+    }
+
+    return null;
+  }
+
+  @Test
+  public void testTarDocPicture() throws Exception {
+    InputStream is =
+            resource()
+                    .path(UNPACKER_PATH)
+                    .type(APPLICATION_MSWORD)
+                    .accept("application/x-tar")
+                    .put(InputStream.class, ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
+
+    ArchiveInputStream zip = new TarArchiveInputStream(is);
+
+    Map<String, String> data = readArchive(zip);
+
+    assertEquals(JPG_MD5, data.get(JPG_NAME));
+  }
+
+  @Test
+  public void testText() throws IOException {
+    InputStream is
+            = resource()
+                    .path(ALL_PATH)
+                    .header(CONTENT_TYPE, APPLICATION_XML)
+                    .put(InputStream.class, ClassLoader.getSystemResourceAsStream("test.doc"));
+    String responseMsg = readArchiveText(new ZipArchiveInputStream(is));
+
+    assertNotNull(responseMsg);
+    assertTrue(responseMsg.contains("test"));
+  }
 }



Mime
View raw message