tika-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From amy...@apache.org
Subject svn commit: r1220696 [1/2] - in /tika/trunk: tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java tika-parsers/src/test/resources/test-documents/testWEBARCHIVE.webarchive
Date Mon, 19 Dec 2011 11:27:07 GMT
Author: amylka
Date: Mon Dec 19 11:27:06 2011
New Revision: 1220696

URL: http://svn.apache.org/viewvc?rev=1220696&view=rev
Log:
TIKA-813 Support for detection of Apple "bplist" files (Binary Property List) and webarchive
files - a special case of bplists.

Added:
    tika/trunk/tika-parsers/src/test/resources/test-documents/testWEBARCHIVE.webarchive
Modified:
    tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java

Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1220696&r1=1220695&r2=1220696&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original)
+++ tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Mon Dec
19 11:27:06 2011
@@ -2256,6 +2256,17 @@
     </magic>
     <glob pattern="*.torrent"/>
   </mime-type>
+  
+  <mime-type type="application/x-bplist">
+    <!-- The priority is 60, as .webarchive files often contain 
+         (X)HTML content. The bplist magic must trump the XHTML 
+         magics further within the file. This must also be 
+         independent of the internal ordering of patterns within 
+         MimeTypes -->
+    <magic priority="60">
+      <match value="bplist" type="string" offset="0"/>
+    </magic>
+  </mime-type>
 
   <mime-type type="application/x-bzip">
     <magic priority="40">
@@ -2814,6 +2825,10 @@
   <mime-type type="application/x-wais-source">
     <glob pattern="*.src"/>
   </mime-type>
+  <mime-type type="application/x-webarchive">
+    <sub-class-of type="application/x-bplist"/>
+    <glob pattern="*.webarchive"/>
+  </mime-type>
   <mime-type type="application/x-x509-ca-cert">
     <glob pattern="*.der"/>
     <glob pattern="*.crt"/>

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=1220696&r1=1220695&r2=1220696&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java Mon Dec
19 11:27:06 2011
@@ -529,6 +529,12 @@ public class TestMimeTypes extends TestC
         assertType("application/x-msaccess", "testACCESS.mdb");
         assertType("application/x-font-ttf", "testTrueType.ttf");
     }
+    
+    public void testWebArchiveDetection() throws Exception {
+        assertTypeByName("application/x-webarchive","x.webarchive");
+        assertTypeByData("application/x-bplist","testWEBARCHIVE.webarchive");
+        assertTypeByNameAndData("application/x-webarchive", "testWEBARCHIVE.webarchive");
+    }
 
     private void assertType(String expected, String filename) throws Exception {
         InputStream stream = TestMimeTypes.class.getResourceAsStream(



Mime
View raw message