tika-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From n...@apache.org
Subject svn commit: r1221581 - in /tika/trunk/tika-core/src: main/java/org/apache/tika/mime/MediaType.java test/java/org/apache/tika/mime/MediaTypeTest.java
Date Wed, 21 Dec 2011 03:03:18 GMT
Author: nick
Date: Wed Dec 21 03:03:17 2011
New Revision: 1221581

URL: http://svn.apache.org/viewvc?rev=1221581&view=rev
Log:
TIKA-822 - Handle quoted parameters on media types

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java
    tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MediaTypeTest.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java?rev=1221581&r1=1221580&r2=1221581&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaType.java Wed Dec 21 03:03:17
2011
@@ -17,10 +17,8 @@
 package org.apache.tika.mime;
 
 import java.io.Serializable;
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.SortedMap;
@@ -163,6 +161,9 @@ public final class MediaType implements 
             return Collections.<String, String>emptyMap();
         }
 
+        // Extracts k1=v1, k2=v2 from mime/type; k1=v1; k2=v2
+        // Note - this logic isn't fully RFC2045 compliant yet, as it
+        //  doesn't fully handle quoted keys or values (eg containing ; or =)
         Map<String, String> parameters = new HashMap<String, String>();
         while (string.length() > 0) {
             String key = string;
@@ -184,11 +185,22 @@ public final class MediaType implements 
 
             key = key.trim();
             if (key.length() > 0) {
-                parameters.put(key, value.trim());
+                parameters.put(key, unquote(value.trim()));
             }
         }
         return parameters;
     }
+    
+    private static String unquote(String s) {
+        if( s.startsWith("\"") && s.endsWith("\"")) {
+            return s.substring(1, s.length() - 1);
+        }
+        if( s.startsWith("'") && s.endsWith("'")) {
+           return s.substring(1, s.length() - 1);
+       }
+
+        return s;
+    }
 
     /**
      * Canonical string representation of this media type.

Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MediaTypeTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MediaTypeTest.java?rev=1221581&r1=1221580&r2=1221581&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MediaTypeTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MediaTypeTest.java Wed Dec 21
03:03:17 2011
@@ -21,6 +21,8 @@ import java.util.Map;
 
 import junit.framework.TestCase;
 
+import static java.util.Collections.singletonMap;
+
 public class MediaTypeTest extends TestCase {
 
     public void testBasics() {
@@ -132,6 +134,28 @@ public class MediaTypeTest extends TestC
     }
 
     /**
+     * Per http://tools.ietf.org/html/rfc2045#section-5.1, charset can be in quotes
+     */
+    public void testParseWithParamsAndQuotedCharset() {
+        // Typical case, with a quoted charset
+        String mimeStringWithParams = "text/html;charset=\"UTF-8\"";
+
+        MediaType type = MediaType.parse(mimeStringWithParams);
+        assertNotNull(type);
+        assertEquals(singletonMap("charset", "UTF-8"), type.getParameters());
+        
+        // Complex case, with various different quoted and un-quoted forms
+        mimeStringWithParams = "text/html;charset=\'UTF-8\';test=\"true\";unquoted=here";
+
+        type = MediaType.parse(mimeStringWithParams);
+        assertNotNull(type);
+        assertEquals(3, type.getParameters().size());
+        assertEquals("UTF-8", type.getParameters().get("charset"));
+        assertEquals("true", type.getParameters().get("test"));
+        assertEquals("here", type.getParameters().get("unquoted"));
+    }
+
+    /**
      * @since TIKA-121
      */
     public void testParseNoParams() {



Mime
View raw message