commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ggreg...@apache.org
Subject svn commit: r1814505 - in /commons/proper/codec/trunk/src: changes/changes.xml main/java/org/apache/commons/codec/net/PercentCodec.java test/java/org/apache/commons/codec/net/PercentCodecTest.java
Date Tue, 07 Nov 2017 16:09:28 GMT
Author: ggregory
Date: Tue Nov  7 16:09:28 2017
New Revision: 1814505

URL: http://svn.apache.org/viewvc?rev=1814505&view=rev
Log:
[CODEC-240] Add Percent-Encoding Codec (described in RFC3986 and RFC7578).

Added:
    commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/PercentCodec.java
    commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/PercentCodecTest.java
Modified:
    commons/proper/codec/trunk/src/changes/changes.xml

Modified: commons/proper/codec/trunk/src/changes/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/changes/changes.xml?rev=1814505&r1=1814504&r2=1814505&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/changes/changes.xml (original)
+++ commons/proper/codec/trunk/src/changes/changes.xml Tue Nov  7 16:09:28 2017
@@ -44,6 +44,7 @@ The <action> type attribute can be add,u
   <body>
     <release version="1.12" date="2017-MM-DD" description="Feature and fix release.">
       <action issue="CODEC-244" dev="ggregory" type="update">Update from Java 6 to
Java 7</action>
+      <action issue="CODEC-240" dev="ggregory" type="add" due-to="Ioannis Sermetziadis">Add
Percent-Encoding Codec (described in RFC3986 and RFC7578)</action>
     </release>
     <release version="1.11" date="2017-10-20" description="Feature and fix release.">
       <!-- The first attribute below should be the issue id; makes it easier to navigate
in the IDE outline -->

Added: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/PercentCodec.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/PercentCodec.java?rev=1814505&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/PercentCodec.java
(added)
+++ commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/PercentCodec.java
Tue Nov  7 16:09:28 2017
@@ -0,0 +1,260 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.codec.net;
+
+import java.nio.ByteBuffer;
+import java.util.BitSet;
+import org.apache.commons.codec.BinaryDecoder;
+import org.apache.commons.codec.BinaryEncoder;
+import org.apache.commons.codec.DecoderException;
+import org.apache.commons.codec.EncoderException;
+
+/**
+ * Implements the Percent-Encoding scheme, as described in HTTP 1.1 specification. For extensibility,
an array of
+ * special US-ASCII characters can be specified in order to perform proper URI encoding for
the different parts
+ * of the URI.
+ * <p>
+ * This class is immutable. It is also thread-safe besides using BitSet which is not thread-safe,
but its public
+ * interface only call the access
+ * </p>
+ *
+ * @see <a href="https://tools.ietf.org/html/rfc3986#section-2.1">Percent-Encoding</a>
+ * @since 1.11
+ */
+public class PercentCodec implements BinaryEncoder, BinaryDecoder {
+
+    /**
+     * The escape character used by the Percent-Encoding in order to introduce an encoded
character.
+     */
+    private final byte ESCAPE_CHAR = '%';
+    /**
+     * The bit set used to store the character that should be always encoded
+     */
+    private final BitSet alwaysEncodeChars = new BitSet();
+    /**
+     * The flag defining if the space character should be encoded as '+'
+     */
+    private final boolean plusForSpace;
+    /**
+     * The minimum and maximum code of the bytes that is inserted in the bit set, used to
prevent look-ups
+     */
+    private int alwaysEncodeCharsMin = Integer.MAX_VALUE, alwaysEncodeCharsMax = Integer.MIN_VALUE;
+
+    /**
+     * Constructs a Percent coded that will encode all the non US-ASCII characters using
the Percent-Encoding
+     * while it will not encode all the US-ASCII characters, except for character '%' that
is used as escape
+     * character for Percent-Encoding.
+     */
+    public PercentCodec() {
+        this.plusForSpace = false;
+        insertAlwaysEncodeChar(ESCAPE_CHAR);
+    }
+
+    /**
+     * Constructs a Percent codec by specifying the characters that belong to US-ASCII that
should
+     * always be encoded. The rest US-ASCII characters will not be encoded, except for character
'%' that
+     * is used as escape character for Percent-Encoding.
+     *
+     * @param alwaysEncodeChars the unsafe characters that should always be encoded
+     * @param plusForSpace      the flag defining if the space character should be encoded
as '+'
+     */
+    public PercentCodec(final byte[] alwaysEncodeChars, final boolean plusForSpace) {
+        this.plusForSpace = plusForSpace;
+        insertAlwaysEncodeChars(alwaysEncodeChars);
+    }
+
+    /**
+     * Adds the byte array into a BitSet for faster lookup
+     *
+     * @param alwaysEncodeChars
+     */
+    private void insertAlwaysEncodeChars(final byte[] alwaysEncodeChars) {
+        if (alwaysEncodeChars != null) {
+            for (byte b : alwaysEncodeChars) {
+                insertAlwaysEncodeChar(b);
+            }
+        }
+        insertAlwaysEncodeChar(ESCAPE_CHAR);
+    }
+
+    /**
+     * Inserts a single character into a BitSet and maintains the min and max of the characters
of the
+     * {@code BitSet alwaysEncodeChars} in order to avoid look-ups when a byte is out of
this range.
+     *
+     * @param b the byte that is candidate for min and max limit
+     */
+    private void insertAlwaysEncodeChar(final byte b) {
+        this.alwaysEncodeChars.set(b);
+        if (b < alwaysEncodeCharsMin) {
+            alwaysEncodeCharsMin = b;
+        }
+        if (b > alwaysEncodeCharsMax) {
+            alwaysEncodeCharsMax = b;
+        }
+    }
+
+    /**
+     * Percent-Encoding based on RFC 3986. The non US-ASCII characters are encoded, as well
as the
+     * US-ASCII characters that are configured to be always encoded.
+     */
+    @Override
+    public byte[] encode(final byte[] bytes) throws EncoderException {
+        if (bytes == null) {
+            return null;
+        }
+
+        int expectedEncodingBytes = expectedEncodingBytes(bytes);
+        boolean willEncode = expectedEncodingBytes != bytes.length;
+        if (willEncode || (plusForSpace && containsSpace(bytes))) {
+            return doEncode(bytes, expectedEncodingBytes, willEncode);
+        } else {
+            return bytes;
+        }
+    }
+
+    private byte[] doEncode(final byte[] bytes, int expectedLength, boolean willEncode) {
+        final ByteBuffer buffer = ByteBuffer.allocate(expectedLength);
+        for (final byte b : bytes) {
+            if (willEncode && canEncode(b)) {
+                byte bb = b;
+                if (bb < 0) {
+                    bb = (byte) (256 + bb);
+                }
+                final char hex1 = Utils.hexDigit(bb >> 4);
+                final char hex2 = Utils.hexDigit(bb);
+                buffer.put(ESCAPE_CHAR);
+                buffer.put((byte) hex1);
+                buffer.put((byte) hex2);
+            } else {
+                if (plusForSpace && b == ' ') {
+                    buffer.put((byte) '+');
+                } else {
+                    buffer.put(b);
+                }
+            }
+        }
+        return buffer.array();
+    }
+
+    private int expectedEncodingBytes(final byte[] bytes) {
+        int byteCount = 0;
+        for (final byte b : bytes) {
+            byteCount += canEncode(b) ? 3: 1;
+        }
+        return byteCount;
+    }
+
+    private boolean containsSpace(final byte[] bytes) {
+        for (final byte b : bytes) {
+            if (b == ' ') {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    private boolean canEncode(final byte c) {
+        return !isAsciiChar(c) || (inAlwaysEncodeCharsRange(c) && alwaysEncodeChars.get(c));
+    }
+
+    private boolean inAlwaysEncodeCharsRange(final byte c) {
+        return c >= alwaysEncodeCharsMin && c <= alwaysEncodeCharsMax;
+    }
+
+    private boolean isAsciiChar(final byte c) {
+        return c >= 0;
+    }
+
+    /**
+     * Decode bytes encoded with Percent-Encoding based on RFC 3986. The reverse process
is performed in order to
+     * decode the encoded characters to Unicode.
+     */
+    @Override
+    public byte[] decode(final byte[] bytes) throws DecoderException {
+        if (bytes == null) {
+            return null;
+        }
+
+        final ByteBuffer buffer = ByteBuffer.allocate(expectedDecodingBytes(bytes));
+        for (int i = 0; i < bytes.length; i++) {
+            final byte b = bytes[i];
+            if (b == ESCAPE_CHAR) {
+                try {
+                    final int u = Utils.digit16(bytes[++i]);
+                    final int l = Utils.digit16(bytes[++i]);
+                    buffer.put((byte) ((u << 4) + l));
+                } catch (final ArrayIndexOutOfBoundsException e) {
+                    throw new DecoderException("Invalid percent decoding: ", e);
+                }
+            } else {
+                if (plusForSpace && b == '+') {
+                    buffer.put((byte) ' ');
+                } else {
+                    buffer.put(b);
+                }
+            }
+        }
+        return buffer.array();
+    }
+
+    private int expectedDecodingBytes(final byte[] bytes) {
+        int byteCount = 0;
+        for (int i = 0; i < bytes.length; ) {
+            byte b = bytes[i];
+            i += b == ESCAPE_CHAR ? 3: 1;
+            byteCount++;
+        }
+        return byteCount;
+    }
+
+    /**
+     * Encodes an object into using the Percent-Encoding. Only byte[] objects are accepted.
+     *
+     * @param obj the object to encode
+     * @return the encoding result byte[] as Object
+     * @throws EncoderException
+     */
+    @Override
+    public Object encode(final Object obj) throws EncoderException {
+        if (obj == null) {
+            return null;
+        } else if (obj instanceof byte[]) {
+            return encode((byte[]) obj);
+        } else {
+            throw new EncoderException("Objects of type " + obj.getClass().getName() + "
cannot be Percent encoded");
+        }
+    }
+
+    /**
+     * Decodes a byte[] Object, whose bytes are encoded with Percent-Encoding.
+     *
+     * @param obj the object to decode
+     * @return the decoding result byte[] as Object
+     * @throws DecoderException
+     */
+    @Override
+    public Object decode(final Object obj) throws DecoderException {
+        if (obj == null) {
+            return null;
+        } else if (obj instanceof byte[]) {
+            return decode((byte[]) obj);
+        } else {
+            throw new DecoderException("Objects of type " + obj.getClass().getName() + "
cannot be Percent decoded");
+        }
+    }
+}

Added: commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/PercentCodecTest.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/PercentCodecTest.java?rev=1814505&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/PercentCodecTest.java
(added)
+++ commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/PercentCodecTest.java
Tue Nov  7 16:09:28 2017
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.codec.net;
+
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import org.apache.commons.codec.DecoderException;
+import org.apache.commons.codec.EncoderException;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import org.junit.Test;
+
+/**
+ * Percent cοdec test cases
+ */
+public class PercentCodecTest {
+
+    @Test
+    public void testBasicEncodeDecode() throws Exception {
+        PercentCodec percentCodec = new PercentCodec();
+        final String input = "abcdABCD";
+        byte[] encoded = percentCodec.encode(input.getBytes(Charset.forName("UTF-8")));
+        final String encodedS = new String(encoded, "UTF-8");
+        byte[] decoded = percentCodec.decode(encoded);
+        final String decodedS = new String(decoded, "UTF-8");
+        assertEquals("Basic PercentCodec encoding test", input, encodedS);
+        assertEquals("Basic PercentCodec decoding test", input, decodedS);
+    }
+
+    @Test
+    public void testSafeCharEncodeDecodeObject() throws Exception {
+        PercentCodec percentCodec = new PercentCodec(null, true);
+        final String input = "abc123_-.*";
+        Object encoded = percentCodec.encode((Object) input.getBytes(Charset.forName("UTF-8")));
+        final String encodedS = new String((byte[]) encoded, "UTF-8");
+        Object decoded = percentCodec.decode(encoded);
+        final String decodedS = new String((byte[]) decoded, "UTF-8");
+        assertEquals("Basic PercentCodec safe char encoding test", input, encodedS);
+        assertEquals("Basic PercentCodec safe char decoding test", input, decodedS);
+    }
+
+    @Test
+    public void testUnsafeCharEncodeDecode() throws Exception {
+        PercentCodec percentCodec = new PercentCodec();
+        final String input = "\u03B1\u03B2\u03B3\u03B4\u03B5\u03B6% ";
+        byte[] encoded = percentCodec.encode(input.getBytes(Charset.forName("UTF-8")));
+        final String encodedS = new String(encoded, "UTF-8");
+        byte[] decoded = percentCodec.decode(encoded);
+        final String decodedS = new String(decoded, "UTF-8");
+        assertEquals("Basic PercentCodec unsafe char encoding test", "%CE%B1%CE%B2%CE%B3%CE%B4%CE%B5%CE%B6%25
", encodedS);
+        assertEquals("Basic PercentCodec unsafe char decoding test", input, decodedS);
+    }
+
+    @Test
+    public void testConfigurablePercentEncoder() throws Exception {
+        final String input = "abc123_-.*\u03B1\u03B2";
+        PercentCodec percentCodec = new PercentCodec("abcdef".getBytes("UTF-8"), false);
+        byte[] encoded = percentCodec.encode(input.getBytes(Charset.forName("UTF-8")));
+        final String encodedS = new String(encoded, "UTF-8");
+        assertEquals("Configurable PercentCodec encoding test", "%61%62%63123_-.*%CE%B1%CE%B2",
encodedS);
+        final byte[] decoded = percentCodec.decode(encoded);
+        assertEquals("Configurable PercentCodec decoding test", new String(decoded, "UTF-8"),
input);
+    }
+
+    @Test
+    public void testPercentEncoderDecoderWithNullOrEmptyInput() throws Exception {
+        PercentCodec percentCodec = new PercentCodec(null, true);
+        assertEquals("Null input value encoding test", percentCodec.encode(null), null);
+        assertEquals("Null input value decoding test", percentCodec.decode(null), null);
+        byte[] emptyInput = "".getBytes("UTF-8");
+        assertEquals("Empty input value encoding test", percentCodec.encode(emptyInput),
emptyInput);
+        assertTrue("Empty input value decoding test", Arrays.equals(percentCodec.decode(emptyInput),
emptyInput));
+    }
+
+    @Test
+    public void testPercentEncoderDecoderWithPlusForSpace() throws Exception {
+        final String input = "a b c d";
+        PercentCodec percentCodec = new PercentCodec(null, true);
+        byte[] encoded = percentCodec.encode(input.getBytes(Charset.forName("UTF-8")));
+        final String encodedS = new String(encoded, "UTF-8");
+        assertEquals("PercentCodec plus for space encoding test", "a+b+c+d", encodedS);
+        byte[] decode = percentCodec.decode(encoded);
+        assertEquals("PercentCodec plus for space decoding test", new String(decode, "UTF-8"),
input);
+    }
+
+    @Test(expected = EncoderException.class)
+    public void testEncodeUnsupportedObject() throws Exception {
+        PercentCodec percentCodec = new PercentCodec();
+        percentCodec.encode("test");
+    }
+
+    @Test
+    public void testEncodeNullObject() throws Exception {
+        PercentCodec percentCodec = new PercentCodec();
+        assertEquals(percentCodec.encode((Object) null), null);
+    }
+
+    @Test(expected = DecoderException.class)
+    public void testDecodeUnsupportedObject() throws Exception {
+        PercentCodec percentCodec = new PercentCodec();
+        percentCodec.decode("test");
+    }
+
+    @Test
+    public void testDecodeNullObject() throws Exception {
+        PercentCodec percentCodec = new PercentCodec();
+        assertEquals(percentCodec.decode((Object) null), null);
+    }
+
+    @Test
+    public void testDecodeInvalidEncodedResultDecoding() throws Exception {
+        String inputS = "\u03B1\u03B2";
+        PercentCodec percentCodec = new PercentCodec();
+        byte[] encoded = percentCodec.encode(inputS.getBytes("UTF-8"));
+        try {
+            percentCodec.decode(Arrays.copyOf(encoded, encoded.length-1)); //exclude one
byte
+        } catch (Exception e) {
+            assertTrue(DecoderException.class.isInstance(e) &&
+                ArrayIndexOutOfBoundsException.class.isInstance(e.getCause()));
+        }
+    }
+
+}



Mime
View raw message