Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 5EFB1200D35 for ; Tue, 7 Nov 2017 17:09:34 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 5DA0D160BED; Tue, 7 Nov 2017 16:09:34 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 539301609C8 for ; Tue, 7 Nov 2017 17:09:33 +0100 (CET) Received: (qmail 81144 invoked by uid 500); 7 Nov 2017 16:09:32 -0000 Mailing-List: contact commits-help@commons.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@commons.apache.org Delivered-To: mailing list commits@commons.apache.org Received: (qmail 81135 invoked by uid 99); 7 Nov 2017 16:09:32 -0000 Received: from Unknown (HELO svn01-us-west.apache.org) (209.188.14.144) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 07 Nov 2017 16:09:32 +0000 Received: from svn01-us-west.apache.org (localhost [127.0.0.1]) by svn01-us-west.apache.org (ASF Mail Server at svn01-us-west.apache.org) with ESMTP id D67303A0225 for ; Tue, 7 Nov 2017 16:09:30 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: svn commit: r1814505 - in /commons/proper/codec/trunk/src: changes/changes.xml main/java/org/apache/commons/codec/net/PercentCodec.java test/java/org/apache/commons/codec/net/PercentCodecTest.java Date: Tue, 07 Nov 2017 16:09:28 -0000 To: commits@commons.apache.org From: ggregory@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20171107160930.D67303A0225@svn01-us-west.apache.org> archived-at: Tue, 07 Nov 2017 16:09:34 -0000 Author: ggregory Date: Tue Nov 7 16:09:28 2017 New Revision: 1814505 URL: http://svn.apache.org/viewvc?rev=1814505&view=rev Log: [CODEC-240] Add Percent-Encoding Codec (described in RFC3986 and RFC7578). Added: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/PercentCodec.java commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/PercentCodecTest.java Modified: commons/proper/codec/trunk/src/changes/changes.xml Modified: commons/proper/codec/trunk/src/changes/changes.xml URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/changes/changes.xml?rev=1814505&r1=1814504&r2=1814505&view=diff ============================================================================== --- commons/proper/codec/trunk/src/changes/changes.xml (original) +++ commons/proper/codec/trunk/src/changes/changes.xml Tue Nov 7 16:09:28 2017 @@ -44,6 +44,7 @@ The type attribute can be add,u Update from Java 6 to Java 7 + Add Percent-Encoding Codec (described in RFC3986 and RFC7578) Added: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/PercentCodec.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/PercentCodec.java?rev=1814505&view=auto ============================================================================== --- commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/PercentCodec.java (added) +++ commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/PercentCodec.java Tue Nov 7 16:09:28 2017 @@ -0,0 +1,260 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec.net; + +import java.nio.ByteBuffer; +import java.util.BitSet; +import org.apache.commons.codec.BinaryDecoder; +import org.apache.commons.codec.BinaryEncoder; +import org.apache.commons.codec.DecoderException; +import org.apache.commons.codec.EncoderException; + +/** + * Implements the Percent-Encoding scheme, as described in HTTP 1.1 specification. For extensibility, an array of + * special US-ASCII characters can be specified in order to perform proper URI encoding for the different parts + * of the URI. + *

+ * This class is immutable. It is also thread-safe besides using BitSet which is not thread-safe, but its public + * interface only call the access + *

+ * + * @see Percent-Encoding + * @since 1.11 + */ +public class PercentCodec implements BinaryEncoder, BinaryDecoder { + + /** + * The escape character used by the Percent-Encoding in order to introduce an encoded character. + */ + private final byte ESCAPE_CHAR = '%'; + /** + * The bit set used to store the character that should be always encoded + */ + private final BitSet alwaysEncodeChars = new BitSet(); + /** + * The flag defining if the space character should be encoded as '+' + */ + private final boolean plusForSpace; + /** + * The minimum and maximum code of the bytes that is inserted in the bit set, used to prevent look-ups + */ + private int alwaysEncodeCharsMin = Integer.MAX_VALUE, alwaysEncodeCharsMax = Integer.MIN_VALUE; + + /** + * Constructs a Percent coded that will encode all the non US-ASCII characters using the Percent-Encoding + * while it will not encode all the US-ASCII characters, except for character '%' that is used as escape + * character for Percent-Encoding. + */ + public PercentCodec() { + this.plusForSpace = false; + insertAlwaysEncodeChar(ESCAPE_CHAR); + } + + /** + * Constructs a Percent codec by specifying the characters that belong to US-ASCII that should + * always be encoded. The rest US-ASCII characters will not be encoded, except for character '%' that + * is used as escape character for Percent-Encoding. + * + * @param alwaysEncodeChars the unsafe characters that should always be encoded + * @param plusForSpace the flag defining if the space character should be encoded as '+' + */ + public PercentCodec(final byte[] alwaysEncodeChars, final boolean plusForSpace) { + this.plusForSpace = plusForSpace; + insertAlwaysEncodeChars(alwaysEncodeChars); + } + + /** + * Adds the byte array into a BitSet for faster lookup + * + * @param alwaysEncodeChars + */ + private void insertAlwaysEncodeChars(final byte[] alwaysEncodeChars) { + if (alwaysEncodeChars != null) { + for (byte b : alwaysEncodeChars) { + insertAlwaysEncodeChar(b); + } + } + insertAlwaysEncodeChar(ESCAPE_CHAR); + } + + /** + * Inserts a single character into a BitSet and maintains the min and max of the characters of the + * {@code BitSet alwaysEncodeChars} in order to avoid look-ups when a byte is out of this range. + * + * @param b the byte that is candidate for min and max limit + */ + private void insertAlwaysEncodeChar(final byte b) { + this.alwaysEncodeChars.set(b); + if (b < alwaysEncodeCharsMin) { + alwaysEncodeCharsMin = b; + } + if (b > alwaysEncodeCharsMax) { + alwaysEncodeCharsMax = b; + } + } + + /** + * Percent-Encoding based on RFC 3986. The non US-ASCII characters are encoded, as well as the + * US-ASCII characters that are configured to be always encoded. + */ + @Override + public byte[] encode(final byte[] bytes) throws EncoderException { + if (bytes == null) { + return null; + } + + int expectedEncodingBytes = expectedEncodingBytes(bytes); + boolean willEncode = expectedEncodingBytes != bytes.length; + if (willEncode || (plusForSpace && containsSpace(bytes))) { + return doEncode(bytes, expectedEncodingBytes, willEncode); + } else { + return bytes; + } + } + + private byte[] doEncode(final byte[] bytes, int expectedLength, boolean willEncode) { + final ByteBuffer buffer = ByteBuffer.allocate(expectedLength); + for (final byte b : bytes) { + if (willEncode && canEncode(b)) { + byte bb = b; + if (bb < 0) { + bb = (byte) (256 + bb); + } + final char hex1 = Utils.hexDigit(bb >> 4); + final char hex2 = Utils.hexDigit(bb); + buffer.put(ESCAPE_CHAR); + buffer.put((byte) hex1); + buffer.put((byte) hex2); + } else { + if (plusForSpace && b == ' ') { + buffer.put((byte) '+'); + } else { + buffer.put(b); + } + } + } + return buffer.array(); + } + + private int expectedEncodingBytes(final byte[] bytes) { + int byteCount = 0; + for (final byte b : bytes) { + byteCount += canEncode(b) ? 3: 1; + } + return byteCount; + } + + private boolean containsSpace(final byte[] bytes) { + for (final byte b : bytes) { + if (b == ' ') { + return true; + } + } + return false; + } + + private boolean canEncode(final byte c) { + return !isAsciiChar(c) || (inAlwaysEncodeCharsRange(c) && alwaysEncodeChars.get(c)); + } + + private boolean inAlwaysEncodeCharsRange(final byte c) { + return c >= alwaysEncodeCharsMin && c <= alwaysEncodeCharsMax; + } + + private boolean isAsciiChar(final byte c) { + return c >= 0; + } + + /** + * Decode bytes encoded with Percent-Encoding based on RFC 3986. The reverse process is performed in order to + * decode the encoded characters to Unicode. + */ + @Override + public byte[] decode(final byte[] bytes) throws DecoderException { + if (bytes == null) { + return null; + } + + final ByteBuffer buffer = ByteBuffer.allocate(expectedDecodingBytes(bytes)); + for (int i = 0; i < bytes.length; i++) { + final byte b = bytes[i]; + if (b == ESCAPE_CHAR) { + try { + final int u = Utils.digit16(bytes[++i]); + final int l = Utils.digit16(bytes[++i]); + buffer.put((byte) ((u << 4) + l)); + } catch (final ArrayIndexOutOfBoundsException e) { + throw new DecoderException("Invalid percent decoding: ", e); + } + } else { + if (plusForSpace && b == '+') { + buffer.put((byte) ' '); + } else { + buffer.put(b); + } + } + } + return buffer.array(); + } + + private int expectedDecodingBytes(final byte[] bytes) { + int byteCount = 0; + for (int i = 0; i < bytes.length; ) { + byte b = bytes[i]; + i += b == ESCAPE_CHAR ? 3: 1; + byteCount++; + } + return byteCount; + } + + /** + * Encodes an object into using the Percent-Encoding. Only byte[] objects are accepted. + * + * @param obj the object to encode + * @return the encoding result byte[] as Object + * @throws EncoderException + */ + @Override + public Object encode(final Object obj) throws EncoderException { + if (obj == null) { + return null; + } else if (obj instanceof byte[]) { + return encode((byte[]) obj); + } else { + throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be Percent encoded"); + } + } + + /** + * Decodes a byte[] Object, whose bytes are encoded with Percent-Encoding. + * + * @param obj the object to decode + * @return the decoding result byte[] as Object + * @throws DecoderException + */ + @Override + public Object decode(final Object obj) throws DecoderException { + if (obj == null) { + return null; + } else if (obj instanceof byte[]) { + return decode((byte[]) obj); + } else { + throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be Percent decoded"); + } + } +} Added: commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/PercentCodecTest.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/PercentCodecTest.java?rev=1814505&view=auto ============================================================================== --- commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/PercentCodecTest.java (added) +++ commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/PercentCodecTest.java Tue Nov 7 16:09:28 2017 @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec.net; + +import java.nio.charset.Charset; +import java.util.Arrays; +import org.apache.commons.codec.DecoderException; +import org.apache.commons.codec.EncoderException; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import org.junit.Test; + +/** + * Percent cοdec test cases + */ +public class PercentCodecTest { + + @Test + public void testBasicEncodeDecode() throws Exception { + PercentCodec percentCodec = new PercentCodec(); + final String input = "abcdABCD"; + byte[] encoded = percentCodec.encode(input.getBytes(Charset.forName("UTF-8"))); + final String encodedS = new String(encoded, "UTF-8"); + byte[] decoded = percentCodec.decode(encoded); + final String decodedS = new String(decoded, "UTF-8"); + assertEquals("Basic PercentCodec encoding test", input, encodedS); + assertEquals("Basic PercentCodec decoding test", input, decodedS); + } + + @Test + public void testSafeCharEncodeDecodeObject() throws Exception { + PercentCodec percentCodec = new PercentCodec(null, true); + final String input = "abc123_-.*"; + Object encoded = percentCodec.encode((Object) input.getBytes(Charset.forName("UTF-8"))); + final String encodedS = new String((byte[]) encoded, "UTF-8"); + Object decoded = percentCodec.decode(encoded); + final String decodedS = new String((byte[]) decoded, "UTF-8"); + assertEquals("Basic PercentCodec safe char encoding test", input, encodedS); + assertEquals("Basic PercentCodec safe char decoding test", input, decodedS); + } + + @Test + public void testUnsafeCharEncodeDecode() throws Exception { + PercentCodec percentCodec = new PercentCodec(); + final String input = "\u03B1\u03B2\u03B3\u03B4\u03B5\u03B6% "; + byte[] encoded = percentCodec.encode(input.getBytes(Charset.forName("UTF-8"))); + final String encodedS = new String(encoded, "UTF-8"); + byte[] decoded = percentCodec.decode(encoded); + final String decodedS = new String(decoded, "UTF-8"); + assertEquals("Basic PercentCodec unsafe char encoding test", "%CE%B1%CE%B2%CE%B3%CE%B4%CE%B5%CE%B6%25 ", encodedS); + assertEquals("Basic PercentCodec unsafe char decoding test", input, decodedS); + } + + @Test + public void testConfigurablePercentEncoder() throws Exception { + final String input = "abc123_-.*\u03B1\u03B2"; + PercentCodec percentCodec = new PercentCodec("abcdef".getBytes("UTF-8"), false); + byte[] encoded = percentCodec.encode(input.getBytes(Charset.forName("UTF-8"))); + final String encodedS = new String(encoded, "UTF-8"); + assertEquals("Configurable PercentCodec encoding test", "%61%62%63123_-.*%CE%B1%CE%B2", encodedS); + final byte[] decoded = percentCodec.decode(encoded); + assertEquals("Configurable PercentCodec decoding test", new String(decoded, "UTF-8"), input); + } + + @Test + public void testPercentEncoderDecoderWithNullOrEmptyInput() throws Exception { + PercentCodec percentCodec = new PercentCodec(null, true); + assertEquals("Null input value encoding test", percentCodec.encode(null), null); + assertEquals("Null input value decoding test", percentCodec.decode(null), null); + byte[] emptyInput = "".getBytes("UTF-8"); + assertEquals("Empty input value encoding test", percentCodec.encode(emptyInput), emptyInput); + assertTrue("Empty input value decoding test", Arrays.equals(percentCodec.decode(emptyInput), emptyInput)); + } + + @Test + public void testPercentEncoderDecoderWithPlusForSpace() throws Exception { + final String input = "a b c d"; + PercentCodec percentCodec = new PercentCodec(null, true); + byte[] encoded = percentCodec.encode(input.getBytes(Charset.forName("UTF-8"))); + final String encodedS = new String(encoded, "UTF-8"); + assertEquals("PercentCodec plus for space encoding test", "a+b+c+d", encodedS); + byte[] decode = percentCodec.decode(encoded); + assertEquals("PercentCodec plus for space decoding test", new String(decode, "UTF-8"), input); + } + + @Test(expected = EncoderException.class) + public void testEncodeUnsupportedObject() throws Exception { + PercentCodec percentCodec = new PercentCodec(); + percentCodec.encode("test"); + } + + @Test + public void testEncodeNullObject() throws Exception { + PercentCodec percentCodec = new PercentCodec(); + assertEquals(percentCodec.encode((Object) null), null); + } + + @Test(expected = DecoderException.class) + public void testDecodeUnsupportedObject() throws Exception { + PercentCodec percentCodec = new PercentCodec(); + percentCodec.decode("test"); + } + + @Test + public void testDecodeNullObject() throws Exception { + PercentCodec percentCodec = new PercentCodec(); + assertEquals(percentCodec.decode((Object) null), null); + } + + @Test + public void testDecodeInvalidEncodedResultDecoding() throws Exception { + String inputS = "\u03B1\u03B2"; + PercentCodec percentCodec = new PercentCodec(); + byte[] encoded = percentCodec.encode(inputS.getBytes("UTF-8")); + try { + percentCodec.decode(Arrays.copyOf(encoded, encoded.length-1)); //exclude one byte + } catch (Exception e) { + assertTrue(DecoderException.class.isInstance(e) && + ArrayIndexOutOfBoundsException.class.isInstance(e.getCause())); + } + } + +}