commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bode...@apache.org
Subject [04/12] commons-compress git commit: COMPRESS-410 Remove Non-NIO character set encoders. As a special case, the UTF-8 encoder will replace malformed / unmappable input with '?'. This behavior is required for compatibility with existing behavior.
Date Wed, 05 Jul 2017 15:34:23 GMT
COMPRESS-410 Remove Non-NIO character set encoders. As a special case, the UTF-8 encoder will
replace malformed / unmappable input with '?'. This behavior is required for compatibility
with existing behavior.

Signed-off-by: Simon Spero <sesuncedu@gmail.com>

(cherry picked from commit 0d41ac4)
Signed-off-by: Simon Spero <sesuncedu@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/cec72ce6
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/cec72ce6
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/cec72ce6

Branch: refs/heads/master
Commit: cec72ce690353c90f3867191d7e657ba59ed2612
Parents: 60a459a
Author: Simon Spero <sesuncedu@gmail.com>
Authored: Fri Jun 16 20:17:13 2017 -0400
Committer: Stefan Bodewig <bodewig@apache.org>
Committed: Wed Jul 5 16:30:00 2017 +0200

----------------------------------------------------------------------
 .../archivers/zip/FallbackZipEncoding.java      |  96 -------
 .../compress/archivers/zip/HasCharset.java      |  12 +
 .../archivers/zip/Simple8BitZipEncoding.java    | 279 -------------------
 .../archivers/zip/ZipEncodingHelper.java        | 165 ++---------
 .../compress/archivers/zip/ZipEncodingTest.java |  51 +++-
 5 files changed, 77 insertions(+), 526 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/cec72ce6/src/main/java/org/apache/commons/compress/archivers/zip/FallbackZipEncoding.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/FallbackZipEncoding.java
b/src/main/java/org/apache/commons/compress/archivers/zip/FallbackZipEncoding.java
deleted file mode 100644
index 757bcbd..0000000
--- a/src/main/java/org/apache/commons/compress/archivers/zip/FallbackZipEncoding.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.commons.compress.archivers.zip;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-/**
- * A fallback ZipEncoding, which uses a java.io means to encode names.
- *
- * <p>This implementation is not suitable for encodings other than
- * UTF-8, because java.io encodes unmappable character as question
- * marks leading to unreadable ZIP entries on some operating
- * systems.</p>
- * 
- * <p>Furthermore this implementation is unable to tell whether a
- * given name can be safely encoded or not.</p>
- * 
- * <p>This implementation acts as a last resort implementation, when
- * neither {@link Simple8BitZipEnoding} nor {@link NioZipEncoding} is
- * available.</p>
- * 
- * <p>The methods of this class are reentrant.</p>
- * @Immutable
- */
-class FallbackZipEncoding implements ZipEncoding {
-    private final String charsetName;
-
-    /**
-     * Construct a fallback zip encoding, which uses the platform's
-     * default charset.
-     */
-    public FallbackZipEncoding() {
-        this.charsetName = null;
-    }
-
-    /**
-     * Construct a fallback zip encoding, which uses the given charset.
-     * 
-     * @param charsetName The name of the charset or {@code null} for
-     *                the platform's default character set.
-     */
-    public FallbackZipEncoding(final String charsetName) {
-        this.charsetName = charsetName;
-    }
-
-    /**
-     * @see
-     * org.apache.commons.compress.archivers.zip.ZipEncoding#canEncode(java.lang.String)
-     */
-    @Override
-    public boolean canEncode(final String name) {
-        return true;
-    }
-
-    /**
-     * @see
-     * org.apache.commons.compress.archivers.zip.ZipEncoding#encode(java.lang.String)
-     */
-    @Override
-    public ByteBuffer encode(final String name) throws IOException {
-        if (this.charsetName == null) { // i.e. use default charset, see no-args constructor
-            return ByteBuffer.wrap(name.getBytes());
-        }
-        return ByteBuffer.wrap(name.getBytes(this.charsetName));
-    }
-
-    /**
-     * @see
-     * org.apache.commons.compress.archivers.zip.ZipEncoding#decode(byte[])
-     */
-    @Override
-    public String decode(final byte[] data) throws IOException {
-        if (this.charsetName == null) { // i.e. use default charset, see no-args constructor
-            return new String(data);
-        }
-        return new String(data,this.charsetName);
-    }
-}

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/cec72ce6/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java b/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java
new file mode 100644
index 0000000..09dfced
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java
@@ -0,0 +1,12 @@
+package org.apache.commons.compress.archivers.zip;
+
+import java.nio.charset.Charset;
+
+public interface HasCharset {
+
+    /**
+     *
+     * @return the character set associated with this object
+     */
+    Charset getCharset();
+}

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/cec72ce6/src/main/java/org/apache/commons/compress/archivers/zip/Simple8BitZipEncoding.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/Simple8BitZipEncoding.java
b/src/main/java/org/apache/commons/compress/archivers/zip/Simple8BitZipEncoding.java
deleted file mode 100644
index 1bd0f9c..0000000
--- a/src/main/java/org/apache/commons/compress/archivers/zip/Simple8BitZipEncoding.java
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.commons.compress.archivers.zip;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * This ZipEncoding implementation implements a simple 8bit character
- * set, which mets the following restrictions:
- * 
- * <ul>
- * <li>Characters 0x0000 to 0x007f are encoded as the corresponding
- *        byte values 0x00 to 0x7f.</li>
- * <li>All byte codes from 0x80 to 0xff are mapped to a unique unicode
- *       character in the range 0x0080 to 0x7fff. (No support for
- *       UTF-16 surrogates)
- * </ul>
- * 
- * <p>These restrictions most notably apply to the most prominent
- * omissions of java-1.4's {@link java.nio.charset.Charset Charset}
- * implementation, Cp437 and Cp850.</p>
- * 
- * <p>The methods of this class are reentrant.</p>
- * @Immutable
- */
-class Simple8BitZipEncoding implements ZipEncoding {
-
-    /**
-     * A character entity, which is put to the reverse mapping table
-     * of a simple encoding.
-     */
-    private static final class Simple8BitChar implements Comparable<Simple8BitChar>
{
-        public final char unicode;
-        public final byte code;
-
-        Simple8BitChar(final byte code, final char unicode) {
-            this.code = code;
-            this.unicode = unicode;
-        }
-
-        @Override
-        public int compareTo(final Simple8BitChar a) {
-            return this.unicode - a.unicode;
-        }
-
-        @Override
-        public String toString() {
-            return "0x" + Integer.toHexString(0xffff & unicode)
-                + "->0x" + Integer.toHexString(0xff & code);
-        }
-
-        @Override
-        public boolean equals(final Object o) {
-            if (o instanceof Simple8BitChar) {
-                final Simple8BitChar other = (Simple8BitChar) o;
-                return unicode == other.unicode && code == other.code;
-            }
-            return false;
-        }
-
-        @Override
-        public int hashCode() {
-            return unicode;
-        }
-    }
-
-    /**
-     * The characters for byte values of 128 to 255 stored as an array of
-     * 128 chars.
-     */
-    private final char[] highChars;
-
-    /**
-     * A list of {@link Simple8BitChar} objects sorted by the unicode
-     * field.  This list is used to binary search reverse mapping of
-     * unicode characters with a character code greater than 127.
-     */
-    private final List<Simple8BitChar> reverseMapping;
-
-    /**
-     * @param highChars The characters for byte values of 128 to 255
-     * stored as an array of 128 chars.
-     */
-    public Simple8BitZipEncoding(final char[] highChars) {
-        this.highChars = highChars.clone();
-        final List<Simple8BitChar> temp =
-            new ArrayList<>(this.highChars.length);
-
-        byte code = 127;
-
-        for (final char highChar : this.highChars) {
-            temp.add(new Simple8BitChar(++code, highChar));
-        }
-
-        Collections.sort(temp);
-        this.reverseMapping = Collections.unmodifiableList(temp);
-    }
-
-    /**
-     * Return the character code for a given encoded byte.
-     * 
-     * @param b The byte to decode.
-     * @return The associated character value.
-     */
-    public char decodeByte(final byte b) {
-        // code 0-127
-        if (b >= 0) {
-            return (char) b;
-        }
-
-        // byte is signed, so 128 == -128 and 255 == -1
-        return this.highChars[128 + b];
-    }
-
-    /**
-     * @param c The character to encode.
-     * @return Whether the given unicode character is covered by this encoding.
-     */
-    public boolean canEncodeChar(final char c) {
-
-        if (c >= 0 && c < 128) {
-            return true;
-        }
-
-        final Simple8BitChar r = this.encodeHighChar(c);
-        return r != null;
-    }
-
-    /**
-     * Pushes the encoded form of the given character to the given byte buffer.
-     * 
-     * @param bb The byte buffer to write to.
-     * @param c The character to encode.
-     * @return Whether the given unicode character is covered by this encoding.
-     *         If {@code false} is returned, nothing is pushed to the
-     *         byte buffer. 
-     */
-    public boolean pushEncodedChar(final ByteBuffer bb, final char c) {
-
-        if (c >= 0 && c < 128) {
-            bb.put((byte) c);
-            return true;
-        }
-
-        final Simple8BitChar r = this.encodeHighChar(c);
-        if (r == null) {
-            return false;
-        }
-        bb.put(r.code);
-        return true;
-    }
-
-    /**
-     * @param c A unicode character in the range from 0x0080 to 0x7f00
-     * @return A Simple8BitChar, if this character is covered by this encoding.
-     *         A {@code null} value is returned, if this character is not
-     *         covered by this encoding.
-     */
-    private Simple8BitChar encodeHighChar(final char c) {
-        // for performance an simplicity, yet another reincarnation of
-        // binary search...
-        int i0 = 0;
-        int i1 = this.reverseMapping.size();
-
-        while (i1 > i0) {
-
-            final int i = i0 + (i1 - i0) / 2;
-
-            final Simple8BitChar m = this.reverseMapping.get(i);
-
-            if (m.unicode == c) {
-                return m;
-            }
-
-            if (m.unicode < c) {
-                i0 = i + 1;
-            } else {
-                i1 = i;
-            }
-        }
-
-        if (i0 >= this.reverseMapping.size()) {
-            return null;
-        }
-
-        final Simple8BitChar r = this.reverseMapping.get(i0);
-
-        if (r.unicode != c) {
-            return null;
-        }
-
-        return r;
-    }
-
-    /**
-     * @see
-     * org.apache.commons.compress.archivers.zip.ZipEncoding#canEncode(java.lang.String)
-     */
-    @Override
-    public boolean canEncode(final String name) {
-
-        for (int i=0;i<name.length();++i) {
-
-            final char c = name.charAt(i);
-
-            if (!this.canEncodeChar(c)) {
-                return false;
-            }
-        }
-
-        return true;
-    }
-
-    /**
-     * @see
-     * org.apache.commons.compress.archivers.zip.ZipEncoding#encode(java.lang.String)
-     */
-    @Override
-    public ByteBuffer encode(final String name) {
-        ByteBuffer out = ByteBuffer.allocate(name.length()
-                                             + 6 + (name.length() + 1) / 2);
-
-        for (int i=0;i<name.length();++i) {
-
-            final char c = name.charAt(i);
-
-            if (out.remaining() < 6) {
-                out = ZipEncodingHelper.growBuffer(out,out.position() + 6);
-            }
-
-            if (!this.pushEncodedChar(out,c)) {
-
-                ZipEncodingHelper.appendSurrogate(out,c);
-            }
-        }
-
-        out.limit(out.position());
-        out.rewind();
-        return out;
-    }
-
-    /**
-     * @see
-     * org.apache.commons.compress.archivers.zip.ZipEncoding#decode(byte[])
-     */
-    @Override
-    public String decode(final byte[] data) throws IOException {
-        final char [] ret = new char[data.length];
-
-        for (int i=0;i<data.length;++i) {
-            ret[i] = this.decodeByte(data[i]);
-        }
-
-        return new String(ret);
-    }
-
-
-}

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/cec72ce6/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
index bcfb8cf..18ad103 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
@@ -21,126 +21,17 @@ package org.apache.commons.compress.archivers.zip;
 import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
 import java.nio.charset.UnsupportedCharsetException;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-
 import org.apache.commons.compress.utils.Charsets;
 
 /**
- * Static helper functions for robustly encoding filenames in zip files. 
+ * Static helper functions for robustly encoding filenames in zip files.
  */
 public abstract class ZipEncodingHelper {
 
     /**
-     * A class, which holds the high characters of a simple encoding
-     * and lazily instantiates a Simple8BitZipEncoding instance in a
-     * thread-safe manner.
-     */
-    private static class SimpleEncodingHolder {
-
-        private final char [] highChars;
-        private Simple8BitZipEncoding encoding;
-
-        /**
-         * Instantiate a simple encoding holder.
-         * 
-         * @param highChars The characters for byte codes 128 to 255.
-         * 
-         * @see Simple8BitZipEncoding#Simple8BitZipEncoding(char[])
-         */
-        SimpleEncodingHolder(final char [] highChars) {
-            this.highChars = highChars;
-        }
-
-        /**
-         * @return The associated {@link Simple8BitZipEncoding}, which
-         *         is instantiated if not done so far.
-         */
-        public synchronized Simple8BitZipEncoding getEncoding() {
-            if (this.encoding == null) {
-                this.encoding = new Simple8BitZipEncoding(this.highChars);
-            }
-            return this.encoding;
-        }
-    }
-
-    private static final Map<String, SimpleEncodingHolder> simpleEncodings;
-
-    static {
-        final Map<String, SimpleEncodingHolder> se =
-            new HashMap<>();
-
-        final char[] cp437_high_chars =
-            new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0,
-                         0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef,
-                         0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
-                         0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
-                         0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5,
-                         0x20a7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
-                         0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x2310,
-                         0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
-                         0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561,
-                         0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557,
-                         0x255d, 0x255c, 0x255b, 0x2510, 0x2514, 0x2534,
-                         0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
-                         0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550,
-                         0x256c, 0x2567, 0x2568, 0x2564, 0x2565, 0x2559,
-                         0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518,
-                         0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
-                         0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3,
-                         0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4,
-                         0x221e, 0x03c6, 0x03b5, 0x2229, 0x2261, 0x00b1,
-                         0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248,
-                         0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2,
-                         0x25a0, 0x00a0 };
-
-        final SimpleEncodingHolder cp437 = new SimpleEncodingHolder(cp437_high_chars);
-
-        se.put("CP437", cp437);
-        se.put("Cp437", cp437);
-        se.put("cp437", cp437);
-        se.put("IBM437", cp437);
-        se.put("ibm437", cp437);
-
-        final char[] cp850_high_chars =
-            new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0,
-                         0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef,
-                         0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
-                         0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
-                         0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8,
-                         0x00d7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
-                         0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x00ae,
-                         0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
-                         0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1,
-                         0x00c2, 0x00c0, 0x00a9, 0x2563, 0x2551, 0x2557,
-                         0x255d, 0x00a2, 0x00a5, 0x2510, 0x2514, 0x2534,
-                         0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3,
-                         0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550,
-                         0x256c, 0x00a4, 0x00f0, 0x00d0, 0x00ca, 0x00cb,
-                         0x00c8, 0x0131, 0x00cd, 0x00ce, 0x00cf, 0x2518,
-                         0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580,
-                         0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5,
-                         0x00b5, 0x00fe, 0x00de, 0x00da, 0x00db, 0x00d9,
-                         0x00fd, 0x00dd, 0x00af, 0x00b4, 0x00ad, 0x00b1,
-                         0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8,
-                         0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2,
-                         0x25a0, 0x00a0 };
-
-        final SimpleEncodingHolder cp850 = new SimpleEncodingHolder(cp850_high_chars);
-
-        se.put("CP850", cp850);
-        se.put("Cp850", cp850);
-        se.put("cp850", cp850);
-        se.put("IBM850", cp850);
-        se.put("ibm850", cp850);
-        simpleEncodings = Collections.unmodifiableMap(se);
-    }
-
-    /**
      * Grow a byte buffer, so it has a minimal capacity or at least
-     * the double capacity of the original buffer 
-     * 
+     * the double capacity of the original buffer
+     *
      * @param b The original buffer.
      * @param newCapacity The minimal requested new capacity.
      * @return A byte buffer <code>r</code> with
@@ -160,7 +51,7 @@ public abstract class ZipEncodingHelper {
         return on;
     }
 
- 
+
     /**
      * The hexadecimal digits <code>0,...,9,A,...,F</code> encoded as
      * ASCII bytes.
@@ -174,7 +65,7 @@ public abstract class ZipEncodingHelper {
     /**
      * Append <code>%Uxxxx</code> to the given byte buffer.
      * The caller must assure, that <code>bb.remaining()&gt;=6</code>.
-     * 
+     *
      * @param bb The byte buffer to write to.
      * @param c The character to write.
      */
@@ -198,47 +89,37 @@ public abstract class ZipEncodingHelper {
     /**
      * name of the encoding UTF-8
      */
-    static final ZipEncoding UTF8_ZIP_ENCODING = new FallbackZipEncoding(UTF8);
+    static final ZipEncoding UTF8_ZIP_ENCODING = getZipEncoding("UTF-8");
 
     /**
-     * Instantiates a zip encoding.
-     * 
+     * Instantiates a zip encoding. An NIO based character set encoder/decoder will be returned.
+     * As a special case, if the character set is UTF-8, the nio encoder will be configured
 replace malformed and
+     * unmappable characters with '?'. This matches existing behavior from the older fallback
encoder.
+     * <p>
+     *     If the requested characer set cannot be found, the platform default will
+     *     be used instead.
+     * </p>
      * @param name The name of the zip encoding. Specify {@code null} for
      *             the platform's default encoding.
      * @return A zip encoding for the given encoding name.
      */
     public static ZipEncoding getZipEncoding(final String name) {
- 
-        // fallback encoding is good enough for UTF-8.
-        if (isUTF8(name)) {
-            return UTF8_ZIP_ENCODING;
-        }
-
-        if (name == null) {
-            return new FallbackZipEncoding();
-        }
-
-        final SimpleEncodingHolder h = simpleEncodings.get(name);
-
-        if (h!=null) {
-            return h.getEncoding();
+        Charset cs = Charset.defaultCharset();
+        if (name != null) {
+            try {
+                cs = Charset.forName(name);
+            } catch (UnsupportedCharsetException e) {
+            }
         }
+        boolean useReplacement = cs.name().equals("UTF-8");
+        return new NioZipEncoding(cs, useReplacement);
 
-        try {
-
-            final Charset cs = Charset.forName(name);
-            return new NioZipEncoding(cs);
-
-        } catch (final UnsupportedCharsetException e) {
-            return new FallbackZipEncoding(name);
-        }
     }
 
     /**
      * Returns whether a given encoding is UTF-8. If the given name is null, then check the
platform's default encoding.
-     * 
-     * @param charsetName
-     *            If the given name is null, then check the platform's default encoding.
+     *
+     * @param charsetName If the given name is null, then check the platform's default encoding.
      */
     static boolean isUTF8(String charsetName) {
         if (charsetName == null) {

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/cec72ce6/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
b/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
index f0c049a..f3e5127 100644
--- a/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
@@ -19,13 +19,16 @@
 
 package org.apache.commons.compress.archivers.zip;
 
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
 import java.io.IOException;
 import java.nio.ByteBuffer;
-
+import java.nio.charset.Charset;
 import org.apache.commons.compress.utils.CharsetNames;
-
-import static org.junit.Assert.*;
-
+import org.hamcrest.core.IsInstanceOf;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -33,6 +36,7 @@ import org.junit.Test;
  * Test zip encodings.
  */
 public class ZipEncodingTest {
+
     private static final String UNENC_STRING = "\u2016";
 
     // stress test for internal grow method.
@@ -43,15 +47,44 @@ public class ZipEncodingTest {
         "%U2016%U2015%U2016%U2015%U2016%U2015%U2016%U2015%U2016%U2015%U2016";
 
     @Test
-    public void testSimpleCp437Encoding() throws IOException {
-
-        doSimpleEncodingTest("Cp437", null);
+    public void testNothingToMakeCoverallsHappier() {
+        Object o = new ZipEncodingHelper() {
+        };
+        assertNotNull(o);
+    }
+    @Test
+    public void testGetNonexistentEncodng() throws IOException {
+        ZipEncoding ze = ZipEncodingHelper.getZipEncoding("I-am-a-banana");
+        assertNotNull(ze);
+        if (ze instanceof HasCharset) {
+            HasCharset hasCharset = (HasCharset) ze;
+            Assert.assertEquals(Charset.defaultCharset(),hasCharset.getCharset());
+        }
     }
 
     @Test
+    public void testIsUTF8() throws IOException {
+       assertTrue(ZipEncodingHelper.isUTF8("UTF-8"));
+       assertTrue(ZipEncodingHelper.isUTF8("UTF8"));
+       Assert.assertEquals(Charset.defaultCharset().name().equals("UTF-8"),ZipEncodingHelper.isUTF8(null));
+    }
+    @Test
+    public void testSimpleCp437Encoding() throws IOException {
+        doSimpleEncodingsTest(437);
+    }
+    @Test
     public void testSimpleCp850Encoding() throws IOException {
+        doSimpleEncodingsTest(850);
+    }
+
 
-        doSimpleEncodingTest("Cp850", null);
+    private void doSimpleEncodingsTest(int n) throws IOException {
+
+        doSimpleEncodingTest("Cp" + n, null);
+        doSimpleEncodingTest("cp" + n, null);
+        doSimpleEncodingTest("CP" + n, null);
+        doSimpleEncodingTest("IBM" + n, null);
+        doSimpleEncodingTest("ibm" + n, null);
     }
 
     @Test
@@ -127,7 +160,7 @@ public class ZipEncodingTest {
         throws IOException {
 
         final ZipEncoding enc = ZipEncodingHelper.getZipEncoding(name);
-
+        assertThat(enc, IsInstanceOf.instanceOf(NioZipEncoding.class));
         if (testBytes == null) {
 
             testBytes = new byte[256];


Mime
View raw message