xerces-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mrgla...@apache.org
Subject svn commit: r581336 - in /xerces/java/trunk/tests/io: UTF16.java UTF8.java
Date Tue, 02 Oct 2007 18:15:20 GMT
Author: mrglavas
Date: Tue Oct  2 11:15:19 2007
New Revision: 581336

URL: http://svn.apache.org/viewvc?rev=581336&view=rev
Log:
A new UTF-16 test derived from the UTF-8 one.

Added:
    xerces/java/trunk/tests/io/UTF16.java   (with props)
Modified:
    xerces/java/trunk/tests/io/UTF8.java

Added: xerces/java/trunk/tests/io/UTF16.java
URL: http://svn.apache.org/viewvc/xerces/java/trunk/tests/io/UTF16.java?rev=581336&view=auto
==============================================================================
--- xerces/java/trunk/tests/io/UTF16.java (added)
+++ xerces/java/trunk/tests/io/UTF16.java Tue Oct  2 11:15:19 2007
@@ -0,0 +1,382 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+
+import org.apache.xerces.impl.io.UTF16Reader;
+import org.apache.xerces.util.XMLChar;
+
+/**
+ * This program tests the customized UTF-16 reader for the parser,
+ * comparing it with the Java UTF-16 reader.
+ *
+ * @version $Id$
+ */
+public class UTF16 {
+    
+    //
+    // MAIN
+    //
+
+    /** Main program entry. */
+    public static void main(String[] argv) throws Exception {
+        testUTF16Decoder(true);
+        testUTF16Decoder(false);
+    } // main(String[])
+    
+    //
+    // Public static methods
+    //
+    
+    public static void testUTF16Decoder(boolean isBigEndian) throws Exception {
+        
+        final int BLOCK_READ_SIZE = 2048;
+        final String encoding = isBigEndian ? "UnicodeBig" : "UnicodeLittle";
+        final String shortName = isBigEndian ? "BE" : "LE";
+
+        //
+        // Test Java reference implementation of UTF-16 decoder
+        //
+
+        System.err.println("#");
+        System.err.println("# Testing Java UTF-16" + shortName + " decoder");
+        System.err.println("#");
+
+        // test character by character
+        try {
+            InputStream stream = new UTF16Producer(isBigEndian);
+            Reader reader = new InputStreamReader(stream, encoding);
+            long time = testCharByChar(reader);
+            System.err.println("PASS ("+time+" ms)");
+            reader.close();
+        } 
+        catch (IOException e) {
+            System.err.println("FAIL: "+e.getMessage());
+        }
+        
+        // test character array
+        try {
+            InputStream stream = new UTF16Producer(isBigEndian);
+            Reader reader = new InputStreamReader(stream, encoding);
+            long time = testCharArray(reader, BLOCK_READ_SIZE);
+            System.err.println("PASS ("+time+" ms)");
+            reader.close();
+        } 
+        catch (IOException e) {
+            System.err.println("FAIL: "+e.getMessage());
+        }
+        
+        //
+        // Test custom implementation of UTF-16 decoder
+        //
+
+        System.err.println("#");
+        System.err.println("# Testing custom UTF-16" + shortName + " decoder");
+        System.err.println("#");
+
+        // test character by character
+        try {
+            InputStream stream = new UTF16Producer(isBigEndian);
+            Reader reader = new UTF16Reader(stream, isBigEndian);
+            long time = testCharByChar(reader);
+            System.err.println("PASS ("+time+" ms)");
+            reader.close();
+        } 
+        catch (IOException e) {
+            System.err.println("FAIL: "+e.getMessage());
+        }
+        
+        // test character array
+        try {
+            InputStream stream = new UTF16Producer(isBigEndian);
+            Reader reader = new UTF16Reader(stream, isBigEndian);
+            long time = testCharArray(reader, BLOCK_READ_SIZE);
+            System.err.println("PASS ("+time+" ms)");
+            reader.close();
+        } 
+        catch (IOException e) {
+            System.err.println("FAIL: "+e.getMessage());
+        }
+    }
+
+    /** This function tests the specified reader character by character. */
+    public static long testCharByChar(Reader reader) throws Exception {
+
+        long before = System.currentTimeMillis();
+        System.err.println("# Testing character by character");
+
+        System.err.println("testing 0x000000 -> 0x00D7FF");
+        for (int i = 0; i < 0xD800; i++) {
+            int c = reader.read();
+            if (c != i) {
+                UTF8.expectedChar(null, i, c);
+            }
+        }
+        System.err.println("testing 0x00E000 -> 0x00FFFD");
+        for (int i = 0xE000; i < 0xFFFE; i++) {
+            int c = reader.read();
+            if (c != i) {
+                UTF8.expectedChar(null, i, c);
+            }
+        }
+        System.err.println("testing 0x010000 -> 0x10FFFF");
+        for (int i = 0x10000; i < 0x110000; i++) {
+            // vars
+            int uuuuu = (i >> 16) & 0x001F;
+            int wwww = uuuuu - 1;
+            int zzzz = (i >> 12) & 0x000F;
+            int yyyyyy = (i >> 6) & 0x003F;
+            int xxxxxx = i & 0x003F;
+            int hs = 0xD800 | (wwww << 6) | (zzzz << 2) | (yyyyyy >> 4);
+            int ls = 0xDC00 | ((yyyyyy << 6) & 0x03C0) | xxxxxx;
+            // high surrogate
+            int c = reader.read();
+            if (c != hs) {
+                UTF8.expectedChar("high surrogate", hs, c);
+            }
+            // low surrogate
+            c = reader.read();
+            if (c != ls) {
+                UTF8.expectedChar("low surrogate", ls, c);
+            }
+        }
+        System.err.println("checking EOF");
+        int c = reader.read();
+        if (c != -1) {
+            UTF8.extraChar(c);
+        }
+        long after = System.currentTimeMillis();
+
+        return after - before;
+
+    } // testCharByChar(Reader):long
+
+    /**
+     * This function tests the given reader by performing block character
+     * reads of the specified size.
+     */
+    public static long testCharArray(Reader reader, int size) throws Exception {
+
+        long before = System.currentTimeMillis();
+        System.err.println("# Testing character array of size "+size);
+
+        char[] ch = new char[size];
+        int count = 0;
+        int position = 0;
+
+        System.err.println("testing 0x000000 -> 0x00D7FF");
+        for (int i = 0; i < 0xD800; i++) {
+            if (position == count) {
+                count = UTF8.load(reader, ch);
+                position = 0;
+            }
+            int c = ch[position++];
+            if (c != i) {
+                UTF8.expectedChar(null, i, c);
+            }
+        }
+        System.err.println("testing 0x00E000 -> 0x00FFFD");
+        for (int i = 0xE000; i < 0xFFFE; i++) {
+            if (position == count) {
+                count = UTF8.load(reader, ch);
+                position = 0;
+            }
+            int c = ch[position++];
+            if (c != i) {
+                UTF8.expectedChar(null, i, c);
+            }
+        }
+        System.err.println("testing 0x010000 -> 0x110000");
+        for (int i = 0x10000; i < 0x110000; i++) {
+            // vars
+            int uuuuu = (i >> 16) & 0x001F;
+            int wwww = uuuuu - 1;
+            int zzzz = (i >> 12) & 0x000F;
+            int yyyyyy = (i >> 6) & 0x003F;
+            int xxxxxx = i & 0x003F;
+            int hs = 0xD800 | (wwww << 6) | (zzzz << 2) | (yyyyyy >> 4);
+            int ls = 0xDC00 | ((yyyyyy << 6) & 0x03C0) | xxxxxx;
+            // high surrogate
+            if (position == count) {
+                count = UTF8.load(reader, ch);
+                position = 0;
+            }
+            int c = ch[position++];
+            if (c != hs) {
+                UTF8.expectedChar("high surrogate", hs, c);
+            }
+            // low surrogate
+            if (position == count) {
+                count = UTF8.load(reader, ch);
+                position = 0;
+            }
+            c = ch[position++];
+            if (c != ls) {
+                UTF8.expectedChar("low surrogate", ls, c);
+            }
+        }
+        System.err.println("checking EOF");
+        if (position == count) {
+            count = UTF8.load(reader, ch);
+            position = 0;
+        }
+        if (count != -1) {
+            UTF8.extraChar(ch[position]);
+        }
+        long after = System.currentTimeMillis();
+
+        return after - before;
+
+    } // testCharArray(Reader):long
+    
+    //
+    // Classes
+    //
+    
+    /**
+     * This classes produces a stream of UTF-16 byte sequences for all 
+     * valid Unicode characters.
+     */
+    public static class UTF16Producer
+        extends InputStream {
+        
+        //
+        // Data
+        //
+
+        /** The current code point. */
+        private int fCodePoint;
+
+        /** The current byte of the current code point. */
+        private int fByte;
+        
+        /** Endianness. */
+        private final boolean fIsBigEndian;
+        
+        //
+        // Constructors
+        //
+        
+        public UTF16Producer(boolean isBigEndian) {
+            fIsBigEndian = isBigEndian;
+        }
+        
+        //
+        // InputStream methods
+        //
+
+        /** Reads the next character. */
+        public int read() throws IOException {
+            
+            if (fCodePoint < 0xFFFE) {
+                // skip surrogate blocks
+                if (fCodePoint == 0xD800) {
+                    fCodePoint = 0xE000;
+                }
+                switch (fByte) {
+                    case 0: {
+                        final int b;
+                        if (fIsBigEndian) {
+                            b = fCodePoint >> 8;
+                        }
+                        else {
+                            b = fCodePoint & 0xff;
+                        }
+                        fByte++;
+                        return b;
+                    }
+                    case 1: {
+                        final int b;
+                        if (fIsBigEndian) {
+                            b = fCodePoint & 0xff;
+                        }
+                        else {
+                            b = fCodePoint >> 8;
+                        }
+                        fCodePoint++;
+                        fByte = 0;
+                        return b;
+                    }
+                    default: {
+                        throw new RuntimeException("byte "+fByte+" of 2 byte UTF-8 sequence");
+                    }
+                }
+            }
+            if (fCodePoint == 0xFFFE) {
+                fCodePoint = 0x10000;
+            }
+            if (fCodePoint < 0x110000) {
+                switch (fByte) {
+                    case 0: {
+                        final int b;
+                        if (fIsBigEndian) {
+                            b = XMLChar.highSurrogate(fCodePoint) >> 8;
+                        }
+                        else {
+                            b = XMLChar.highSurrogate(fCodePoint) & 0xff;
+                        }
+                        fByte++;
+                        return b;
+                    }
+                    case 1: {
+                        final int b;
+                        if (fIsBigEndian) {
+                            b = XMLChar.highSurrogate(fCodePoint) & 0xff;
+                        }
+                        else {
+                            b = XMLChar.highSurrogate(fCodePoint) >> 8;
+                        }
+                        fByte++;
+                        return b;
+                    }
+                    case 2: {
+                        final int b;
+                        if (fIsBigEndian) {
+                            b = XMLChar.lowSurrogate(fCodePoint) >> 8;
+                        }
+                        else {
+                            b = XMLChar.lowSurrogate(fCodePoint) & 0xff;
+                        }
+                        fByte++;
+                        return b;
+                    }
+                    case 3: {
+                        final int b;
+                        if (fIsBigEndian) {
+                            b = XMLChar.lowSurrogate(fCodePoint) & 0xff;
+                        }
+                        else {
+                            b = XMLChar.lowSurrogate(fCodePoint) >> 8;
+                        }
+                        fCodePoint++;
+                        fByte = 0;
+                        return b;
+                    }
+                    default: {
+                        throw new RuntimeException("byte "+fByte+" of 2 byte UTF-8 sequence");
+                    }
+                }
+            }
+            return -1;
+        }
+    }
+}

Propchange: xerces/java/trunk/tests/io/UTF16.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: xerces/java/trunk/tests/io/UTF16.java
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Modified: xerces/java/trunk/tests/io/UTF8.java
URL: http://svn.apache.org/viewvc/xerces/java/trunk/tests/io/UTF8.java?rev=581336&r1=581335&r2=581336&view=diff
==============================================================================
--- xerces/java/trunk/tests/io/UTF8.java (original)
+++ xerces/java/trunk/tests/io/UTF8.java Tue Oct  2 11:15:19 2007
@@ -240,7 +240,7 @@
                 expectedChar(null, i, c);
             }
         }
-        System.err.println("testing 0x010000 -> 0x110000");
+        System.err.println("testing 0x010000 -> 0x10FFFF");
         for (int i = 0x10000; i < 0x110000; i++) {
             // vars
             int uuuuu = (i >> 16) & 0x001F;
@@ -284,17 +284,17 @@
     } // testCharArray(Reader):long
 
     //
-    // Private static methods
+    // Package private static methods
     //
 
     /** Loads another block of characters from the reader. */
-    private static int load(Reader reader, char[] ch) throws IOException {
+    static int load(Reader reader, char[] ch) throws IOException {
         int count = reader.read(ch, 0, ch.length);
         return count;
     } // load(Reader,char[]):int
 
     /** Creates an I/O exception for expected character. */
-    private static void expectedChar(String prefix, int ec, int fc) throws IOException {
+    static void expectedChar(String prefix, int ec, int fc) throws IOException {
         StringBuffer str = new StringBuffer();
         str.append("expected ");
         if (prefix != null) {
@@ -315,7 +315,7 @@
     } // expectedChar(String,int,int)
 
     /** Creates an I/O exception for extra character. */
-    private static void extraChar(int c) throws IOException {
+    static void extraChar(int c) throws IOException {
         StringBuffer str = new StringBuffer();
         str.append("found extra character 0x");
         str.append(Integer.toHexString(c));



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org


Mime
View raw message