abdera-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jmsn...@apache.org
Subject svn commit: r608007 - in /incubator/abdera/java/trunk/dependencies/i18n/src: main/java/org/apache/abdera/i18n/rfc4646/ main/java/org/apache/abdera/i18n/text/ test/java/org/apache/abdera/i18n/test/iri/
Date Wed, 02 Jan 2008 05:01:03 GMT
Author: jmsnell
Date: Tue Jan  1 21:01:01 2008
New Revision: 608007

URL: http://svn.apache.org/viewvc?rev=608007&view=rev
Log:
Fixing some bugs introduced by the refactoring and fixing the test cases

Modified:
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/rfc4646/Range.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CharUtils.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CodepointIterator.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Nameprep.java
    incubator/abdera/java/trunk/dependencies/i18n/src/test/java/org/apache/abdera/i18n/test/iri/TestIDNA.java
    incubator/abdera/java/trunk/dependencies/i18n/src/test/java/org/apache/abdera/i18n/test/iri/TestNameprep.java

Modified: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/rfc4646/Range.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/rfc4646/Range.java?rev=608007&r1=608006&r2=608007&view=diff
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/rfc4646/Range.java
(original)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/rfc4646/Range.java
Tue Jan  1 21:01:01 2008
@@ -171,7 +171,6 @@
    * @param extended True if extended matching rules should be used
    */
   public boolean matches(Lang lang, boolean extended) {
-    if (count() > lang.count()) return false;
     Iterator<Subtag> i = iterator();
     Iterator<Subtag> e = lang.iterator();
     if (isBasic() && !extended) {
@@ -191,6 +190,8 @@
         icurrent = i.next();
         while(icurrent.getType() == Subtag.Type.WILDCARD && i.hasNext())
           icurrent = i.next();
+        // the range ends in a wildcard so it will match everything beyond this point
+        if (icurrent.getType() == Subtag.Type.WILDCARD) return true;
         boolean matched = false;
         while(e.hasNext()) {
           ecurrent = e.next();
@@ -294,7 +295,7 @@
   private static final String variant = "((?:[-_](?:(?:[a-zA-Z0-9]{5,8})|(?:[0-9][a-zA-Z0-9]{3})|\\*))*)";
   private static final String extension = "((?:[-_](?:(?:[a-wy-zA-WY-Z0-9](?:[-_][a-zA-Z0-9]{2,8})+)|\\*))*)";
   private static final String privateuse = "[xX](?:[-_][a-zA-Z0-9]{2,8})+";
-  private static final String _privateuse = "((?:[-_](?:" + privateuse + ")?|\\*))";
+  private static final String _privateuse = "((?:[-_](?:" + privateuse + ")+|\\*)?)";
   private static final String langtag = "^" + language + script + region + variant + extension
+ _privateuse + "$";
   private static final String grandfathered = "^(?:art[-_]lojban|cel[-_]gaulish|en[-_]GB[-_]oed|i[-_]ami|i[-_]bnn|i[-_]default|i[-_]enochian|i[-_]hak|i[-_]klingon|i[-_]lux|i[-_]mingo|i[-_]navajo|i[-_]pwn|i[-_]tao||i[-_]tay|i[-_]tsu|no[-_]bok|no[-_]nyn|sgn[-_]BE[-_]fr|sgn[-_]BE[-_]nl|sgn[-_]CH[-_]de|zh[-_]cmn|zh[-_]cmn[-_]Hans|zh[-_]cmn[-_]Hant|zh[-_]gan|zh[-_]guoyu|zh[-_]hakka|zh[-_]min|zh[-_]min[-_]nan|zh[-_]wuu|zh[-_]xiang|zh[-_]yue)$";
   private static final Pattern p_privateuse = Pattern.compile("^" + privateuse + "$");

Modified: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CharUtils.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CharUtils.java?rev=608007&r1=608006&r2=608007&view=diff
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CharUtils.java
(original)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CharUtils.java
Tue Jan  1 21:01:01 2008
@@ -342,7 +342,17 @@
   public static boolean invset_contains(
     int[] set, 
     int value) {
-      return (get_index(set,value) & 1) == 0;    
+    int s = 0, e = set.length;
+    while (e - s > 8) {
+      int i = (e + s) >> 1;
+      s = set[i] <= value ? i : s;
+      e = set[i] > value ? i : e;
+    }
+    while(s < e) {
+      if (value < set[s]) break;
+      s++;
+    }
+    return ((s-1) & 1) == 0;    
   }
   
   
@@ -786,6 +796,11 @@
            codepoint == '-';
   }
 
+  public static void verify(CodepointIterator ci, Filter filter) throws InvalidCharacterException
{
+    CodepointIterator rci = CodepointIterator.restrict(ci, filter);
+    while (rci.hasNext()) rci.next();
+  }
+  
   public static void verify(CodepointIterator ci, Profile profile) throws InvalidCharacterException
{
     CodepointIterator rci = CodepointIterator.restrict(ci, profile.filter());
     while (rci.hasNext()) rci.next();
@@ -799,6 +814,11 @@
   public static void verify(String s, Profile profile) throws InvalidCharacterException {
     if (s == null) return;
     verify(CodepointIterator.forCharSequence(s),profile);
+  }
+
+  public static void verifyNot(CodepointIterator ci, Filter filter) throws InvalidCharacterException
{
+    CodepointIterator rci = ci.restrict(filter,false,true);
+    while (rci.hasNext()) rci.next();
   }
   
   public static void verifyNot(CodepointIterator ci, Profile profile) throws InvalidCharacterException
{

Modified: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CodepointIterator.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CodepointIterator.java?rev=608007&r1=608006&r2=608007&view=diff
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CodepointIterator.java
(original)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CodepointIterator.java
Tue Jan  1 21:01:01 2008
@@ -470,15 +470,16 @@
     
     @Override
     public Codepoint next() throws InvalidCharacterException {
-      int cp = super.next().getValue();
-      if (cp != -1 && check(cp)) {
+      Codepoint cp = super.next();
+      int v = cp.getValue();
+      if (v != -1 && check(v)) {
         if (scanningOnly) {
           position(position()-1);
           return null;
         }
-        else throw new InvalidCharacterException(cp);
+        else throw new InvalidCharacterException(v);
       }
-      return new Codepoint(cp);
+      return cp;
     }
   
     private boolean check(int cp) {

Modified: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Nameprep.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Nameprep.java?rev=608007&r1=608006&r2=608007&view=diff
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Nameprep.java
(original)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Nameprep.java
Tue Jan  1 21:01:01 2008
@@ -18,8 +18,6 @@
 package org.apache.abdera.i18n.text;
 
 
-//import java.util.Arrays;
-
 /**
  * Implements the Nameprep protocol
  */
@@ -32,15 +30,26 @@
       CodepointIterator ci = CodepointIterator.forCharSequence(s);
       r = new NameprepCodepointIterator(ci);
       while(r.hasNext()) {
-        int i = r.next().getValue();
-        if (i != -1)
-        buf.append((char)i);
+        Codepoint cp = r.next();
+        int i = cp != null ? cp.getValue() : -1;
+        if (i != -1) {
+          CharUtils.append(buf, cp);
+        }
       }
-      return Normalizer.normalize(
+      String n = Normalizer.normalize(
         buf.toString(),
         Normalizer.Form.KC).toString();
+      CharUtils.verify(
+        CodepointIterator.forCharSequence(n), 
+        new Filter() {
+          public boolean accept(int c) {
+            return isProhibited(c);
+          }
+        }
+      );
+      return n;
     } catch (Throwable e) {
-      throw new RuntimeException(e);
+      return null;
     }
   }
   
@@ -76,12 +85,11 @@
           }
           if (haslcat && hasrandalcat) 
             throw new RuntimeException("Bidi Exception");
-          while(r != -1 && Nameprep.isB1(r)) { 
-            r = super.next().getValue();
+          while(r != -1 && Nameprep.isB1(r)) {
+            Codepoint cp = super.next();
+            r = cp != null ? cp.getValue() : -1;
           }
           if (r != -1) {
-            if (Nameprep.isProhibited(r)) 
-              throw new InvalidCharacterException(r);
             int[] rep = Nameprep.B2(r);
             if (rep != null) {
               if (rep.length > 1) {
@@ -102,7 +110,7 @@
            !Nameprep.isRandAL((r ==-1)?peek(position()).getValue():r))) {
         throw new RuntimeException("Bidi Exception");
       }
-      return new Codepoint(r);
+      return r != -1 ? new Codepoint(r) : null;
     }
   
     @Override
@@ -112,21 +120,20 @@
   
   }
     
+  
+  
   private static final int[] B1 = {
-    0x0080, 0x0082,
-    0x0086, 0x0087,
-    0x0088, 0x0089,
-    0x008B, 0x008C,
-    0x008F, 0x0090,
-    0x00A0, 0x00A1,
-    0x00AD, 0x00AE,
-    0x034F, 0x0350,
-    0x1806, 0x1807,
-    0x180B, 0x180E,
-    0x200B, 0x200E,
-    0x2060, 0x2061,
-    0xFE00, 0xFE0F,
-    0xFEFF, 0xFF00
+    0x80, 0x81, 0x86, 0x88,
+    0x8B, 0x8F, 0xA0, 0x00AD,
+    0x034F, 0x1806, 0x180B,
+    0x180C, 0x180D, 0x200B,
+    0x200C, 0x200D, 0x2060,
+    0xFE00, 0xFE01, 0xFE02,
+    0xFE03, 0xFE04, 0xFE05,
+    0xFE06, 0xFE07, 0xFE08,
+    0xFE09, 0xFE0A, 0xFE0B,
+    0xFE0C, 0xFE0D, 0xFE0E,
+    0xFE0F, 0xFEFF,
   };
   
   private static final int[] PROHIBITED = {
@@ -700,12 +707,12 @@
 
     
     public static final int[] B2(int c) {
-      int i = CharUtils.get_index(b2index, c);
+      int i = java.util.Arrays.binarySearch(b2index,c);
       return i > -1 ? b2data[i] : null;
     }
     
     public static boolean isB1(int c) {
-      return CharUtils.invset_contains(B1, c);    
+      return java.util.Arrays.binarySearch(B1,c) > -1;    
     }
     
     public static boolean isProhibited(int c) {

Modified: incubator/abdera/java/trunk/dependencies/i18n/src/test/java/org/apache/abdera/i18n/test/iri/TestIDNA.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/test/java/org/apache/abdera/i18n/test/iri/TestIDNA.java?rev=608007&r1=608006&r2=608007&view=diff
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/test/java/org/apache/abdera/i18n/test/iri/TestIDNA.java
(original)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/test/java/org/apache/abdera/i18n/test/iri/TestIDNA.java
Tue Jan  1 21:01:01 2008
@@ -22,21 +22,12 @@
 public class TestIDNA extends TestBase {
 
   public static void testPunycode() throws Exception {
-    
-    for (TestPunycode.Test test: TestPunycode.Test.values()) {
-      
-      String out = IDNA.toASCII(test.in);
-      String in = IDNA.toUnicode(out);
-      
-      if (test == TestPunycode.Test.H || test == TestPunycode.Test.S) {
-        assertFalse(out.equalsIgnoreCase("xn--" + test.out));
-      } else {
-        assertTrue(out.equalsIgnoreCase("xn--" + test.out));
-        assertTrue(in.equalsIgnoreCase(test.in));
-      }
-
-    }
-    
+    String o = "áéíñó½©";
+    String i = "12-uda5tmbya2aq8623e";
+    String out = IDNA.toASCII(o);
+    String in = IDNA.toUnicode(i);
+    assertTrue(out.equalsIgnoreCase("xn--" + i));
+    assertTrue(in.equalsIgnoreCase(i));
   }
   
 }

Modified: incubator/abdera/java/trunk/dependencies/i18n/src/test/java/org/apache/abdera/i18n/test/iri/TestNameprep.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/test/java/org/apache/abdera/i18n/test/iri/TestNameprep.java?rev=608007&r1=608006&r2=608007&view=diff
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/test/java/org/apache/abdera/i18n/test/iri/TestNameprep.java
(original)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/test/java/org/apache/abdera/i18n/test/iri/TestNameprep.java
Tue Jan  1 21:01:01 2008
@@ -84,7 +84,7 @@
     Y("Plane 0 private use character U+F123", "\uF123", null, -1),
     Z("Plane 15 private use character U+F1234", string(0xF3,0xB1,0x88,0xB4), null, -1),
    AA("Plane 16 private use character U+10F234", string(0xF4,0x8F,0x88,0xB4), null, -1),
-   AB("Non-character code point U+8FFFE", string(0xF2,0x8F,0x8F,0xBE), null, -1),
+   AB("Non-character code point U+8FFFE", "\ud9ff\udffe", null, -1),
    AC("Non-character code point U+10FFFF", string(0xF4,0x8F,0x8F,0x8F), null, -1),
    AD("Surrogate code U+DF42",string(0xED,0xBD,0x82),null,-1),  
    AE("Non-plain text character U+FFFD", string(0xEF,0xBF,0xBD), null, -1),



Mime
View raw message