abdera-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jmsn...@apache.org
Subject svn commit: r614366 - in /incubator/abdera/java/trunk: dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/ examples/src/main/java/org/apache/abdera/examples/ext/ examples/src/main/java/org/apache/abdera/examples/simple/ extensions/json/src/mai...
Date Tue, 22 Jan 2008 22:57:54 GMT
Author: jmsnell
Date: Tue Jan 22 14:57:51 2008
New Revision: 614366

URL: http://svn.apache.org/viewvc?rev=614366&view=rev
Log:
Move the bidi guessing algorithms into the i18n module. This is mainly to keep related code
together in one spot to make it easier to maintain

Oh, and I've verified the correctness of the guessing algorithms against the Common Locale
Data Repository (CLDR)

Added:
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Bidi.java
Modified:
    incubator/abdera/java/trunk/examples/src/main/java/org/apache/abdera/examples/ext/Bidi.java
    incubator/abdera/java/trunk/examples/src/main/java/org/apache/abdera/examples/simple/i18nExample.java
    incubator/abdera/java/trunk/extensions/json/src/main/java/org/apache/abdera/ext/json/JSONUtil.java
    incubator/abdera/java/trunk/extensions/main/src/main/java/org/apache/abdera/ext/bidi/BidiHelper.java
    incubator/abdera/java/trunk/extensions/main/src/test/java/org/apache/abdera/test/ext/bidi/BidiTest.java

Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Bidi.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Bidi.java?rev=614366&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Bidi.java
(added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Bidi.java
Tue Jan 22 14:57:51 2008
@@ -0,0 +1,110 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+import java.text.AttributedString;
+import java.util.Arrays;
+
+import org.apache.abdera.i18n.rfc4646.Lang;
+
+/**
+ * Bidi guessing algorithms
+ */
+public class Bidi {
+
+  public enum Direction { UNSPECIFIED, LTR, RTL };
+  
+  private static final String[] RTL_LANGS = {
+    "ar",
+    "dv",
+    "fa",
+    "he",
+    "ps",
+    "syr",
+    "ur",
+    "yi"};
+  
+  private static final String[] RTL_SCRIPTS = {
+    "arab","avst","hebr","hung","lydi","mand",
+    "mani","mero","mong","nkoo","orkh","phlv",
+    "phnx","samr","syrc","syre","syrj","syrn",
+    "tfng","thaa"
+  };
+  // charset encodings that one may typically expect to be RTL
+  private static final String[] RTL_ENCODINGS = {
+    "iso-8859-6", "iso-8859-6-bidi", 
+    "iso-8859-6-i", "iso-ir-127", 
+    "ecma-114", "asmo-708", "arabic", 
+    "csisolatinarabic", "windows-1256", 
+    "ibm-864", "macarabic", "macfarsi", 
+    "iso-8859-8-i", "iso-8859-8-bidi", 
+    "windows-1255", "iso-8859-8", "ibm-862", 
+    "machebrew", "asmo-449", "iso-9036", 
+    "arabic7", "iso-ir-89", "csiso89asmo449", 
+    "iso-unicode-ibm-1264", "csunicodeibm1264", 
+    "iso_8859-8:1988", "iso-ir-138", "hebrew", 
+    "csisolatinhebrew", "iso-unicode-ibm-1265", 
+    "csunicodeibm1265", "cp862", "862", 
+    "cspc862latinhebrew"
+  };
+  
+  public static Direction guessDirectionFromLanguage(Lang lang) {
+    if (lang.getScript() != null) {
+      String script = lang.getScript().getName();
+      if (Arrays.binarySearch(RTL_SCRIPTS, script.toLowerCase()) > -1)
+        return Direction.RTL;
+    }
+    String primary = lang.getLanguage().getName();
+    if (Arrays.binarySearch(RTL_LANGS, primary.toLowerCase()) > -1) 
+          return Direction.RTL;
+    return Direction.UNSPECIFIED;
+  }
+ 
+  public static Direction guessDirectionFromEncoding(String charset) {
+    if (charset == null) return Direction.UNSPECIFIED;
+    charset = charset.replace('_', '-');
+    Arrays.sort(RTL_ENCODINGS);
+    if (Arrays.binarySearch(RTL_ENCODINGS, charset.toLowerCase()) > -1) 
+      return Direction.RTL;
+    return Direction.UNSPECIFIED;
+  }
+  
+  public static Direction guessDirectionFromTextProperties(String text) {
+    if (text != null && text.length() > 0) {
+      if (text.charAt(0) == 0x200F) return Direction.RTL; // if using the unicode right-to-left
mark
+      if (text.charAt(0) == 0x200E) return Direction.LTR; // if using the unicode left-to-right
mark
+      int c = 0;
+      for (int n = 0; n < text.length(); n++) {
+        char ch = text.charAt(n);
+        if (java.text.Bidi.requiresBidi(new char[] {ch}, 0, 1)) c++;
+        else c--;
+      }
+      return c > 0 ? Direction.RTL : Direction.LTR;
+    }
+    return Direction.UNSPECIFIED;
+  }
+  
+  public static Direction guessDirectionFromJavaBidi(String text) {
+    if (text != null) {
+      AttributedString s = new AttributedString(text);
+      java.text.Bidi bidi = new java.text.Bidi(s.getIterator());
+      return bidi.baseIsLeftToRight() ? Direction.LTR : Direction.RTL;
+    }
+    return Direction.UNSPECIFIED;
+  }
+}

Modified: incubator/abdera/java/trunk/examples/src/main/java/org/apache/abdera/examples/ext/Bidi.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/examples/src/main/java/org/apache/abdera/examples/ext/Bidi.java?rev=614366&r1=614365&r2=614366&view=diff
==============================================================================
--- incubator/abdera/java/trunk/examples/src/main/java/org/apache/abdera/examples/ext/Bidi.java
(original)
+++ incubator/abdera/java/trunk/examples/src/main/java/org/apache/abdera/examples/ext/Bidi.java
Tue Jan 22 14:57:51 2008
@@ -19,8 +19,8 @@
 
 import org.apache.abdera.Abdera;
 import org.apache.abdera.ext.bidi.BidiHelper;
+import org.apache.abdera.i18n.text.Bidi.Direction;
 import org.apache.abdera.model.Entry;
-
 /**
  * The Atom Bidi Extension is described in an IETF Internet-Draft and is used
  * to communicate information about the base directionality of text in an Atom
@@ -34,7 +34,7 @@
     
     Abdera abdera = new Abdera();
     Entry entry = abdera.newEntry();
-    BidiHelper.setDirection(BidiHelper.Direction.RTL, entry);
+    BidiHelper.setDirection(Direction.RTL, entry);
     
     entry.setTitle(text);
     

Modified: incubator/abdera/java/trunk/examples/src/main/java/org/apache/abdera/examples/simple/i18nExample.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/examples/src/main/java/org/apache/abdera/examples/simple/i18nExample.java?rev=614366&r1=614365&r2=614366&view=diff
==============================================================================
--- incubator/abdera/java/trunk/examples/src/main/java/org/apache/abdera/examples/simple/i18nExample.java
(original)
+++ incubator/abdera/java/trunk/examples/src/main/java/org/apache/abdera/examples/simple/i18nExample.java
Tue Jan 22 14:57:51 2008
@@ -21,6 +21,7 @@
 
 import org.apache.abdera.Abdera;
 import org.apache.abdera.ext.bidi.BidiHelper;
+import org.apache.abdera.i18n.text.Bidi.Direction;
 import org.apache.abdera.model.Element;
 import org.apache.abdera.model.Entry;
 import org.apache.abdera.model.Feed;
@@ -38,7 +39,7 @@
     
     // Set the language context and default text direction
     feed.setLanguage("ar");  // Arabic
-    BidiHelper.setDirection(BidiHelper.Direction.RTL, feed);
+    BidiHelper.setDirection(Direction.RTL, feed);
     
     feed.setBaseUri("http://\u0645\u062b\u0627\u0644.org/ar/feed.xml");
     feed.setId("tag:\u0645\u062b\u0627\u0644.org,2007:/\u0645\u062b\u0627\u0644");

Modified: incubator/abdera/java/trunk/extensions/json/src/main/java/org/apache/abdera/ext/json/JSONUtil.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/json/src/main/java/org/apache/abdera/ext/json/JSONUtil.java?rev=614366&r1=614365&r2=614366&view=diff
==============================================================================
--- incubator/abdera/java/trunk/extensions/json/src/main/java/org/apache/abdera/ext/json/JSONUtil.java
(original)
+++ incubator/abdera/java/trunk/extensions/json/src/main/java/org/apache/abdera/ext/json/JSONUtil.java
Tue Jan 22 14:57:51 2008
@@ -30,6 +30,7 @@
 import org.apache.abdera.ext.thread.ThreadHelper;
 import org.apache.abdera.i18n.iri.IRI;
 import org.apache.abdera.i18n.text.UrlEncoding;
+import org.apache.abdera.i18n.text.Bidi.Direction;
 import org.apache.abdera.i18n.text.CharUtils.Profile;
 import org.apache.abdera.model.Base;
 import org.apache.abdera.model.Categories;
@@ -507,14 +508,14 @@
   }
 
   private static boolean needToWriteDir(Element element) {
-    BidiHelper.Direction parentdir = BidiHelper.Direction.UNSPECIFIED;
-    BidiHelper.Direction dir = BidiHelper.getDirection(element);
+    Direction parentdir = Direction.UNSPECIFIED;
+    Direction dir = BidiHelper.getDirection(element);
     if (element.getParentElement() != null) {
       Base parent = element.getParentElement();
       if (parent instanceof Element)
         parentdir = BidiHelper.getDirection((Element)parent);
     }
-    return dir != BidiHelper.Direction.UNSPECIFIED && !dir.equals(parentdir);
+    return dir != Direction.UNSPECIFIED && !dir.equals(parentdir);
   }
   
   private static void writeLanguageFields(
@@ -526,7 +527,7 @@
       jstream.writeField("lang",lang);
     }
     if (needToWriteDir(element)) {
-      BidiHelper.Direction dir = BidiHelper.getDirection(element);
+      Direction dir = BidiHelper.getDirection(element);
       jstream.writeField("dir", dir.name().toLowerCase());
     }
   }

Modified: incubator/abdera/java/trunk/extensions/main/src/main/java/org/apache/abdera/ext/bidi/BidiHelper.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/main/src/main/java/org/apache/abdera/ext/bidi/BidiHelper.java?rev=614366&r1=614365&r2=614366&view=diff
==============================================================================
--- incubator/abdera/java/trunk/extensions/main/src/main/java/org/apache/abdera/ext/bidi/BidiHelper.java
(original)
+++ incubator/abdera/java/trunk/extensions/main/src/main/java/org/apache/abdera/ext/bidi/BidiHelper.java
Tue Jan 22 14:57:51 2008
@@ -17,15 +17,14 @@
 */
 package org.apache.abdera.ext.bidi;
 
-import java.text.AttributedString;
-import java.text.Bidi;
-import java.util.Arrays;
 import java.util.Locale;
 
 import javax.xml.namespace.QName;
 
 import org.apache.abdera.i18n.rfc4646.Lang;
+import org.apache.abdera.i18n.text.Bidi;
 import org.apache.abdera.i18n.text.CharUtils;
+import org.apache.abdera.i18n.text.Bidi.Direction;
 import org.apache.abdera.model.Base;
 import org.apache.abdera.model.Document;
 import org.apache.abdera.model.Element;
@@ -72,8 +71,6 @@
   
   BidiHelper() {}
   
-  public enum Direction { UNSPECIFIED, LTR, RTL};
-  
   /**
    * Set the value of dir attribute
    */
@@ -183,40 +180,6 @@
     return guessDirectionFromLanguage(element, false);
   }
   
-  private static final String[] RTL_LANGS = {
-    "ar",
-    "dv",
-    "fa",
-    "he",
-    "ps",
-    "syr",
-    "ur",
-    "yi"};
-  
-  private static final String[] RTL_SCRIPTS = {
-    "arab","avst","hebr","hung","lydi","mand",
-    "mani","mero","mong","nkoo","orkh","phlv",
-    "phnx","samr","syrc","syre","syrj","syrn",
-    "tfng","thaa"
-  };
-  // charset encodings that one may typically expect to be RTL
-  private static final String[] RTL_ENCODINGS = {
-    "iso-8859-6", "iso-8859-6-bidi", 
-    "iso-8859-6-i", "iso-ir-127", 
-    "ecma-114", "asmo-708", "arabic", 
-    "csisolatinarabic", "windows-1256", 
-    "ibm-864", "macarabic", "macfarsi", 
-    "iso-8859-8-i", "iso-8859-8-bidi", 
-    "windows-1255", "iso-8859-8", "ibm-862", 
-    "machebrew", "asmo-449", "iso-9036", 
-    "arabic7", "iso-ir-89", "csiso89asmo449", 
-    "iso-unicode-ibm-1264", "csunicodeibm1264", 
-    "iso_8859-8:1988", "iso-ir-138", "hebrew", 
-    "csisolatinhebrew", "iso-unicode-ibm-1265", 
-    "csunicodeibm1265", "cp862", "862", 
-    "cspc862latinhebrew"
-  };
-  
   /**
    * Attempt to guess the base direction using the in-scope language.  
    * Implements the method used by Internet Explorer 7's feed view
@@ -239,15 +202,7 @@
       language != null ? 
         new Lang(language) :
         new Lang(Locale.getDefault());
-    if (lang.getScript() != null) {
-      String script = lang.getScript().getName();
-      if (Arrays.binarySearch(RTL_SCRIPTS, script.toLowerCase()) > -1)
-        return Direction.RTL;
-    }
-    String primary = lang.getLanguage().getName();
-    if (Arrays.binarySearch(RTL_LANGS, primary.toLowerCase()) > -1) 
-          return Direction.RTL;
-    return Direction.UNSPECIFIED;
+    return Bidi.guessDirectionFromLanguage(lang);
   }
 
   /**
@@ -267,13 +222,7 @@
     if (!ignoredir && hasDirection(element)) return getDirection(element);
     Document doc = element.getDocument();
     if (doc == null) return Direction.UNSPECIFIED;
-    String charset = doc.getCharset();
-    if (charset == null) return Direction.UNSPECIFIED;
-    charset = charset.replace('_', '-');
-    Arrays.sort(RTL_ENCODINGS);
-    if (Arrays.binarySearch(RTL_ENCODINGS, charset.toLowerCase()) > -1) 
-      return Direction.RTL;
-    return Direction.UNSPECIFIED;
+    return Bidi.guessDirectionFromEncoding(doc.getCharset());
   }
   
   /**
@@ -304,21 +253,8 @@
    * ignoredir to true.
    */
   public static <T extends Element>Direction guessDirectionFromTextProperties(T element,
boolean ignoredir) {
-    Direction dir = Direction.UNSPECIFIED;
     if (!ignoredir && hasDirection(element)) return getDirection(element);
-    String text = element.getText();
-    if (text != null && text.length() > 0) {
-      if (text.charAt(0) == 0x200F) return Direction.RTL; // if using the unicode right-to-left
mark
-      if (text.charAt(0) == 0x200E) return Direction.LTR; // if using the unicode left-to-right
mark
-      int c = 0;
-      for (int n = 0; n < text.length(); n++) {
-        char ch = text.charAt(n);
-        if (Bidi.requiresBidi(new char[] {ch}, 0, 1)) c++;
-        else c--;
-      }
-      dir = (c > 0) ? Direction.RTL : Direction.LTR;
-    }
-    return dir;
+    return Bidi.guessDirectionFromTextProperties(element.getText());
   }
 
   /**
@@ -343,30 +279,22 @@
    * ignoredir to true.
    */
   public static <T extends Element>Direction guessDirectionFromJavaBidi(T element,
boolean ignoredir) {
-    Direction dir = Direction.UNSPECIFIED;
     if (!ignoredir && hasDirection(element)) return getDirection(element);
-    String text = element.getText();
-    if (text != null) {
-      AttributedString s = new AttributedString(text);
-      Bidi bidi = new Bidi(s.getIterator());
-      dir = (bidi.baseIsLeftToRight()) ? Direction.LTR : Direction.RTL;
-    }
-    return dir;
+    return Bidi.guessDirectionFromJavaBidi(element.getText());
   }
   
   private static <T extends Element>boolean hasDirection(T element) {
-    boolean answer = false;
     String dir = element.getAttributeValue("dir");
     if (dir != null && dir.length() > 0)
-      answer = true;
+      return true;
     else if (dir == null) {
       // if the direction is unspecified on this element, 
       // let's see if we've inherited it
       Base parent = element.getParentElement(); 
       if (parent != null && 
           parent instanceof Element)
-        answer = hasDirection((Element)parent);
+        return hasDirection((Element)parent);
     }
-    return answer;
+    return false;
   }
 }

Modified: incubator/abdera/java/trunk/extensions/main/src/test/java/org/apache/abdera/test/ext/bidi/BidiTest.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/extensions/main/src/test/java/org/apache/abdera/test/ext/bidi/BidiTest.java?rev=614366&r1=614365&r2=614366&view=diff
==============================================================================
--- incubator/abdera/java/trunk/extensions/main/src/test/java/org/apache/abdera/test/ext/bidi/BidiTest.java
(original)
+++ incubator/abdera/java/trunk/extensions/main/src/test/java/org/apache/abdera/test/ext/bidi/BidiTest.java
Tue Jan 22 14:57:51 2008
@@ -21,7 +21,7 @@
 
 import org.apache.abdera.Abdera;
 import org.apache.abdera.ext.bidi.BidiHelper;
-import org.apache.abdera.ext.bidi.BidiHelper.Direction;
+import org.apache.abdera.i18n.text.Bidi.Direction;
 import org.apache.abdera.model.Entry;
 import org.apache.abdera.model.Feed;
 



Mime
View raw message