commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject [03/45] commons-rdf git commit: COMMONSRDF-51: compare language tags in lower case
Date Fri, 10 Feb 2017 16:55:05 GMT
COMMONSRDF-51: compare language tags in lower case


Project: http://git-wip-us.apache.org/repos/asf/commons-rdf/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-rdf/commit/3064d219
Tree: http://git-wip-us.apache.org/repos/asf/commons-rdf/tree/3064d219
Diff: http://git-wip-us.apache.org/repos/asf/commons-rdf/diff/3064d219

Branch: refs/heads/COMMONSRDF-47
Commit: 3064d219606cbe42c0150d81dbf6cdbc74bf7491
Parents: 0e1969a
Author: Stian Soiland-Reyes <stain@apache.org>
Authored: Thu Jan 12 14:51:26 2017 +0000
Committer: Stian Soiland-Reyes <stain@apache.org>
Committed: Thu Jan 12 14:51:26 2017 +0000

----------------------------------------------------------------------
 .../org/apache/commons/rdf/api/Literal.java     |  27 +++--
 .../apache/commons/rdf/api/AbstractRDFTest.java | 118 ++++++++++++++++++-
 .../commons/rdf/jena/impl/JenaLiteralImpl.java  |  13 +-
 .../commons/rdf/jsonldjava/JsonLdLiteral.java   |  13 +-
 .../commons/rdf/rdf4j/impl/LiteralImpl.java     |  15 ++-
 .../apache/commons/rdf/simple/LiteralImpl.java  |   8 +-
 6 files changed, 169 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-rdf/blob/3064d219/api/src/main/java/org/apache/commons/rdf/api/Literal.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/commons/rdf/api/Literal.java b/api/src/main/java/org/apache/commons/rdf/api/Literal.java
index a434a73..ea6e3a4 100644
--- a/api/src/main/java/org/apache/commons/rdf/api/Literal.java
+++ b/api/src/main/java/org/apache/commons/rdf/api/Literal.java
@@ -18,6 +18,7 @@
 package org.apache.commons.rdf.api;
 
 import java.io.Serializable;
+import java.util.Locale;
 import java.util.Objects;
 import java.util.Optional;
 
@@ -72,7 +73,13 @@ public interface Literal extends RDFTerm {
      * <a href="http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"
      * >http://www.w3.org/1999/02/22-rdf-syntax-ns#langString</a>, this method
      * must return {@link Optional#empty()}.
-     *
+     * <p>
+     * The value space of language tags is always in lower case; although 
+     * RDF implementations MAY convert all language tags to lower case,
+     * safe comparisons of language tags should be done using
+     * {@link String#toLowerCase(Locale)} with the locale
+     * {@link Locale#ROOT}. 
+     * <p>
      * Implementation note: If your application requires {@link Serializable}
      * objects, it is best not to store an {@link Optional} in a field. It is
      * recommended to use {@link Optional#ofNullable(Object)} to create the
@@ -80,8 +87,8 @@ public interface Literal extends RDFTerm {
      *
      * @return The {@link Optional} language tag for this literal. If
      *         {@link Optional#isPresent()} returns true, the value returned by
-     *         {@link Optional#get()} must be a non-empty string conforming to
-     *         BCP47.
+     *         {@link Optional#get()} must be a non-empty language tag string
+     *         conforming to BCP47.
      * @see <a href=
      *      "http://www.w3.org/TR/rdf11-concepts/#dfn-language-tag">RDF-1.1
      *      Literal language tag</a>
@@ -89,14 +96,20 @@ public interface Literal extends RDFTerm {
     Optional<String> getLanguageTag();
 
     /**
-     * Check it this Literal is equal to another Literal. <blockquote>
+     * Check it this Literal is equal to another Literal. 
+     * <blockquote>
      * <a href="http://www.w3.org/TR/rdf11-concepts/#dfn-literal-term">Literal
-     * term equality</a>: Two literals are term-equal (the same RDF literal) if
+     * term equality</a>: 
+     * Two literals are term-equal (the same RDF literal) if
      * and only if the two lexical forms, the two datatype IRIs, and the two
      * language tags (if any) compare equal, character by character. Thus, two
      * literals can have the same value without being the same RDF term.
      * </blockquote>
-     *
+     * As the value space for language tags is lower-space, if they are present,
+     * they MUST be compared character by character
+     * using the equivalent of {@link String#toLowerCase(java.util.Locale)} with
+     * the locale {@link Locale#ROOT}.
+     * <p>
      * Implementations MUST also override {@link #hashCode()} so that two equal
      * Literals produce the same hash code.
      *
@@ -114,7 +127,7 @@ public interface Literal extends RDFTerm {
      * The returned hash code MUST be equal to the result of
      * {@link Objects#hash(Object...)} with the arguments
      * {@link #getLexicalForm()}, {@link #getDatatype()},
-     * {@link #getLanguageTag()}.
+     * {@link #getLanguageTag()}<code>.map(s-&gt;s.toLowerString(Locale.ROOT))</code>.
      * <p>
      * This method MUST be implemented in conjunction with
      * {@link #equals(Object)} so that two equal Literals produce the same hash

http://git-wip-us.apache.org/repos/asf/commons-rdf/blob/3064d219/api/src/test/java/org/apache/commons/rdf/api/AbstractRDFTest.java
----------------------------------------------------------------------
diff --git a/api/src/test/java/org/apache/commons/rdf/api/AbstractRDFTest.java b/api/src/test/java/org/apache/commons/rdf/api/AbstractRDFTest.java
index 85e7b61..5efcee1 100644
--- a/api/src/test/java/org/apache/commons/rdf/api/AbstractRDFTest.java
+++ b/api/src/test/java/org/apache/commons/rdf/api/AbstractRDFTest.java
@@ -17,13 +17,13 @@
  */
 package org.apache.commons.rdf.api;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotEquals;
-import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.*;
 
+import java.util.Locale;
 import java.util.Objects;
+import java.util.Optional;
 
+import org.junit.Assume;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -139,7 +139,7 @@ public abstract class AbstractRDFTest {
         assertEquals("<http://example.com/vocab#term>", term.ntriplesString());
 
         // and now for the international fun!
-
+        // make sure this file is edited/compiled as UTF-8
         final IRI latin1 = factory.createIRI("http://accént.example.com/première");
         assertEquals("http://accént.example.com/première", latin1.getIRIString());
         assertEquals("<http://accént.example.com/première>", latin1.ntriplesString());
@@ -194,6 +194,114 @@ public abstract class AbstractRDFTest {
         assertEquals("\"Herbert Van de Sompel\"@vls", vls.ntriplesString());
     }
 
+    public void testCreateLiteralLangCaseInsensitive() throws Exception {
+        // COMMONSRDF-51: Literal langtag may not be in lowercase, but
+        // must be COMPARED (aka .equals and .hashCode()) in lowercase
+        // as the language space is lower case.       
+        final Literal lower = factory.createLiteral("Hello", "en-gb"); 
+        final Literal upper = factory.createLiteral("Hello", "EN-GB"); 
+        final Literal mixed = factory.createLiteral("Hello", "en-GB");
+
+        
+        assertEquals("en-gb", lower.getLanguageTag().get());
+     
+        // NOTE: the RDF framework is free to lowercase the language tag 
+        // or leave it as-is, so we can't assume: 
+        /*
+         assertEquals("en-gb", upper.getLanguageTag().get());
+         assertEquals("en-gb", mixed.getLanguageTag().get());
+        */
+        // ..unless we do a case-insensitive comparison:
+        assertEquals("en-gb",
+                upper.getLanguageTag().get().toLowerCase(Locale.ROOT));
+        assertEquals("en-gb",
+                mixed.getLanguageTag().get().toLowerCase(Locale.ROOT));
+                
+        // However these should all be true
+        assertEquals(lower, lower);
+        assertEquals(lower, upper);
+        assertEquals(lower, mixed);
+        assertEquals(upper, lower);
+        assertEquals(upper, upper);
+        assertEquals(upper, mixed);
+        assertEquals(mixed, lower);
+        assertEquals(mixed, upper);
+        assertEquals(mixed, mixed);
+
+        // And then by java.lang.Object contract, also the hashcode:
+        assertEquals(lower.hashCode(), upper.hashCode());
+        assertEquals(lower.hashCode(), mixed.hashCode());        
+    }
+
+    @Test
+    public void testCreateLiteralLangCaseInsensitiveOther() throws Exception {
+        // COMMONSRDF-51: Ensure the Literal is using case insensitive
+        // comparison against more 'liberal' literal implementations
+        // which may not have done .toLowerString() in their constructor
+        final Literal lower = factory.createLiteral("Hello", "en-gb"); 
+        final Literal upper = factory.createLiteral("Hello", "EN-GB"); 
+        final Literal mixed = factory.createLiteral("Hello", "en-GB");
+        
+        Literal otherLiteral = new Literal() {
+            @Override
+            public String ntriplesString() {
+                return "Hello@en-GB";
+            }
+            @Override
+            public String getLexicalForm() {
+                return "Hello";
+            }
+            @Override
+            public Optional<String> getLanguageTag() {
+                return Optional.of("en-GB");
+            }
+            @Override
+            public IRI getDatatype() {
+                return factory.createIRI("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString");
+            }
+            @Override
+            public boolean equals(Object obj) {
+                throw new RuntimeException("Wrong way comparison of literal");
+            }
+        };
+
+        assertEquals(mixed, otherLiteral);
+        assertEquals(lower, otherLiteral);
+        assertEquals(upper, otherLiteral);
+    }
+
+    @Test
+    public void testCreateLiteralLangCaseInsensitiveInTurkish() throws Exception {
+        // COMMONSRDF-51: Special test for Turkish issue where 
+        // "i".toLowerCase() != "i"
+        // See also:
+        // https://garygregory.wordpress.com/2015/11/03/java-lowercase-conversion-turkey/
+        Locale defaultLocale = Locale.getDefault();
+        try { 
+            Locale turkish = Locale.forLanguageTag("TR");
+            Locale.setDefault(turkish);
+            Assume.assumeFalse("FI".toLowerCase().equals("fi"));
+
+            final Literal lower = factory.createLiteral("moi", "fi"); 
+            final Literal upper = factory.createLiteral("moi", "FI"); 
+            final Literal mixed = factory.createLiteral("moi", "fI");
+
+            assertEquals(lower, lower);
+            assertEquals(lower, upper);
+            assertEquals(lower, mixed);
+            assertEquals(upper, lower);
+            assertEquals(upper, upper);
+            assertEquals(upper, mixed);
+            assertEquals(mixed, lower);
+            assertEquals(mixed, upper);
+            assertEquals(mixed, mixed);
+            assertEquals(lower.hashCode(), upper.hashCode());
+            assertEquals(lower.hashCode(), mixed.hashCode());        
+        } finally {
+            Locale.setDefault(defaultLocale);
+        }
+    }
+    
     @Test
     public void testCreateLiteralString() throws Exception {
         final Literal example = factory.createLiteral("Example",

http://git-wip-us.apache.org/repos/asf/commons-rdf/blob/3064d219/jena/src/main/java/org/apache/commons/rdf/jena/impl/JenaLiteralImpl.java
----------------------------------------------------------------------
diff --git a/jena/src/main/java/org/apache/commons/rdf/jena/impl/JenaLiteralImpl.java b/jena/src/main/java/org/apache/commons/rdf/jena/impl/JenaLiteralImpl.java
index faee060..8a0c9ee 100644
--- a/jena/src/main/java/org/apache/commons/rdf/jena/impl/JenaLiteralImpl.java
+++ b/jena/src/main/java/org/apache/commons/rdf/jena/impl/JenaLiteralImpl.java
@@ -18,6 +18,7 @@
 
 package org.apache.commons.rdf.jena.impl;
 
+import java.util.Locale;
 import java.util.Objects;
 import java.util.Optional;
 
@@ -37,6 +38,10 @@ class JenaLiteralImpl extends AbstractJenaRDFTerm implements JenaLiteral
{
             throw new IllegalArgumentException("Node is not a literal: " + node);
         }
     }
+    
+    private static String lowerCase(String langTag) { 
+        return langTag.toLowerCase(Locale.ROOT);
+    }
 
     @Override
     public boolean equals(final Object other) {
@@ -50,8 +55,10 @@ class JenaLiteralImpl extends AbstractJenaRDFTerm implements JenaLiteral
{
             return false;
         }
         final Literal literal = (Literal) other;
-        return getLexicalForm().equals(literal.getLexicalForm()) && getLanguageTag().equals(literal.getLanguageTag())
-                && getDatatype().equals(literal.getDatatype());
+        return getLexicalForm().equals(literal.getLexicalForm()) &&
+                getDatatype().equals(literal.getDatatype()) &&
+                getLanguageTag().map(JenaLiteralImpl::lowerCase).equals(
+                        literal.getLanguageTag().map(JenaLiteralImpl::lowerCase));
     }
 
     @Override
@@ -75,6 +82,6 @@ class JenaLiteralImpl extends AbstractJenaRDFTerm implements JenaLiteral
{
 
     @Override
     public int hashCode() {
-        return Objects.hash(getLexicalForm(), getDatatype(), getLanguageTag());
+        return Objects.hash(getLexicalForm(), getDatatype(), getLanguageTag().map(JenaLiteralImpl::lowerCase));
     }
 }

http://git-wip-us.apache.org/repos/asf/commons-rdf/blob/3064d219/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/JsonLdLiteral.java
----------------------------------------------------------------------
diff --git a/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/JsonLdLiteral.java
b/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/JsonLdLiteral.java
index 2414087..0d63c29 100644
--- a/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/JsonLdLiteral.java
+++ b/jsonld-java/src/main/java/org/apache/commons/rdf/jsonldjava/JsonLdLiteral.java
@@ -17,6 +17,7 @@
  */
 package org.apache.commons.rdf.jsonldjava;
 
+import java.util.Locale;
 import java.util.Objects;
 import java.util.Optional;
 
@@ -38,6 +39,10 @@ class JsonLdLiteralImpl extends JsonLdTermImpl implements JsonLdLiteral
{
         }
     }
 
+    private static String lowerCase(String langTag) { 
+        return langTag.toLowerCase(Locale.ROOT);
+    }
+    
     @Override
     public String ntriplesString() {
         final StringBuilder sb = new StringBuilder();
@@ -76,9 +81,8 @@ class JsonLdLiteralImpl extends JsonLdTermImpl implements JsonLdLiteral
{
 
     @Override
     public int hashCode() {
-        // Should be the same as
-        // Objects.hash(getLexicalForm(), getDatatype(), getLanguageTag());
-        return Objects.hash(node.getValue(), node.getDatatype(), node.getLanguage());
+        return Objects.hash(node.getValue(), node.getDatatype(), 
+                getLanguageTag().map(JsonLdLiteralImpl::lowerCase));
     }
 
     @Override
@@ -90,7 +94,8 @@ class JsonLdLiteralImpl extends JsonLdTermImpl implements JsonLdLiteral
{
         if (obj instanceof Literal) {
             final Literal other = (Literal) obj;
             return getLexicalForm().equals(other.getLexicalForm()) && getDatatype().equals(other.getDatatype())
-                    && getLanguageTag().equals(other.getLanguageTag());
+                    && getLanguageTag().map(JsonLdLiteralImpl::lowerCase)
+                    .equals(other.getLanguageTag().map(JsonLdLiteralImpl::lowerCase));
         }
         return false;
 

http://git-wip-us.apache.org/repos/asf/commons-rdf/blob/3064d219/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/impl/LiteralImpl.java
----------------------------------------------------------------------
diff --git a/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/impl/LiteralImpl.java b/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/impl/LiteralImpl.java
index 16d70b8..253b645 100644
--- a/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/impl/LiteralImpl.java
+++ b/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/impl/LiteralImpl.java
@@ -17,6 +17,7 @@
  */
 package org.apache.commons.rdf.rdf4j.impl;
 
+import java.util.Locale;
 import java.util.Objects;
 import java.util.Optional;
 
@@ -32,6 +33,10 @@ final class LiteralImpl extends AbstractRDFTerm<org.eclipse.rdf4j.model.Literal>
         super(literal);
     }
 
+    private static String lowerCase(String langTag) { 
+        return langTag.toLowerCase(Locale.ROOT);
+    }
+    
     @Override
     public boolean equals(final Object obj) {
         if (obj == this) {
@@ -39,9 +44,10 @@ final class LiteralImpl extends AbstractRDFTerm<org.eclipse.rdf4j.model.Literal>
         }
         if (obj instanceof org.apache.commons.rdf.api.Literal) {
             final org.apache.commons.rdf.api.Literal other = (org.apache.commons.rdf.api.Literal)
obj;
-            return getLexicalForm().equals(other.getLexicalForm()) && getDatatype().equals(other.getDatatype())
-                    && getLanguageTag().equals(other.getLanguageTag());
-
+            return getLexicalForm().equals(other.getLexicalForm()) && 
+                    getDatatype().equals(other.getDatatype()) &&
+                    getLanguageTag().map(LiteralImpl::lowerCase).equals(
+                            other.getLanguageTag().map(LiteralImpl::lowerCase));
         }
         return false;
     }
@@ -63,7 +69,8 @@ final class LiteralImpl extends AbstractRDFTerm<org.eclipse.rdf4j.model.Literal>
 
     @Override
     public int hashCode() {
-        return Objects.hash(value.getLabel(), value.getDatatype(), value.getLanguage());
+        return Objects.hash(value.getLabel(), value.getDatatype(), 
+                getLanguageTag().map(LiteralImpl::lowerCase));
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/commons-rdf/blob/3064d219/simple/src/main/java/org/apache/commons/rdf/simple/LiteralImpl.java
----------------------------------------------------------------------
diff --git a/simple/src/main/java/org/apache/commons/rdf/simple/LiteralImpl.java b/simple/src/main/java/org/apache/commons/rdf/simple/LiteralImpl.java
index 3cca4c6..763a629 100644
--- a/simple/src/main/java/org/apache/commons/rdf/simple/LiteralImpl.java
+++ b/simple/src/main/java/org/apache/commons/rdf/simple/LiteralImpl.java
@@ -52,7 +52,7 @@ final class LiteralImpl implements Literal, SimpleRDF.SimpleRDFTerm {
 
     public LiteralImpl(final String literal, final String languageTag) {
         this.lexicalForm = Objects.requireNonNull(literal);
-        this.languageTag = Objects.requireNonNull(languageTag).toLowerCase(Locale.ENGLISH);
+        this.languageTag = Objects.requireNonNull(lowerCase(languageTag));
         if (languageTag.isEmpty()) {
             // TODO: Check against
             // http://www.w3.org/TR/n-triples/#n-triples-grammar
@@ -116,6 +116,10 @@ final class LiteralImpl implements Literal, SimpleRDF.SimpleRDFTerm {
         return Objects.hash(lexicalForm, dataType, languageTag);
     }
 
+    private static String lowerCase(String langTag) { 
+        return langTag.toLowerCase(Locale.ROOT);
+    }
+    
     @Override
     public boolean equals(final Object obj) {
         if (this == obj) {
@@ -126,7 +130,7 @@ final class LiteralImpl implements Literal, SimpleRDF.SimpleRDFTerm {
         }
         final Literal literal = (Literal) obj;
         return getDatatype().equals(literal.getDatatype()) && getLexicalForm().equals(literal.getLexicalForm())
-                && getLanguageTag().equals(literal.getLanguageTag());
+                && getLanguageTag().equals(literal.getLanguageTag().map(LiteralImpl::lowerCase));
     }
 
 }


Mime
View raw message