lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From uschind...@apache.org
Subject svn commit: r893655 - in /lucene/java/trunk: CHANGES.txt common-build.xml src/java/org/apache/lucene/analysis/CharArraySet.java src/test/org/apache/lucene/analysis/TestCharArraySet.java
Date Wed, 23 Dec 2009 23:24:38 GMT
Author: uschindler
Date: Wed Dec 23 23:24:37 2009
New Revision: 893655

URL: http://svn.apache.org/viewvc?rev=893655&view=rev
Log:
LUCENE-2169, LUCENE-2179: Improvements to CharArraySet.copy() and clear() is now working

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/common-build.xml
    lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArraySet.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=893655&r1=893654&r2=893655&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Wed Dec 23 23:24:37 2009
@@ -25,6 +25,9 @@
 * LUCENE-1923: Made IndexReader.toString() produce something
   meaningful (Tim Smith via Mike McCandless)
 
+* LUCENE-2179: CharArraySet.clear() is now functional.
+  (Robert Muir, Uwe Schindler)
+
 API Changes
 
 * LUCENE-2076: Rename FSDirectory.getFile -> getDirectory.  (George
@@ -137,6 +140,9 @@
   reported by Runtime.getRuntime().availableProcessors() (Mike
   McCandless)
 
+* LUCENE-2169: Improved CharArraySet.copy(), if source set is
+  also a CharArraySet.  (Simon Willnauer via Uwe Schindler)
+
 Build
 
  * LUCENE-2124: Moved the JDK-based collation support from contrib/collation 

Modified: lucene/java/trunk/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/java/trunk/common-build.xml?rev=893655&r1=893654&r2=893655&view=diff
==============================================================================
--- lucene/java/trunk/common-build.xml (original)
+++ lucene/java/trunk/common-build.xml Wed Dec 23 23:24:37 2009
@@ -42,7 +42,7 @@
   <property name="Name" value="Lucene"/>
   <property name="dev.version" value="3.1-dev"/>
   <property name="version" value="${dev.version}"/>
-  <property name="compatibility.tag" value="lucene_3_0_back_compat_tests_20091218"/>
+  <property name="compatibility.tag" value="lucene_3_0_back_compat_tests_20091223"/>
   <property name="spec.version" value="${version}"/>	
   <property name="year" value="2000-${current.year}"/>
   <property name="final.name" value="lucene-${name}-${version}"/>

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java?rev=893655&r1=893654&r2=893655&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/CharArraySet.java Wed Dec 23 23:24:37
2009
@@ -1,5 +1,6 @@
 package org.apache.lucene.analysis;
 
+import java.util.Arrays;
 import java.util.AbstractSet;
 import java.util.Collection;
 import java.util.Collections;
@@ -143,6 +144,13 @@
     this.charUtils = CharacterUtils.getInstance(matchVersion);
     this.matchVersion = matchVersion;
   }
+  
+  /** Clears all entries in this set. This method is supported for reusing, but not {@link
Set#remove}. */
+  @Override
+  public void clear() {
+    count = 0;
+    Arrays.fill(entries, null);
+  }
 
   /** true if the <code>len</code> chars of <code>text</code> starting
at <code>off</code>
    * are in the set */
@@ -369,35 +377,49 @@
    * @param set
    *          a set to copy
    * @return a copy of the given set as a {@link CharArraySet}. If the given set
-   *         is a {@link CharArraySet} the ignoreCase property will be
+   *         is a {@link CharArraySet} the ignoreCase and matchVersion property will be
    *         preserved.
-   * @deprecated use {@link #copy(Version, Set)} instead
+   * @deprecated use {@link #copy(Version, Set)} instead.
    */
-  public static CharArraySet copy(Set<?> set) {
-    return copy(Version.LUCENE_30, set);
+  public static CharArraySet copy(final Set<?> set) {
+    if(set == EMPTY_SET)
+      return EMPTY_SET;
+    return (set instanceof CharArraySet) ? copy((CharArraySet) set) : copy(Version.LUCENE_30,
set);
   }
   
   /**
    * Returns a copy of the given set as a {@link CharArraySet}. If the given set
    * is a {@link CharArraySet} the ignoreCase property will be preserved.
+   * <p>
+   * <b>Note:</b> If you intend to create a copy of another {@link CharArraySet}
where
+   * the {@link Version} of the source set differs from its copy
+   * {@link #CharArraySet(Version, Collection, boolean)} should be used instead.
+   * The {@link #copy(Version, Set)} will preserve the {@link Version} of the
+   * source set it is an instance of {@link CharArraySet}.
+   * </p>
    * 
    * @param matchVersion
    *          compatibility match version see <a href="#version">Version
-   *          note</a> above for details.
+   *          note</a> above for details. This argument will be ignored if the
+   *          given set is a {@link CharArraySet}.
    * @param set
    *          a set to copy
    * @return a copy of the given set as a {@link CharArraySet}. If the given set
-   *         is a {@link CharArraySet} the ignoreCase property will be
-   *         preserved.
+   *         is a {@link CharArraySet} the ignoreCase property as well as the
+   *         matchVersion will be of the given set will be preserved.
    */
-  public static CharArraySet copy(Version matchVersion, Set<?> set) {
-    if (set == null)
-      throw new NullPointerException("Given set is null");
+  public static CharArraySet copy(final Version matchVersion, final Set<?> set) {
     if(set == EMPTY_SET)
       return EMPTY_SET;
-    final boolean ignoreCase = set instanceof CharArraySet ? ((CharArraySet) set).ignoreCase
-        : false;
-    return new CharArraySet(matchVersion, set, ignoreCase);
+    if(set instanceof CharArraySet) {
+      final CharArraySet source = (CharArraySet) set;
+      // use fast path instead of iterating all values
+      // this is even on very small sets ~10 times faster than iterating
+      final char[][] entries = new char[source.entries.length][];
+      System.arraycopy(source.entries, 0, entries, 0, entries.length);
+      return new CharArraySet(source.matchVersion, entries, source.ignoreCase, source.count);
+    }
+    return new CharArraySet(matchVersion, set, false);
   }
   
 
@@ -465,6 +487,11 @@
     }
 
     @Override
+    public void clear() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
     public boolean add(Object o){
       throw new UnsupportedOperationException();
     }

Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArraySet.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArraySet.java?rev=893655&r1=893654&r2=893655&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArraySet.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestCharArraySet.java Wed Dec 23
23:24:37 2009
@@ -17,11 +17,16 @@
  * limitations under the License.
  */
 
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
 
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.Version;
 
+
 public class TestCharArraySet extends LuceneTestCase {
   
   static final String[] TEST_STOP_WORDS = {
@@ -61,24 +66,29 @@
     Integer val = Integer.valueOf(1);
     set.add(val);
     assertTrue(set.contains(val));
-    assertTrue(set.contains(Integer.valueOf(1)));
+    assertTrue(set.contains(new Integer(1))); // another integer
+    assertTrue(set.contains("1"));
+    assertTrue(set.contains(new char[]{'1'}));
     // test unmodifiable
     set = CharArraySet.unmodifiableSet(set);
     assertTrue(set.contains(val));
-    assertTrue(set.contains(Integer.valueOf(1)));
+    assertTrue(set.contains(new Integer(1))); // another integer
+    assertTrue(set.contains("1"));
+    assertTrue(set.contains(new char[]{'1'}));
   }
   
   public void testClear(){
     CharArraySet set=new CharArraySet(Version.LUCENE_CURRENT, 10,true);
     set.addAll(Arrays.asList(TEST_STOP_WORDS));
     assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
-    try{
-      set.clear();
-      fail("remove is not supported");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
-    }
+    set.clear();
+    assertEquals("not empty", 0, set.size());
+    for(int i=0;i<TEST_STOP_WORDS.length;i++)
+      assertFalse(set.contains(TEST_STOP_WORDS[i]));
+    set.addAll(Arrays.asList(TEST_STOP_WORDS));
+    assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
+    for(int i=0;i<TEST_STOP_WORDS.length;i++)
+      assertTrue(set.contains(TEST_STOP_WORDS[i]));
   }
   
   public void testModifyOnUnmodifiable(){
@@ -165,9 +175,16 @@
   public void testUnmodifiableSet(){
     CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 10,true);
     set.addAll(Arrays.asList(TEST_STOP_WORDS));
+    set.add(Integer.valueOf(1));
     final int size = set.size();
     set = CharArraySet.unmodifiableSet(set);
     assertEquals("Set size changed due to unmodifiableSet call" , size, set.size());
+    for (String stopword : TEST_STOP_WORDS) {
+      assertTrue(set.contains(stopword));
+    }
+    assertTrue(set.contains(Integer.valueOf(1)));
+    assertTrue(set.contains("1"));
+    assertTrue(set.contains(new char[]{'1'}));
     
     try{
       CharArraySet.unmodifiableSet(null);
@@ -301,4 +318,88 @@
           .contains(lowerArr[i]));
     }
   }
+  
+  /**
+   * Test the static #copy() function with a CharArraySet as a source
+   */
+  public void testCopyCharArraySet() {
+    CharArraySet setIngoreCase = new CharArraySet(Version.LUCENE_CURRENT, 10, true);
+    CharArraySet setCaseSensitive = new CharArraySet(Version.LUCENE_CURRENT, 10, false);
+
+    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
+    List<String> stopwordsUpper = new ArrayList<String>();
+    for (String string : stopwords) {
+      stopwordsUpper.add(string.toUpperCase());
+    }
+    setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
+    setIngoreCase.add(Integer.valueOf(1));
+    setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS));
+    setCaseSensitive.add(Integer.valueOf(1));
+
+    CharArraySet copy = CharArraySet.copy(Version.LUCENE_CURRENT, setIngoreCase);
+    CharArraySet copyCaseSens = CharArraySet.copy(Version.LUCENE_CURRENT, setCaseSensitive);
+
+    assertEquals(setIngoreCase.size(), copy.size());
+    assertEquals(setCaseSensitive.size(), copy.size());
+
+    assertTrue(copy.containsAll(stopwords));
+    assertTrue(copy.containsAll(stopwordsUpper));
+    assertTrue(copyCaseSens.containsAll(stopwords));
+    for (String string : stopwordsUpper) {
+      assertFalse(copyCaseSens.contains(string));
+    }
+    // test adding terms to the copy
+    List<String> newWords = new ArrayList<String>();
+    for (String string : stopwords) {
+      newWords.add(string+"_1");
+    }
+    copy.addAll(newWords);
+    
+    assertTrue(copy.containsAll(stopwords));
+    assertTrue(copy.containsAll(stopwordsUpper));
+    assertTrue(copy.containsAll(newWords));
+    // new added terms are not in the source set
+    for (String string : newWords) {
+      assertFalse(setIngoreCase.contains(string));  
+      assertFalse(setCaseSensitive.contains(string));  
+
+    }
+  }
+  
+  /**
+   * Test the static #copy() function with a JDK {@link Set} as a source
+   */
+  public void testCopyJDKSet() {
+    Set<String> set = new HashSet<String>();
+
+    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
+    List<String> stopwordsUpper = new ArrayList<String>();
+    for (String string : stopwords) {
+      stopwordsUpper.add(string.toUpperCase());
+    }
+    set.addAll(Arrays.asList(TEST_STOP_WORDS));
+
+    CharArraySet copy = CharArraySet.copy(Version.LUCENE_CURRENT, set);
+
+    assertEquals(set.size(), copy.size());
+    assertEquals(set.size(), copy.size());
+
+    assertTrue(copy.containsAll(stopwords));
+    for (String string : stopwordsUpper) {
+      assertFalse(copy.contains(string));
+    }
+    
+    List<String> newWords = new ArrayList<String>();
+    for (String string : stopwords) {
+      newWords.add(string+"_1");
+    }
+    copy.addAll(newWords);
+    
+    assertTrue(copy.containsAll(stopwords));
+    assertTrue(copy.containsAll(newWords));
+    // new added terms are not in the source set
+    for (String string : newWords) {
+      assertFalse(set.contains(string));  
+    }
+  }
 }



Mime
View raw message