lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From yo...@apache.org
Subject svn commit: r801344 - in /lucene/java/trunk: ./ contrib/highlighter/src/java/org/apache/lucene/search/highlight/ contrib/instantiated/src/java/org/apache/lucene/store/instantiated/ contrib/miscellaneous/src/java/org/apache/lucene/index/ contrib/miscell...
Date Wed, 05 Aug 2009 18:05:07 GMT
Author: yonik
Date: Wed Aug  5 18:05:06 2009
New Revision: 801344

URL: http://svn.apache.org/viewvc?rev=801344&view=rev
Log:
LUCENE-1607: String.intern() cache StringHelper.intern()

Added:
    lucene/java/trunk/src/java/org/apache/lucene/util/SimpleStringInterner.java   (with props)
    lucene/java/trunk/src/java/org/apache/lucene/util/StringInterner.java   (with props)
    lucene/java/trunk/src/test/org/apache/lucene/util/TestStringIntern.java   (with props)
Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java
    lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
    lucene/java/trunk/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
    lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/index/FieldNormModifier.java
    lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/index/TermVectorAccessor.java
    lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/misc/LengthNormModifier.java
    lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
    lucene/java/trunk/src/java/org/apache/lucene/document/AbstractField.java
    lucene/java/trunk/src/java/org/apache/lucene/document/Field.java
    lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java
    lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java
    lucene/java/trunk/src/java/org/apache/lucene/index/Term.java
    lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheImpl.java
    lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeQuery.java
    lucene/java/trunk/src/java/org/apache/lucene/search/SortField.java
    lucene/java/trunk/src/java/org/apache/lucene/search/TermRangeTermEnum.java
    lucene/java/trunk/src/java/org/apache/lucene/util/StringHelper.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestStressIndexing2.java
    lucene/java/trunk/src/test/org/apache/lucene/search/SampleComparable.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Wed Aug  5 18:05:06 2009
@@ -717,6 +717,12 @@
     optional scorers, or not enough optional scorers to satisfy
     minShouldMatch).  (Shai Erera via Mike McCandless)
 
+12. LUCENE-1607: To speed up string interning for commonly used
+    strings, the StringHelper.intern() interface was added with a
+    default implementation that uses a lockless cache.
+    (Earwin Burrfoot, yonik)
+    
+
 Documentation
 
 Build

Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
(original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
Wed Aug  5 18:05:06 2009
@@ -11,6 +11,7 @@
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.util.StringHelper;
 
 /**
  * {@link Scorer} implementation which scores text fragments by the number of
@@ -67,7 +68,7 @@
    */
   public QueryScorer(Query query, IndexReader reader, String field, String defaultField)
     throws IOException {
-    this.defaultField = defaultField.intern();
+    this.defaultField = StringHelper.intern(defaultField);
     init(query, field, reader, true);
   }
 
@@ -75,7 +76,7 @@
    * @param defaultField - The default field for queries with the field name unspecified
    */
   public QueryScorer(Query query, String field, String defaultField) {
-    this.defaultField = defaultField.intern();
+    this.defaultField = StringHelper.intern(defaultField);
     init(query, field, null, true);
   }
 

Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java
(original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java
Wed Aug  5 18:05:06 2009
@@ -26,6 +26,7 @@
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.FilteredQuery;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.util.StringHelper;
 
 /**
  * Utility class used to extract the terms used in a query, plus any weights.
@@ -95,7 +96,7 @@
 		HashSet terms=new HashSet();
 		if(fieldName!=null)
 		{
-		    fieldName=fieldName.intern();
+		    fieldName= StringHelper.intern(fieldName);
 		}
 		getTerms(query,terms,prohibited,fieldName);
 		return (WeightedTerm[]) terms.toArray(new WeightedTerm[0]);

Modified: lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
(original)
+++ lucene/java/trunk/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
Wed Aug  5 18:05:06 2009
@@ -50,6 +50,7 @@
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.util.StringHelper;
 
 /**
  * Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether
Terms from the query are contained in a supplied TokenStream.
@@ -68,7 +69,7 @@
 
   public WeightedSpanTermExtractor(String defaultField) {
     if (defaultField != null) {
-      this.defaultField = defaultField.intern();
+      this.defaultField = StringHelper.intern(defaultField);
     }
   }
 
@@ -362,7 +363,7 @@
   public Map getWeightedSpanTerms(Query query, TokenStream tokenStream,
       String fieldName) throws IOException {
     if (fieldName != null) {
-      this.fieldName = fieldName.intern();
+      this.fieldName = StringHelper.intern(fieldName);
     }
 
     Map terms = new PositionCheckingMap();

Modified: lucene/java/trunk/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
(original)
+++ lucene/java/trunk/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
Wed Aug  5 18:05:06 2009
@@ -43,6 +43,7 @@
 import org.apache.lucene.index.TermVectorOffsetInfo;
 import org.apache.lucene.search.DefaultSimilarity;
 import org.apache.lucene.search.Similarity;
+import org.apache.lucene.util.StringHelper;
 
 /**
  * This class, similar to {@link org.apache.lucene.index.IndexWriter}, has no locking mechanism.
@@ -458,7 +459,7 @@
       FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name());
       if (fieldSetting == null) {
         fieldSetting = new FieldSetting();
-        fieldSetting.fieldName = field.name().intern();
+        fieldSetting.fieldName = StringHelper.intern(field.name());
         fieldSettingsByFieldName.put(fieldSetting.fieldName, fieldSetting);
         fieldNameBuffer.add(fieldSetting.fieldName);
       }

Modified: lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/index/FieldNormModifier.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/index/FieldNormModifier.java?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/index/FieldNormModifier.java
(original)
+++ lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/index/FieldNormModifier.java
Wed Aug  5 18:05:06 2009
@@ -23,6 +23,7 @@
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.StringHelper;
 
 /**
  * Given a directory and a list of fields, updates the fieldNorms in place for every document.
@@ -103,7 +104,7 @@
    * @param field the field whose norms should be reset
    */
   public void reSetNorms(String field) throws IOException {
-    String fieldName = field.intern();
+    String fieldName = StringHelper.intern(field);
     int[] termCounts = new int[0];
     byte[] fakeNorms = new byte[0];
     

Modified: lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/index/TermVectorAccessor.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/index/TermVectorAccessor.java?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/index/TermVectorAccessor.java
(original)
+++ lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/index/TermVectorAccessor.java
Wed Aug  5 18:05:06 2009
@@ -1,5 +1,7 @@
 package org.apache.lucene.index;
 
+import org.apache.lucene.util.StringHelper;
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
@@ -52,7 +54,7 @@
    */
   public void accept(IndexReader indexReader, int documentNumber, String fieldName, TermVectorMapper
mapper) throws IOException {
 
-    fieldName = fieldName.intern();
+    fieldName = StringHelper.intern(fieldName);
 
     decoratedMapper.decorated = mapper;
     decoratedMapper.termVectorStored = false;

Modified: lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/misc/LengthNormModifier.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/misc/LengthNormModifier.java?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/misc/LengthNormModifier.java
(original)
+++ lucene/java/trunk/contrib/miscellaneous/src/java/org/apache/lucene/misc/LengthNormModifier.java
Wed Aug  5 18:05:06 2009
@@ -23,6 +23,7 @@
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.StringHelper;
 
 import java.io.File;
 import java.io.IOException;
@@ -105,7 +106,7 @@
    * @param field the field whose norms should be reset
    */
   public void reSetNorms(String field) throws IOException {
-    String fieldName = field.intern();
+    String fieldName = StringHelper.intern(field);
     int[] termCounts = new int[0];
     
     IndexReader reader = null;

Modified: lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
(original)
+++ lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
Wed Aug  5 18:05:06 2009
@@ -23,6 +23,7 @@
 
 import org.apache.lucene.index.TermEnum;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.util.StringHelper;
 
 import java.io.*;
 
@@ -42,7 +43,7 @@
 
   public LuceneDictionary(IndexReader reader, String field) {
     this.reader = reader;
-    this.field = field.intern();
+    this.field = StringHelper.intern(field);
   }
 
   public final Iterator getWordsIterator() {

Modified: lucene/java/trunk/src/java/org/apache/lucene/document/AbstractField.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/document/AbstractField.java?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/document/AbstractField.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/document/AbstractField.java Wed Aug  5 18:05:06
2009
@@ -17,7 +17,8 @@
 
 import org.apache.lucene.search.PhraseQuery; // for javadocs
 import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.analysis.TokenStream; // for javadocs
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.util.StringHelper; // for javadocs
 
 
 /**
@@ -54,7 +55,7 @@
   protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector
termVector) {
     if (name == null)
       throw new NullPointerException("name cannot be null");
-    this.name = name.intern();        // field names are interned
+    this.name = StringHelper.intern(name);        // field names are interned
 
     if (store == Field.Store.YES){
       this.isStored = true;

Modified: lucene/java/trunk/src/java/org/apache/lucene/document/Field.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/document/Field.java?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/document/Field.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/document/Field.java Wed Aug  5 18:05:06 2009
@@ -20,6 +20,7 @@
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.index.IndexWriter;   // for javadoc
 import org.apache.lucene.util.Parameter;
+import org.apache.lucene.util.StringHelper;
 
 import java.io.Reader;
 import java.io.Serializable;
@@ -337,7 +338,7 @@
          + "for a field that is not indexed");
           
     if (internName) // field names are optionally interned
-      name = name.intern();        
+      name = StringHelper.intern(name);
     
     this.name = name; 
     
@@ -417,7 +418,7 @@
     if (reader == null)
       throw new NullPointerException("reader cannot be null");
     
-    this.name = name.intern();        // field names are interned
+    this.name = StringHelper.intern(name);        // field names are interned
     this.fieldsData = reader;
     
     this.isStored = false;
@@ -464,7 +465,7 @@
     if (tokenStream == null)
       throw new NullPointerException("tokenStream cannot be null");
     
-    this.name = name.intern();        // field names are interned
+    this.name = StringHelper.intern(name);        // field names are interned
     this.fieldsData = null;
     this.tokenStream = tokenStream;
 
@@ -509,7 +510,7 @@
     if (value == null)
       throw new IllegalArgumentException("value cannot be null");
     
-    this.name = name.intern();
+    this.name = StringHelper.intern(name);        // field names are interned
     fieldsData = value;
     
     if (store == Store.YES) {

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldInfos.java Wed Aug  5 18:05:06
2009
@@ -22,6 +22,7 @@
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.StringHelper;
 
 import java.io.IOException;
 import java.util.*;
@@ -248,7 +249,7 @@
   private FieldInfo addInternal(String name, boolean isIndexed,
                                 boolean storeTermVector, boolean storePositionWithTermVector,

                                 boolean storeOffsetWithTermVector, boolean omitNorms, boolean
storePayloads, boolean omitTermFreqAndPositions) {
-    name = name.intern();
+    name = StringHelper.intern(name);
     FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector,
                                  storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
     byNumber.add(fi);
@@ -352,7 +353,7 @@
     }
 
     for (int i = 0; i < size; i++) {
-      String name = input.readString().intern();
+      String name = StringHelper.intern(input.readString());
       byte bits = input.readByte();
       boolean isIndexed = (bits & IS_INDEXED) != 0;
       boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java Wed Aug  5 18:05:06
2009
@@ -24,6 +24,7 @@
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.BufferedIndexInput;
 import org.apache.lucene.util.CloseableThreadLocal;
+import org.apache.lucene.util.StringHelper;
 
 import java.io.IOException;
 import java.io.Reader;
@@ -643,7 +644,7 @@
 
       this.isTokenized = tokenize;
 
-      this.name = fi.name.intern();
+      this.name = StringHelper.intern(fi.name);
       this.isIndexed = fi.isIndexed;
       this.omitNorms = fi.omitNorms;          
       this.omitTermFreqAndPositions = fi.omitTermFreqAndPositions;

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/Term.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/Term.java?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/Term.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/Term.java Wed Aug  5 18:05:06 2009
@@ -1,5 +1,7 @@
 package org.apache.lucene.index;
 
+import org.apache.lucene.util.StringHelper;
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -33,7 +35,8 @@
    * <p>Note that a null field or null text value results in undefined
    * behavior for most Lucene APIs that accept a Term parameter. */
   public Term(String fld, String txt) {
-    this(fld, txt, true);
+    field = StringHelper.intern(fld);
+    text = txt;
   }
 
   /** Constructs a Term with the given field and empty text.
@@ -47,8 +50,8 @@
   }
 
   Term(String fld, String txt, boolean intern) {
-    field = intern ? fld.intern() : fld;	  // field names are interned
-    text = txt;					  // unless already known to be
+    field = intern ? StringHelper.intern(fld) : fld;	  // field names are interned
+    text = txt;					          // unless already known to be
   }
 
   /** Returns the field of this term, an interned string.   The field indicates
@@ -130,6 +133,6 @@
     throws java.io.IOException, ClassNotFoundException
   {
       in.defaultReadObject();
-      field = field.intern();
+      field = StringHelper.intern(field);
   }
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheImpl.java Wed Aug  5 18:05:06
2009
@@ -21,6 +21,7 @@
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermDocs;
 import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.util.StringHelper;
 
 import java.io.IOException;
 import java.util.HashMap;
@@ -103,7 +104,7 @@
 
     /** Creates one of these objects. */
     Entry (String field, int type, Locale locale) {
-      this.field = field.intern();
+      this.field = StringHelper.intern(field);
       this.type = type;
       this.custom = null;
       this.locale = locale;
@@ -111,7 +112,7 @@
 
     /** Creates one of these objects for a custom comparator/parser. */
     Entry (String field, Object custom) {
-      this.field = field.intern();
+      this.field = StringHelper.intern(field);
       this.type = SortField.CUSTOM;
       this.custom = custom;
       this.locale = null;
@@ -119,7 +120,7 @@
 
     /** Creates one of these objects for a custom type with parser, needed by FieldSortedHitQueue.
*/
     Entry (String field, int type, Parser parser) {
-      this.field = field.intern();
+      this.field = StringHelper.intern(field);
       this.type = type;
       this.custom = parser;
       this.locale = null;
@@ -463,7 +464,7 @@
 
     protected Object createValue(IndexReader reader, Object fieldKey)
         throws IOException {
-      String field = ((String) fieldKey).intern();
+      String field = StringHelper.intern((String) fieldKey);
       final String[] retArray = new String[reader.maxDoc()];
       TermDocs termDocs = reader.termDocs();
       TermEnum termEnum = reader.terms (new Term (field));
@@ -495,7 +496,7 @@
 
     protected Object createValue(IndexReader reader, Object fieldKey)
         throws IOException {
-      String field = ((String) fieldKey).intern();
+      String field = StringHelper.intern((String) fieldKey);
       final int[] retArray = new int[reader.maxDoc()];
       String[] mterms = new String[reader.maxDoc()+1];
       TermDocs termDocs = reader.termDocs();
@@ -574,7 +575,7 @@
 
     protected Object createValue(IndexReader reader, Object fieldKey)
         throws IOException {
-      String field = ((String)fieldKey).intern();
+      String field = StringHelper.intern((String) fieldKey);
       TermEnum enumerator = reader.terms (new Term (field));
       try {
         Term term = enumerator.term();

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeQuery.java?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeQuery.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/NumericRangeQuery.java Wed Aug  5
18:05:06 2009
@@ -24,6 +24,7 @@
 import org.apache.lucene.document.NumericField; // for javadocs
 import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.util.ToStringUtils;
+import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 
@@ -151,7 +152,7 @@
     assert (valSize == 32 || valSize == 64);
     if (precisionStep < 1)
       throw new IllegalArgumentException("precisionStep must be >=1");
-    this.field = field.intern();
+    this.field = StringHelper.intern(field);
     this.precisionStep = precisionStep;
     this.valSize = valSize;
     this.min = min;

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/SortField.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/SortField.java?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/SortField.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/SortField.java Wed Aug  5 18:05:06
2009
@@ -25,6 +25,7 @@
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.util.StringHelper;
 
 /**
  * Stores information about how to sort documents by terms in an individual
@@ -272,7 +273,7 @@
       if (type != SCORE && type != DOC)
         throw new IllegalArgumentException("field can only be null when type is SCORE or
DOC");
     } else {
-      this.field = field.intern();
+      this.field = StringHelper.intern(field);
     }
   }
 
@@ -510,7 +511,7 @@
    * @deprecated
    */
   static int detectFieldType(IndexReader reader, String fieldKey) throws IOException {
-    String field = fieldKey.intern();
+    String field = StringHelper.intern(fieldKey);
     TermEnum enumerator = reader.terms(new Term(field));
     try {
       Term term = enumerator.term();

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/TermRangeTermEnum.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/TermRangeTermEnum.java?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/TermRangeTermEnum.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/TermRangeTermEnum.java Wed Aug  5
18:05:06 2009
@@ -22,6 +22,7 @@
 
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.util.StringHelper;
 
 /**
  * Subclass of FilteredTermEnum for enumerating all terms that match the
@@ -75,7 +76,7 @@
     this.lowerTermText = lowerTermText;
     this.includeLower = includeLower;
     this.includeUpper = includeUpper;
-    this.field = field.intern();
+    this.field = StringHelper.intern(field);
     
     // do a little bit of normalization...
     // open ended range queries should always be inclusive.

Added: lucene/java/trunk/src/java/org/apache/lucene/util/SimpleStringInterner.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/SimpleStringInterner.java?rev=801344&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/SimpleStringInterner.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/SimpleStringInterner.java Wed Aug  5
18:05:06 2009
@@ -0,0 +1,82 @@
+package org.apache.lucene.util;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ * Simple lockless and memory barrier free String intern cache that is guaranteed
+ * to return the same String instance as String.intern() does.
+ */
+public class SimpleStringInterner extends StringInterner {
+
+  private static class Entry {
+    final private String str;
+    final private int hash;
+    private Entry next;
+    private Entry(String str, int hash, Entry next) {
+      this.str = str;
+      this.hash = hash;
+      this.next = next;
+    }
+  }
+
+  private final Entry[] cache;
+  private final int maxChainLength;
+
+  /**
+   * @param tableSize  Size of the hash table, should be a power of two.
+   * @param maxChainLength  Maximum length of each bucket, after which the oldest item inserted
is dropped.
+   */
+  public SimpleStringInterner(int tableSize, int maxChainLength) {
+    cache = new Entry[Math.max(1,BitUtil.nextHighestPowerOfTwo(tableSize))];
+    this.maxChainLength = Math.max(2,maxChainLength);
+  }
+
+  // @Override
+  public String intern(String s) {
+    int h = s.hashCode();
+    // In the future, it may be worth augmenting the string hash
+    // if the lower bits need better distribution.
+    int slot = h & (cache.length-1);
+
+    Entry first = this.cache[slot];
+    Entry nextToLast = null;
+
+    int chainLength = 0;
+
+    for(Entry e=first; e!=null; e=e.next) {
+      if (e.hash == h && (e.str == s || e.str.compareTo(s)==0)) {
+      // if (e.str == s || (e.hash == h && e.str.compareTo(s)==0)) {
+        return e.str;
+      }
+
+      chainLength++;
+      if (e.next != null) {
+        nextToLast = e;
+      }
+    }
+
+    // insertion-order cache: add new entry at head
+    s = s.intern();
+    this.cache[slot] = new Entry(s, h, first);
+    if (chainLength >= maxChainLength) {
+      // prune last entry
+      nextToLast.next = null;
+    }
+    return s;
+  }
+}
\ No newline at end of file

Propchange: lucene/java/trunk/src/java/org/apache/lucene/util/SimpleStringInterner.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/src/java/org/apache/lucene/util/SimpleStringInterner.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: lucene/java/trunk/src/java/org/apache/lucene/util/SimpleStringInterner.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Modified: lucene/java/trunk/src/java/org/apache/lucene/util/StringHelper.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/StringHelper.java?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/StringHelper.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/StringHelper.java Wed Aug  5 18:05:06
2009
@@ -24,6 +24,17 @@
  * $Id$
  */
 public abstract class StringHelper {
+  /**
+   * Expert:
+   * The StringInterner implementation used by Lucene.
+   * This shouldn't be changed to an incompatible implementation after other Lucene APIs
have been used.
+   */
+  public static StringInterner interner = new SimpleStringInterner(1024,8);
+
+  /** Return the same string object for all equal strings */
+  public static String intern(String s) {
+    return interner.intern(s);
+  }
 
   /**
    * Compares two byte[] arrays, element by element, and returns the

Added: lucene/java/trunk/src/java/org/apache/lucene/util/StringInterner.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/StringInterner.java?rev=801344&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/StringInterner.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/StringInterner.java Wed Aug  5 18:05:06
2009
@@ -0,0 +1,37 @@
+package org.apache.lucene.util;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Subclasses of StringInterner are required to
+ * return the same single String object for all equal strings.
+ * Depending on the implementation, this may not be
+ * the same object returned as String.intern().
+ *
+ * This StringInterner base class simply delegates to String.intern().
+ */
+public class StringInterner {
+  /** Returns a single object instance for each equal string. */
+  public String intern(String s) {
+    return s.intern();
+  }
+
+  /** Returns a single object instance for each equal string. */
+  public String intern(char[] arr, int offset, int len) {
+    return intern(new String(arr, offset, len));
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/util/StringInterner.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/src/java/org/apache/lucene/util/StringInterner.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: lucene/java/trunk/src/java/org/apache/lucene/util/StringInterner.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestStressIndexing2.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestStressIndexing2.java?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestStressIndexing2.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestStressIndexing2.java Wed Aug  5
18:05:06 2009
@@ -20,6 +20,7 @@
 
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.search.TermQuery;
 
 import java.util.*;
@@ -266,7 +267,7 @@
     TermDocs termDocs2 = r2.termDocs();
 
     // create mapping from id2 space to id2 based on idField
-    idField = idField.intern();
+    idField = StringHelper.intern(idField);
     TermEnum termEnum = r1.terms (new Term (idField, ""));
     do {
       Term term = termEnum.term();

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/SampleComparable.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/SampleComparable.java?rev=801344&r1=801343&r2=801344&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/SampleComparable.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/SampleComparable.java Wed Aug  5 18:05:06
2009
@@ -21,6 +21,7 @@
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermDocs;
 import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.util.StringHelper;
 
 import java.io.IOException;
 import java.io.Serializable;
@@ -69,7 +70,7 @@
     return new SortComparatorSource () {
       public ScoreDocComparator newComparator (final IndexReader reader, String fieldname)
       throws IOException {
-        final String field = fieldname.intern ();
+        final String field = StringHelper.intern(fieldname);
         final TermEnum enumerator = reader.terms (new Term (fieldname, ""));
         try {
           return new ScoreDocComparator () {
@@ -104,7 +105,7 @@
        */
       protected Comparable[] fillCache (IndexReader reader, TermEnum enumerator, String fieldname)
       throws IOException {
-        final String field = fieldname.intern ();
+        final String field = StringHelper.intern(fieldname);
         Comparable[] retArray = new Comparable[reader.maxDoc ()];
         if (retArray.length > 0) {
           TermDocs termDocs = reader.termDocs ();

Added: lucene/java/trunk/src/test/org/apache/lucene/util/TestStringIntern.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/util/TestStringIntern.java?rev=801344&view=auto
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/util/TestStringIntern.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/util/TestStringIntern.java Wed Aug  5 18:05:06
2009
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util;
+
+import junit.framework.TestCase;
+
+import java.util.Random;
+
+public class TestStringIntern extends LuceneTestCase {
+  String[] testStrings;
+  String[] internedStrings;
+  Random r = newRandom();
+
+  private String randStr(int len) {
+    char[] arr = new char[len];
+    for (int i=0; i<len; i++) {
+      arr[i] = (char)('a' + r.nextInt(26));
+    }
+    return new String(arr);
+  }
+
+  private void makeStrings(int sz) {
+    testStrings = new String[sz];
+    internedStrings = new String[sz];
+    for (int i=0; i<sz; i++) {
+      testStrings[i] = randStr(r.nextInt(8)+3);
+    }
+  }
+
+  public void testStringIntern() throws InterruptedException {
+    makeStrings(1024*10);  // something greater than the capacity of the default cache size
+    // makeStrings(100);  // realistic for perf testing
+    int nThreads = 20;
+    // final int iter=100000;
+    final int iter=1000000;
+    final boolean newStrings=true;
+    
+    // try native intern
+    // StringHelper.interner = new StringInterner();
+
+    Thread[] threads = new Thread[nThreads];
+    for (int i=0; i<nThreads; i++) {
+      final int seed = i;
+      threads[i] = new Thread() {
+        public void run() {
+          Random rand = new Random(seed);
+          String[] myInterned = new String[testStrings.length];
+          for (int j=0; j<iter; j++) {
+            int idx = rand.nextInt(testStrings.length);
+            String s = testStrings[idx];
+            if (newStrings == true && rand.nextBoolean()) s = new String(s); // make
a copy half of the time
+            String interned = StringHelper.intern(s);
+            String prevInterned = myInterned[idx];
+            String otherInterned = internedStrings[idx];
+
+            // test against other threads
+            if (otherInterned != null && otherInterned != interned) {
+              TestCase.fail();
+            }
+            internedStrings[idx] = interned;
+
+            // test against local copy
+            if (prevInterned != null && prevInterned != interned) {
+              TestCase.fail();
+            }
+            myInterned[idx] = interned;
+          }
+        }
+      };
+
+      threads[i].start();
+    }
+
+    for (int i=0; i<nThreads; i++) {
+      threads[i].join();
+    }
+  }
+}

Propchange: lucene/java/trunk/src/test/org/apache/lucene/util/TestStringIntern.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/src/test/org/apache/lucene/util/TestStringIntern.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: lucene/java/trunk/src/test/org/apache/lucene/util/TestStringIntern.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL



Mime
View raw message