lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From uschind...@apache.org
Subject svn commit: r1375508 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/CHANGES.txt lucene/core/ lucene/core/src/java/org/apache/lucene/document/Field.java
Date Tue, 21 Aug 2012 12:46:55 GMT
Author: uschindler
Date: Tue Aug 21 12:46:55 2012
New Revision: 1375508

URL: http://svn.apache.org/viewvc?rev=1375508&view=rev
Log:
Merged revision(s) 1375507 from lucene/dev/trunk:
LUCENE-4317: Improve reuse of internal TokenStreams in oal.document.Field.

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/document/Field.java

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1375508&r1=1375507&r2=1375508&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Tue Aug 21 12:46:55 2012
@@ -88,6 +88,11 @@ Bug Fixes
   containing non-BMP Unicode characters.  (Dawid Weiss, Robert Muir,
   Mike McCandless)
 
+Optimizations
+
+* LUCENE-4317: Improve reuse of internal TokenStreams in oal.document.Field.
+  (Uwe Schindler, Chris Male, Robert Muir)
+
 Build
 
 * LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for 

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/document/Field.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/document/Field.java?rev=1375508&r1=1375507&r2=1375508&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/document/Field.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/document/Field.java
Tue Aug 21 12:46:55 2012
@@ -73,7 +73,7 @@ public class Field implements IndexableF
   // customize how it's tokenized:
   protected TokenStream tokenStream;
 
-  protected transient NumericTokenStream numericTokenStream;
+  protected transient TokenStream internalTokenStream;
 
   protected float boost = 1.0f;
 
@@ -283,9 +283,6 @@ public class Field implements IndexableF
     if (!(fieldsData instanceof Byte)) {
       throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName()
+ " to Byte");
     }
-    if (numericTokenStream != null) {
-      numericTokenStream.setIntValue(value);
-    }
     fieldsData = Byte.valueOf(value);
   }
 
@@ -293,9 +290,6 @@ public class Field implements IndexableF
     if (!(fieldsData instanceof Short)) {
       throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName()
+ " to Short");
     }
-    if (numericTokenStream != null) {
-      numericTokenStream.setIntValue(value);
-    }
     fieldsData = Short.valueOf(value);
   }
 
@@ -303,9 +297,6 @@ public class Field implements IndexableF
     if (!(fieldsData instanceof Integer)) {
       throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName()
+ " to Integer");
     }
-    if (numericTokenStream != null) {
-      numericTokenStream.setIntValue(value);
-    }
     fieldsData = Integer.valueOf(value);
   }
 
@@ -313,9 +304,6 @@ public class Field implements IndexableF
     if (!(fieldsData instanceof Long)) {
       throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName()
+ " to Long");
     }
-    if (numericTokenStream != null) {
-      numericTokenStream.setLongValue(value);
-    }
     fieldsData = Long.valueOf(value);
   }
 
@@ -323,9 +311,6 @@ public class Field implements IndexableF
     if (!(fieldsData instanceof Float)) {
       throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName()
+ " to Float");
     }
-    if (numericTokenStream != null) {
-      numericTokenStream.setFloatValue(value);
-    }
     fieldsData = Float.valueOf(value);
   }
 
@@ -333,9 +318,6 @@ public class Field implements IndexableF
     if (!(fieldsData instanceof Double)) {
       throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName()
+ " to Double");
     }
-    if (numericTokenStream != null) {
-      numericTokenStream.setDoubleValue(value);
-    }
     fieldsData = Double.valueOf(value);
   }
 
@@ -433,62 +415,44 @@ public class Field implements IndexableF
 
     final NumericType numericType = fieldType().numericType();
     if (numericType != null) {
-      if (numericTokenStream == null) {
+      if (!(internalTokenStream instanceof NumericTokenStream)) {
         // lazy init the TokenStream as it is heavy to instantiate
         // (attributes,...) if not needed (stored field loading)
-        numericTokenStream = new NumericTokenStream(type.numericPrecisionStep());
-        // initialize value in TokenStream
-        final Number val = (Number) fieldsData;
-        switch (numericType) {
-        case INT:
-          numericTokenStream.setIntValue(val.intValue());
-          break;
-        case LONG:
-          numericTokenStream.setLongValue(val.longValue());
-          break;
-        case FLOAT:
-          numericTokenStream.setFloatValue(val.floatValue());
-          break;
-        case DOUBLE:
-          numericTokenStream.setDoubleValue(val.doubleValue());
-          break;
-        default:
-          assert false : "Should never get here";
-        }
-      } else {
-        // OK -- previously cached and we already updated if
-        // setters were called.
+        internalTokenStream = new NumericTokenStream(type.numericPrecisionStep());
       }
-
-      return numericTokenStream;
+      final NumericTokenStream nts = (NumericTokenStream) internalTokenStream;
+      // initialize value in TokenStream
+      final Number val = (Number) fieldsData;
+      switch (numericType) {
+      case INT:
+        nts.setIntValue(val.intValue());
+        break;
+      case LONG:
+        nts.setLongValue(val.longValue());
+        break;
+      case FLOAT:
+        nts.setFloatValue(val.floatValue());
+        break;
+      case DOUBLE:
+        nts.setDoubleValue(val.doubleValue());
+        break;
+      default:
+        assert false : "Should never get here";
+      }
+      return internalTokenStream;
     }
 
     if (!fieldType().tokenized()) {
       if (stringValue() == null) {
         throw new IllegalArgumentException("Non-Tokenized Fields must have a String value");
       }
-
-      return new TokenStream() {
-        CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
-        OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
-        boolean used;
-
-        @Override
-        public boolean incrementToken() {
-          if (used) {
-            return false;
-          }
-          termAttribute.setEmpty().append(stringValue());
-          offsetAttribute.setOffset(0, stringValue().length());
-          used = true;
-          return true;
-        }
-
-        @Override
-        public void reset() {
-          used = false;
-        }
-      };
+      if (!(internalTokenStream instanceof StringTokenStream)) {
+        // lazy init the TokenStream as it is heavy to instantiate
+        // (attributes,...) if not needed (stored field loading)
+        internalTokenStream = new StringTokenStream();
+      }
+      ((StringTokenStream) internalTokenStream).setValue(stringValue());
+      return internalTokenStream;
     }
 
     if (tokenStream != null) {
@@ -502,7 +466,49 @@ public class Field implements IndexableF
     throw new IllegalArgumentException("Field must have either TokenStream, String, Reader
or Number value");
   }
   
-  /** Specifies whether a field's value should be stored. */
+  static final class StringTokenStream extends TokenStream {
+    private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
+    private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
+    private boolean used = false;
+    private String value = null;
+    
+    /** Creates a new TokenStream that returns a String as single token.
+     * <p>Warning: Does not initialize the value, you must call
+     * {@link #setValue()} afterwards!
+     */
+    StringTokenStream() {
+    }
+    
+    /** Sets the string value. */
+    void setValue(String value) {
+      this.value = value;
+    }
+
+    @Override
+    public boolean incrementToken() {
+      if (used) {
+        return false;
+      }
+      clearAttributes();
+      termAttribute.append(value);
+      offsetAttribute.setOffset(0, value.length());
+      used = true;
+      return true;
+    }
+
+    @Override
+    public void end() {
+      final int finalOffset = value.length();
+      offsetAttribute.setOffset(finalOffset, finalOffset);
+    }
+    
+    @Override
+    public void reset() {
+      used = false;
+    }
+  }
+
+  /** Specifies whether and how a field should be stored. */
   public static enum Store {
 
     /** Store the original field value in the index. This is useful for short texts



Mime
View raw message