lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r1407249 - in /lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index: BytesDVWriter.java DocFieldProcessor.java DocFieldProcessorPerField.java NumberDVWriter.java
Date Thu, 08 Nov 2012 20:04:19 GMT
Author: mikemccand
Date: Thu Nov  8 20:04:19 2012
New Revision: 1407249

URL: http://svn.apache.org/viewvc?rev=1407249&view=rev
Log:
LUCENE-4547: add numeric buffering too

Added:
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/NumberDVWriter.java
  (with props)
Modified:
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/BytesDVWriter.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/BytesDVWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/BytesDVWriter.java?rev=1407249&r1=1407248&r2=1407249&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/BytesDVWriter.java
(original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/BytesDVWriter.java
Thu Nov  8 20:04:19 2012
@@ -39,8 +39,6 @@ class BytesDVWriter {
   private int bytesUsed;
   private final FieldInfo fieldInfo;
 
-  private static final BytesRef EMPTY = new BytesRef(BytesRef.EMPTY_BYTES);
-
   // -2 means not set yet; -1 means length isn't fixed;
   // -otherwise it's the fixed length seen so far:
   int fixedLength = -2;
@@ -93,8 +91,9 @@ class BytesDVWriter {
       consumer.add(value);
     }
     final int maxDoc = state.segmentInfo.getDocCount();
+    value.length = 0;
     for(int docID=bufferedDocCount;docID<maxDoc;docID++) {
-      consumer.add(EMPTY);
+      consumer.add(value);
     }
     reset();
     //System.out.println("FLUSH");

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java?rev=1407249&r1=1407248&r2=1407249&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java
(original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java
Thu Nov  8 20:04:19 2012
@@ -91,7 +91,8 @@ final class DocFieldProcessor extends Do
       while(field != null) {
         // nocommit maybe we should sort by .... somethign?
         // field name?  field number?  else this is hash order!!
-        if (field.bytesDVWriter != null) {
+        if (field.bytesDVWriter != null || field.numberDVWriter != null) {
+
           if (dvConsumer == null) {
             SimpleDocValuesFormat fmt =  state.segmentInfo.getCodec().simpleDocValuesFormat();
             // nocommit once we make
@@ -104,10 +105,19 @@ final class DocFieldProcessor extends Do
 
             dvConsumer = fmt.fieldsConsumer(state.directory, state.segmentInfo, state.fieldInfos,
state.context);
           }
-          field.bytesDVWriter.flush(field.fieldInfo, state,
-                                    dvConsumer.addBinaryField(field.fieldInfo,
-                                                              field.bytesDVWriter.fixedLength
>= 0,
-                                                              field.bytesDVWriter.maxLength));
+
+          if (field.bytesDVWriter != null) {
+            field.bytesDVWriter.flush(field.fieldInfo, state,
+                                      dvConsumer.addBinaryField(field.fieldInfo,
+                                                                field.bytesDVWriter.fixedLength
>= 0,
+                                                                field.bytesDVWriter.maxLength));
+          }
+          if (field.numberDVWriter != null) {
+            field.numberDVWriter.flush(field.fieldInfo, state,
+                                       dvConsumer.addNumericField(field.fieldInfo,
+                                                                  field.numberDVWriter.minValue,
+                                                                  field.numberDVWriter.maxValue));
+          }
         }
         field = field.next;
       }
@@ -282,6 +292,13 @@ final class DocFieldProcessor extends Do
         case BYTES_VAR_STRAIGHT:
           fp.addBytesDVField(docState.docID, field.binaryValue());
           break;
+        case VAR_INTS:
+        case FIXED_INTS_8:
+        case FIXED_INTS_16:
+        case FIXED_INTS_32:
+        case FIXED_INTS_64:
+          fp.addNumberDVField(docState.docID, field.numericValue());
+          break;
         default:
           break;
         }

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java?rev=1407249&r1=1407248&r2=1407249&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java
(original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java
Thu Nov  8 20:04:19 2012
@@ -31,7 +31,11 @@ final class DocFieldProcessorPerField {
   final DocFieldConsumerPerField consumer;
   final FieldInfo fieldInfo;
   private final Counter bytesUsed;
+
+  // nocommit after flush we should null these out?  then we
+  // don't need reset() impl'd in each...
   BytesDVWriter bytesDVWriter;
+  NumberDVWriter numberDVWriter;
 
   DocFieldProcessorPerField next;
   int lastGen = -1;
@@ -53,6 +57,14 @@ final class DocFieldProcessorPerField {
     bytesDVWriter.addValue(docID, value);
   }
 
+  // nocommit make this generic chain through consumer?
+  public void addNumberDVField(int docID, Number value) {
+    if (numberDVWriter == null) {
+      numberDVWriter = new NumberDVWriter(fieldInfo, bytesUsed);
+    }
+    numberDVWriter.addValue(docID, value.longValue());
+  }
+
   public void addField(IndexableField field) {
     if (fieldCount == fields.length) {
       int newSize = ArrayUtil.oversize(fieldCount + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
@@ -69,5 +81,8 @@ final class DocFieldProcessorPerField {
     if (bytesDVWriter != null) {
       bytesDVWriter.abort();
     }
+    if (numberDVWriter != null) {
+      numberDVWriter.abort();
+    }
   }
 }

Added: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/NumberDVWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/NumberDVWriter.java?rev=1407249&view=auto
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/NumberDVWriter.java
(added)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/NumberDVWriter.java
Thu Nov  8 20:04:19 2012
@@ -0,0 +1,110 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.codecs.NumericDocValuesConsumer;
+import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.RamUsageEstimator;
+
+// nocommit pick numeric or number ... then fix all places ...
+
+/** Buffers up pending long per doc, then flushes when
+ *  segment flushes. */
+// nocommit name?
+// nocommit make this a consumer in the chain?
+class NumberDVWriter {
+
+  private final static Long MISSING = new Long(0);
+
+  // nocommit more ram efficient?
+  private final ArrayList<Long> pending = new ArrayList<Long>();
+  private final Counter iwBytesUsed;
+  private int bytesUsed;
+  private final FieldInfo fieldInfo;
+
+  long minValue;
+  long maxValue;
+  private boolean anyValues;
+
+  public NumberDVWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
+    this.fieldInfo = fieldInfo;
+    this.iwBytesUsed = iwBytesUsed;
+  }
+
+  public void addValue(int docID, long value) {
+    final int oldBytesUsed = bytesUsed;
+    mergeValue(value);
+
+    // Fill in any holes:
+    while(pending.size() < docID) {
+      pending.add(MISSING);
+      bytesUsed += RamUsageEstimator.NUM_BYTES_OBJECT_REF;
+      mergeValue(0);
+    }
+
+    pending.add(value);
+
+    // estimate 25% overhead for ArrayList:
+    bytesUsed += (int) (RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_LONG
+ (RamUsageEstimator.NUM_BYTES_OBJECT_REF * 1.25));
+    iwBytesUsed.addAndGet(bytesUsed - oldBytesUsed);
+    //System.out.println("ADD: " + value);
+  }
+
+  private void mergeValue(long value) {
+    if (!anyValues) {
+      anyValues = true;
+      minValue = maxValue = value;
+    } else {
+      maxValue = Math.max(value, maxValue);
+      minValue = Math.min(value, minValue);
+    }
+  }
+
+  public void flush(FieldInfo fieldInfo, SegmentWriteState state, NumericDocValuesConsumer
consumer) throws IOException {
+    final int bufferedDocCount = pending.size();
+
+    for(int docID=0;docID<bufferedDocCount;docID++) {
+      consumer.add(pending.get(docID));
+    }
+    final int maxDoc = state.segmentInfo.getDocCount();
+    for(int docID=bufferedDocCount;docID<maxDoc;docID++) {
+      consumer.add(0);
+    }
+    reset();
+    //System.out.println("FLUSH");
+  }
+
+  public void abort() {
+    reset();
+  }
+
+  // nocommit do we really need this...?  can't parent alloc
+  // a new instance after flush?
+  private void reset() {
+    pending.clear();
+    pending.trimToSize();
+    iwBytesUsed.addAndGet(-bytesUsed);
+    anyValues = false;
+    minValue = maxValue = 0;
+    bytesUsed = 0;
+  }
+}
\ No newline at end of file



Mime
View raw message