lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1199492 - in /lucene/dev/branches/lucene2621/lucene: ./ src/java/org/apache/lucene/index/codecs/simpletext/ src/java/org/apache/lucene/util/ src/resources/META-INF/services/ src/test-framework/java/org/apache/lucene/util/
Date Tue, 08 Nov 2011 22:20:30 GMT
Author: rmuir
Date: Tue Nov  8 22:20:30 2011
New Revision: 1199492

URL: http://svn.apache.org/viewvc?rev=1199492&view=rev
Log:
LUCENE-2621: add simpletext stored fields

Added:
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java
  (with props)
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextStoredFieldsFormat.java
  (with props)
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextStoredFieldsReader.java
  (with props)
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextStoredFieldsWriter.java
  (with props)
Modified:
    lucene/dev/branches/lucene2621/lucene/common-build.xml
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/util/ArrayUtil.java
    lucene/dev/branches/lucene2621/lucene/src/resources/META-INF/services/org.apache.lucene.index.codecs.Codec
    lucene/dev/branches/lucene2621/lucene/src/test-framework/java/org/apache/lucene/util/LuceneTestCase.java

Modified: lucene/dev/branches/lucene2621/lucene/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/common-build.xml?rev=1199492&r1=1199491&r2=1199492&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/common-build.xml (original)
+++ lucene/dev/branches/lucene2621/lucene/common-build.xml Tue Nov  8 22:20:30 2011
@@ -83,6 +83,7 @@
     </or>
   </condition>
   <property name="tests.multiplier" value="1" />
+  <property name="tests.codec" value="random" />
   <property name="tests.postingsformat" value="random" />
   <property name="tests.locale" value="random" />
   <property name="tests.timezone" value="random" />
@@ -555,6 +556,8 @@
               <!-- directory for formatter lock -->
 	      <sysproperty key="tests.lockdir" value="${tests.lockdir}"/>
               <!-- set the codec tests should run with -->
+	      <sysproperty key="tests.codec" value="${tests.codec}"/>
+              <!-- set the postingsformat tests should run with -->
 	      <sysproperty key="tests.postingsformat" value="${tests.postingsformat}"/>
               <!-- set the locale tests should run with -->
 	      <sysproperty key="tests.locale" value="${tests.locale}"/>

Added: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java?rev=1199492&view=auto
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java
(added)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java
Tue Nov  8 22:20:30 2011
@@ -0,0 +1,66 @@
+package org.apache.lucene.index.codecs.simpletext;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.DefaultDocValuesFormat;
+import org.apache.lucene.index.codecs.DefaultSegmentInfosFormat;
+import org.apache.lucene.index.codecs.DocValuesFormat;
+import org.apache.lucene.index.codecs.PostingsFormat;
+import org.apache.lucene.index.codecs.SegmentInfosFormat;
+import org.apache.lucene.index.codecs.StoredFieldsFormat;
+
+/**
+ * plain text index format.
+ * <p>
+ * <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
+ * @lucene.experimental
+ */
+public final class SimpleTextCodec extends Codec {
+  private final PostingsFormat postings = new SimpleTextPostingsFormat();
+  private final StoredFieldsFormat storedFields = new SimpleTextStoredFieldsFormat();
+  
+  // TODO: need a plain-text impl
+  private final DocValuesFormat docValues = new DefaultDocValuesFormat();
+  // TODO: need a plain-text impl
+  private final SegmentInfosFormat segmentInfos = new DefaultSegmentInfosFormat();
+  
+  public SimpleTextCodec() {
+    super("SimpleText");
+  }
+  
+  @Override
+  public PostingsFormat postingsFormat() {
+    return postings;
+  }
+
+  @Override
+  public DocValuesFormat docValuesFormat() {
+    return docValues;
+  }
+
+  @Override
+  public StoredFieldsFormat storedFieldsFormat() {
+    return storedFields;
+  }
+
+  @Override
+  public SegmentInfosFormat segmentInfosFormat() {
+    return segmentInfos;
+  }
+}

Added: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextStoredFieldsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextStoredFieldsFormat.java?rev=1199492&view=auto
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextStoredFieldsFormat.java
(added)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextStoredFieldsFormat.java
Tue Nov  8 22:20:30 2011
@@ -0,0 +1,53 @@
+package org.apache.lucene.index.codecs.simpletext;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Set;
+
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.codecs.StoredFieldsFormat;
+import org.apache.lucene.index.codecs.StoredFieldsReader;
+import org.apache.lucene.index.codecs.StoredFieldsWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+
+/**
+ * plain text stored fields format.
+ * <p>
+ * <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
+ * @lucene.experimental
+ */
+public class SimpleTextStoredFieldsFormat extends StoredFieldsFormat {
+
+  @Override
+  public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos
fn, IOContext context) throws IOException {
+    return new SimpleTextStoredFieldsReader(directory, si, fn, context);
+  }
+
+  @Override
+  public StoredFieldsWriter fieldsWriter(Directory directory, String segment, IOContext context)
throws IOException {
+    return new SimpleTextStoredFieldsWriter(directory, segment, context);
+  }
+
+  @Override
+  public void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException
{
+    SimpleTextStoredFieldsReader.files(dir, info, files);
+  }
+}

Added: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextStoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextStoredFieldsReader.java?rev=1199492&view=auto
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextStoredFieldsReader.java
(added)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextStoredFieldsReader.java
Tue Nov  8 22:20:30 2011
@@ -0,0 +1,197 @@
+package org.apache.lucene.index.codecs.simpletext;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Set;
+
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.StoredFieldVisitor;
+import org.apache.lucene.index.codecs.StoredFieldsReader;
+import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.UnicodeUtil;
+
+import static org.apache.lucene.index.codecs.simpletext.SimpleTextStoredFieldsWriter.*;
+
+/**
+ * reads plaintext stored fields
+ * <p>
+ * <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
+ * @lucene.experimental
+ */
+public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
+  private ArrayList<Long> offsets; /* docid -> offset in .fld file */
+  private IndexInput in;
+  private BytesRef scratch = new BytesRef();
+  private CharsRef scratchUTF16 = new CharsRef();
+  private final FieldInfos fieldInfos;
+
+  public SimpleTextStoredFieldsReader(Directory directory, SegmentInfo si, FieldInfos fn,
IOContext context) throws IOException {
+    this.fieldInfos = fn;
+    boolean success = false;
+    try {
+      in = directory.openInput(IndexFileNames.segmentFileName(si.name, "", SimpleTextStoredFieldsWriter.FIELDS_EXTENSION),
context);
+      success = true;
+    } finally {
+      if (!success) {
+        close();
+      }
+    }
+    readIndex();
+  }
+  
+  // used by clone
+  SimpleTextStoredFieldsReader(ArrayList<Long> offsets, IndexInput in, FieldInfos fieldInfos)
{
+    this.offsets = offsets;
+    this.in = in;
+    this.fieldInfos = fieldInfos;
+  }
+  
+  // we don't actually write a .fdx-like index, instead we read the 
+  // stored fields file in entirety up-front and save the offsets 
+  // so we can seek to the documents later.
+  private void readIndex() throws IOException {
+    offsets = new ArrayList<Long>();
+    while (!scratch.equals(END)) {
+      readLine();
+      if (scratch.startsWith(DOC)) {
+        offsets.add(in.getFilePointer());
+      }
+    }
+  }
+  
+  @Override
+  public void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException,
IOException {
+    in.seek(offsets.get(n));
+    readLine();
+    assert scratch.startsWith(NUM);
+    int numFields = parseIntAt(NUM.length);
+    
+    for (int i = 0; i < numFields; i++) {
+      readLine();
+      assert scratch.startsWith(FIELD);
+      int fieldNumber = parseIntAt(FIELD.length);
+      FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
+      readLine();
+      assert scratch.startsWith(NAME);
+      readLine();
+      assert scratch.startsWith(TYPE);
+      
+      final BytesRef type;
+      if (equalsAt(TYPE_STRING, scratch, TYPE.length)) {
+        type = TYPE_STRING;
+      } else if (equalsAt(TYPE_BINARY, scratch, TYPE.length)) {
+        type = TYPE_BINARY;
+      } else if (equalsAt(TYPE_INT, scratch, TYPE.length)) {
+        type = TYPE_INT;
+      } else if (equalsAt(TYPE_LONG, scratch, TYPE.length)) {
+        type = TYPE_LONG;
+      } else if (equalsAt(TYPE_FLOAT, scratch, TYPE.length)) {
+        type = TYPE_FLOAT;
+      } else if (equalsAt(TYPE_DOUBLE, scratch, TYPE.length)) {
+        type = TYPE_DOUBLE;
+      } else {
+        throw new RuntimeException("unknown field type");
+      }
+      
+      switch (visitor.needsField(fieldInfo)) {
+        case YES:  
+          readField(type, fieldInfo, visitor);
+          break;
+        case NO:   
+          readLine();
+          assert scratch.startsWith(VALUE);
+          break;
+        case STOP: return;
+      }
+    }
+  }
+  
+  private void readField(BytesRef type, FieldInfo fieldInfo, StoredFieldVisitor visitor)
throws IOException {
+    readLine();
+    assert scratch.startsWith(VALUE);
+    if (type == TYPE_STRING) {
+      visitor.stringField(fieldInfo, new String(scratch.bytes, scratch.offset+VALUE.length,
scratch.length-VALUE.length));
+    } else if (type == TYPE_BINARY) {
+      // TODO: who owns the bytes?
+      byte[] copy = new byte[scratch.length-VALUE.length];
+      System.arraycopy(scratch.bytes, scratch.offset+VALUE.length, copy, 0, copy.length);
+      visitor.binaryField(fieldInfo, copy, 0, copy.length);
+    } else if (type == TYPE_INT) {
+      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+VALUE.length, scratch.length-VALUE.length,
scratchUTF16);
+      visitor.intField(fieldInfo, Integer.parseInt(scratchUTF16.toString()));
+    } else if (type == TYPE_LONG) {
+      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+VALUE.length, scratch.length-VALUE.length,
scratchUTF16);
+      visitor.longField(fieldInfo, Long.parseLong(scratchUTF16.toString()));
+    } else if (type == TYPE_FLOAT) {
+      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+VALUE.length, scratch.length-VALUE.length,
scratchUTF16);
+      visitor.floatField(fieldInfo, Float.parseFloat(scratchUTF16.toString()));
+    } else if (type == TYPE_DOUBLE) {
+      UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+VALUE.length, scratch.length-VALUE.length,
scratchUTF16);
+      visitor.doubleField(fieldInfo, Double.parseDouble(scratchUTF16.toString()));
+    }
+  }
+
+  @Override
+  public StoredFieldsReader clone() {
+    if (in == null) {
+      throw new AlreadyClosedException("this FieldsReader is closed");
+    }
+    return new SimpleTextStoredFieldsReader(offsets, (IndexInput) in.clone(), fieldInfos);
+  }
+  
+  @Override
+  public void close() throws IOException {
+    try {
+      IOUtils.close(in); 
+    } finally {
+      in = null;
+      offsets = null;
+    }
+  }
+  
+  public static void files(Directory dir, SegmentInfo info, Set<String> files) throws
IOException {
+    files.add(IndexFileNames.segmentFileName(info.name, "", SimpleTextStoredFieldsWriter.FIELDS_EXTENSION));
+  }
+  
+  private void readLine() throws IOException {
+    SimpleTextUtil.readLine(in, scratch);
+  }
+  
+  private int parseIntAt(int offset) throws IOException {
+    UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+offset, scratch.length-offset,
scratchUTF16);
+    return ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
+  }
+  
+  private boolean equalsAt(BytesRef a, BytesRef b, int bOffset) {
+    return a.length == b.length - bOffset && 
+        ArrayUtil.equals(a.bytes, a.offset, b.bytes, b.offset + bOffset, b.length - bOffset);
+  }
+}

Added: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextStoredFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextStoredFieldsWriter.java?rev=1199492&view=auto
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextStoredFieldsWriter.java
(added)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextStoredFieldsWriter.java
Tue Nov  8 22:20:30 2011
@@ -0,0 +1,201 @@
+package org.apache.lucene.index.codecs.simpletext;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.codecs.StoredFieldsWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Writes plain-text stored fields.
+ * <p>
+ * <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
+ * @lucene.experimental
+ */
+public class SimpleTextStoredFieldsWriter extends StoredFieldsWriter {
+  private int numDocsWritten = 0;
+  private final Directory directory;
+  private final String segment;
+  private IndexOutput out;
+  
+  final static String FIELDS_EXTENSION = "fld";
+  
+  final static BytesRef TYPE_STRING = new BytesRef("string");
+  final static BytesRef TYPE_BINARY = new BytesRef("binary");
+  final static BytesRef TYPE_INT    = new BytesRef("int");
+  final static BytesRef TYPE_LONG   = new BytesRef("long");
+  final static BytesRef TYPE_FLOAT  = new BytesRef("float");
+  final static BytesRef TYPE_DOUBLE = new BytesRef("double");
+
+  final static BytesRef END     = new BytesRef("END");
+  final static BytesRef DOC     = new BytesRef("doc ");
+  final static BytesRef NUM     = new BytesRef("  numfields ");
+  final static BytesRef FIELD   = new BytesRef("  field ");
+  final static BytesRef NAME    = new BytesRef("    name ");
+  final static BytesRef TYPE    = new BytesRef("    type ");
+  final static BytesRef VALUE   = new BytesRef("    value ");
+  
+  private final BytesRef scratch = new BytesRef();
+  
+  public SimpleTextStoredFieldsWriter(Directory directory, String segment, IOContext context)
throws IOException {
+    this.directory = directory;
+    this.segment = segment;
+    boolean success = false;
+    try {
+      out = directory.createOutput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION),
context);
+      success = true;
+    } finally {
+      if (!success) {
+        abort();
+      }
+    }
+  }
+
+  @Override
+  public void startDocument(int numStoredFields) throws IOException {
+    write(DOC);
+    write(Integer.toString(numDocsWritten));
+    newLine();
+    
+    write(NUM);
+    write(Integer.toString(numStoredFields));
+    newLine();
+    
+    numDocsWritten++;
+  }
+
+  @Override
+  public void writeField(int fieldNumber, IndexableField field) throws IOException {
+    write(FIELD);
+    write(Integer.toString(fieldNumber));
+    newLine();
+    
+    write(NAME);
+    write(field.name());
+    newLine();
+    
+    write(TYPE);
+    if (field.numeric()) {
+      switch (field.numericDataType()) {
+        case INT:
+          write(TYPE_INT);
+          newLine();
+          
+          write(VALUE);
+          write(Integer.toString(field.numericValue().intValue()));
+          newLine();
+          
+          break;
+        case LONG:
+          write(TYPE_LONG);
+          newLine();
+          
+          write(VALUE);
+          write(Long.toString(field.numericValue().longValue()));
+          newLine();
+          
+          break;
+        case FLOAT:
+          write(TYPE_FLOAT);
+          newLine();
+          
+          write(VALUE);
+          write(Float.toString(field.numericValue().floatValue()));
+          newLine();
+          
+          break;
+        case DOUBLE:
+          write(TYPE_DOUBLE);
+          newLine();
+          
+          write(VALUE);
+          write(Double.toString(field.numericValue().doubleValue()));
+          newLine();
+          
+          break;
+        default:
+          assert false : "Should never get here";
+      }
+    } else { 
+      BytesRef bytes = field.binaryValue();
+      if (bytes != null) {
+        write(TYPE_BINARY);
+        newLine();
+        
+        write(VALUE);
+        write(bytes);
+        newLine();
+      } else {
+        write(TYPE_STRING);
+        newLine();
+        
+        write(VALUE);
+        write(field.stringValue());
+        newLine();
+      }
+    }
+  }
+
+  @Override
+  public void abort() {
+    try {
+      close();
+    } catch (IOException ignored) {}
+    try {
+      directory.deleteFile(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION));
+    } catch (IOException ignored) {}
+  }
+
+  @Override
+  public void finish(int numDocs) throws IOException {
+    if (numDocsWritten != numDocs) {
+      throw new RuntimeException("mergeFields produced an invalid result: docCount is " +
numDocs 
+          + " but only saw " + numDocsWritten + " file=" + out.toString() + "; now aborting
this merge to prevent index corruption");
+    }
+    write(END);
+    newLine();
+  }
+
+  @Override
+  public void close() throws IOException {
+    try {
+      IOUtils.close(out);
+    } finally {
+      out = null;
+    }
+  }
+  
+  private void write(String s) throws IOException {
+    SimpleTextUtil.write(out, s, scratch);
+  }
+  
+  private void write(BytesRef bytes) throws IOException {
+    SimpleTextUtil.write(out, bytes);
+  }
+  
+  private void newLine() throws IOException {
+    SimpleTextUtil.writeNewline(out);
+  }
+}

Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/util/ArrayUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/util/ArrayUtil.java?rev=1199492&r1=1199491&r2=1199492&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/util/ArrayUtil.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/util/ArrayUtil.java Tue
Nov  8 22:20:30 2011
@@ -509,6 +509,32 @@ public final class ArrayUtil {
     }
     return false;
   }
+  
+  // Since Arrays.equals doesn't implement offsets for equals
+  /**
+   * See if two array slices are the same.
+   *
+   * @param left        The left array to compare
+   * @param offsetLeft  The offset into the array.  Must be positive
+   * @param right       The right array to compare
+   * @param offsetRight the offset into the right array.  Must be positive
+   * @param length      The length of the section of the array to compare
+   * @return true if the two arrays, starting at their respective offsets, are equal
+   * 
+   * @see java.util.Arrays#equals(byte[], byte[])
+   */
+  public static boolean equals(byte[] left, int offsetLeft, byte[] right, int offsetRight,
int length) {
+    if ((offsetLeft + length <= left.length) && (offsetRight + length <= right.length))
{
+      for (int i = 0; i < length; i++) {
+        if (left[offsetLeft + i] != right[offsetRight + i]) {
+          return false;
+        }
+
+      }
+      return true;
+    }
+    return false;
+  }
 
   /* DISABLE THIS FOR NOW: This has performance problems until Java creates intrinsics for
Class#getComponentType() and Array.newInstance()
   public static <T> T[] grow(T[] array, int minSize) {

Modified: lucene/dev/branches/lucene2621/lucene/src/resources/META-INF/services/org.apache.lucene.index.codecs.Codec
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/resources/META-INF/services/org.apache.lucene.index.codecs.Codec?rev=1199492&r1=1199491&r2=1199492&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/resources/META-INF/services/org.apache.lucene.index.codecs.Codec
(original)
+++ lucene/dev/branches/lucene2621/lucene/src/resources/META-INF/services/org.apache.lucene.index.codecs.Codec
Tue Nov  8 22:20:30 2011
@@ -15,3 +15,4 @@
 
 org.apache.lucene.index.codecs.lucene40.Lucene40Codec
 org.apache.lucene.index.codecs.lucene3x.Lucene3xCodec
+org.apache.lucene.index.codecs.simpletext.SimpleTextCodec

Modified: lucene/dev/branches/lucene2621/lucene/src/test-framework/java/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/test-framework/java/org/apache/lucene/util/LuceneTestCase.java?rev=1199492&r1=1199491&r2=1199492&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/test-framework/java/org/apache/lucene/util/LuceneTestCase.java
(original)
+++ lucene/dev/branches/lucene2621/lucene/src/test-framework/java/org/apache/lucene/util/LuceneTestCase.java
Tue Nov  8 22:20:30 2011
@@ -52,6 +52,7 @@ import org.apache.lucene.index.codecs.pe
 import org.apache.lucene.index.codecs.preflexrw.PreFlexRWCodec;
 import org.apache.lucene.index.codecs.preflexrw.PreFlexRWPostingsFormat;
 import org.apache.lucene.index.codecs.pulsing.PulsingPostingsFormat;
+import org.apache.lucene.index.codecs.simpletext.SimpleTextCodec;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.FieldCache;
 import org.apache.lucene.search.FieldCache.CacheEntry;
@@ -141,6 +142,8 @@ public abstract class LuceneTestCase ext
   // by default we randomly pick a different codec for
   // each test case (non-J4 tests) and each test class (J4
   // tests)
+  /** Gets the codec to run tests with. */
+  public static final String TEST_CODEC = System.getProperty("tests.codec", "random");
   /** Gets the postingsFormat to run tests with. */
   public static final String TEST_POSTINGSFORMAT = System.getProperty("tests.postingsformat",
"random");
   /** Gets the locale to run tests with */
@@ -284,9 +287,15 @@ public abstract class LuceneTestCase ext
     PREFLEX_IMPERSONATION_IS_ACTIVE = false;
     savedCodec = Codec.getDefault();
     final Codec codec;
-    if ("Lucene3x".equals(TEST_POSTINGSFORMAT) || ("random".equals(TEST_POSTINGSFORMAT) &&
random.nextInt(4) == 0)) { // preflex-only setup
+    int randomVal = random.nextInt(10);
+    
+    if ("Lucene3x".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal
<= 3)) { // preflex-only setup
       codec = new PreFlexRWCodec();
       PREFLEX_IMPERSONATION_IS_ACTIVE = true;
+    } else if ("SimpleText".equals(TEST_CODEC) || "random".equals(TEST_CODEC) &&
randomVal == 10) {
+      codec = new SimpleTextCodec();
+    } else if (!"random".equals(TEST_CODEC)) {
+      codec = Codec.forName(TEST_CODEC);
     } else if ("random".equals(TEST_POSTINGSFORMAT)) {
       codec = new RandomCodec(random, useNoMemoryExpensiveCodec);
     } else {
@@ -1289,6 +1298,7 @@ public abstract class LuceneTestCase ext
   // extra params that were overridden needed to reproduce the command
   private static String reproduceWithExtraParams() {
     StringBuilder sb = new StringBuilder();
+    if (!TEST_CODEC.equals("random")) sb.append(" -Dtests.codec=").append(TEST_CODEC);
     if (!TEST_POSTINGSFORMAT.equals("random")) sb.append(" -Dtests.postingsformat=").append(TEST_POSTINGSFORMAT);
     if (!TEST_LOCALE.equals("random")) sb.append(" -Dtests.locale=").append(TEST_LOCALE);
     if (!TEST_TIMEZONE.equals("random")) sb.append(" -Dtests.timezone=").append(TEST_TIMEZONE);



Mime
View raw message