lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1210306 - in /lucene/dev/branches/lucene3606/lucene/src: java/org/apache/lucene/index/ java/org/apache/lucene/index/codecs/simpletext/ test/org/apache/lucene/index/
Date Mon, 05 Dec 2011 01:45:30 GMT
Author: rmuir
Date: Mon Dec  5 01:45:30 2011
New Revision: 1210306

URL: http://svn.apache.org/viewvc?rev=1210306&view=rev
Log:
LUCENE-3606: SimpleText norms

Added:
    lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsFormat.java
  (with props)
    lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsReader.java
  (with props)
    lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsWriter.java
  (with props)
Modified:
    lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java
    lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java
    lucene/dev/branches/lucene3606/lucene/src/test/org/apache/lucene/index/TestOmitNorms.java

Modified: lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java?rev=1210306&r1=1210305&r2=1210306&view=diff
==============================================================================
--- lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java
(original)
+++ lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java
Mon Dec  5 01:45:30 2011
@@ -94,7 +94,9 @@ final class SegmentCoreReaders {
       // Ask codec for its Fields
       fields = format.fieldsProducer(segmentReadState);
       assert fields != null;
-      // ask codec for its Norms
+      // ask codec for its Norms: 
+      // TODO: since we don't write any norms file if there are no norms,
+      // kinda jaky to assume the codec handles the case of no norms file at all gracefully?!
       norms = codec.normsFormat().normsReader(cfsDir, si, fieldInfos, context, dir);
       perDocProducer = codec.docValuesFormat().docsProducer(segmentReadState);
       success = true;

Modified: lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java?rev=1210306&r1=1210305&r2=1210306&view=diff
==============================================================================
--- lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java
(original)
+++ lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java
Mon Dec  5 01:45:30 2011
@@ -26,7 +26,6 @@ import org.apache.lucene.index.codecs.Se
 import org.apache.lucene.index.codecs.StoredFieldsFormat;
 import org.apache.lucene.index.codecs.TermVectorsFormat;
 import org.apache.lucene.index.codecs.lucene40.Lucene40DocValuesFormat;
-import org.apache.lucene.index.codecs.lucene40.Lucene40NormsFormat;
 
 /**
  * plain text index format.
@@ -43,7 +42,7 @@ public final class SimpleTextCodec exten
   // TODO: need a plain-text impl
   private final DocValuesFormat docValues = new Lucene40DocValuesFormat();
   // TODO: need a plain-text impl (using the above)
-  private final NormsFormat normsFormat = new Lucene40NormsFormat();
+  private final NormsFormat normsFormat = new SimpleTextNormsFormat();
   
   public SimpleTextCodec() {
     super("SimpleText");

Added: lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsFormat.java?rev=1210306&view=auto
==============================================================================
--- lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsFormat.java
(added)
+++ lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsFormat.java
Mon Dec  5 01:45:30 2011
@@ -0,0 +1,54 @@
+package org.apache.lucene.index.codecs.simpletext;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Set;
+
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.codecs.NormsFormat;
+import org.apache.lucene.index.codecs.NormsReader;
+import org.apache.lucene.index.codecs.NormsWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+
+/**
+ * plain-text norms format
+ * <p>
+ * <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
+ * @lucene.experimental
+ */
+public class SimpleTextNormsFormat extends NormsFormat {
+
+  @Override
+  public NormsReader normsReader(Directory dir, SegmentInfo info, FieldInfos fields, IOContext
context, Directory separateNormsDir) throws IOException {
+    return new SimpleTextNormsReader(dir, info, fields, context);
+  }
+
+  @Override
+  public NormsWriter normsWriter(SegmentWriteState state) throws IOException {
+    return new SimpleTextNormsWriter(state.directory, state.segmentName, state.context);
+  }
+
+  @Override
+  public void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException
{
+    SimpleTextNormsReader.files(dir, info, files);
+  }
+}

Added: lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsReader.java?rev=1210306&view=auto
==============================================================================
--- lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsReader.java
(added)
+++ lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsReader.java
Mon Dec  5 01:45:30 2011
@@ -0,0 +1,106 @@
+package org.apache.lucene.index.codecs.simpletext;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.codecs.NormsReader;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.StringHelper;
+
+import static org.apache.lucene.index.codecs.simpletext.SimpleTextNormsWriter.*;
+
+/**
+ * Reads plain-text norms
+ * <p>
+ * <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
+ * @lucene.experimental
+ */
+public class SimpleTextNormsReader extends NormsReader {
+  private Map<String,byte[]> norms = new HashMap<String,byte[]>();
+  
+  public SimpleTextNormsReader(Directory directory, SegmentInfo si, FieldInfos fields, IOContext
context) throws IOException {
+    if (fields.hasNorms()) {
+      readNorms(directory.openInput(IndexFileNames.segmentFileName(si.name, "", NORMS_EXTENSION),
context), si.docCount);
+    }
+  }
+  
+  // we read in all the norms up front into a hashmap
+  private void readNorms(IndexInput in, int maxDoc) throws IOException {
+    BytesRef scratch = new BytesRef();
+    boolean success = false;
+    try {
+      SimpleTextUtil.readLine(in, scratch);
+      while (!scratch.equals(END)) {
+        assert StringHelper.startsWith(scratch, FIELD);
+        String fieldName = readString(FIELD.length, scratch);
+        byte bytes[] = new byte[maxDoc];
+        for (int i = 0; i < bytes.length; i++) {
+          SimpleTextUtil.readLine(in, scratch);
+          assert StringHelper.startsWith(scratch, DOC);
+          SimpleTextUtil.readLine(in, scratch);
+          assert StringHelper.startsWith(scratch, NORM);
+          bytes[i] = scratch.bytes[scratch.offset + NORM.length];
+        }
+        norms.put(fieldName, bytes);
+        SimpleTextUtil.readLine(in, scratch);
+        assert StringHelper.startsWith(scratch, FIELD) || scratch.equals(END);
+      }
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(in);
+      } else {
+        IOUtils.closeWhileHandlingException(in);
+      }
+    }
+  }
+  
+  @Override
+  public byte[] norms(String name) throws IOException {
+    return norms.get(name);
+  }
+  
+  @Override
+  public void close() throws IOException {
+    norms = null;
+  }
+  
+  static void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException
{
+    // TODO: This is what SI always did... but we can do this cleaner?
+    // like first FI that has norms but doesn't have separate norms?
+    final String normsFileName = IndexFileNames.segmentFileName(info.name, "", SimpleTextNormsWriter.NORMS_EXTENSION);
+    if (dir.fileExists(normsFileName)) {
+      files.add(normsFileName);
+    }
+  }
+  
+  private String readString(int offset, BytesRef scratch) {
+    return new String(scratch.bytes, scratch.offset+offset, scratch.length-offset, IOUtils.CHARSET_UTF_8);
+  }
+}

Added: lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsWriter.java?rev=1210306&view=auto
==============================================================================
--- lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsWriter.java
(added)
+++ lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsWriter.java
Mon Dec  5 01:45:30 2011
@@ -0,0 +1,114 @@
+package org.apache.lucene.index.codecs.simpletext;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.codecs.NormsWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Writes plain-text norms
+ * <p>
+ * <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
+ * @lucene.experimental
+ */
+public class SimpleTextNormsWriter extends NormsWriter {
+  private IndexOutput out;
+  private int docid = 0;
+    
+  /** Extension of norms file */
+  static final String NORMS_EXTENSION = "len";
+  
+  private final BytesRef scratch = new BytesRef();
+  
+  final static BytesRef END     = new BytesRef("END");
+  final static BytesRef FIELD   = new BytesRef("field ");
+  final static BytesRef DOC     = new BytesRef("  doc ");
+  final static BytesRef NORM    = new BytesRef("    norm ");
+  
+  public SimpleTextNormsWriter(Directory directory, String segment, IOContext context) throws
IOException {
+    final String normsFileName = IndexFileNames.segmentFileName(segment, "", NORMS_EXTENSION);
+    out = directory.createOutput(normsFileName, context);
+  }
+
+  @Override
+  public void startField(FieldInfo info) throws IOException {
+    assert info.omitNorms == false;
+    docid = 0;
+    write(FIELD);
+    write(info.name);
+    newLine();
+  }
+    
+  @Override
+  public void writeNorm(byte norm) throws IOException {
+    write(DOC);
+    write(Integer.toString(docid));
+    newLine();
+    
+    write(NORM);
+    write(norm);
+    newLine();
+    docid++;
+  }
+    
+  @Override
+  public void finish(int numDocs) throws IOException {
+    if (docid != numDocs) {
+      throw new RuntimeException("mergeNorms produced an invalid result: docCount is " +
numDocs
+          + " but only saw " + docid + " file=" + out.toString() + "; now aborting this merge
to prevent index corruption");
+    }
+    write(END);
+    newLine();
+  }
+
+  @Override
+  public void close() throws IOException {
+    try {
+      IOUtils.close(out);
+    } finally {
+      out = null;
+    }
+  }
+  
+  private void write(String s) throws IOException {
+    SimpleTextUtil.write(out, s, scratch);
+  }
+  
+  private void write(BytesRef bytes) throws IOException {
+    SimpleTextUtil.write(out, bytes);
+  }
+  
+  private void write(byte b) throws IOException {
+    scratch.grow(1);
+    scratch.bytes[scratch.offset] = b;
+    scratch.length = 1;
+    SimpleTextUtil.write(out, scratch);
+  }
+  
+  private void newLine() throws IOException {
+    SimpleTextUtil.writeNewline(out);
+  }
+}

Modified: lucene/dev/branches/lucene3606/lucene/src/test/org/apache/lucene/index/TestOmitNorms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3606/lucene/src/test/org/apache/lucene/index/TestOmitNorms.java?rev=1210306&r1=1210305&r2=1210306&view=diff
==============================================================================
--- lucene/dev/branches/lucene3606/lucene/src/test/org/apache/lucene/index/TestOmitNorms.java
(original)
+++ lucene/dev/branches/lucene3606/lucene/src/test/org/apache/lucene/index/TestOmitNorms.java
Mon Dec  5 01:45:30 2011
@@ -180,7 +180,8 @@ public class TestOmitNorms extends Lucen
   private void assertNoNrm(Directory dir) throws Throwable {
     final String[] files = dir.listAll();
     for (int i = 0; i < files.length; i++) {
-      assertFalse(files[i].endsWith(".nrm"));
+      // TODO: this relies upon filenames
+      assertFalse(files[i].endsWith(".nrm") || files[i].endsWith(".len"));
     }
   }
 



Mime
View raw message