Return-Path: X-Original-To: apmail-lucene-commits-archive@www.apache.org Delivered-To: apmail-lucene-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id D1D8A9CE6 for ; Mon, 5 Dec 2011 01:45:57 +0000 (UTC) Received: (qmail 83735 invoked by uid 500); 5 Dec 2011 01:45:57 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 83720 invoked by uid 99); 5 Dec 2011 01:45:57 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 05 Dec 2011 01:45:57 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 05 Dec 2011 01:45:53 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id D5D5423888FD; Mon, 5 Dec 2011 01:45:30 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1210306 - in /lucene/dev/branches/lucene3606/lucene/src: java/org/apache/lucene/index/ java/org/apache/lucene/index/codecs/simpletext/ test/org/apache/lucene/index/ Date: Mon, 05 Dec 2011 01:45:30 -0000 To: commits@lucene.apache.org From: rmuir@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20111205014530.D5D5423888FD@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: rmuir Date: Mon Dec 5 01:45:30 2011 New Revision: 1210306 URL: http://svn.apache.org/viewvc?rev=1210306&view=rev Log: LUCENE-3606: SimpleText norms Added: lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsFormat.java (with props) lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsReader.java (with props) lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsWriter.java (with props) Modified: lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java lucene/dev/branches/lucene3606/lucene/src/test/org/apache/lucene/index/TestOmitNorms.java Modified: lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java?rev=1210306&r1=1210305&r2=1210306&view=diff ============================================================================== --- lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java (original) +++ lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java Mon Dec 5 01:45:30 2011 @@ -94,7 +94,9 @@ final class SegmentCoreReaders { // Ask codec for its Fields fields = format.fieldsProducer(segmentReadState); assert fields != null; - // ask codec for its Norms + // ask codec for its Norms: + // TODO: since we don't write any norms file if there are no norms, + // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! norms = codec.normsFormat().normsReader(cfsDir, si, fieldInfos, context, dir); perDocProducer = codec.docValuesFormat().docsProducer(segmentReadState); success = true; Modified: lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java?rev=1210306&r1=1210305&r2=1210306&view=diff ============================================================================== --- lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java (original) +++ lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java Mon Dec 5 01:45:30 2011 @@ -26,7 +26,6 @@ import org.apache.lucene.index.codecs.Se import org.apache.lucene.index.codecs.StoredFieldsFormat; import org.apache.lucene.index.codecs.TermVectorsFormat; import org.apache.lucene.index.codecs.lucene40.Lucene40DocValuesFormat; -import org.apache.lucene.index.codecs.lucene40.Lucene40NormsFormat; /** * plain text index format. @@ -43,7 +42,7 @@ public final class SimpleTextCodec exten // TODO: need a plain-text impl private final DocValuesFormat docValues = new Lucene40DocValuesFormat(); // TODO: need a plain-text impl (using the above) - private final NormsFormat normsFormat = new Lucene40NormsFormat(); + private final NormsFormat normsFormat = new SimpleTextNormsFormat(); public SimpleTextCodec() { super("SimpleText"); Added: lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsFormat.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsFormat.java?rev=1210306&view=auto ============================================================================== --- lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsFormat.java (added) +++ lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsFormat.java Mon Dec 5 01:45:30 2011 @@ -0,0 +1,54 @@ +package org.apache.lucene.index.codecs.simpletext; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Set; + +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.codecs.NormsFormat; +import org.apache.lucene.index.codecs.NormsReader; +import org.apache.lucene.index.codecs.NormsWriter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; + +/** + * plain-text norms format + *

+ * FOR RECREATIONAL USE ONLY + * @lucene.experimental + */ +public class SimpleTextNormsFormat extends NormsFormat { + + @Override + public NormsReader normsReader(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context, Directory separateNormsDir) throws IOException { + return new SimpleTextNormsReader(dir, info, fields, context); + } + + @Override + public NormsWriter normsWriter(SegmentWriteState state) throws IOException { + return new SimpleTextNormsWriter(state.directory, state.segmentName, state.context); + } + + @Override + public void files(Directory dir, SegmentInfo info, Set files) throws IOException { + SimpleTextNormsReader.files(dir, info, files); + } +} Added: lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsReader.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsReader.java?rev=1210306&view=auto ============================================================================== --- lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsReader.java (added) +++ lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsReader.java Mon Dec 5 01:45:30 2011 @@ -0,0 +1,106 @@ +package org.apache.lucene.index.codecs.simpletext; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.codecs.NormsReader; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.StringHelper; + +import static org.apache.lucene.index.codecs.simpletext.SimpleTextNormsWriter.*; + +/** + * Reads plain-text norms + *

+ * FOR RECREATIONAL USE ONLY + * @lucene.experimental + */ +public class SimpleTextNormsReader extends NormsReader { + private Map norms = new HashMap(); + + public SimpleTextNormsReader(Directory directory, SegmentInfo si, FieldInfos fields, IOContext context) throws IOException { + if (fields.hasNorms()) { + readNorms(directory.openInput(IndexFileNames.segmentFileName(si.name, "", NORMS_EXTENSION), context), si.docCount); + } + } + + // we read in all the norms up front into a hashmap + private void readNorms(IndexInput in, int maxDoc) throws IOException { + BytesRef scratch = new BytesRef(); + boolean success = false; + try { + SimpleTextUtil.readLine(in, scratch); + while (!scratch.equals(END)) { + assert StringHelper.startsWith(scratch, FIELD); + String fieldName = readString(FIELD.length, scratch); + byte bytes[] = new byte[maxDoc]; + for (int i = 0; i < bytes.length; i++) { + SimpleTextUtil.readLine(in, scratch); + assert StringHelper.startsWith(scratch, DOC); + SimpleTextUtil.readLine(in, scratch); + assert StringHelper.startsWith(scratch, NORM); + bytes[i] = scratch.bytes[scratch.offset + NORM.length]; + } + norms.put(fieldName, bytes); + SimpleTextUtil.readLine(in, scratch); + assert StringHelper.startsWith(scratch, FIELD) || scratch.equals(END); + } + success = true; + } finally { + if (success) { + IOUtils.close(in); + } else { + IOUtils.closeWhileHandlingException(in); + } + } + } + + @Override + public byte[] norms(String name) throws IOException { + return norms.get(name); + } + + @Override + public void close() throws IOException { + norms = null; + } + + static void files(Directory dir, SegmentInfo info, Set files) throws IOException { + // TODO: This is what SI always did... but we can do this cleaner? + // like first FI that has norms but doesn't have separate norms? + final String normsFileName = IndexFileNames.segmentFileName(info.name, "", SimpleTextNormsWriter.NORMS_EXTENSION); + if (dir.fileExists(normsFileName)) { + files.add(normsFileName); + } + } + + private String readString(int offset, BytesRef scratch) { + return new String(scratch.bytes, scratch.offset+offset, scratch.length-offset, IOUtils.CHARSET_UTF_8); + } +} Added: lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsWriter.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsWriter.java?rev=1210306&view=auto ============================================================================== --- lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsWriter.java (added) +++ lucene/dev/branches/lucene3606/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextNormsWriter.java Mon Dec 5 01:45:30 2011 @@ -0,0 +1,114 @@ +package org.apache.lucene.index.codecs.simpletext; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.codecs.NormsWriter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; + +/** + * Writes plain-text norms + *

+ * FOR RECREATIONAL USE ONLY + * @lucene.experimental + */ +public class SimpleTextNormsWriter extends NormsWriter { + private IndexOutput out; + private int docid = 0; + + /** Extension of norms file */ + static final String NORMS_EXTENSION = "len"; + + private final BytesRef scratch = new BytesRef(); + + final static BytesRef END = new BytesRef("END"); + final static BytesRef FIELD = new BytesRef("field "); + final static BytesRef DOC = new BytesRef(" doc "); + final static BytesRef NORM = new BytesRef(" norm "); + + public SimpleTextNormsWriter(Directory directory, String segment, IOContext context) throws IOException { + final String normsFileName = IndexFileNames.segmentFileName(segment, "", NORMS_EXTENSION); + out = directory.createOutput(normsFileName, context); + } + + @Override + public void startField(FieldInfo info) throws IOException { + assert info.omitNorms == false; + docid = 0; + write(FIELD); + write(info.name); + newLine(); + } + + @Override + public void writeNorm(byte norm) throws IOException { + write(DOC); + write(Integer.toString(docid)); + newLine(); + + write(NORM); + write(norm); + newLine(); + docid++; + } + + @Override + public void finish(int numDocs) throws IOException { + if (docid != numDocs) { + throw new RuntimeException("mergeNorms produced an invalid result: docCount is " + numDocs + + " but only saw " + docid + " file=" + out.toString() + "; now aborting this merge to prevent index corruption"); + } + write(END); + newLine(); + } + + @Override + public void close() throws IOException { + try { + IOUtils.close(out); + } finally { + out = null; + } + } + + private void write(String s) throws IOException { + SimpleTextUtil.write(out, s, scratch); + } + + private void write(BytesRef bytes) throws IOException { + SimpleTextUtil.write(out, bytes); + } + + private void write(byte b) throws IOException { + scratch.grow(1); + scratch.bytes[scratch.offset] = b; + scratch.length = 1; + SimpleTextUtil.write(out, scratch); + } + + private void newLine() throws IOException { + SimpleTextUtil.writeNewline(out); + } +} Modified: lucene/dev/branches/lucene3606/lucene/src/test/org/apache/lucene/index/TestOmitNorms.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3606/lucene/src/test/org/apache/lucene/index/TestOmitNorms.java?rev=1210306&r1=1210305&r2=1210306&view=diff ============================================================================== --- lucene/dev/branches/lucene3606/lucene/src/test/org/apache/lucene/index/TestOmitNorms.java (original) +++ lucene/dev/branches/lucene3606/lucene/src/test/org/apache/lucene/index/TestOmitNorms.java Mon Dec 5 01:45:30 2011 @@ -180,7 +180,8 @@ public class TestOmitNorms extends Lucen private void assertNoNrm(Directory dir) throws Throwable { final String[] files = dir.listAll(); for (int i = 0; i < files.length; i++) { - assertFalse(files[i].endsWith(".nrm")); + // TODO: this relies upon filenames + assertFalse(files[i].endsWith(".nrm") || files[i].endsWith(".len")); } }