lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r560378 - in /lucene/java/trunk/src: java/org/apache/lucene/index/ test/org/apache/lucene/index/
Date Fri, 27 Jul 2007 20:50:20 GMT
Author: mikemccand
Date: Fri Jul 27 13:50:19 2007
New Revision: 560378

URL: http://svn.apache.org/viewvc?view=rev&rev=560378
Log:
LUCENE-964: remove DocumentWriter

Modified:
    lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java
    lucene/java/trunk/src/test/org/apache/lucene/index/DocHelper.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestDoc.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestMultiReader.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentMerger.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentReader.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentTermDocs.java

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java?view=diff&rev=560378&r1=560377&r2=560378
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java Fri Jul 27 13:50:19
2007
@@ -1,556 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.search.Similarity;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IndexOutput;
-
-import java.io.IOException;
-import java.io.PrintStream;
-import java.io.Reader;
-import java.io.StringReader;
-import java.util.Arrays;
-import java.util.BitSet;
-import java.util.Enumeration;
-import java.util.Hashtable;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-
-final class DocumentWriter {
-  private Analyzer analyzer;
-  private Directory directory;
-  private Similarity similarity;
-  private FieldInfos fieldInfos;
-  private int maxFieldLength;
-  private int termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL;
-  private PrintStream infoStream;
-
-  /** This ctor used by test code only.
-   *
-   * @param directory The directory to write the document information to
-   * @param analyzer The analyzer to use for the document
-   * @param similarity The Similarity function
-   * @param maxFieldLength The maximum number of tokens a field may have
-   */ 
-  DocumentWriter(Directory directory, Analyzer analyzer,
-                 Similarity similarity, int maxFieldLength) {
-    this.directory = directory;
-    this.analyzer = analyzer;
-    this.similarity = similarity;
-    this.maxFieldLength = maxFieldLength;
-  }
-
-  DocumentWriter(Directory directory, Analyzer analyzer, IndexWriter writer) {
-    this.directory = directory;
-    this.analyzer = analyzer;
-    this.similarity = writer.getSimilarity();
-    this.maxFieldLength = writer.getMaxFieldLength();
-    this.termIndexInterval = writer.getTermIndexInterval();
-  }
-
-  final void addDocument(String segment, Document doc)
-          throws CorruptIndexException, IOException {
-    // create field infos
-    fieldInfos = new FieldInfos();
-    fieldInfos.add(doc);
-    
-    // invert doc into postingTable
-    postingTable.clear();			  // clear postingTable
-    fieldLengths = new int[fieldInfos.size()];    // init fieldLengths
-    fieldPositions = new int[fieldInfos.size()];  // init fieldPositions
-    fieldOffsets = new int[fieldInfos.size()];    // init fieldOffsets
-    fieldStoresPayloads = new BitSet(fieldInfos.size());
-    
-    fieldBoosts = new float[fieldInfos.size()];	  // init fieldBoosts
-    Arrays.fill(fieldBoosts, doc.getBoost());
-
-    try {
-    
-      // Before we write the FieldInfos we invert the Document. The reason is that
-      // during invertion the TokenStreams of tokenized fields are being processed 
-      // and we might encounter tokens that have payloads associated with them. In 
-      // this case we have to update the FieldInfo of the particular field.
-      invertDocument(doc);
-    
-      // sort postingTable into an array
-      Posting[] postings = sortPostingTable();
-    
-      // write field infos 
-      fieldInfos.write(directory, segment + ".fnm");
-
-      // write field values
-      FieldsWriter fieldsWriter =
-        new FieldsWriter(directory, segment, fieldInfos);
-      try {
-        fieldsWriter.addDocument(doc);
-      } finally {
-        fieldsWriter.close();
-      }
-    
-      /*
-      for (int i = 0; i < postings.length; i++) {
-        Posting posting = postings[i];
-        System.out.print(posting.term);
-        System.out.print(" freq=" + posting.freq);
-        System.out.print(" pos=");
-        System.out.print(posting.positions[0]);
-        for (int j = 1; j < posting.freq; j++)
-	  System.out.print("," + posting.positions[j]);
-        System.out.println("");
-      }
-       */
-
-      // write postings
-      writePostings(postings, segment);
-
-      // write norms of indexed fields
-      writeNorms(segment);
-    } finally {
-      // close TokenStreams
-      IOException ex = null;
-      
-      Iterator it = openTokenStreams.iterator();
-      while (it.hasNext()) {
-        try {
-          ((TokenStream) it.next()).close();
-        } catch (IOException e) {
-          if (ex != null) {
-            ex = e;
-          }
-        }
-      }
-      openTokenStreams.clear();
-      
-      if (ex != null) {
-        throw ex;
-      }
-    }
-  }
-
-  // Keys are Terms, values are Postings.
-  // Used to buffer a document before it is written to the index.
-  private final Hashtable postingTable = new Hashtable();
-  private int[] fieldLengths;
-  private int[] fieldPositions;
-  private int[] fieldOffsets;
-  private float[] fieldBoosts;
-  
-  // If any of the tokens of a paticular field carry a payload
-  // then we enable payloads for that field. 
-  private BitSet fieldStoresPayloads;
-  
-  // Keep references of the token streams. We must close them after
-  // the postings are written to the segment.
-  private List openTokenStreams = new LinkedList();
-
-  // Tokenizes the fields of a document into Postings.
-  private final void invertDocument(Document doc)
-          throws IOException {
-    Iterator fieldIterator = doc.getFields().iterator();
-    while (fieldIterator.hasNext()) {
-      Fieldable field = (Fieldable) fieldIterator.next();
-      String fieldName = field.name();
-      int fieldNumber = fieldInfos.fieldNumber(fieldName);
-
-      int length = fieldLengths[fieldNumber];     // length of field
-      int position = fieldPositions[fieldNumber]; // position in field
-      if (length>0) position+=analyzer.getPositionIncrementGap(fieldName);
-      int offset = fieldOffsets[fieldNumber];       // offset field
-
-      if (field.isIndexed()) {
-        if (!field.isTokenized()) {		  // un-tokenized field
-          String stringValue = field.stringValue();
-          if(field.isStoreOffsetWithTermVector())
-            addPosition(fieldName, stringValue, position++, null, new TermVectorOffsetInfo(offset,
offset + stringValue.length()));
-          else
-            addPosition(fieldName, stringValue, position++, null, null);
-          offset += stringValue.length();
-          length++;
-        } else 
-        { // tokenized field
-          TokenStream stream = field.tokenStreamValue();
-          
-          // the field does not have a TokenStream,
-          // so we have to obtain one from the analyzer
-          if (stream == null) {
-            Reader reader;			  // find or make Reader
-            if (field.readerValue() != null)
-              reader = field.readerValue();
-            else if (field.stringValue() != null)
-              reader = new StringReader(field.stringValue());
-            else
-              throw new IllegalArgumentException
-                      ("field must have either String or Reader value");
-  
-            // Tokenize field and add to postingTable
-            stream = analyzer.tokenStream(fieldName, reader);
-          }
-          
-          // remember this TokenStream, we must close it later
-          openTokenStreams.add(stream);
-          
-          // reset the TokenStream to the first token
-          stream.reset();
-          
-
-          Token lastToken = null;
-          for (Token t = stream.next(); t != null; t = stream.next()) {
-            position += (t.getPositionIncrement() - 1);
-              
-            Payload payload = t.getPayload();
-            if (payload != null) {
-              // enable payloads for this field
-              fieldStoresPayloads.set(fieldNumber);
-            }
-              
-            TermVectorOffsetInfo termVectorOffsetInfo;
-            if (field.isStoreOffsetWithTermVector()) {
-              termVectorOffsetInfo = new TermVectorOffsetInfo(offset + t.startOffset(), offset
+ t.endOffset());
-            } else {
-              termVectorOffsetInfo = null;
-            }
-            addPosition(fieldName, t.termText(), position++, payload, termVectorOffsetInfo);
-              
-            lastToken = t;
-            if (++length >= maxFieldLength) {
-              if (infoStream != null)
-                infoStream.println("maxFieldLength " +maxFieldLength+ " reached, ignoring
following tokens");
-              break;
-            }
-          }
-            
-          if(lastToken != null)
-            offset += lastToken.endOffset() + 1;
-        }
-
-        fieldLengths[fieldNumber] = length;	  // save field length
-        fieldPositions[fieldNumber] = position;	  // save field position
-        fieldBoosts[fieldNumber] *= field.getBoost();
-        fieldOffsets[fieldNumber] = offset;
-      }
-    }
-    
-    // update fieldInfos for all fields that have one or more tokens with payloads
-    for (int i = fieldStoresPayloads.nextSetBit(0); i >= 0; i = fieldStoresPayloads.nextSetBit(i+1))
{ 
-    	fieldInfos.fieldInfo(i).storePayloads = true;
-    }
-  }
-
-  private final Term termBuffer = new Term("", ""); // avoid consing
-
-  private final void addPosition(String field, String text, int position, Payload payload,
TermVectorOffsetInfo offset) {
-    termBuffer.set(field, text);
-    //System.out.println("Offset: " + offset);
-    Posting ti = (Posting) postingTable.get(termBuffer);
-    if (ti != null) {				  // word seen before
-      int freq = ti.freq;
-      if (ti.positions.length == freq) {	  // positions array is full
-        int[] newPositions = new int[freq * 2];	  // double size
-        int[] positions = ti.positions;
-        System.arraycopy(positions, 0, newPositions, 0, freq);
-        ti.positions = newPositions;
-        
-        if (ti.payloads != null) {
-          // the current field stores payloads
-          Payload[] newPayloads = new Payload[freq * 2];  // grow payloads array
-          Payload[] payloads = ti.payloads;
-          System.arraycopy(payloads, 0, newPayloads, 0, payloads.length);
-          ti.payloads = newPayloads;
-        }
-      }
-      ti.positions[freq] = position;		  // add new position
-
-      if (payload != null) {
-        if (ti.payloads == null) {
-          // lazily allocate payload array
-          ti.payloads = new Payload[ti.positions.length];
-        }
-        ti.payloads[freq] = payload;
-      }
-      
-      if (offset != null) {
-        if (ti.offsets.length == freq){
-          TermVectorOffsetInfo [] newOffsets = new TermVectorOffsetInfo[freq*2];
-          TermVectorOffsetInfo [] offsets = ti.offsets;
-          System.arraycopy(offsets, 0, newOffsets, 0, freq);
-          ti.offsets = newOffsets;
-        }
-        ti.offsets[freq] = offset;
-      }
-      ti.freq = freq + 1;			  // update frequency
-    } else {					  // word not seen before
-      Term term = new Term(field, text, false);
-      postingTable.put(term, new Posting(term, position, payload, offset));
-    }
-  }
-
-  private final Posting[] sortPostingTable() {
-    // copy postingTable into an array
-    Posting[] array = new Posting[postingTable.size()];
-    Enumeration postings = postingTable.elements();
-    for (int i = 0; postings.hasMoreElements(); i++)
-      array[i] = (Posting) postings.nextElement();
-
-    // sort the array
-    quickSort(array, 0, array.length - 1);
-
-    return array;
-  }
-
-  private static final void quickSort(Posting[] postings, int lo, int hi) {
-    if (lo >= hi)
-      return;
-
-    int mid = (lo + hi) >>> 1;
-
-    if (postings[lo].term.compareTo(postings[mid].term) > 0) {
-      Posting tmp = postings[lo];
-      postings[lo] = postings[mid];
-      postings[mid] = tmp;
-    }
-
-    if (postings[mid].term.compareTo(postings[hi].term) > 0) {
-      Posting tmp = postings[mid];
-      postings[mid] = postings[hi];
-      postings[hi] = tmp;
-
-      if (postings[lo].term.compareTo(postings[mid].term) > 0) {
-        Posting tmp2 = postings[lo];
-        postings[lo] = postings[mid];
-        postings[mid] = tmp2;
-      }
-    }
-
-    int left = lo + 1;
-    int right = hi - 1;
-
-    if (left >= right)
-      return;
-
-    Term partition = postings[mid].term;
-
-    for (; ;) {
-      while (postings[right].term.compareTo(partition) > 0)
-        --right;
-
-      while (left < right && postings[left].term.compareTo(partition) <= 0)
-        ++left;
-
-      if (left < right) {
-        Posting tmp = postings[left];
-        postings[left] = postings[right];
-        postings[right] = tmp;
-        --right;
-      } else {
-        break;
-      }
-    }
-
-    quickSort(postings, lo, left);
-    quickSort(postings, left + 1, hi);
-  }
-
-  private final void writePostings(Posting[] postings, String segment)
-          throws CorruptIndexException, IOException {
-    IndexOutput freq = null, prox = null;
-    TermInfosWriter tis = null;
-    TermVectorsWriter termVectorWriter = null;
-    try {
-      //open files for inverse index storage
-      freq = directory.createOutput(segment + ".frq");
-      prox = directory.createOutput(segment + ".prx");
-      tis = new TermInfosWriter(directory, segment, fieldInfos,
-                                termIndexInterval);
-      TermInfo ti = new TermInfo();
-      String currentField = null;
-      boolean currentFieldHasPayloads = false;
-      
-      for (int i = 0; i < postings.length; i++) {
-        Posting posting = postings[i];
-
-        // check to see if we switched to a new field
-        String termField = posting.term.field();
-        if (currentField != termField) {
-          // changing field - see if there is something to save
-          currentField = termField;
-          FieldInfo fi = fieldInfos.fieldInfo(currentField);
-          currentFieldHasPayloads = fi.storePayloads;
-          if (fi.storeTermVector) {
-            if (termVectorWriter == null) {
-              termVectorWriter =
-                new TermVectorsWriter(directory, segment, fieldInfos);
-              termVectorWriter.openDocument();
-            }
-            termVectorWriter.openField(currentField);
-
-          } else if (termVectorWriter != null) {
-            termVectorWriter.closeField();
-          }
-        }
-        
-        // add an entry to the dictionary with pointers to prox and freq files
-        ti.set(1, freq.getFilePointer(), prox.getFilePointer(), -1);
-        tis.add(posting.term, ti);
-
-        // add an entry to the freq file
-        int postingFreq = posting.freq;
-        if (postingFreq == 1)				  // optimize freq=1
-          freq.writeVInt(1);			  // set low bit of doc num.
-        else {
-          freq.writeVInt(0);			  // the document number
-          freq.writeVInt(postingFreq);			  // frequency in doc
-        }
-
-        int lastPosition = 0;			  // write positions
-        int[] positions = posting.positions;
-        Payload[] payloads = posting.payloads;
-        int lastPayloadLength = -1;
-        
-        
-        // The following encoding is being used for positions and payloads:
-        // Case 1: current field does not store payloads
-        //           Positions     -> <PositionDelta>^freq
-        //           PositionDelta -> VInt
-        //         The PositionDelta is the difference between the current
-        //         and the previous position
-        // Case 2: current field stores payloads
-        //           Positions     -> <PositionDelta, Payload>^freq
-        //           Payload       ->  <PayloadLength?, PayloadData>
-        //           PositionDelta -> VInt
-        //           PayloadLength -> VInt
-        //           PayloadData   -> byte^PayloadLength
-        //         In this case PositionDelta/2 is the difference between
-        //         the current and the previous position. If PositionDelta
-        //         is odd, then a PayloadLength encoded as VInt follows,
-        //         if PositionDelta is even, then it is assumed that the
-        //         length of the current Payload equals the length of the
-        //         previous Payload.        
-        for (int j = 0; j < postingFreq; j++) {		  // use delta-encoding
-          int position = positions[j];
-          int delta = position - lastPosition;
-          if (currentFieldHasPayloads) {
-            int payloadLength = 0;
-            Payload payload = null;
-            if (payloads != null) {
-              payload = payloads[j];
-              if (payload != null) {
-                payloadLength = payload.length;
-              }
-            }
-            if (payloadLength == lastPayloadLength) {
-            	// the length of the current payload equals the length
-            	// of the previous one. So we do not have to store the length
-            	// again and we only shift the position delta by one bit
-              prox.writeVInt(delta * 2);
-            } else {
-            	// the length of the current payload is different from the
-            	// previous one. We shift the position delta, set the lowest
-            	// bit and store the current payload length as VInt.
-              prox.writeVInt(delta * 2 + 1);
-              prox.writeVInt(payloadLength);
-              lastPayloadLength = payloadLength;
-            }
-            if (payloadLength > 0) {
-            	// write current payload
-              prox.writeBytes(payload.data, payload.offset, payload.length);
-            }
-          } else {
-          	// field does not store payloads, just write position delta as VInt
-            prox.writeVInt(delta);
-          }
-          lastPosition = position;
-        }
-        if (termVectorWriter != null && termVectorWriter.isFieldOpen()) {
-            termVectorWriter.addTerm(posting.term.text(), postingFreq, posting.positions,
posting.offsets);
-        }
-      }
-      if (termVectorWriter != null)
-        termVectorWriter.closeDocument();
-    } finally {
-      // make an effort to close all streams we can but remember and re-throw
-      // the first exception encountered in this process
-      IOException keep = null;
-      if (freq != null) try { freq.close(); } catch (IOException e) { if (keep == null) keep
= e; }
-      if (prox != null) try { prox.close(); } catch (IOException e) { if (keep == null) keep
= e; }
-      if (tis  != null) try {  tis.close(); } catch (IOException e) { if (keep == null) keep
= e; }
-      if (termVectorWriter  != null) try {  termVectorWriter.close(); } catch (IOException
e) { if (keep == null) keep = e; }
-      if (keep != null) throw (IOException) keep.fillInStackTrace();
-    }
-  }
-
-  private final void writeNorms(String segment) throws IOException { 
-    for(int n = 0; n < fieldInfos.size(); n++){
-      FieldInfo fi = fieldInfos.fieldInfo(n);
-      if(fi.isIndexed && !fi.omitNorms){
-        float norm = fieldBoosts[n] * similarity.lengthNorm(fi.name, fieldLengths[n]);
-        IndexOutput norms = directory.createOutput(segment + ".f" + n);
-        try {
-          norms.writeByte(Similarity.encodeNorm(norm));
-        } finally {
-          norms.close();
-        }
-      }
-    }
-  }
-  
-  /** If non-null, a message will be printed to this if maxFieldLength is reached.
-   */
-  void setInfoStream(PrintStream infoStream) {
-    this.infoStream = infoStream;
-  }
-
-  int getNumFields() {
-    return fieldInfos.size();
-  }
-}
-
-final class Posting {				  // info about a Term in a doc
-  Term term;					  // the Term
-  int freq;					  // its frequency in doc
-  int[] positions;				  // positions it occurs at
-  Payload[] payloads; // the payloads of the terms
-  TermVectorOffsetInfo [] offsets;
-  
-
-  Posting(Term t, int position, Payload payload, TermVectorOffsetInfo offset) {
-    term = t;
-    freq = 1;
-    positions = new int[1];
-    positions[0] = position;
-    
-    if (payload != null) {
-      payloads = new Payload[1];
-      payloads[0] = payload;
-    } else 
-      payloads = null;    
-    
-
-    if(offset != null){
-      offsets = new TermVectorOffsetInfo[1];
-      offsets[0] = offset;
-    } else
-      offsets = null;
-  }
-}

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/DocHelper.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/DocHelper.java?view=diff&rev=560378&r1=560377&r2=560378
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/DocHelper.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/DocHelper.java Fri Jul 27 13:50:19
2007
@@ -207,55 +207,38 @@
   }                         
 
   /**
-   * Writes the document to the directory using a segment named "test"
+   * Writes the document to the directory using a segment
+   * named "test"; returns the SegmentInfo describing the new
+   * segment 
    * @param dir
    * @param doc
    * @throws IOException
    */ 
-  public static void writeDoc(Directory dir, Document doc) throws IOException
+  public static SegmentInfo writeDoc(Directory dir, Document doc) throws IOException
   {
-    writeDoc(dir, "test", doc);
+    return writeDoc(dir, new WhitespaceAnalyzer(), Similarity.getDefault(), doc);
   }
 
   /**
-   * Writes the document to the directory in the given segment
-   * @param dir
-   * @param segment
-   * @param doc
-   * @throws IOException
-   */ 
-  public static void writeDoc(Directory dir, String segment, Document doc) throws IOException
-  {
-    Similarity similarity = Similarity.getDefault();
-    writeDoc(dir, new WhitespaceAnalyzer(), similarity, segment, doc);
-  }
-
-  /**
-   * Writes the document to the directory segment named "test" using the specified analyzer
and similarity
-   * @param dir
-   * @param analyzer
-   * @param similarity
-   * @param doc
-   * @throws IOException
-   */ 
-  public static void writeDoc(Directory dir, Analyzer analyzer, Similarity similarity, Document
doc) throws IOException
-  {
-    writeDoc(dir, analyzer, similarity, "test", doc);
-  }
-
-  /**
-   * Writes the document to the directory segment using the analyzer and the similarity score
+   * Writes the document to the directory using the analyzer
+   * and the similarity score; returns the SegmentInfo
+   * describing the new segment
    * @param dir
    * @param analyzer
    * @param similarity
-   * @param segment
    * @param doc
    * @throws IOException
    */ 
-  public static void writeDoc(Directory dir, Analyzer analyzer, Similarity similarity, String
segment, Document doc) throws IOException
+  public static SegmentInfo writeDoc(Directory dir, Analyzer analyzer, Similarity similarity,
Document doc) throws IOException
   {
-    DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
-    writer.addDocument(segment, doc);
+    IndexWriter writer = new IndexWriter(dir, analyzer);
+    writer.setSimilarity(similarity);
+    //writer.setUseCompoundFile(false);
+    writer.addDocument(doc);
+    writer.flush();
+    SegmentInfo info = writer.segmentInfos.info(writer.segmentInfos.size()-1);
+    writer.close();
+    return info;
   }
 
   public static int numFields(Document doc) {

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestDoc.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestDoc.java?view=diff&rev=560378&r1=560377&r2=560378
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestDoc.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestDoc.java Fri Jul 27 13:50:19 2007
@@ -105,14 +105,16 @@
       StringWriter sw = new StringWriter();
       PrintWriter out = new PrintWriter(sw, true);
 
-      Directory directory = FSDirectory.getDirectory(indexDir, true);
-      directory.close();
+      Directory directory = FSDirectory.getDirectory(indexDir);
+      IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true);
 
-      SegmentInfo si1 = indexDoc("one", "test.txt");
+      SegmentInfo si1 = indexDoc(writer, "test.txt");
       printSegment(out, si1);
 
-      SegmentInfo si2 = indexDoc("two", "test2.txt");
+      SegmentInfo si2 = indexDoc(writer, "test2.txt");
       printSegment(out, si2);
+      writer.close();
+      directory.close();
 
       SegmentInfo siMerge = merge(si1, si2, "merge", false);
       printSegment(out, siMerge);
@@ -131,14 +133,16 @@
       sw = new StringWriter();
       out = new PrintWriter(sw, true);
 
-      directory = FSDirectory.getDirectory(indexDir, true);
-      directory.close();
+      directory = FSDirectory.getDirectory(indexDir);
+      writer = new IndexWriter(directory, new SimpleAnalyzer(), true);
 
-      si1 = indexDoc("one", "test.txt");
+      si1 = indexDoc(writer, "test.txt");
       printSegment(out, si1);
 
-      si2 = indexDoc("two", "test2.txt");
+      si2 = indexDoc(writer, "test2.txt");
       printSegment(out, si2);
+      writer.close();
+      directory.close();
 
       siMerge = merge(si1, si2, "merge", true);
       printSegment(out, siMerge);
@@ -157,21 +161,14 @@
    }
 
 
-   private SegmentInfo indexDoc(String segment, String fileName)
+   private SegmentInfo indexDoc(IndexWriter writer, String fileName)
    throws Exception
    {
-      Directory directory = FSDirectory.getDirectory(indexDir, false);
-      Analyzer analyzer = new SimpleAnalyzer();
-      DocumentWriter writer =
-         new DocumentWriter(directory, analyzer, Similarity.getDefault(), 1000);
-
       File file = new File(workDir, fileName);
       Document doc = FileDocument.Document(file);
-
-      writer.addDocument(segment, doc);
-
-      directory.close();
-      return new SegmentInfo(segment, 1, directory, false, false);
+      writer.addDocument(doc);
+      writer.flush();
+      return writer.segmentInfos.info(writer.segmentInfos.size()-1);
    }
 
 

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java?view=diff&rev=560378&r1=560377&r2=560378
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java Fri Jul 27
13:50:19 2007
@@ -32,6 +32,8 @@
 import java.io.Reader;
 import java.io.IOException;
 
+import java.util.Arrays;
+
 public class TestDocumentWriter extends TestCase {
   private RAMDirectory dir;
 
@@ -57,11 +59,13 @@
     DocHelper.setupDoc(testDoc);
     Analyzer analyzer = new WhitespaceAnalyzer();
     Similarity similarity = Similarity.getDefault();
-    DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
-    String segName = "test";
-    writer.addDocument(segName, testDoc);
+    IndexWriter writer = new IndexWriter(dir, analyzer, true);
+    writer.addDocument(testDoc);
+    writer.flush();
+    SegmentInfo info = writer.segmentInfos.info(writer.segmentInfos.size()-1);
+    writer.close();
     //After adding the document, we should be able to read it back in
-    SegmentReader reader = SegmentReader.get(new SegmentInfo(segName, 1, dir));
+    SegmentReader reader = SegmentReader.get(info);
     assertTrue(reader != null);
     Document doc = reader.document(0);
     assertTrue(doc != null);
@@ -89,14 +93,14 @@
     assertTrue(fields != null && fields.length == 1);
     assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_3_TEXT));
 
-    // test that the norm file is not present if omitNorms is true
+    // test that the norms are not present in the segment if
+    // omitNorms is true
     for (int i = 0; i < reader.fieldInfos.size(); i++) {
       FieldInfo fi = reader.fieldInfos.fieldInfo(i);
       if (fi.isIndexed) {
-        assertTrue(fi.omitNorms == !dir.fileExists(segName + ".f" + i));
+        assertTrue(fi.omitNorms == !reader.hasNorms(fi.name));
       }
     }
-
   }
 
   public void testPositionIncrementGap() throws IOException {
@@ -111,14 +115,17 @@
     };
 
     Similarity similarity = Similarity.getDefault();
-    DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
+    IndexWriter writer = new IndexWriter(dir, analyzer, true);
+
     Document doc = new Document();
     doc.add(new Field("repeated", "repeated one", Field.Store.YES, Field.Index.TOKENIZED));
     doc.add(new Field("repeated", "repeated two", Field.Store.YES, Field.Index.TOKENIZED));
 
-    String segName = "test";
-    writer.addDocument(segName, doc);
-    SegmentReader reader = SegmentReader.get(new SegmentInfo(segName, 1, dir));
+    writer.addDocument(doc);
+    writer.flush();
+    SegmentInfo info = writer.segmentInfos.info(writer.segmentInfos.size()-1);
+    writer.close();
+    SegmentReader reader = SegmentReader.get(info);
 
     TermPositions termPositions = reader.termPositions(new Term("repeated", "repeated"));
     assertTrue(termPositions.next());
@@ -130,7 +137,7 @@
   
   public void testPreAnalyzedField() throws IOException {
     Similarity similarity = Similarity.getDefault();
-    DocumentWriter writer = new DocumentWriter(dir, new SimpleAnalyzer(), similarity, 50);
+    IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true);
     Document doc = new Document();
     
     doc.add(new Field("preanalyzed", new TokenStream() {
@@ -147,9 +154,11 @@
       
     }, TermVector.NO));
     
-    String segName = "test";
-    writer.addDocument(segName, doc);
-    SegmentReader reader = SegmentReader.get(new SegmentInfo(segName, 1, dir));
+    writer.addDocument(doc);
+    writer.flush();
+    SegmentInfo info = writer.segmentInfos.info(writer.segmentInfos.size()-1);
+    writer.close();
+    SegmentReader reader = SegmentReader.get(info);
 
     TermPositions termPositions = reader.termPositions(new Term("preanalyzed", "term1"));
     assertTrue(termPositions.next());

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java?view=diff&rev=560378&r1=560377&r2=560378
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java Fri Jul 27 13:50:19
2007
@@ -35,6 +35,8 @@
   private Document testDoc = new Document();
   private FieldInfos fieldInfos = null;
 
+  private final static String TEST_SEGMENT_NAME = "_0";
+
   public TestFieldsReader(String s) {
     super(s);
   }
@@ -43,16 +45,16 @@
     fieldInfos = new FieldInfos();
     DocHelper.setupDoc(testDoc);
     fieldInfos.add(testDoc);
-    DocumentWriter writer = new DocumentWriter(dir, new WhitespaceAnalyzer(),
-            Similarity.getDefault(), 50);
-    assertTrue(writer != null);
-    writer.addDocument("test", testDoc);
+    IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+    writer.setUseCompoundFile(false);
+    writer.addDocument(testDoc);
+    writer.close();
   }
 
   public void test() throws IOException {
     assertTrue(dir != null);
     assertTrue(fieldInfos != null);
-    FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
+    FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
     assertTrue(reader != null);
     assertTrue(reader.size() == 1);
     Document doc = reader.doc(0, null);
@@ -82,7 +84,7 @@
   public void testLazyFields() throws Exception {
     assertTrue(dir != null);
     assertTrue(fieldInfos != null);
-    FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
+    FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
     assertTrue(reader != null);
     assertTrue(reader.size() == 1);
     Set loadFieldNames = new HashSet();
@@ -137,7 +139,7 @@
   public void testLazyFieldsAfterClose() throws Exception {
     assertTrue(dir != null);
     assertTrue(fieldInfos != null);
-    FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
+    FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
     assertTrue(reader != null);
     assertTrue(reader.size() == 1);
     Set loadFieldNames = new HashSet();
@@ -167,7 +169,7 @@
   public void testLoadFirst() throws Exception {
     assertTrue(dir != null);
     assertTrue(fieldInfos != null);
-    FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
+    FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
     assertTrue(reader != null);
     assertTrue(reader.size() == 1);
     LoadFirstFieldSelector fieldSelector = new LoadFirstFieldSelector();
@@ -200,10 +202,12 @@
     _TestUtil.rmDir(file);
     FSDirectory tmpDir = FSDirectory.getDirectory(file);
     assertTrue(tmpDir != null);
-    DocumentWriter writer = new DocumentWriter(tmpDir, new WhitespaceAnalyzer(),
-            Similarity.getDefault(), 50);
-    assertTrue(writer != null);
-    writer.addDocument("test", testDoc);
+
+    IndexWriter writer = new IndexWriter(tmpDir, new WhitespaceAnalyzer(), true);
+    writer.setUseCompoundFile(false);
+    writer.addDocument(testDoc);
+    writer.close();
+
     assertTrue(fieldInfos != null);
     FieldsReader reader;
     long lazyTime = 0;
@@ -214,7 +218,7 @@
     SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Collections.EMPTY_SET,
lazyFieldNames);
 
     for (int i = 0; i < length; i++) {
-      reader = new FieldsReader(tmpDir, "test", fieldInfos);
+      reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos);
       assertTrue(reader != null);
       assertTrue(reader.size() == 1);
 
@@ -238,7 +242,7 @@
       doc = null;
       //Hmmm, are we still in cache???
       System.gc();
-      reader = new FieldsReader(tmpDir, "test", fieldInfos);
+      reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos);
       doc = reader.doc(0, fieldSelector);
       field = doc.getFieldable(DocHelper.LARGE_LAZY_FIELD_KEY);
       assertTrue("field is not lazy", field.isLazy() == true);
@@ -256,7 +260,7 @@
   }
   
   public void testLoadSize() throws IOException {
-    FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
+    FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
     Document doc;
     
     doc = reader.doc(0, new FieldSelector(){

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestMultiReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestMultiReader.java?view=diff&rev=560378&r1=560377&r2=560378
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestMultiReader.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestMultiReader.java Fri Jul 27 13:50:19
2007
@@ -43,15 +43,20 @@
   protected void setUp() throws IOException {
     DocHelper.setupDoc(doc1);
     DocHelper.setupDoc(doc2);
-    DocHelper.writeDoc(dir, "seg-1", doc1);
-    DocHelper.writeDoc(dir, "seg-2", doc2);
+    SegmentInfo info1 = DocHelper.writeDoc(dir, doc1);
+    SegmentInfo info2 = DocHelper.writeDoc(dir, doc2);
     sis.write(dir);
-    reader1 = SegmentReader.get(new SegmentInfo("seg-1", 1, dir));
-    reader2 = SegmentReader.get(new SegmentInfo("seg-2", 1, dir));
+    openReaders();
+  }
+
+  private void openReaders() throws IOException {
+    sis.read(dir);
+    reader1 = SegmentReader.get(sis.info(0));
+    reader2 = SegmentReader.get(sis.info(1));
     readers[0] = reader1;
     readers[1] = reader2;
   }
-  
+
   public void test() {
     assertTrue(dir != null);
     assertTrue(reader1 != null);
@@ -88,6 +93,7 @@
     reader.commit();
     reader.close();
     sis.read(dir);
+    openReaders();
     reader = new MultiSegmentReader(dir, sis, false, readers);
     assertEquals( 2, reader.numDocs() );
 

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentMerger.java?view=diff&rev=560378&r1=560377&r2=560378
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentMerger.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentMerger.java Fri Jul 27 13:50:19
2007
@@ -32,12 +32,10 @@
   //First segment to be merged
   private Directory merge1Dir = new RAMDirectory();
   private Document doc1 = new Document();
-  private String merge1Segment = "test-1";
   private SegmentReader reader1 = null;
   //Second Segment to be merged
   private Directory merge2Dir = new RAMDirectory();
   private Document doc2 = new Document();
-  private String merge2Segment = "test-2";
   private SegmentReader reader2 = null;
   
 
@@ -47,11 +45,11 @@
 
   protected void setUp() throws IOException {
     DocHelper.setupDoc(doc1);
-    DocHelper.writeDoc(merge1Dir, merge1Segment, doc1);
+    SegmentInfo info1 = DocHelper.writeDoc(merge1Dir, doc1);
     DocHelper.setupDoc(doc2);
-    DocHelper.writeDoc(merge2Dir, merge2Segment, doc2);
-    reader1 = SegmentReader.get(new SegmentInfo(merge1Segment, 1, merge1Dir));
-    reader2 = SegmentReader.get(new SegmentInfo(merge2Segment, 1, merge2Dir));
+    SegmentInfo info2 = DocHelper.writeDoc(merge2Dir, doc2);
+    reader1 = SegmentReader.get(info1);
+    reader2 = SegmentReader.get(info2);
   }
 
   public void test() {

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentReader.java?view=diff&rev=560378&r1=560377&r2=560378
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentReader.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentReader.java Fri Jul 27 13:50:19
2007
@@ -41,8 +41,8 @@
   //TODO: Setup the reader w/ multiple documents
   protected void setUp() throws IOException {
     DocHelper.setupDoc(testDoc);
-    DocHelper.writeDoc(dir, testDoc);
-    reader = SegmentReader.get(new SegmentInfo("test", 1, dir));
+    SegmentInfo info = DocHelper.writeDoc(dir, testDoc);
+    reader = SegmentReader.get(info);
   }
 
   protected void tearDown() {
@@ -75,8 +75,8 @@
   public void testDelete() throws IOException {
     Document docToDelete = new Document();
     DocHelper.setupDoc(docToDelete);
-    DocHelper.writeDoc(dir, "seg-to-delete", docToDelete);
-    SegmentReader deleteReader = SegmentReader.get(new SegmentInfo("seg-to-delete", 1, dir));
+    SegmentInfo info = DocHelper.writeDoc(dir, docToDelete);
+    SegmentReader deleteReader = SegmentReader.get(info);
     assertTrue(deleteReader != null);
     assertTrue(deleteReader.numDocs() == 1);
     deleteReader.deleteDocument(0);

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentTermDocs.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentTermDocs.java?view=diff&rev=560378&r1=560377&r2=560378
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentTermDocs.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentTermDocs.java Fri Jul 27
13:50:19 2007
@@ -29,6 +29,7 @@
 public class TestSegmentTermDocs extends TestCase {
   private Document testDoc = new Document();
   private Directory dir = new RAMDirectory();
+  private SegmentInfo info;
 
   public TestSegmentTermDocs(String s) {
     super(s);
@@ -36,7 +37,7 @@
 
   protected void setUp() throws IOException {
     DocHelper.setupDoc(testDoc);
-    DocHelper.writeDoc(dir, testDoc);
+    info = DocHelper.writeDoc(dir, testDoc);
   }
 
 
@@ -50,7 +51,7 @@
   
   public void testTermDocs() throws IOException {
     //After adding the document, we should be able to read it back in
-    SegmentReader reader = SegmentReader.get(new SegmentInfo("test", 1, dir));
+    SegmentReader reader = SegmentReader.get(info);
     assertTrue(reader != null);
     SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
     assertTrue(segTermDocs != null);
@@ -68,7 +69,7 @@
   public void testBadSeek() throws IOException {
     {
       //After adding the document, we should be able to read it back in
-      SegmentReader reader = SegmentReader.get(new SegmentInfo("test", 1, dir));
+      SegmentReader reader = SegmentReader.get(info);
       assertTrue(reader != null);
       SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
       assertTrue(segTermDocs != null);
@@ -78,7 +79,7 @@
     }
     {
       //After adding the document, we should be able to read it back in
-      SegmentReader reader = SegmentReader.get(new SegmentInfo("test", 1, dir));
+      SegmentReader reader = SegmentReader.get(info);
       assertTrue(reader != null);
       SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
       assertTrue(segTermDocs != null);



Mime
View raw message