lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r1201371 - in /lucene/dev/branches/lucene2621: lucene/ lucene/contrib/instantiated/ lucene/src/java/org/apache/lucene/index/ lucene/src/java/org/apache/lucene/index/codecs/ solr/core/src/java/org/apache/solr/handler/admin/ solr/core/src/jav...
Date Sun, 13 Nov 2011 00:17:24 GMT
Author: mikemccand
Date: Sun Nov 13 00:17:24 2011
New Revision: 1201371

URL: http://svn.apache.org/viewvc?rev=1201371&view=rev
Log:
LUCENE-2621: flex TV api

Removed:
    lucene/dev/branches/lucene2621/lucene/contrib/instantiated/
Modified:
    lucene/dev/branches/lucene2621/lucene/build.xml
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/ParallelReader.java
    lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/TermVectorsWriter.java
    lucene/dev/branches/lucene2621/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
    lucene/dev/branches/lucene2621/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java

Modified: lucene/dev/branches/lucene2621/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/build.xml?rev=1201371&r1=1201370&r2=1201371&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/build.xml (original)
+++ lucene/dev/branches/lucene2621/lucene/build.xml Sun Nov 13 00:17:24 2011
@@ -252,7 +252,6 @@
 
           <packageset dir="contrib/demo/src/java"/>
           <packageset dir="contrib/highlighter/src/java"/>
-          <packageset dir="contrib/instantiated/src/java"/>
           <packageset dir="contrib/memory/src/java"/>
           <packageset dir="contrib/misc/src/java"/>
           <packageset dir="contrib/sandbox/src/java"/>
@@ -268,7 +267,6 @@
           <group title="contrib: Demo" packages="org.apache.lucene.demo*"/>
           <group title="contrib: ICU" packages="org.apache.lucene.collation*"/>
           <group title="contrib: Highlighter" packages="org.apache.lucene.search.highlight*:org.apache.lucene.search.vectorhighlight*"/>
-          <group title="contrib: Instantiated" packages="org.apache.lucene.store.instantiated*"/>
           <group title="contrib: Memory" packages="org.apache.lucene.index.memory*"/>
           <group title="contrib: Misc " packages="org.apache.lucene.misc*"/>
           <group title="contrib: Sandbox" packages="org.apache.lucene.sandbox*"/>

Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/ParallelReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/ParallelReader.java?rev=1201371&r1=1201370&r2=1201371&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/ParallelReader.java
(original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/ParallelReader.java
Sun Nov 13 00:17:24 2011
@@ -132,7 +132,8 @@ public class ParallelReader extends Inde
       if (fieldToReader.get(field) == null) {
         fieldToReader.put(field, reader);
       }
-      this.fields.addField(field, reader);
+
+      this.fields.addField(field, MultiFields.getFields(reader).terms(field));
       this.perDocs.addField(field, reader);
     }
 
@@ -187,9 +188,8 @@ public class ParallelReader extends Inde
   private class ParallelFields extends Fields {
     final HashMap<String,Terms> fields = new HashMap<String,Terms>();
 
-    public void addField(String field, IndexReader r) throws IOException {
-      Fields multiFields = MultiFields.getFields(r);
-      fields.put(field, multiFields.terms(field));
+    public void addField(String fieldName, Terms terms) throws IOException {
+      fields.put(fieldName, terms);
     }
 
     @Override
@@ -370,8 +370,16 @@ public class ParallelReader extends Inde
   @Override
   public Fields getTermVectors(int docID) throws IOException {
     ensureOpen();
-    // nocommit hmmm
-    return null;
+    ParallelFields fields = new ParallelFields();
+    for (Map.Entry<String,IndexReader> ent : fieldToReader.entrySet()) {
+      String fieldName = ent.getKey();
+      Terms vector = ent.getValue().getTermVector(docID, fieldName);
+      if (vector != null) {
+        fields.addField(fieldName, vector);
+      }
+    }
+
+    return fields;
   }
 
   @Override

Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/TermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/TermVectorsWriter.java?rev=1201371&r1=1201370&r2=1201371&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/TermVectorsWriter.java
(original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/TermVectorsWriter.java
Sun Nov 13 00:17:24 2011
@@ -28,6 +28,7 @@ import org.apache.lucene.index.FieldInfo
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.util.Bits;
@@ -187,8 +188,11 @@ public abstract class TermVectorsWriter 
       // nocommit O(N^2) under here:
       // nocommit just cast to int right off....?  single
       // doc w/ > 2.1 B terms is surely crazy...?
-      // nocommit -- must null check here
-      final long numTerms = vectors.terms(fieldName).getUniqueTermCount();
+      final Terms terms = vectors.terms(fieldName);
+      if (terms == null) {
+        continue;
+      }
+      final long numTerms = terms.getUniqueTermCount();
 
       final boolean positions;
 

Modified: lucene/dev/branches/lucene2621/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java?rev=1201371&r1=1201370&r2=1201371&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
(original)
+++ lucene/dev/branches/lucene2621/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
Sun Nov 13 00:17:24 2011
@@ -29,27 +29,25 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.FieldsEnum;
+import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.index.TermFreqVector;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.PriorityQueue;
-import org.apache.lucene.util.BytesRef;
 import org.apache.solr.analysis.CharFilterFactory;
 import org.apache.solr.analysis.TokenFilterFactory;
 import org.apache.solr.analysis.TokenizerChain;
@@ -58,9 +56,9 @@ import org.apache.solr.common.SolrExcept
 import org.apache.solr.common.luke.FieldFlag;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.Base64;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
-import org.apache.solr.common.util.Base64;
 import org.apache.solr.handler.RequestHandlerBase;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
@@ -68,6 +66,8 @@ import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.SolrIndexSearcher;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * This handler exposes the internal lucene index.  It is inspired by and 
@@ -266,11 +266,14 @@ public class LukeRequestHandler extends 
       // If we have a term vector, return that
       if( field.fieldType().storeTermVectors() ) {
         try {
-          TermFreqVector v = reader.getTermFreqVector( docId, field.name() );
+          Terms v = reader.getTermVector( docId, field.name() );
           if( v != null ) {
             SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<Integer>();
-            for( int i=0; i<v.size(); i++ ) {
-              tfv.add( v.getTerms()[i].utf8ToChars(spare).toString(), v.getTermFrequencies()[i]
);
+            final TermsEnum termsEnum = v.iterator();
+            BytesRef text;
+            while((text = termsEnum.next()) != null) {
+              final int freq = (int) termsEnum.totalTermFreq();
+              tfv.add( text.utf8ToChars(spare).toString(), freq );
             }
             f.add( "termVector", tfv );
           }

Modified: lucene/dev/branches/lucene2621/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java?rev=1201371&r1=1201370&r2=1201371&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
(original)
+++ lucene/dev/branches/lucene2621/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
Sun Nov 13 00:17:24 2011
@@ -8,13 +8,15 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.index.StoredFieldVisitor;
 import org.apache.lucene.index.StoredFieldVisitor.Status;
-import org.apache.lucene.index.TermVectorMapper;
-import org.apache.lucene.index.TermVectorOffsetInfo;
+import org.apache.lucene.index.StoredFieldVisitor;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.IndexInput;
@@ -225,12 +227,9 @@ public class TermVectorComponent extends
       }
     };
 
-    TVMapper mapper = new TVMapper(reader);
-    mapper.fieldOptions = allFields; //this will only stay set if fieldOptions.isEmpty()
(in other words, only if the user didn't set any fields)
     while (iter.hasNext()) {
       Integer docId = iter.next();
       NamedList<Object> docNL = new NamedList<Object>();
-      mapper.docNL = docNL;
       termVectors.add("doc-" + docId, docNL);
 
       if (keyField != null) {
@@ -245,12 +244,86 @@ public class TermVectorComponent extends
       }
       if (!fieldOptions.isEmpty()) {
         for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
-          mapper.fieldOptions = entry.getValue();
-          reader.getTermFreqVector(docId, entry.getKey(), mapper);
+          final String field = entry.getKey();
+          final Terms vector = reader.getTermVector(docId, field);
+          if (vector != null) {
+            mapOneVector(docNL, entry.getValue(), reader, docId, vector.iterator(), field);
+          }
         }
       } else {
-        //deal with all fields by using the allFieldMapper
-        reader.getTermFreqVector(docId, mapper);
+        // extract all fields
+        final Fields vectors = reader.getTermVectors(docId);
+        final FieldsEnum fieldsEnum = vectors.iterator();
+        String field;
+        while((field = fieldsEnum.next()) != null) {
+          mapOneVector(docNL, allFields, reader, docId, fieldsEnum.terms(), field);
+        }
+      }
+    }
+  }
+
+  private void mapOneVector(NamedList<Object> docNL, FieldOptions fieldOptions, IndexReader
reader, int docID, TermsEnum termsEnum, String field) throws IOException {
+    NamedList<Object> fieldNL = new NamedList<Object>();
+    docNL.add(field, fieldNL);
+
+    BytesRef text;
+    DocsAndPositionsEnum dpEnum = null;
+    while((text = termsEnum.next()) != null) {
+      String term = text.utf8ToString();
+      NamedList<Object> termInfo = new NamedList<Object>();
+      fieldNL.add(term, termInfo);
+      final int freq = (int) termsEnum.totalTermFreq();
+      if (fieldOptions.termFreq == true) {
+        termInfo.add("tf", freq);
+      }
+
+      dpEnum = termsEnum.docsAndPositions(null, dpEnum);
+
+      boolean usePositions = false;
+      boolean useOffsets = false;
+      OffsetAttribute offsetAtt = null;
+      if (dpEnum != null) {
+        dpEnum.nextDoc();
+        usePositions = fieldOptions.positions;
+        if (fieldOptions.offsets && dpEnum.attributes().hasAttribute(OffsetAttribute.class))
{
+          useOffsets = true;
+          offsetAtt = dpEnum.attributes().getAttribute(OffsetAttribute.class);
+        }
+      }
+
+      NamedList<Number> theOffsets = null;
+      if (useOffsets) {
+        theOffsets = new NamedList<Number>();
+        termInfo.add("offsets", theOffsets);
+      }
+
+      NamedList<Integer> positionsNL = null;
+
+      if (usePositions || theOffsets != null) {
+        for (int i = 0; i < freq; i++) {
+          final int pos = dpEnum.nextPosition();
+          if (usePositions && pos >= 0) {
+            if (positionsNL == null) {
+              positionsNL = new NamedList<Integer>();
+              termInfo.add("positions", positionsNL);
+            }
+            positionsNL.add("position", pos);
+          }
+
+          if (theOffsets != null) {
+            theOffsets.add("start", offsetAtt.startOffset());
+            theOffsets.add("end", offsetAtt.endOffset());
+          }
+        }
+      }
+
+      if (fieldOptions.docFreq) {
+        termInfo.add("df", getDocFreq(reader, field, text));
+      }
+
+      if (fieldOptions.tfIdf) {
+        double tfIdfVal = ((double) freq) / getDocFreq(reader, field, text);
+        termInfo.add("tf-idf", tfIdfVal);
       }
     }
   }
@@ -310,90 +383,20 @@ public class TermVectorComponent extends
     return result;
   }
 
-  private static class TVMapper extends TermVectorMapper {
-    private IndexReader reader;
-    private NamedList<Object> docNL;
-
-    //needs to be set for each new field
-    FieldOptions fieldOptions;
-
-    //internal vars not passed in by construction
-    private boolean useOffsets, usePositions;
-    //private Map<String, Integer> idfCache;
-    private NamedList<Object> fieldNL;
-    private String field;
-
-
-    public TVMapper(IndexReader reader) {
-      this.reader = reader;
-    }
-
-    @Override
-    public void map(BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions)
{
-      NamedList<Object> termInfo = new NamedList<Object>();
-      fieldNL.add(term.utf8ToString(), termInfo);
-      if (fieldOptions.termFreq == true) {
-        termInfo.add("tf", frequency);
-      }
-      if (useOffsets) {
-        NamedList<Number> theOffsets = new NamedList<Number>();
-        termInfo.add("offsets", theOffsets);
-        for (int i = 0; i < offsets.length; i++) {
-          TermVectorOffsetInfo offset = offsets[i];
-          theOffsets.add("start", offset.getStartOffset());
-          theOffsets.add("end", offset.getEndOffset());
+  private static int getDocFreq(IndexReader reader, String field, BytesRef term) {
+    int result = 1;
+    try {
+      Terms terms = MultiFields.getTerms(reader, field);
+      if (terms != null) {
+        TermsEnum termsEnum = terms.iterator();
+        if (termsEnum.seekExact(term, true)) {
+          result = termsEnum.docFreq();
         }
       }
-      if (usePositions) {
-        NamedList<Integer> positionsNL = new NamedList<Integer>();
-        for (int i = 0; i < positions.length; i++) {
-          positionsNL.add("position", positions[i]);
-        }
-        termInfo.add("positions", positionsNL);
-      }
-      if (fieldOptions.docFreq) {
-        termInfo.add("df", getDocFreq(term));
-      }
-      if (fieldOptions.tfIdf) {
-        double tfIdfVal = ((double) frequency) / getDocFreq(term);
-        termInfo.add("tf-idf", tfIdfVal);
-      }
-    }
-
-    private int getDocFreq(BytesRef term) {
-      int result = 1;
-      try {
-        Terms terms = MultiFields.getTerms(reader, field);
-        if (terms != null) {
-          TermsEnum termsEnum = terms.iterator();
-          if (termsEnum.seekExact(term, true)) {
-            result = termsEnum.docFreq();
-          }
-        }
-      } catch (IOException e) {
-        throw new RuntimeException(e);
-      }
-      return result;
-    }
-
-    @Override
-    public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean
storePositions) {
-      this.field = field;
-      useOffsets = storeOffsets && fieldOptions.offsets;
-      usePositions = storePositions && fieldOptions.positions;
-      fieldNL = new NamedList<Object>();
-      docNL.add(field, fieldNL);
-    }
-
-    @Override
-    public boolean isIgnoringPositions() {
-      return !fieldOptions.positions;  // if we are not interested in positions, then return
true telling Lucene to skip loading them
-    }
-
-    @Override
-    public boolean isIgnoringOffsets() {
-      return !fieldOptions.offsets;  //  if we are not interested in offsets, then return
true telling Lucene to skip loading them
+    } catch (IOException e) {
+      throw new RuntimeException(e);
     }
+    return result;
   }
 
   @Override



Mime
View raw message