Return-Path:
Delivered-To: apmail-lucene-java-commits-archive@www.apache.org
Received: (qmail 5816 invoked from network); 6 Apr 2010 19:20:16 -0000
Received: from unknown (HELO mail.apache.org) (140.211.11.3)
by 140.211.11.9 with SMTP; 6 Apr 2010 19:20:16 -0000
Received: (qmail 49240 invoked by uid 500); 6 Apr 2010 19:20:15 -0000
Delivered-To: apmail-lucene-java-commits-archive@lucene.apache.org
Received: (qmail 49210 invoked by uid 500); 6 Apr 2010 19:20:15 -0000
Mailing-List: contact java-commits-help@lucene.apache.org; run by ezmlm
Precedence: bulk
List-Help:
List-Unsubscribe:
List-Post:
List-Id:
Reply-To: java-dev@lucene.apache.org
Delivered-To: mailing list java-commits@lucene.apache.org
Received: (qmail 49203 invoked by uid 99); 6 Apr 2010 19:20:15 -0000
Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230)
by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 06 Apr 2010 19:20:15 +0000
X-ASF-Spam-Status: No, hits=-2000.0 required=10.0
tests=ALL_TRUSTED
X-Spam-Check-By: apache.org
Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4)
by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 06 Apr 2010 19:20:03 +0000
Received: by eris.apache.org (Postfix, from userid 65534)
id 8C13F23889DE; Tue, 6 Apr 2010 19:19:40 +0000 (UTC)
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: svn commit: r931278 [3/10] - in /lucene/dev/trunk: lucene/
lucene/backwards/src/ lucene/backwards/src/java/org/apache/lucene/index/
lucene/backwards/src/java/org/apache/lucene/index/codecs/
lucene/backwards/src/java/org/apache/lucene/search/ lucene/bac...
Date: Tue, 06 Apr 2010 19:19:36 -0000
To: java-commits@lucene.apache.org
From: uschindler@apache.org
X-Mailer: svnmailer-1.0.8
Message-Id: <20100406191940.8C13F23889DE@eris.apache.org>
X-Virus-Checked: Checked by ClamAV on apache.org
Modified: lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java (original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java Tue Apr 6 19:19:27 2010
@@ -76,13 +76,9 @@ public class TestFieldNormModifier exten
writer.close();
}
- public void testMissingField() {
+ public void testMissingField() throws Exception {
FieldNormModifier fnm = new FieldNormModifier(store, s);
- try {
- fnm.reSetNorms("nobodyherebutuschickens");
- } catch (Exception e) {
- assertNull("caught something", e);
- }
+ fnm.reSetNorms("nobodyherebutuschickens");
}
public void testFieldWithNoNorm() throws Exception {
@@ -97,11 +93,7 @@ public class TestFieldNormModifier exten
r.close();
FieldNormModifier fnm = new FieldNormModifier(store, s);
- try {
- fnm.reSetNorms("nonorm");
- } catch (Exception e) {
- assertNull("caught something", e);
- }
+ fnm.reSetNorms("nonorm");
// nothing should have changed
r = IndexReader.open(store, false);
Modified: lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java (original)
+++ lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java Tue Apr 6 19:19:27 2010
@@ -18,10 +18,13 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.Bits;
public class DuplicateFilter extends Filter
{
@@ -79,88 +82,87 @@ public class DuplicateFilter extends Fil
}
}
- private OpenBitSet correctBits(IndexReader reader) throws IOException
- {
-
- OpenBitSet bits=new OpenBitSet(reader.maxDoc()); //assume all are INvalid
- Term startTerm=new Term(fieldName);
- TermEnum te = reader.terms(startTerm);
- if(te!=null)
- {
- Term currTerm=te.term();
- while((currTerm!=null)&&(currTerm.field()==startTerm.field())) //term fieldnames are interned
- {
- int lastDoc=-1;
- //set non duplicates
- TermDocs td = reader.termDocs(currTerm);
- if(td.next())
- {
- if(keepMode==KM_USE_FIRST_OCCURRENCE)
- {
- bits.set(td.doc());
- }
- else
- {
- do
- {
- lastDoc=td.doc();
- }while(td.next());
- bits.set(lastDoc);
- }
- }
- if(!te.next())
- {
- break;
- }
- currTerm=te.term();
- }
- }
- return bits;
- }
+ private OpenBitSet correctBits(IndexReader reader) throws IOException {
+ OpenBitSet bits = new OpenBitSet(reader.maxDoc()); //assume all are INvalid
+ final Bits delDocs = MultiFields.getDeletedDocs(reader);
+ Terms terms = reader.fields().terms(fieldName);
+ if (terms != null) {
+ TermsEnum termsEnum = terms.iterator();
+ DocsEnum docs = null;
+ while(true) {
+ BytesRef currTerm = termsEnum.next();
+ if (currTerm == null) {
+ break;
+ } else {
+ docs = termsEnum.docs(delDocs, docs);
+ int doc = docs.nextDoc();
+ if (doc != docs.NO_MORE_DOCS) {
+ if (keepMode == KM_USE_FIRST_OCCURRENCE) {
+ bits.set(doc);
+ } else {
+ int lastDoc = doc;
+ while (true) {
+ lastDoc = doc;
+ doc = docs.nextDoc();
+ if (doc == docs.NO_MORE_DOCS) {
+ break;
+ }
+ }
+ bits.set(lastDoc);
+ }
+ }
+ }
+ }
+ }
+ return bits;
+ }
private OpenBitSet fastBits(IndexReader reader) throws IOException
- {
+ {
OpenBitSet bits=new OpenBitSet(reader.maxDoc());
- bits.set(0,reader.maxDoc()); //assume all are valid
- Term startTerm=new Term(fieldName);
- TermEnum te = reader.terms(startTerm);
- if(te!=null)
- {
- Term currTerm=te.term();
-
- while((currTerm!=null)&&(currTerm.field()==startTerm.field())) //term fieldnames are interned
- {
- if(te.docFreq()>1)
- {
- int lastDoc=-1;
- //unset potential duplicates
- TermDocs td = reader.termDocs(currTerm);
- td.next();
- if(keepMode==KM_USE_FIRST_OCCURRENCE)
- {
- td.next();
- }
- do
- {
- lastDoc=td.doc();
- bits.clear(lastDoc);
- }while(td.next());
- if(keepMode==KM_USE_LAST_OCCURRENCE)
- {
- //restore the last bit
- bits.set(lastDoc);
- }
- }
- if(!te.next())
- {
- break;
- }
- currTerm=te.term();
- }
- }
- return bits;
- }
+ bits.set(0,reader.maxDoc()); //assume all are valid
+ final Bits delDocs = MultiFields.getDeletedDocs(reader);
+ Terms terms = reader.fields().terms(fieldName);
+ if (terms != null) {
+ TermsEnum termsEnum = terms.iterator();
+ DocsEnum docs = null;
+ while(true) {
+ BytesRef currTerm = termsEnum.next();
+ if (currTerm == null) {
+ break;
+ } else {
+ if (termsEnum.docFreq() > 1) {
+ // unset potential duplicates
+ docs = termsEnum.docs(delDocs, docs);
+ int doc = docs.nextDoc();
+ if (doc != docs.NO_MORE_DOCS) {
+ if (keepMode == KM_USE_FIRST_OCCURRENCE) {
+ doc = docs.nextDoc();
+ }
+ }
+
+ int lastDoc = -1;
+ while (true) {
+ lastDoc = doc;
+ bits.clear(lastDoc);
+ doc = docs.nextDoc();
+ if (doc == docs.NO_MORE_DOCS) {
+ break;
+ }
+ }
+
+ if (keepMode==KM_USE_LAST_OCCURRENCE) {
+ // restore the last bit
+ bits.set(lastDoc);
+ }
+ }
+ }
+ }
+ }
+
+ return bits;
+ }
public String getFieldName()
{
Modified: lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java (original)
+++ lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java Tue Apr 6 19:19:27 2010
@@ -29,7 +29,7 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PriorityQueue;
/**
@@ -172,8 +172,8 @@ public class FuzzyLikeThisQuery extends
* Adds user input for "fuzzification"
* @param queryString The string which will be parsed by the analyzer and for which fuzzy variants will be parsed
* @param fieldName
- * @param minSimilarity The minimum similarity of the term variants (see FuzzyTermEnum)
- * @param prefixLength Length of required common prefix on variant terms (see FuzzyTermEnum)
+ * @param minSimilarity The minimum similarity of the term variants (see FuzzyTermsEnum)
+ * @param prefixLength Length of required common prefix on variant terms (see FuzzyTermsEnum)
*/
public void addTerms(String queryString, String fieldName,float minSimilarity, int prefixLength)
{
@@ -195,48 +195,44 @@ public class FuzzyLikeThisQuery extends
String term = termAtt.term();
if(!processedTerms.contains(term))
{
- processedTerms.add(term);
- ScoreTermQueue variantsQ=new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term
- float minScore=0;
- Term startTerm=internSavingTemplateTerm.createTerm(term);
- FuzzyTermEnum fe=new FuzzyTermEnum(reader,startTerm,f.minSimilarity,f.prefixLength);
- TermEnum origEnum = reader.terms(startTerm);
- int df=0;
- if(startTerm.equals(origEnum.term()))
- {
- df=origEnum.docFreq(); //store the df so all variants use same idf
- }
- int numVariants=0;
- int totalVariantDocFreqs=0;
- do
- {
- Term possibleMatch=fe.term();
- if(possibleMatch!=null)
- {
- numVariants++;
- totalVariantDocFreqs+=fe.docFreq();
- float score=fe.difference();
- if(variantsQ.size() < MAX_VARIANTS_PER_TERM || score > minScore){
- ScoreTerm st=new ScoreTerm(possibleMatch,score,startTerm);
- variantsQ.insertWithOverflow(st);
- minScore = variantsQ.top().score; // maintain minScore
- }
+ processedTerms.add(term);
+ ScoreTermQueue variantsQ=new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term
+ float minScore=0;
+ Term startTerm=internSavingTemplateTerm.createTerm(term);
+ FuzzyTermsEnum fe = new FuzzyTermsEnum(reader, startTerm, f.minSimilarity, f.prefixLength);
+ //store the df so all variants use same idf
+ int df = reader.docFreq(startTerm);
+ int numVariants=0;
+ int totalVariantDocFreqs=0;
+ BytesRef possibleMatch;
+ MultiTermQuery.BoostAttribute boostAtt =
+ fe.attributes().addAttribute(MultiTermQuery.BoostAttribute.class);
+ while ((possibleMatch = fe.next()) != null) {
+ if (possibleMatch!=null) {
+ numVariants++;
+ totalVariantDocFreqs+=fe.docFreq();
+ float score=boostAtt.getBoost();
+ if (variantsQ.size() < MAX_VARIANTS_PER_TERM || score > minScore){
+ ScoreTerm st=new ScoreTerm(new Term(startTerm.field(), possibleMatch.utf8ToString()),score,startTerm);
+ variantsQ.insertWithOverflow(st);
+ minScore = variantsQ.top().score; // maintain minScore
+ }
+ }
}
- }
- while(fe.next());
- if(numVariants>0)
- {
- int avgDf=totalVariantDocFreqs/numVariants;
- if(df==0)//no direct match we can use as df for all variants
+
+ if(numVariants>0)
+ {
+ int avgDf=totalVariantDocFreqs/numVariants;
+ if(df==0)//no direct match we can use as df for all variants
{
df=avgDf; //use avg df of all variants
}
- // take the top variants (scored by edit distance) and reset the score
- // to include an IDF factor then add to the global queue for ranking
- // overall top query terms
- int size = variantsQ.size();
- for(int i = 0; i < size; i++)
+ // take the top variants (scored by edit distance) and reset the score
+ // to include an IDF factor then add to the global queue for ranking
+ // overall top query terms
+ int size = variantsQ.size();
+ for(int i = 0; i < size; i++)
{
ScoreTerm st = variantsQ.pop();
st.score=(st.score*st.score)*sim.idf(df,corpusNumDocs);
Modified: lucene/dev/trunk/lucene/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java (original)
+++ lucene/dev/trunk/lucene/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java Tue Apr 6 19:19:27 2010
@@ -38,6 +38,7 @@ import org.apache.lucene.index.IndexWrit
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.Term;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
@@ -219,8 +220,8 @@ public class TestRemoteSort extends Luce
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
docValues = FieldCache.DEFAULT.getInts(reader, "parser", new FieldCache.IntParser() {
- public final int parseInt(final String val) {
- return (val.charAt(0)-'A') * 123456;
+ public final int parseInt(BytesRef termRef) {
+ return (termRef.utf8ToString().charAt(0)-'A') * 123456;
}
});
}
@@ -245,6 +246,29 @@ public class TestRemoteSort extends Luce
runMultiSorts(multi, true); // this runs on the full index
}
+ // test custom search when remote
+ /* rewrite with new API
+ public void testRemoteCustomSort() throws Exception {
+ Searchable searcher = getRemote();
+ MultiSearcher multi = new MultiSearcher (new Searchable[] { searcher });
+ sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource()));
+ assertMatches (multi, queryX, sort, "CAIEG");
+ sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource(), true));
+ assertMatches (multi, queryY, sort, "HJDBF");
+
+ assertSaneFieldCaches(getName() + " ComparatorSource");
+ FieldCache.DEFAULT.purgeAllCaches();
+
+ SortComparator custom = SampleComparable.getComparator();
+ sort.setSort (new SortField ("custom", custom));
+ assertMatches (multi, queryX, sort, "CAIEG");
+ sort.setSort (new SortField ("custom", custom, true));
+ assertMatches (multi, queryY, sort, "HJDBF");
+
+ assertSaneFieldCaches(getName() + " Comparator");
+ FieldCache.DEFAULT.purgeAllCaches();
+ }*/
+
// test that the relevancy scores are the same even if
// hits are sorted
public void testNormalizedScores() throws Exception {
@@ -294,7 +318,7 @@ public class TestRemoteSort extends Luce
assertSameValues (scoresY, getScores (remote.search (queryY, null, 1000, sort).scoreDocs, remote));
assertSameValues (scoresA, getScores (remote.search (queryA, null, 1000, sort).scoreDocs, remote));
- sort.setSort (new SortField("float", SortField.FLOAT), new SortField("string", SortField.STRING));
+ sort.setSort (new SortField("float", SortField.FLOAT));
assertSameValues (scoresX, getScores (remote.search (queryX, null, 1000, sort).scoreDocs, remote));
assertSameValues (scoresY, getScores (remote.search (queryY, null, 1000, sort).scoreDocs, remote));
assertSameValues (scoresA, getScores (remote.search (queryA, null, 1000, sort).scoreDocs, remote));
@@ -314,6 +338,10 @@ public class TestRemoteSort extends Luce
expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC";
assertMatches(multi, queryA, sort, expected);
+ sort.setSort(new SortField ("int", SortField.INT));
+ expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC";
+ assertMatches(multi, queryA, sort, expected);
+
sort.setSort(new SortField ("float", SortField.FLOAT), SortField.FIELD_DOC);
assertMatches(multi, queryA, sort, "GDHJCIEFAB");
Modified: lucene/dev/trunk/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java (original)
+++ lucene/dev/trunk/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java Tue Apr 6 19:19:27 2010
@@ -19,12 +19,15 @@ package org.apache.lucene.spatial.tier;
import java.io.IOException;
import java.util.List;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.NumericUtils;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.OpenBitSet;
/**
@@ -44,22 +47,41 @@ public class CartesianShapeFilter extend
@Override
public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
- final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
- final TermDocs termDocs = reader.termDocs();
+ final Bits delDocs = MultiFields.getDeletedDocs(reader);
final List area = shape.getArea();
- int sz = area.size();
+ final int sz = area.size();
- final Term term = new Term(fieldName);
// iterate through each boxid
- for (int i =0; i< sz; i++) {
- double boxId = area.get(i).doubleValue();
- termDocs.seek(term.createTerm(NumericUtils.doubleToPrefixCoded(boxId)));
- // iterate through all documents
- // which have this boxId
- while (termDocs.next()) {
- bits.fastSet(termDocs.doc());
+ final BytesRef bytesRef = new BytesRef(NumericUtils.BUF_SIZE_LONG);
+ if (sz == 1) {
+ double boxId = area.get(0).doubleValue();
+ NumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(boxId), 0, bytesRef);
+ return new DocIdSet() {
+ @Override
+ public DocIdSetIterator iterator() throws IOException {
+ return MultiFields.getTermDocsEnum(reader, delDocs, fieldName, bytesRef);
+ }
+
+ @Override
+ public boolean isCacheable() {
+ return false;
+ }
+ };
+ } else {
+ final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
+ for (int i =0; i< sz; i++) {
+ double boxId = area.get(i).doubleValue();
+ NumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(boxId), 0, bytesRef);
+ final DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, delDocs, fieldName, bytesRef);
+ if (docsEnum == null) continue;
+ // iterate through all documents
+ // which have this boxId
+ int doc;
+ while ((doc = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ bits.fastSet(doc);
+ }
}
+ return bits;
}
- return bits;
}
}
Modified: lucene/dev/trunk/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java (original)
+++ lucene/dev/trunk/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java Tue Apr 6 19:19:27 2010
@@ -24,6 +24,7 @@ import java.util.Map;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
@@ -49,7 +50,6 @@ import org.apache.lucene.spatial.tier.pr
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.NumericUtils;
public class TestCartesian extends LuceneTestCase {
@@ -96,8 +96,8 @@ public class TestCartesian extends Lucen
doc.add(new Field("name", name,Field.Store.YES, Field.Index.ANALYZED));
// convert the lat / long to lucene fields
- doc.add(new Field(latField, NumericUtils.doubleToPrefixCoded(lat),Field.Store.YES, Field.Index.NOT_ANALYZED));
- doc.add(new Field(lngField, NumericUtils.doubleToPrefixCoded(lng),Field.Store.YES, Field.Index.NOT_ANALYZED));
+ doc.add(new NumericField(latField, Integer.MAX_VALUE, Field.Store.YES, true).setDoubleValue(lat));
+ doc.add(new NumericField(lngField, Integer.MAX_VALUE, Field.Store.YES, true).setDoubleValue(lng));
// add a default meta field to make searching all documents easy
doc.add(new Field("metafile", "doc",Field.Store.YES, Field.Index.ANALYZED));
@@ -105,10 +105,9 @@ public class TestCartesian extends Lucen
int ctpsize = ctps.size();
for (int i =0; i < ctpsize; i++){
CartesianTierPlotter ctp = ctps.get(i);
- doc.add(new Field(ctp.getTierFieldName(),
- NumericUtils.doubleToPrefixCoded(ctp.getTierBoxId(lat,lng)),
+ doc.add(new NumericField(ctp.getTierFieldName(), Integer.MAX_VALUE,
Field.Store.YES,
- Field.Index.NOT_ANALYZED_NO_NORMS));
+ true).setDoubleValue(ctp.getTierBoxId(lat,lng)));
doc.add(new Field(geoHashPrefix, GeoHashUtils.encode(lat,lng),
Field.Store.YES,
@@ -275,8 +274,8 @@ public class TestCartesian extends Lucen
Document d = searcher.doc(scoreDocs[i].doc);
String name = d.get("name");
- double rsLat = NumericUtils.prefixCodedToDouble(d.get(latField));
- double rsLng = NumericUtils.prefixCodedToDouble(d.get(lngField));
+ double rsLat = Double.parseDouble(d.get(latField));
+ double rsLng = Double.parseDouble(d.get(lngField));
Double geo_distance = distances.get(scoreDocs[i].doc);
double distance = DistanceUtils.getInstance().getDistanceMi(lat, lng, rsLat, rsLng);
@@ -369,8 +368,8 @@ public class TestCartesian extends Lucen
for(int i =0 ; i < results; i++){
Document d = searcher.doc(scoreDocs[i].doc);
String name = d.get("name");
- double rsLat = NumericUtils.prefixCodedToDouble(d.get(latField));
- double rsLng = NumericUtils.prefixCodedToDouble(d.get(lngField));
+ double rsLat = Double.parseDouble(d.get(latField));
+ double rsLng = Double.parseDouble(d.get(lngField));
Double geo_distance = distances.get(scoreDocs[i].doc);
double distance = DistanceUtils.getInstance().getDistanceMi(lat, lng, rsLat, rsLng);
@@ -464,8 +463,8 @@ public class TestCartesian extends Lucen
Document d = searcher.doc(scoreDocs[i].doc);
String name = d.get("name");
- double rsLat = NumericUtils.prefixCodedToDouble(d.get(latField));
- double rsLng = NumericUtils.prefixCodedToDouble(d.get(lngField));
+ double rsLat = Double.parseDouble(d.get(latField));
+ double rsLng = Double.parseDouble(d.get(lngField));
Double geo_distance = distances.get(scoreDocs[i].doc);
double distance = DistanceUtils.getInstance().getDistanceMi(lat, lng, rsLat, rsLng);
@@ -558,8 +557,8 @@ public class TestCartesian extends Lucen
Document d = searcher.doc(scoreDocs[i].doc);
String name = d.get("name");
- double rsLat = NumericUtils.prefixCodedToDouble(d.get(latField));
- double rsLng = NumericUtils.prefixCodedToDouble(d.get(lngField));
+ double rsLat = Double.parseDouble(d.get(latField));
+ double rsLng = Double.parseDouble(d.get(lngField));
Double geo_distance = distances.get(scoreDocs[i].doc);
double distance = DistanceUtils.getInstance().getDistanceMi(lat, lng, rsLat, rsLng);
Modified: lucene/dev/trunk/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java (original)
+++ lucene/dev/trunk/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java Tue Apr 6 19:19:27 2010
@@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
@@ -28,7 +29,6 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.store.RAMDirectory;
public class TestDistance extends LuceneTestCase {
@@ -63,8 +63,8 @@ public class TestDistance extends Lucene
doc.add(new Field("name", name,Field.Store.YES, Field.Index.ANALYZED));
// convert the lat / long to lucene fields
- doc.add(new Field(latField, NumericUtils.doubleToPrefixCoded(lat),Field.Store.YES, Field.Index.NOT_ANALYZED));
- doc.add(new Field(lngField, NumericUtils.doubleToPrefixCoded(lng),Field.Store.YES, Field.Index.NOT_ANALYZED));
+ doc.add(new NumericField(latField, Integer.MAX_VALUE, Field.Store.YES, true).setDoubleValue(lat));
+ doc.add(new NumericField(lngField, Integer.MAX_VALUE,Field.Store.YES, true).setDoubleValue(lng));
// add a default meta field to make searching all documents easy
doc.add(new Field("metafile", "doc",Field.Store.YES, Field.Index.ANALYZED));
Modified: lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java (original)
+++ lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java Tue Apr 6 19:19:27 2010
@@ -21,8 +21,10 @@ import org.apache.lucene.index.IndexRead
import java.util.Iterator;
-import org.apache.lucene.index.TermEnum;
-import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.util.StringHelper;
import java.io.*;
@@ -52,55 +54,39 @@ public class LuceneDictionary implements
final class LuceneIterator implements Iterator {
- private TermEnum termEnum;
- private Term actualTerm;
- private boolean hasNextCalled;
+ private TermsEnum termsEnum;
+ private BytesRef pendingTerm;
LuceneIterator() {
try {
- termEnum = reader.terms(new Term(field));
+ final Terms terms = MultiFields.getTerms(reader, field);
+ if (terms != null) {
+ termsEnum = terms.iterator();
+ pendingTerm = termsEnum.next();
+ }
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public String next() {
- if (!hasNextCalled) {
- hasNext();
+ if (pendingTerm == null) {
+ return null;
}
- hasNextCalled = false;
+
+ String result = pendingTerm.utf8ToString();
try {
- termEnum.next();
+ pendingTerm = termsEnum.next();
} catch (IOException e) {
throw new RuntimeException(e);
}
- return (actualTerm != null) ? actualTerm.text() : null;
+ return result;
}
public boolean hasNext() {
- if (hasNextCalled) {
- return actualTerm != null;
- }
- hasNextCalled = true;
-
- actualTerm = termEnum.term();
-
- // if there are no words return false
- if (actualTerm == null) {
- return false;
- }
-
- String currentField = actualTerm.field();
-
- // if the next word doesn't have the same field return false
- if (currentField != field) {
- actualTerm = null;
- return false;
- }
-
- return true;
+ return pendingTerm != null;
}
public void remove() {
Modified: lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndPrefixQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndPrefixQuery.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndPrefixQuery.java (original)
+++ lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndPrefixQuery.java Tue Apr 6 19:19:27 2010
@@ -17,16 +17,21 @@ package org.apache.lucene.queryParser.su
*/
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiFields;
import java.io.IOException;
public class SrndPrefixQuery extends SimpleTerm {
+ private final BytesRef prefixRef;
public SrndPrefixQuery(String prefix, boolean quoted, char truncator) {
super(quoted);
this.prefix = prefix;
+ prefixRef = new BytesRef(prefix);
this.truncator = truncator;
}
@@ -53,20 +58,35 @@ public class SrndPrefixQuery extends Sim
MatchingTermVisitor mtv) throws IOException
{
/* inspired by PrefixQuery.rewrite(): */
- TermEnum enumerator = reader.terms(getLucenePrefixTerm(fieldName));
- try {
- do {
- Term term = enumerator.term();
- if ((term != null)
- && term.text().startsWith(getPrefix())
- && term.field().equals(fieldName)) {
- mtv.visitMatchingTerm(term);
+ Terms terms = MultiFields.getTerms(reader, fieldName);
+ if (terms != null) {
+ TermsEnum termsEnum = terms.iterator();
+
+ boolean skip = false;
+ TermsEnum.SeekStatus status = termsEnum.seek(new BytesRef(getPrefix()));
+ if (status == TermsEnum.SeekStatus.FOUND) {
+ mtv.visitMatchingTerm(getLucenePrefixTerm(fieldName));
+ } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
+ if (termsEnum.term().startsWith(prefixRef)) {
+ mtv.visitMatchingTerm(new Term(fieldName, termsEnum.term().utf8ToString()));
} else {
- break;
+ skip = true;
+ }
+ } else {
+ // EOF
+ skip = true;
+ }
+
+ if (!skip) {
+ while(true) {
+ BytesRef text = termsEnum.next();
+ if (text != null && text.startsWith(prefixRef)) {
+ mtv.visitMatchingTerm(new Term(fieldName, text.utf8ToString()));
+ } else {
+ break;
+ }
}
- } while (enumerator.next());
- } finally {
- enumerator.close();
+ }
}
}
}
Modified: lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTermQuery.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTermQuery.java (original)
+++ lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTermQuery.java Tue Apr 6 19:19:27 2010
@@ -20,7 +20,10 @@ import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.util.BytesRef;
public class SrndTermQuery extends SimpleTerm {
@@ -46,16 +49,14 @@ public class SrndTermQuery extends Simpl
MatchingTermVisitor mtv) throws IOException
{
/* check term presence in index here for symmetry with other SimpleTerm's */
- TermEnum enumerator = reader.terms(getLuceneTerm(fieldName));
- try {
- Term it= enumerator.term(); /* same or following index term */
- if ((it != null)
- && it.text().equals(getTermText())
- && it.field().equals(fieldName)) {
- mtv.visitMatchingTerm(it);
+ Terms terms = MultiFields.getTerms(reader, fieldName);
+ if (terms != null) {
+ TermsEnum termsEnum = terms.iterator();
+
+ TermsEnum.SeekStatus status = termsEnum.seek(new BytesRef(getTermText()));
+ if (status == TermsEnum.SeekStatus.FOUND) {
+ mtv.visitMatchingTerm(getLuceneTerm(fieldName));
}
- } finally {
- enumerator.close();
}
}
}
Modified: lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTruncQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTruncQuery.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTruncQuery.java (original)
+++ lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTruncQuery.java Tue Apr 6 19:19:27 2010
@@ -17,8 +17,11 @@ package org.apache.lucene.queryParser.su
*/
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiFields;
import java.io.IOException;
@@ -40,6 +43,7 @@ public class SrndTruncQuery extends Simp
private final char mask;
private String prefix;
+ private BytesRef prefixRef;
private Pattern pattern;
@@ -68,6 +72,7 @@ public class SrndTruncQuery extends Simp
i++;
}
prefix = truncated.substring(0, i);
+ prefixRef = new BytesRef(prefix);
StringBuilder re = new StringBuilder();
while (i < truncated.length()) {
@@ -84,26 +89,37 @@ public class SrndTruncQuery extends Simp
MatchingTermVisitor mtv) throws IOException
{
int prefixLength = prefix.length();
- TermEnum enumerator = reader.terms(new Term(fieldName, prefix));
- Matcher matcher = pattern.matcher("");
- try {
- do {
- Term term = enumerator.term();
- if (term != null) {
- String text = term.text();
- if ((! text.startsWith(prefix)) || (! term.field().equals(fieldName))) {
- break;
- } else {
- matcher.reset( text.substring(prefixLength));
+ Terms terms = MultiFields.getTerms(reader, fieldName);
+ if (terms != null) {
+ Matcher matcher = pattern.matcher("");
+ try {
+ TermsEnum termsEnum = terms.iterator();
+
+ TermsEnum.SeekStatus status = termsEnum.seek(prefixRef);
+ BytesRef text;
+ if (status == TermsEnum.SeekStatus.FOUND) {
+ text = prefixRef;
+ } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
+ text = termsEnum.term();
+ } else {
+ text = null;
+ }
+
+ while(text != null) {
+ if (text != null && text.startsWith(prefixRef)) {
+ String textString = text.utf8ToString();
+ matcher.reset(textString.substring(prefixLength));
if (matcher.matches()) {
- mtv.visitMatchingTerm(term);
+ mtv.visitMatchingTerm(new Term(fieldName, textString));
}
+ } else {
+ break;
}
+ text = termsEnum.next();
}
- } while (enumerator.next());
- } finally {
- enumerator.close();
- matcher.reset();
+ } finally {
+ matcher.reset();
+ }
}
}
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java Tue Apr 6 19:19:27 2010
@@ -17,12 +17,17 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.document.NumericField; // for javadocs
import org.apache.lucene.search.NumericRangeQuery; // for javadocs
import org.apache.lucene.search.NumericRangeFilter; // for javadocs
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@@ -91,6 +96,88 @@ public final class NumericTokenStream ex
/** The lower precision tokens gets this token type assigned. */
public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric";
+
+ /** Expert: Use this attribute to get the details of the currently generated token
+ * @lucene.experimental
+ * @since 3.1
+ */
+ public interface NumericTermAttribute extends Attribute {
+ /** Returns current shift value, undefined before first token */
+ int getShift();
+ /** Returns {@link NumericTokenStream}'s raw value as {@code long} */
+ long getRawValue();
+ /** Returns value size in bits (32 for {@code float}, {@code int}; 64 for {@code double}, {@code long}) */
+ int getValueSize();
+ }
+
+ private static final class NumericAttributeFactory extends AttributeFactory {
+ private final AttributeFactory delegate;
+ private NumericTokenStream ts = null;
+
+ NumericAttributeFactory(AttributeFactory delegate) {
+ this.delegate = delegate;
+ }
+
+ @Override
+ public AttributeImpl createAttributeInstance(Class extends Attribute> attClass) {
+ if (attClass == NumericTermAttribute.class)
+ return new NumericTermAttributeImpl(ts);
+ if (attClass.isAssignableFrom(CharTermAttribute.class) || attClass.isAssignableFrom(TermAttribute.class))
+ throw new IllegalArgumentException("NumericTokenStream does not support CharTermAttribute/TermAttribute.");
+ return delegate.createAttributeInstance(attClass);
+ }
+ }
+
+ private static final class NumericTermAttributeImpl extends AttributeImpl implements NumericTermAttribute,TermToBytesRefAttribute {
+ private final NumericTokenStream ts;
+
+ public NumericTermAttributeImpl(NumericTokenStream ts) {
+ this.ts = ts;
+ }
+
+ public int toBytesRef(BytesRef bytes) {
+ try {
+ assert ts.valSize == 64 || ts.valSize == 32;
+ return (ts.valSize == 64) ?
+ NumericUtils.longToPrefixCoded(ts.value, ts.shift, bytes) :
+ NumericUtils.intToPrefixCoded((int) ts.value, ts.shift, bytes);
+ } catch (IllegalArgumentException iae) {
+ // return empty token before first
+ bytes.length = 0;
+ return 0;
+ }
+ }
+
+ public int getShift() { return ts.shift; }
+ public long getRawValue() { return ts.value; }
+ public int getValueSize() { return ts.valSize; }
+
+ @Override
+ public void clear() {
+ // this attribute has no contents to clear
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ return other == this;
+ }
+
+ @Override
+ public int hashCode() {
+ return System.identityHashCode(this);
+ }
+
+ @Override
+ public void copyTo(AttributeImpl target) {
+ // this attribute has no contents to copy
+ }
+
+ @Override
+ public Object clone() {
+ // cannot throw CloneNotSupportedException (checked)
+ throw new UnsupportedOperationException();
+ }
+ }
/**
* Creates a token stream for numeric values using the default precisionStep
@@ -107,23 +194,15 @@ public final class NumericTokenStream ex
* before using set a value using the various set???Value() methods.
*/
public NumericTokenStream(final int precisionStep) {
- super();
- this.precisionStep = precisionStep;
- if (precisionStep < 1)
- throw new IllegalArgumentException("precisionStep must be >=1");
- }
+ super(new NumericAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY));
+ // we must do this after the super call :(
+ ((NumericAttributeFactory) getAttributeFactory()).ts = this;
+ addAttribute(NumericTermAttribute.class);
- /**
- * Expert: Creates a token stream for numeric values with the specified
- * precisionStep
using the given {@link AttributeSource}.
- * The stream is not yet initialized,
- * before using set a value using the various set???Value() methods.
- */
- public NumericTokenStream(AttributeSource source, final int precisionStep) {
- super(source);
this.precisionStep = precisionStep;
if (precisionStep < 1)
throw new IllegalArgumentException("precisionStep must be >=1");
+ shift = -precisionStep;
}
/**
@@ -134,10 +213,15 @@ public final class NumericTokenStream ex
* before using set a value using the various set???Value() methods.
*/
public NumericTokenStream(AttributeFactory factory, final int precisionStep) {
- super(factory);
+ super(new NumericAttributeFactory(factory));
+ // we must do this after the super call :(
+ ((NumericAttributeFactory) getAttributeFactory()).ts = this;
+ addAttribute(NumericTermAttribute.class);
+
this.precisionStep = precisionStep;
if (precisionStep < 1)
throw new IllegalArgumentException("precisionStep must be >=1");
+ shift = -precisionStep;
}
/**
@@ -149,7 +233,7 @@ public final class NumericTokenStream ex
public NumericTokenStream setLongValue(final long value) {
this.value = value;
valSize = 64;
- shift = 0;
+ shift = -precisionStep;
return this;
}
@@ -162,7 +246,7 @@ public final class NumericTokenStream ex
public NumericTokenStream setIntValue(final int value) {
this.value = value;
valSize = 32;
- shift = 0;
+ shift = -precisionStep;
return this;
}
@@ -175,7 +259,7 @@ public final class NumericTokenStream ex
public NumericTokenStream setDoubleValue(final double value) {
this.value = NumericUtils.doubleToSortableLong(value);
valSize = 64;
- shift = 0;
+ shift = -precisionStep;
return this;
}
@@ -188,7 +272,7 @@ public final class NumericTokenStream ex
public NumericTokenStream setFloatValue(final float value) {
this.value = NumericUtils.floatToSortableInt(value);
valSize = 32;
- shift = 0;
+ shift = -precisionStep;
return this;
}
@@ -196,37 +280,24 @@ public final class NumericTokenStream ex
public void reset() {
if (valSize == 0)
throw new IllegalStateException("call set???Value() before usage");
- shift = 0;
+ shift = -precisionStep;
}
@Override
public boolean incrementToken() {
if (valSize == 0)
throw new IllegalStateException("call set???Value() before usage");
- if (shift >= valSize)
+ shift += precisionStep;
+ if (shift >= valSize) {
+ // reset so the attribute still works after exhausted stream
+ shift -= precisionStep;
return false;
+ }
clearAttributes();
- final char[] buffer;
- switch (valSize) {
- case 64:
- buffer = termAtt.resizeTermBuffer(NumericUtils.BUF_SIZE_LONG);
- termAtt.setTermLength(NumericUtils.longToPrefixCoded(value, shift, buffer));
- break;
-
- case 32:
- buffer = termAtt.resizeTermBuffer(NumericUtils.BUF_SIZE_INT);
- termAtt.setTermLength(NumericUtils.intToPrefixCoded((int) value, shift, buffer));
- break;
-
- default:
- // should not happen
- throw new IllegalArgumentException("valSize must be 32 or 64");
- }
-
+ // the TermToBytesRefAttribute is directly accessing shift & value.
typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC : TOKEN_TYPE_LOWER_PREC);
posIncrAtt.setPositionIncrement((shift == 0) ? 1 : 0);
- shift += precisionStep;
return true;
}
@@ -238,12 +309,11 @@ public final class NumericTokenStream ex
}
// members
- private final TermAttribute termAtt = addAttribute(TermAttribute.class);
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
- private int shift = 0, valSize = 0; // valSize==0 means not initialized
+ int shift, valSize = 0; // valSize==0 means not initialized
private final int precisionStep;
- private long value = 0L;
+ long value = 0L;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java Tue Apr 6 19:19:27 2010
@@ -64,14 +64,14 @@ import org.apache.lucene.util.AttributeI
implementing the {@link TokenStream#incrementToken()} API.
Failing that, to create a new Token you should first use
one of the constructors that starts with null text. To load
- the token from a char[] use {@link #setTermBuffer(char[], int, int)}.
- To load from a String use {@link #setTermBuffer(String)} or {@link #setTermBuffer(String, int, int)}.
- Alternatively you can get the Token's termBuffer by calling either {@link #termBuffer()},
+ the token from a char[] use {@link #copyBuffer(char[], int, int)}.
+ To load from a String use {@link #setEmpty} followed by {@link #append(CharSequence)} or {@link #append(CharSequence, int, int)}.
+ Alternatively you can get the Token's termBuffer by calling either {@link #buffer()},
if you know that your text is shorter than the capacity of the termBuffer
- or {@link #resizeTermBuffer(int)}, if there is any possibility
+ or {@link #resizeBuffer(int)}, if there is any possibility
that you may need to grow the buffer. Fill in the characters of your term into this
buffer, with {@link String#getChars(int, int, char[], int)} if loading from a string,
- or with {@link System#arraycopy(Object, int, Object, int, int)}, and finally call {@link #setTermLength(int)} to
+ or with {@link System#arraycopy(Object, int, Object, int, int)}, and finally call {@link #setLength(int)} to
set the length of the term text. See LUCENE-969
for details.
@@ -100,7 +100,7 @@ import org.apache.lucene.util.AttributeI
Copying from one one Token to another (type is reset to {@link #DEFAULT_TYPE} if not specified):
- return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
+ return reusableToken.reinit(source.buffer(), 0, source.length(), source.startOffset(), source.endOffset()[, source.type()]);
@@ -115,6 +115,7 @@ import org.apache.lucene.util.AttributeI
@see org.apache.lucene.index.Payload
*/
+// TODO: change superclass to CharTermAttribute in 4.0!
public class Token extends TermAttributeImpl
implements TypeAttribute, PositionIncrementAttribute,
FlagsAttribute, OffsetAttribute, PayloadAttribute {
@@ -172,7 +173,7 @@ public class Token extends TermAttribute
* @param end end offset
*/
public Token(String text, int start, int end) {
- setTermBuffer(text);
+ append(text);
startOffset = start;
endOffset = end;
}
@@ -187,7 +188,7 @@ public class Token extends TermAttribute
* @param typ token type
*/
public Token(String text, int start, int end, String typ) {
- setTermBuffer(text);
+ append(text);
startOffset = start;
endOffset = end;
type = typ;
@@ -204,7 +205,7 @@ public class Token extends TermAttribute
* @param flags token type bits
*/
public Token(String text, int start, int end, int flags) {
- setTermBuffer(text);
+ append(text);
startOffset = start;
endOffset = end;
this.flags = flags;
@@ -221,7 +222,7 @@ public class Token extends TermAttribute
* @param end
*/
public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end) {
- setTermBuffer(startTermBuffer, termBufferOffset, termBufferLength);
+ copyBuffer(startTermBuffer, termBufferOffset, termBufferLength);
startOffset = start;
endOffset = end;
}
@@ -270,7 +271,7 @@ public class Token extends TermAttribute
corresponding to this token in the source text.
Note that the difference between endOffset() and startOffset() may not be
- equal to {@link #termLength}, as the term text may have been altered by a
+ equal to {@link #length}, as the term text may have been altered by a
stemmer or some other filter. */
public final int startOffset() {
return startOffset;
@@ -351,7 +352,7 @@ public class Token extends TermAttribute
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
- sb.append('(').append(term()).append(',')
+ sb.append('(').append(super.toString()).append(',')
.append(startOffset).append(',').append(endOffset);
if (!"word".equals(type))
sb.append(",type=").append(type);
@@ -387,7 +388,7 @@ public class Token extends TermAttribute
/** Makes a clone, but replaces the term buffer &
* start/end offset in the process. This is more
* efficient than doing a full clone (and then calling
- * setTermBuffer) because it saves a wasted copy of the old
+ * {@link #copyBuffer}) because it saves a wasted copy of the old
* termBuffer. */
public Token clone(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
final Token t = new Token(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset);
@@ -442,16 +443,16 @@ public class Token extends TermAttribute
}
/** Shorthand for calling {@link #clear},
- * {@link #setTermBuffer(char[], int, int)},
+ * {@link #copyBuffer(char[], int, int)},
* {@link #setStartOffset},
* {@link #setEndOffset},
* {@link #setType}
* @return this Token instance */
public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) {
clearNoTermBuffer();
+ copyBuffer(newTermBuffer, newTermOffset, newTermLength);
payload = null;
positionIncrement = 1;
- setTermBuffer(newTermBuffer, newTermOffset, newTermLength);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = newType;
@@ -459,14 +460,14 @@ public class Token extends TermAttribute
}
/** Shorthand for calling {@link #clear},
- * {@link #setTermBuffer(char[], int, int)},
+ * {@link #copyBuffer(char[], int, int)},
* {@link #setStartOffset},
* {@link #setEndOffset}
* {@link #setType} on Token.DEFAULT_TYPE
* @return this Token instance */
public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
clearNoTermBuffer();
- setTermBuffer(newTermBuffer, newTermOffset, newTermLength);
+ copyBuffer(newTermBuffer, newTermOffset, newTermLength);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = DEFAULT_TYPE;
@@ -474,14 +475,14 @@ public class Token extends TermAttribute
}
/** Shorthand for calling {@link #clear},
- * {@link #setTermBuffer(String)},
+ * {@link #append(CharSequence)},
* {@link #setStartOffset},
* {@link #setEndOffset}
* {@link #setType}
* @return this Token instance */
public Token reinit(String newTerm, int newStartOffset, int newEndOffset, String newType) {
- clearNoTermBuffer();
- setTermBuffer(newTerm);
+ clear();
+ append(newTerm);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = newType;
@@ -489,14 +490,14 @@ public class Token extends TermAttribute
}
/** Shorthand for calling {@link #clear},
- * {@link #setTermBuffer(String, int, int)},
+ * {@link #append(CharSequence, int, int)},
* {@link #setStartOffset},
* {@link #setEndOffset}
* {@link #setType}
* @return this Token instance */
public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) {
- clearNoTermBuffer();
- setTermBuffer(newTerm, newTermOffset, newTermLength);
+ clear();
+ append(newTerm, newTermOffset, newTermOffset + newTermLength);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = newType;
@@ -504,14 +505,14 @@ public class Token extends TermAttribute
}
/** Shorthand for calling {@link #clear},
- * {@link #setTermBuffer(String)},
+ * {@link #append(CharSequence)},
* {@link #setStartOffset},
* {@link #setEndOffset}
* {@link #setType} on Token.DEFAULT_TYPE
* @return this Token instance */
public Token reinit(String newTerm, int newStartOffset, int newEndOffset) {
- clearNoTermBuffer();
- setTermBuffer(newTerm);
+ clear();
+ append(newTerm);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = DEFAULT_TYPE;
@@ -519,14 +520,14 @@ public class Token extends TermAttribute
}
/** Shorthand for calling {@link #clear},
- * {@link #setTermBuffer(String, int, int)},
+ * {@link #append(CharSequence, int, int)},
* {@link #setStartOffset},
* {@link #setEndOffset}
* {@link #setType} on Token.DEFAULT_TYPE
* @return this Token instance */
public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
- clearNoTermBuffer();
- setTermBuffer(newTerm, newTermOffset, newTermLength);
+ clear();
+ append(newTerm, newTermOffset, newTermOffset + newTermLength);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = DEFAULT_TYPE;
@@ -538,7 +539,7 @@ public class Token extends TermAttribute
* @param prototype
*/
public void reinit(Token prototype) {
- setTermBuffer(prototype.termBuffer(), 0, prototype.termLength());
+ copyBuffer(prototype.buffer(), 0, prototype.length());
positionIncrement = prototype.positionIncrement;
flags = prototype.flags;
startOffset = prototype.startOffset;
@@ -553,7 +554,7 @@ public class Token extends TermAttribute
* @param newTerm
*/
public void reinit(Token prototype, String newTerm) {
- setTermBuffer(newTerm);
+ setEmpty().append(newTerm);
positionIncrement = prototype.positionIncrement;
flags = prototype.flags;
startOffset = prototype.startOffset;
@@ -570,7 +571,7 @@ public class Token extends TermAttribute
* @param length
*/
public void reinit(Token prototype, char[] newTermBuffer, int offset, int length) {
- setTermBuffer(newTermBuffer, offset, length);
+ copyBuffer(newTermBuffer, offset, length);
positionIncrement = prototype.positionIncrement;
flags = prototype.flags;
startOffset = prototype.startOffset;
Propchange: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Tokenizer.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Apr 6 19:19:27 2010
@@ -1,2 +1,3 @@
-/lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/analysis/Tokenizer.java:896850,909334
-/lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java:924483-925561
+/lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/Tokenizer.java:824912-931101
+/lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/analysis/Tokenizer.java:909334
+/lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java:924483-924731,924781,925176-925462
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermAttribute.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermAttribute.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermAttribute.java Tue Apr 6 19:19:27 2010
@@ -21,7 +21,9 @@ import org.apache.lucene.util.Attribute;
/**
* The term text of a Token.
+ * @deprecated Use {@link CharTermAttribute} instead.
*/
+@Deprecated
public interface TermAttribute extends Attribute {
/** Returns the Token's term text.
*
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermAttributeImpl.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermAttributeImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermAttributeImpl.java Tue Apr 6 19:19:27 2010
@@ -17,211 +17,11 @@ package org.apache.lucene.analysis.token
* limitations under the License.
*/
-import java.io.Serializable;
-
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.AttributeImpl;
-import org.apache.lucene.util.RamUsageEstimator;
-
/**
* The term text of a Token.
+ * @deprecated This class is only available for AttributeSource
+ * to be able to load an old TermAttribute implementation class.
*/
-public class TermAttributeImpl extends AttributeImpl implements TermAttribute, Cloneable, Serializable {
- private static int MIN_BUFFER_SIZE = 10;
-
- private char[] termBuffer;
- private int termLength;
-
- /** Returns the Token's term text.
- *
- * This method has a performance penalty
- * because the text is stored internally in a char[]. If
- * possible, use {@link #termBuffer()} and {@link
- * #termLength()} directly instead. If you really need a
- * String, use this method, which is nothing more than
- * a convenience call to new String(token.termBuffer(), 0, token.termLength())
- */
- public String term() {
- initTermBuffer();
- return new String(termBuffer, 0, termLength);
- }
-
- /** Copies the contents of buffer, starting at offset for
- * length characters, into the termBuffer array.
- * @param buffer the buffer to copy
- * @param offset the index in the buffer of the first character to copy
- * @param length the number of characters to copy
- */
- public void setTermBuffer(char[] buffer, int offset, int length) {
- growTermBuffer(length);
- System.arraycopy(buffer, offset, termBuffer, 0, length);
- termLength = length;
- }
-
- /** Copies the contents of buffer into the termBuffer array.
- * @param buffer the buffer to copy
- */
- public void setTermBuffer(String buffer) {
- int length = buffer.length();
- growTermBuffer(length);
- buffer.getChars(0, length, termBuffer, 0);
- termLength = length;
- }
-
- /** Copies the contents of buffer, starting at offset and continuing
- * for length characters, into the termBuffer array.
- * @param buffer the buffer to copy
- * @param offset the index in the buffer of the first character to copy
- * @param length the number of characters to copy
- */
- public void setTermBuffer(String buffer, int offset, int length) {
- assert offset <= buffer.length();
- assert offset + length <= buffer.length();
- growTermBuffer(length);
- buffer.getChars(offset, offset + length, termBuffer, 0);
- termLength = length;
- }
-
- /** Returns the internal termBuffer character array which
- * you can then directly alter. If the array is too
- * small for your token, use {@link
- * #resizeTermBuffer(int)} to increase it. After
- * altering the buffer be sure to call {@link
- * #setTermLength} to record the number of valid
- * characters that were placed into the termBuffer. */
- public char[] termBuffer() {
- initTermBuffer();
- return termBuffer;
- }
-
- /** Grows the termBuffer to at least size newSize, preserving the
- * existing content. Note: If the next operation is to change
- * the contents of the term buffer use
- * {@link #setTermBuffer(char[], int, int)},
- * {@link #setTermBuffer(String)}, or
- * {@link #setTermBuffer(String, int, int)}
- * to optimally combine the resize with the setting of the termBuffer.
- * @param newSize minimum size of the new termBuffer
- * @return newly created termBuffer with length >= newSize
- */
- public char[] resizeTermBuffer(int newSize) {
- if (termBuffer == null) {
- // The buffer is always at least MIN_BUFFER_SIZE
- termBuffer = new char[ArrayUtil.oversize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize, RamUsageEstimator.NUM_BYTES_CHAR)];
- } else {
- if(termBuffer.length < newSize){
- // Not big enough; create a new array with slight
- // over allocation and preserve content
- final char[] newCharBuffer = new char[ArrayUtil.oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)];
- System.arraycopy(termBuffer, 0, newCharBuffer, 0, termBuffer.length);
- termBuffer = newCharBuffer;
- }
- }
- return termBuffer;
- }
-
-
- /** Allocates a buffer char[] of at least newSize, without preserving the existing content.
- * its always used in places that set the content
- * @param newSize minimum size of the buffer
- */
- private void growTermBuffer(int newSize) {
- if (termBuffer == null) {
- // The buffer is always at least MIN_BUFFER_SIZE
- termBuffer = new char[ArrayUtil.oversize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize, RamUsageEstimator.NUM_BYTES_CHAR)];
- } else {
- if(termBuffer.length < newSize){
- // Not big enough; create a new array with slight
- // over allocation:
- termBuffer = new char[ArrayUtil.oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)];
- }
- }
- }
-
- private void initTermBuffer() {
- if (termBuffer == null) {
- termBuffer = new char[ArrayUtil.oversize(MIN_BUFFER_SIZE, RamUsageEstimator.NUM_BYTES_CHAR)];
- termLength = 0;
- }
- }
-
- /** Return number of valid characters (length of the term)
- * in the termBuffer array. */
- public int termLength() {
- return termLength;
- }
-
- /** Set number of valid characters (length of the term) in
- * the termBuffer array. Use this to truncate the termBuffer
- * or to synchronize with external manipulation of the termBuffer.
- * Note: to grow the size of the array,
- * use {@link #resizeTermBuffer(int)} first.
- * @param length the truncated length
- */
- public void setTermLength(int length) {
- initTermBuffer();
- if (length > termBuffer.length)
- throw new IllegalArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.length + ")");
- termLength = length;
- }
-
- @Override
- public int hashCode() {
- initTermBuffer();
- int code = termLength;
- code = code * 31 + ArrayUtil.hashCode(termBuffer, 0, termLength);
- return code;
- }
-
- @Override
- public void clear() {
- termLength = 0;
- }
-
- @Override
- public Object clone() {
- TermAttributeImpl t = (TermAttributeImpl)super.clone();
- // Do a deep clone
- if (termBuffer != null) {
- t.termBuffer = termBuffer.clone();
- }
- return t;
- }
-
- @Override
- public boolean equals(Object other) {
- if (other == this) {
- return true;
- }
-
- if (other instanceof TermAttributeImpl) {
- initTermBuffer();
- TermAttributeImpl o = ((TermAttributeImpl) other);
- o.initTermBuffer();
-
- if (termLength != o.termLength)
- return false;
- for(int i=0;i> DocumentsWriter.BYTE_BLOCK_SHIFT];
+ int pos = textStart & DocumentsWriter.BYTE_BLOCK_MASK;
+ if ((bytes[pos] & 0x80) == 0) {
+ // length is 1 byte
+ term.length = bytes[pos];
+ term.offset = pos+1;
+ } else {
+ // length is 2 bytes
+ term.length = (bytes[pos]&0x7f) + ((bytes[pos+1]&0xff)<<7);
+ term.offset = pos+2;
+ }
+ assert term.length >= 0;
+ return term;
+ }
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/ByteSliceReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/ByteSliceReader.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/ByteSliceReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/ByteSliceReader.java Tue Apr 6 19:19:27 2010
@@ -17,16 +17,17 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
import java.io.IOException;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+
/* IndexInput that knows how to read the byte slices written
* by Posting and PostingVector. We read the bytes in
* each slice until we hit the end of that slice at which
* point we read the forwarding address of the next slice
* and then jump to it.*/
-final class ByteSliceReader extends IndexInput {
+final class ByteSliceReader extends DataInput {
ByteBlockPool pool;
int bufferUpto;
byte[] buffer;
@@ -75,7 +76,7 @@ final class ByteSliceReader extends Inde
return buffer[upto++];
}
- public long writeTo(IndexOutput out) throws IOException {
+ public long writeTo(DataOutput out) throws IOException {
long size = 0;
while(true) {
if (limit + bufferOffset == endIndex) {
@@ -136,14 +137,4 @@ final class ByteSliceReader extends Inde
}
}
}
-
- @Override
- public long getFilePointer() {throw new RuntimeException("not implemented");}
- @Override
- public long length() {throw new RuntimeException("not implemented");}
- @Override
- public void seek(long pos) {throw new RuntimeException("not implemented");}
- @Override
- public void close() {throw new RuntimeException("not implemented");}
-}
-
+}
\ No newline at end of file
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java Tue Apr 6 19:19:27 2010
@@ -1,5 +1,7 @@
package org.apache.lucene.index;
+import org.apache.lucene.store.DataOutput;
+
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -24,7 +26,7 @@ package org.apache.lucene.index;
* posting list for many terms in RAM.
*/
-final class ByteSliceWriter {
+final class ByteSliceWriter extends DataOutput {
private byte[] slice;
private int upto;
@@ -38,7 +40,7 @@ final class ByteSliceWriter {
/**
* Set up the writer to write at address.
- */
+ */
public void init(int address) {
slice = pool.buffers[address >> DocumentsWriter.BYTE_BLOCK_SHIFT];
assert slice != null;
@@ -48,6 +50,7 @@ final class ByteSliceWriter {
}
/** Write byte into byte slice stream */
+ @Override
public void writeByte(byte b) {
assert slice != null;
if (slice[upto] != 0) {
@@ -60,6 +63,7 @@ final class ByteSliceWriter {
assert upto != slice.length;
}
+ @Override
public void writeBytes(final byte[] b, int offset, final int len) {
final int offsetEnd = offset + len;
while(offset < offsetEnd) {
@@ -78,12 +82,4 @@ final class ByteSliceWriter {
public int getAddress() {
return upto + (offset0 & DocumentsWriter.BYTE_BLOCK_NOT_MASK);
}
-
- public void writeVInt(int i) {
- while ((i & ~0x7F) != 0) {
- writeByte((byte)((i & 0x7f) | 0x80));
- i >>>= 7;
- }
- writeByte((byte) i);
- }
-}
+}
\ No newline at end of file
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=931278&r1=931277&r2=931278&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java Tue Apr 6 19:19:27 2010
@@ -22,6 +22,9 @@ import org.apache.lucene.store.Directory
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.document.AbstractField; // for javadocs
import org.apache.lucene.document.Document;
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
import java.text.NumberFormat;
import java.io.PrintStream;
@@ -122,6 +125,9 @@ public class CheckIndex {
/** Name of the segment. */
public String name;
+ /** Name of codec used to read this segment. */
+ public String codec;
+
/** Document count (does not take deletions into account). */
public int docCount;
@@ -263,26 +269,6 @@ public class CheckIndex {
infoStream.println(msg);
}
- private static class MySegmentTermDocs extends SegmentTermDocs {
-
- int delCount;
-
- MySegmentTermDocs(SegmentReader p) {
- super(p);
- }
-
- @Override
- public void seek(Term term) throws IOException {
- super.seek(term);
- delCount = 0;
- }
-
- @Override
- protected void skippingDoc() throws IOException {
- delCount++;
- }
- }
-
/** Returns a {@link Status} instance detailing
* the state of the index.
*
@@ -296,6 +282,10 @@ public class CheckIndex {
return checkIndex(null);
}
+ protected Status checkIndex(List onlySegments) throws IOException {
+ return checkIndex(onlySegments, CodecProvider.getDefault());
+ }
+
/** Returns a {@link Status} instance detailing
* the state of the index.
*
@@ -308,13 +298,13 @@ public class CheckIndex {
* WARNING: make sure
* you only call this when the index is not opened by any
* writer. */
- public Status checkIndex(List onlySegments) throws IOException {
+ protected Status checkIndex(List onlySegments, CodecProvider codecs) throws IOException {
NumberFormat nf = NumberFormat.getInstance();
SegmentInfos sis = new SegmentInfos();
Status result = new Status();
result.dir = dir;
try {
- sis.read(dir);
+ sis.read(dir, codecs);
} catch (Throwable t) {
msg("ERROR: could not read any segments file in directory");
result.missingSegments = true;
@@ -371,6 +361,8 @@ public class CheckIndex {
sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
+ else if (format == SegmentInfos.FORMAT_FLEX_POSTINGS)
+ sFormat = "FORMAT_FLEX_POSTINGS [Lucene 3.1]";
else if (format < SegmentInfos.CURRENT_FORMAT) {
sFormat = "int=" + format + " [newer version of Lucene than this tool]";
skip = true;
@@ -429,6 +421,9 @@ public class CheckIndex {
SegmentReader reader = null;
try {
+ final String codec = info.getCodec().name;
+ msg(" codec=" + codec);
+ segInfoStat.codec = codec;
msg(" compound=" + info.getUseCompoundFile());
segInfoStat.compound = info.getUseCompoundFile();
msg(" hasProx=" + info.getHasProx());
@@ -452,6 +447,7 @@ public class CheckIndex {
msg(" docStoreIsCompoundFile=" + info.getDocStoreIsCompoundFile());
segInfoStat.docStoreCompoundFile = info.getDocStoreIsCompoundFile();
}
+
final String delFileName = info.getDelFileName();
if (delFileName == null){
msg(" no deletions");
@@ -503,7 +499,7 @@ public class CheckIndex {
segInfoStat.fieldNormStatus = testFieldNorms(fieldNames, reader);
// Test the Term Index
- segInfoStat.termIndexStatus = testTermIndex(info, reader);
+ segInfoStat.termIndexStatus = testTermIndex(reader);
// Test Stored Fields
segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf);
@@ -586,69 +582,129 @@ public class CheckIndex {
/**
* Test the term index.
*/
- private Status.TermIndexStatus testTermIndex(SegmentInfo info, SegmentReader reader) {
+ private Status.TermIndexStatus testTermIndex(SegmentReader reader) {
final Status.TermIndexStatus status = new Status.TermIndexStatus();
+ final int maxDoc = reader.maxDoc();
+ final Bits delDocs = reader.getDeletedDocs();
+
try {
+
if (infoStream != null) {
infoStream.print(" test: terms, freq, prox...");
}
- final TermEnum termEnum = reader.terms();
- final TermPositions termPositions = reader.termPositions();
+ final Fields fields = reader.fields();
+ if (fields == null) {
+ msg("OK [no fields/terms]");
+ return status;
+ }
+
+ final FieldsEnum fieldsEnum = fields.iterator();
+ while(true) {
+ final String field = fieldsEnum.next();
+ if (field == null) {
+ break;
+ }
+
+ final TermsEnum terms = fieldsEnum.terms();
+
+ DocsEnum docs = null;
+ DocsAndPositionsEnum postings = null;
- // Used only to count up # deleted docs for this term
- final MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);
+ boolean hasOrd = true;
+ final long termCountStart = status.termCount;
- final int maxDoc = reader.maxDoc();
+ while(true) {
+
+ final BytesRef term = terms.next();
+ if (term == null) {
+ break;
+ }
+
+ final int docFreq = terms.docFreq();
+ status.totFreq += docFreq;
+
+ docs = terms.docs(delDocs, docs);
+ postings = terms.docsAndPositions(delDocs, postings);
+
+ if (hasOrd) {
+ long ord = -1;
+ try {
+ ord = terms.ord();
+ } catch (UnsupportedOperationException uoe) {
+ hasOrd = false;
+ }
+
+ if (hasOrd) {
+ final long ordExpected = status.termCount - termCountStart;
+ if (ord != ordExpected) {
+ throw new RuntimeException("ord mismatch: TermsEnum has ord=" + ord + " vs actual=" + ordExpected);
+ }
+ }
+ }
- while (termEnum.next()) {
- status.termCount++;
- final Term term = termEnum.term();
- final int docFreq = termEnum.docFreq();
- termPositions.seek(term);
- int lastDoc = -1;
- int freq0 = 0;
- status.totFreq += docFreq;
- while (termPositions.next()) {
- freq0++;
- final int doc = termPositions.doc();
- final int freq = termPositions.freq();
- if (doc <= lastDoc)
- throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
- if (doc >= maxDoc)
- throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
-
- lastDoc = doc;
- if (freq <= 0)
- throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
+ status.termCount++;
+
+ final DocsEnum docs2;
+ if (postings != null) {
+ docs2 = postings;
+ } else {
+ docs2 = docs;
+ }
+
+ int lastDoc = -1;
+ while(true) {
+ final int doc = docs2.nextDoc();
+ if (doc == DocsEnum.NO_MORE_DOCS) {
+ break;
+ }
+ final int freq = docs2.freq();
+ status.totPos += freq;
+
+ if (doc <= lastDoc) {
+ throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
+ }
+ if (doc >= maxDoc) {
+ throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
+ }
+
+ lastDoc = doc;
+ if (freq <= 0) {
+ throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
+ }
- int lastPos = -1;
- status.totPos += freq;
- for(int j=0;j