Return-Path: Delivered-To: apmail-lucene-java-commits-archive@www.apache.org Received: (qmail 5816 invoked from network); 6 Apr 2010 19:20:16 -0000 Received: from unknown (HELO mail.apache.org) (140.211.11.3) by 140.211.11.9 with SMTP; 6 Apr 2010 19:20:16 -0000 Received: (qmail 49240 invoked by uid 500); 6 Apr 2010 19:20:15 -0000 Delivered-To: apmail-lucene-java-commits-archive@lucene.apache.org Received: (qmail 49210 invoked by uid 500); 6 Apr 2010 19:20:15 -0000 Mailing-List: contact java-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: java-dev@lucene.apache.org Delivered-To: mailing list java-commits@lucene.apache.org Received: (qmail 49203 invoked by uid 99); 6 Apr 2010 19:20:15 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 06 Apr 2010 19:20:15 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 06 Apr 2010 19:20:03 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 8C13F23889DE; Tue, 6 Apr 2010 19:19:40 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r931278 [3/10] - in /lucene/dev/trunk: lucene/ lucene/backwards/src/ lucene/backwards/src/java/org/apache/lucene/index/ lucene/backwards/src/java/org/apache/lucene/index/codecs/ lucene/backwards/src/java/org/apache/lucene/search/ lucene/bac... Date: Tue, 06 Apr 2010 19:19:36 -0000 To: java-commits@lucene.apache.org From: uschindler@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20100406191940.8C13F23889DE@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Modified: lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java?rev=931278&r1=931277&r2=931278&view=diff ============================================================================== --- lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java (original) +++ lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java Tue Apr 6 19:19:27 2010 @@ -76,13 +76,9 @@ public class TestFieldNormModifier exten writer.close(); } - public void testMissingField() { + public void testMissingField() throws Exception { FieldNormModifier fnm = new FieldNormModifier(store, s); - try { - fnm.reSetNorms("nobodyherebutuschickens"); - } catch (Exception e) { - assertNull("caught something", e); - } + fnm.reSetNorms("nobodyherebutuschickens"); } public void testFieldWithNoNorm() throws Exception { @@ -97,11 +93,7 @@ public class TestFieldNormModifier exten r.close(); FieldNormModifier fnm = new FieldNormModifier(store, s); - try { - fnm.reSetNorms("nonorm"); - } catch (Exception e) { - assertNull("caught something", e); - } + fnm.reSetNorms("nonorm"); // nothing should have changed r = IndexReader.open(store, false); Modified: lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java?rev=931278&r1=931277&r2=931278&view=diff ============================================================================== --- lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java (original) +++ lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java Tue Apr 6 19:19:27 2010 @@ -18,10 +18,13 @@ package org.apache.lucene.search; import java.io.IOException; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.OpenBitSet; +import org.apache.lucene.util.Bits; public class DuplicateFilter extends Filter { @@ -79,88 +82,87 @@ public class DuplicateFilter extends Fil } } - private OpenBitSet correctBits(IndexReader reader) throws IOException - { - - OpenBitSet bits=new OpenBitSet(reader.maxDoc()); //assume all are INvalid - Term startTerm=new Term(fieldName); - TermEnum te = reader.terms(startTerm); - if(te!=null) - { - Term currTerm=te.term(); - while((currTerm!=null)&&(currTerm.field()==startTerm.field())) //term fieldnames are interned - { - int lastDoc=-1; - //set non duplicates - TermDocs td = reader.termDocs(currTerm); - if(td.next()) - { - if(keepMode==KM_USE_FIRST_OCCURRENCE) - { - bits.set(td.doc()); - } - else - { - do - { - lastDoc=td.doc(); - }while(td.next()); - bits.set(lastDoc); - } - } - if(!te.next()) - { - break; - } - currTerm=te.term(); - } - } - return bits; - } + private OpenBitSet correctBits(IndexReader reader) throws IOException { + OpenBitSet bits = new OpenBitSet(reader.maxDoc()); //assume all are INvalid + final Bits delDocs = MultiFields.getDeletedDocs(reader); + Terms terms = reader.fields().terms(fieldName); + if (terms != null) { + TermsEnum termsEnum = terms.iterator(); + DocsEnum docs = null; + while(true) { + BytesRef currTerm = termsEnum.next(); + if (currTerm == null) { + break; + } else { + docs = termsEnum.docs(delDocs, docs); + int doc = docs.nextDoc(); + if (doc != docs.NO_MORE_DOCS) { + if (keepMode == KM_USE_FIRST_OCCURRENCE) { + bits.set(doc); + } else { + int lastDoc = doc; + while (true) { + lastDoc = doc; + doc = docs.nextDoc(); + if (doc == docs.NO_MORE_DOCS) { + break; + } + } + bits.set(lastDoc); + } + } + } + } + } + return bits; + } private OpenBitSet fastBits(IndexReader reader) throws IOException - { + { OpenBitSet bits=new OpenBitSet(reader.maxDoc()); - bits.set(0,reader.maxDoc()); //assume all are valid - Term startTerm=new Term(fieldName); - TermEnum te = reader.terms(startTerm); - if(te!=null) - { - Term currTerm=te.term(); - - while((currTerm!=null)&&(currTerm.field()==startTerm.field())) //term fieldnames are interned - { - if(te.docFreq()>1) - { - int lastDoc=-1; - //unset potential duplicates - TermDocs td = reader.termDocs(currTerm); - td.next(); - if(keepMode==KM_USE_FIRST_OCCURRENCE) - { - td.next(); - } - do - { - lastDoc=td.doc(); - bits.clear(lastDoc); - }while(td.next()); - if(keepMode==KM_USE_LAST_OCCURRENCE) - { - //restore the last bit - bits.set(lastDoc); - } - } - if(!te.next()) - { - break; - } - currTerm=te.term(); - } - } - return bits; - } + bits.set(0,reader.maxDoc()); //assume all are valid + final Bits delDocs = MultiFields.getDeletedDocs(reader); + Terms terms = reader.fields().terms(fieldName); + if (terms != null) { + TermsEnum termsEnum = terms.iterator(); + DocsEnum docs = null; + while(true) { + BytesRef currTerm = termsEnum.next(); + if (currTerm == null) { + break; + } else { + if (termsEnum.docFreq() > 1) { + // unset potential duplicates + docs = termsEnum.docs(delDocs, docs); + int doc = docs.nextDoc(); + if (doc != docs.NO_MORE_DOCS) { + if (keepMode == KM_USE_FIRST_OCCURRENCE) { + doc = docs.nextDoc(); + } + } + + int lastDoc = -1; + while (true) { + lastDoc = doc; + bits.clear(lastDoc); + doc = docs.nextDoc(); + if (doc == docs.NO_MORE_DOCS) { + break; + } + } + + if (keepMode==KM_USE_LAST_OCCURRENCE) { + // restore the last bit + bits.set(lastDoc); + } + } + } + } + } + + return bits; + } public String getFieldName() { Modified: lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java?rev=931278&r1=931277&r2=931278&view=diff ============================================================================== --- lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java (original) +++ lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java Tue Apr 6 19:19:27 2010 @@ -29,7 +29,7 @@ import org.apache.lucene.analysis.TokenS import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PriorityQueue; /** @@ -172,8 +172,8 @@ public class FuzzyLikeThisQuery extends * Adds user input for "fuzzification" * @param queryString The string which will be parsed by the analyzer and for which fuzzy variants will be parsed * @param fieldName - * @param minSimilarity The minimum similarity of the term variants (see FuzzyTermEnum) - * @param prefixLength Length of required common prefix on variant terms (see FuzzyTermEnum) + * @param minSimilarity The minimum similarity of the term variants (see FuzzyTermsEnum) + * @param prefixLength Length of required common prefix on variant terms (see FuzzyTermsEnum) */ public void addTerms(String queryString, String fieldName,float minSimilarity, int prefixLength) { @@ -195,48 +195,44 @@ public class FuzzyLikeThisQuery extends String term = termAtt.term(); if(!processedTerms.contains(term)) { - processedTerms.add(term); - ScoreTermQueue variantsQ=new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term - float minScore=0; - Term startTerm=internSavingTemplateTerm.createTerm(term); - FuzzyTermEnum fe=new FuzzyTermEnum(reader,startTerm,f.minSimilarity,f.prefixLength); - TermEnum origEnum = reader.terms(startTerm); - int df=0; - if(startTerm.equals(origEnum.term())) - { - df=origEnum.docFreq(); //store the df so all variants use same idf - } - int numVariants=0; - int totalVariantDocFreqs=0; - do - { - Term possibleMatch=fe.term(); - if(possibleMatch!=null) - { - numVariants++; - totalVariantDocFreqs+=fe.docFreq(); - float score=fe.difference(); - if(variantsQ.size() < MAX_VARIANTS_PER_TERM || score > minScore){ - ScoreTerm st=new ScoreTerm(possibleMatch,score,startTerm); - variantsQ.insertWithOverflow(st); - minScore = variantsQ.top().score; // maintain minScore - } + processedTerms.add(term); + ScoreTermQueue variantsQ=new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term + float minScore=0; + Term startTerm=internSavingTemplateTerm.createTerm(term); + FuzzyTermsEnum fe = new FuzzyTermsEnum(reader, startTerm, f.minSimilarity, f.prefixLength); + //store the df so all variants use same idf + int df = reader.docFreq(startTerm); + int numVariants=0; + int totalVariantDocFreqs=0; + BytesRef possibleMatch; + MultiTermQuery.BoostAttribute boostAtt = + fe.attributes().addAttribute(MultiTermQuery.BoostAttribute.class); + while ((possibleMatch = fe.next()) != null) { + if (possibleMatch!=null) { + numVariants++; + totalVariantDocFreqs+=fe.docFreq(); + float score=boostAtt.getBoost(); + if (variantsQ.size() < MAX_VARIANTS_PER_TERM || score > minScore){ + ScoreTerm st=new ScoreTerm(new Term(startTerm.field(), possibleMatch.utf8ToString()),score,startTerm); + variantsQ.insertWithOverflow(st); + minScore = variantsQ.top().score; // maintain minScore + } + } } - } - while(fe.next()); - if(numVariants>0) - { - int avgDf=totalVariantDocFreqs/numVariants; - if(df==0)//no direct match we can use as df for all variants + + if(numVariants>0) + { + int avgDf=totalVariantDocFreqs/numVariants; + if(df==0)//no direct match we can use as df for all variants { df=avgDf; //use avg df of all variants } - // take the top variants (scored by edit distance) and reset the score - // to include an IDF factor then add to the global queue for ranking - // overall top query terms - int size = variantsQ.size(); - for(int i = 0; i < size; i++) + // take the top variants (scored by edit distance) and reset the score + // to include an IDF factor then add to the global queue for ranking + // overall top query terms + int size = variantsQ.size(); + for(int i = 0; i < size; i++) { ScoreTerm st = variantsQ.pop(); st.score=(st.score*st.score)*sim.idf(df,corpusNumDocs); Modified: lucene/dev/trunk/lucene/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java?rev=931278&r1=931277&r2=931278&view=diff ============================================================================== --- lucene/dev/trunk/lucene/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java (original) +++ lucene/dev/trunk/lucene/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java Tue Apr 6 19:19:27 2010 @@ -38,6 +38,7 @@ import org.apache.lucene.index.IndexWrit import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LogMergePolicy; import org.apache.lucene.index.Term; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; @@ -219,8 +220,8 @@ public class TestRemoteSort extends Luce @Override public void setNextReader(IndexReader reader, int docBase) throws IOException { docValues = FieldCache.DEFAULT.getInts(reader, "parser", new FieldCache.IntParser() { - public final int parseInt(final String val) { - return (val.charAt(0)-'A') * 123456; + public final int parseInt(BytesRef termRef) { + return (termRef.utf8ToString().charAt(0)-'A') * 123456; } }); } @@ -245,6 +246,29 @@ public class TestRemoteSort extends Luce runMultiSorts(multi, true); // this runs on the full index } + // test custom search when remote + /* rewrite with new API + public void testRemoteCustomSort() throws Exception { + Searchable searcher = getRemote(); + MultiSearcher multi = new MultiSearcher (new Searchable[] { searcher }); + sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource())); + assertMatches (multi, queryX, sort, "CAIEG"); + sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource(), true)); + assertMatches (multi, queryY, sort, "HJDBF"); + + assertSaneFieldCaches(getName() + " ComparatorSource"); + FieldCache.DEFAULT.purgeAllCaches(); + + SortComparator custom = SampleComparable.getComparator(); + sort.setSort (new SortField ("custom", custom)); + assertMatches (multi, queryX, sort, "CAIEG"); + sort.setSort (new SortField ("custom", custom, true)); + assertMatches (multi, queryY, sort, "HJDBF"); + + assertSaneFieldCaches(getName() + " Comparator"); + FieldCache.DEFAULT.purgeAllCaches(); + }*/ + // test that the relevancy scores are the same even if // hits are sorted public void testNormalizedScores() throws Exception { @@ -294,7 +318,7 @@ public class TestRemoteSort extends Luce assertSameValues (scoresY, getScores (remote.search (queryY, null, 1000, sort).scoreDocs, remote)); assertSameValues (scoresA, getScores (remote.search (queryA, null, 1000, sort).scoreDocs, remote)); - sort.setSort (new SortField("float", SortField.FLOAT), new SortField("string", SortField.STRING)); + sort.setSort (new SortField("float", SortField.FLOAT)); assertSameValues (scoresX, getScores (remote.search (queryX, null, 1000, sort).scoreDocs, remote)); assertSameValues (scoresY, getScores (remote.search (queryY, null, 1000, sort).scoreDocs, remote)); assertSameValues (scoresA, getScores (remote.search (queryA, null, 1000, sort).scoreDocs, remote)); @@ -314,6 +338,10 @@ public class TestRemoteSort extends Luce expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; assertMatches(multi, queryA, sort, expected); + sort.setSort(new SortField ("int", SortField.INT)); + expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC"; + assertMatches(multi, queryA, sort, expected); + sort.setSort(new SortField ("float", SortField.FLOAT), SortField.FIELD_DOC); assertMatches(multi, queryA, sort, "GDHJCIEFAB"); Modified: lucene/dev/trunk/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java?rev=931278&r1=931277&r2=931278&view=diff ============================================================================== --- lucene/dev/trunk/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java (original) +++ lucene/dev/trunk/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java Tue Apr 6 19:19:27 2010 @@ -19,12 +19,15 @@ package org.apache.lucene.spatial.tier; import java.io.IOException; import java.util.List; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.search.Filter; import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.OpenBitSet; /** @@ -44,22 +47,41 @@ public class CartesianShapeFilter extend @Override public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { - final OpenBitSet bits = new OpenBitSet(reader.maxDoc()); - final TermDocs termDocs = reader.termDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); final List area = shape.getArea(); - int sz = area.size(); + final int sz = area.size(); - final Term term = new Term(fieldName); // iterate through each boxid - for (int i =0; i< sz; i++) { - double boxId = area.get(i).doubleValue(); - termDocs.seek(term.createTerm(NumericUtils.doubleToPrefixCoded(boxId))); - // iterate through all documents - // which have this boxId - while (termDocs.next()) { - bits.fastSet(termDocs.doc()); + final BytesRef bytesRef = new BytesRef(NumericUtils.BUF_SIZE_LONG); + if (sz == 1) { + double boxId = area.get(0).doubleValue(); + NumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(boxId), 0, bytesRef); + return new DocIdSet() { + @Override + public DocIdSetIterator iterator() throws IOException { + return MultiFields.getTermDocsEnum(reader, delDocs, fieldName, bytesRef); + } + + @Override + public boolean isCacheable() { + return false; + } + }; + } else { + final OpenBitSet bits = new OpenBitSet(reader.maxDoc()); + for (int i =0; i< sz; i++) { + double boxId = area.get(i).doubleValue(); + NumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(boxId), 0, bytesRef); + final DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, delDocs, fieldName, bytesRef); + if (docsEnum == null) continue; + // iterate through all documents + // which have this boxId + int doc; + while ((doc = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + bits.fastSet(doc); + } } + return bits; } - return bits; } } Modified: lucene/dev/trunk/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java?rev=931278&r1=931277&r2=931278&view=diff ============================================================================== --- lucene/dev/trunk/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java (original) +++ lucene/dev/trunk/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java Tue Apr 6 19:19:27 2010 @@ -24,6 +24,7 @@ import java.util.Map; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; @@ -49,7 +50,6 @@ import org.apache.lucene.spatial.tier.pr import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.NumericUtils; public class TestCartesian extends LuceneTestCase { @@ -96,8 +96,8 @@ public class TestCartesian extends Lucen doc.add(new Field("name", name,Field.Store.YES, Field.Index.ANALYZED)); // convert the lat / long to lucene fields - doc.add(new Field(latField, NumericUtils.doubleToPrefixCoded(lat),Field.Store.YES, Field.Index.NOT_ANALYZED)); - doc.add(new Field(lngField, NumericUtils.doubleToPrefixCoded(lng),Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(new NumericField(latField, Integer.MAX_VALUE, Field.Store.YES, true).setDoubleValue(lat)); + doc.add(new NumericField(lngField, Integer.MAX_VALUE, Field.Store.YES, true).setDoubleValue(lng)); // add a default meta field to make searching all documents easy doc.add(new Field("metafile", "doc",Field.Store.YES, Field.Index.ANALYZED)); @@ -105,10 +105,9 @@ public class TestCartesian extends Lucen int ctpsize = ctps.size(); for (int i =0; i < ctpsize; i++){ CartesianTierPlotter ctp = ctps.get(i); - doc.add(new Field(ctp.getTierFieldName(), - NumericUtils.doubleToPrefixCoded(ctp.getTierBoxId(lat,lng)), + doc.add(new NumericField(ctp.getTierFieldName(), Integer.MAX_VALUE, Field.Store.YES, - Field.Index.NOT_ANALYZED_NO_NORMS)); + true).setDoubleValue(ctp.getTierBoxId(lat,lng))); doc.add(new Field(geoHashPrefix, GeoHashUtils.encode(lat,lng), Field.Store.YES, @@ -275,8 +274,8 @@ public class TestCartesian extends Lucen Document d = searcher.doc(scoreDocs[i].doc); String name = d.get("name"); - double rsLat = NumericUtils.prefixCodedToDouble(d.get(latField)); - double rsLng = NumericUtils.prefixCodedToDouble(d.get(lngField)); + double rsLat = Double.parseDouble(d.get(latField)); + double rsLng = Double.parseDouble(d.get(lngField)); Double geo_distance = distances.get(scoreDocs[i].doc); double distance = DistanceUtils.getInstance().getDistanceMi(lat, lng, rsLat, rsLng); @@ -369,8 +368,8 @@ public class TestCartesian extends Lucen for(int i =0 ; i < results; i++){ Document d = searcher.doc(scoreDocs[i].doc); String name = d.get("name"); - double rsLat = NumericUtils.prefixCodedToDouble(d.get(latField)); - double rsLng = NumericUtils.prefixCodedToDouble(d.get(lngField)); + double rsLat = Double.parseDouble(d.get(latField)); + double rsLng = Double.parseDouble(d.get(lngField)); Double geo_distance = distances.get(scoreDocs[i].doc); double distance = DistanceUtils.getInstance().getDistanceMi(lat, lng, rsLat, rsLng); @@ -464,8 +463,8 @@ public class TestCartesian extends Lucen Document d = searcher.doc(scoreDocs[i].doc); String name = d.get("name"); - double rsLat = NumericUtils.prefixCodedToDouble(d.get(latField)); - double rsLng = NumericUtils.prefixCodedToDouble(d.get(lngField)); + double rsLat = Double.parseDouble(d.get(latField)); + double rsLng = Double.parseDouble(d.get(lngField)); Double geo_distance = distances.get(scoreDocs[i].doc); double distance = DistanceUtils.getInstance().getDistanceMi(lat, lng, rsLat, rsLng); @@ -558,8 +557,8 @@ public class TestCartesian extends Lucen Document d = searcher.doc(scoreDocs[i].doc); String name = d.get("name"); - double rsLat = NumericUtils.prefixCodedToDouble(d.get(latField)); - double rsLng = NumericUtils.prefixCodedToDouble(d.get(lngField)); + double rsLat = Double.parseDouble(d.get(latField)); + double rsLng = Double.parseDouble(d.get(lngField)); Double geo_distance = distances.get(scoreDocs[i].doc); double distance = DistanceUtils.getInstance().getDistanceMi(lat, lng, rsLat, rsLng); Modified: lucene/dev/trunk/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java?rev=931278&r1=931277&r2=931278&view=diff ============================================================================== --- lucene/dev/trunk/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java (original) +++ lucene/dev/trunk/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java Tue Apr 6 19:19:27 2010 @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; @@ -28,7 +29,6 @@ import org.apache.lucene.index.IndexRead import org.apache.lucene.search.QueryWrapperFilter; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.NumericUtils; import org.apache.lucene.store.RAMDirectory; public class TestDistance extends LuceneTestCase { @@ -63,8 +63,8 @@ public class TestDistance extends Lucene doc.add(new Field("name", name,Field.Store.YES, Field.Index.ANALYZED)); // convert the lat / long to lucene fields - doc.add(new Field(latField, NumericUtils.doubleToPrefixCoded(lat),Field.Store.YES, Field.Index.NOT_ANALYZED)); - doc.add(new Field(lngField, NumericUtils.doubleToPrefixCoded(lng),Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(new NumericField(latField, Integer.MAX_VALUE, Field.Store.YES, true).setDoubleValue(lat)); + doc.add(new NumericField(lngField, Integer.MAX_VALUE,Field.Store.YES, true).setDoubleValue(lng)); // add a default meta field to make searching all documents easy doc.add(new Field("metafile", "doc",Field.Store.YES, Field.Index.ANALYZED)); Modified: lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java?rev=931278&r1=931277&r2=931278&view=diff ============================================================================== --- lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java (original) +++ lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java Tue Apr 6 19:19:27 2010 @@ -21,8 +21,10 @@ import org.apache.lucene.index.IndexRead import java.util.Iterator; -import org.apache.lucene.index.TermEnum; -import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.StringHelper; import java.io.*; @@ -52,55 +54,39 @@ public class LuceneDictionary implements final class LuceneIterator implements Iterator { - private TermEnum termEnum; - private Term actualTerm; - private boolean hasNextCalled; + private TermsEnum termsEnum; + private BytesRef pendingTerm; LuceneIterator() { try { - termEnum = reader.terms(new Term(field)); + final Terms terms = MultiFields.getTerms(reader, field); + if (terms != null) { + termsEnum = terms.iterator(); + pendingTerm = termsEnum.next(); + } } catch (IOException e) { throw new RuntimeException(e); } } public String next() { - if (!hasNextCalled) { - hasNext(); + if (pendingTerm == null) { + return null; } - hasNextCalled = false; + + String result = pendingTerm.utf8ToString(); try { - termEnum.next(); + pendingTerm = termsEnum.next(); } catch (IOException e) { throw new RuntimeException(e); } - return (actualTerm != null) ? actualTerm.text() : null; + return result; } public boolean hasNext() { - if (hasNextCalled) { - return actualTerm != null; - } - hasNextCalled = true; - - actualTerm = termEnum.term(); - - // if there are no words return false - if (actualTerm == null) { - return false; - } - - String currentField = actualTerm.field(); - - // if the next word doesn't have the same field return false - if (currentField != field) { - actualTerm = null; - return false; - } - - return true; + return pendingTerm != null; } public void remove() { Modified: lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndPrefixQuery.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndPrefixQuery.java?rev=931278&r1=931277&r2=931278&view=diff ============================================================================== --- lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndPrefixQuery.java (original) +++ lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndPrefixQuery.java Tue Apr 6 19:19:27 2010 @@ -17,16 +17,21 @@ package org.apache.lucene.queryParser.su */ import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; import java.io.IOException; public class SrndPrefixQuery extends SimpleTerm { + private final BytesRef prefixRef; public SrndPrefixQuery(String prefix, boolean quoted, char truncator) { super(quoted); this.prefix = prefix; + prefixRef = new BytesRef(prefix); this.truncator = truncator; } @@ -53,20 +58,35 @@ public class SrndPrefixQuery extends Sim MatchingTermVisitor mtv) throws IOException { /* inspired by PrefixQuery.rewrite(): */ - TermEnum enumerator = reader.terms(getLucenePrefixTerm(fieldName)); - try { - do { - Term term = enumerator.term(); - if ((term != null) - && term.text().startsWith(getPrefix()) - && term.field().equals(fieldName)) { - mtv.visitMatchingTerm(term); + Terms terms = MultiFields.getTerms(reader, fieldName); + if (terms != null) { + TermsEnum termsEnum = terms.iterator(); + + boolean skip = false; + TermsEnum.SeekStatus status = termsEnum.seek(new BytesRef(getPrefix())); + if (status == TermsEnum.SeekStatus.FOUND) { + mtv.visitMatchingTerm(getLucenePrefixTerm(fieldName)); + } else if (status == TermsEnum.SeekStatus.NOT_FOUND) { + if (termsEnum.term().startsWith(prefixRef)) { + mtv.visitMatchingTerm(new Term(fieldName, termsEnum.term().utf8ToString())); } else { - break; + skip = true; + } + } else { + // EOF + skip = true; + } + + if (!skip) { + while(true) { + BytesRef text = termsEnum.next(); + if (text != null && text.startsWith(prefixRef)) { + mtv.visitMatchingTerm(new Term(fieldName, text.utf8ToString())); + } else { + break; + } } - } while (enumerator.next()); - } finally { - enumerator.close(); + } } } } Modified: lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTermQuery.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTermQuery.java?rev=931278&r1=931277&r2=931278&view=diff ============================================================================== --- lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTermQuery.java (original) +++ lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTermQuery.java Tue Apr 6 19:19:27 2010 @@ -20,7 +20,10 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.util.BytesRef; public class SrndTermQuery extends SimpleTerm { @@ -46,16 +49,14 @@ public class SrndTermQuery extends Simpl MatchingTermVisitor mtv) throws IOException { /* check term presence in index here for symmetry with other SimpleTerm's */ - TermEnum enumerator = reader.terms(getLuceneTerm(fieldName)); - try { - Term it= enumerator.term(); /* same or following index term */ - if ((it != null) - && it.text().equals(getTermText()) - && it.field().equals(fieldName)) { - mtv.visitMatchingTerm(it); + Terms terms = MultiFields.getTerms(reader, fieldName); + if (terms != null) { + TermsEnum termsEnum = terms.iterator(); + + TermsEnum.SeekStatus status = termsEnum.seek(new BytesRef(getTermText())); + if (status == TermsEnum.SeekStatus.FOUND) { + mtv.visitMatchingTerm(getLuceneTerm(fieldName)); } - } finally { - enumerator.close(); } } } Modified: lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTruncQuery.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTruncQuery.java?rev=931278&r1=931277&r2=931278&view=diff ============================================================================== --- lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTruncQuery.java (original) +++ lucene/dev/trunk/lucene/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTruncQuery.java Tue Apr 6 19:19:27 2010 @@ -17,8 +17,11 @@ package org.apache.lucene.queryParser.su */ import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; import java.io.IOException; @@ -40,6 +43,7 @@ public class SrndTruncQuery extends Simp private final char mask; private String prefix; + private BytesRef prefixRef; private Pattern pattern; @@ -68,6 +72,7 @@ public class SrndTruncQuery extends Simp i++; } prefix = truncated.substring(0, i); + prefixRef = new BytesRef(prefix); StringBuilder re = new StringBuilder(); while (i < truncated.length()) { @@ -84,26 +89,37 @@ public class SrndTruncQuery extends Simp MatchingTermVisitor mtv) throws IOException { int prefixLength = prefix.length(); - TermEnum enumerator = reader.terms(new Term(fieldName, prefix)); - Matcher matcher = pattern.matcher(""); - try { - do { - Term term = enumerator.term(); - if (term != null) { - String text = term.text(); - if ((! text.startsWith(prefix)) || (! term.field().equals(fieldName))) { - break; - } else { - matcher.reset( text.substring(prefixLength)); + Terms terms = MultiFields.getTerms(reader, fieldName); + if (terms != null) { + Matcher matcher = pattern.matcher(""); + try { + TermsEnum termsEnum = terms.iterator(); + + TermsEnum.SeekStatus status = termsEnum.seek(prefixRef); + BytesRef text; + if (status == TermsEnum.SeekStatus.FOUND) { + text = prefixRef; + } else if (status == TermsEnum.SeekStatus.NOT_FOUND) { + text = termsEnum.term(); + } else { + text = null; + } + + while(text != null) { + if (text != null && text.startsWith(prefixRef)) { + String textString = text.utf8ToString(); + matcher.reset(textString.substring(prefixLength)); if (matcher.matches()) { - mtv.visitMatchingTerm(term); + mtv.visitMatchingTerm(new Term(fieldName, textString)); } + } else { + break; } + text = termsEnum.next(); } - } while (enumerator.next()); - } finally { - enumerator.close(); - matcher.reset(); + } finally { + matcher.reset(); + } } } } Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java?rev=931278&r1=931277&r2=931278&view=diff ============================================================================== --- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java (original) +++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java Tue Apr 6 19:19:27 2010 @@ -17,12 +17,17 @@ package org.apache.lucene.analysis; * limitations under the License. */ +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.document.NumericField; // for javadocs import org.apache.lucene.search.NumericRangeQuery; // for javadocs import org.apache.lucene.search.NumericRangeFilter; // for javadocs +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; @@ -91,6 +96,88 @@ public final class NumericTokenStream ex /** The lower precision tokens gets this token type assigned. */ public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric"; + + /** Expert: Use this attribute to get the details of the currently generated token + * @lucene.experimental + * @since 3.1 + */ + public interface NumericTermAttribute extends Attribute { + /** Returns current shift value, undefined before first token */ + int getShift(); + /** Returns {@link NumericTokenStream}'s raw value as {@code long} */ + long getRawValue(); + /** Returns value size in bits (32 for {@code float}, {@code int}; 64 for {@code double}, {@code long}) */ + int getValueSize(); + } + + private static final class NumericAttributeFactory extends AttributeFactory { + private final AttributeFactory delegate; + private NumericTokenStream ts = null; + + NumericAttributeFactory(AttributeFactory delegate) { + this.delegate = delegate; + } + + @Override + public AttributeImpl createAttributeInstance(Class attClass) { + if (attClass == NumericTermAttribute.class) + return new NumericTermAttributeImpl(ts); + if (attClass.isAssignableFrom(CharTermAttribute.class) || attClass.isAssignableFrom(TermAttribute.class)) + throw new IllegalArgumentException("NumericTokenStream does not support CharTermAttribute/TermAttribute."); + return delegate.createAttributeInstance(attClass); + } + } + + private static final class NumericTermAttributeImpl extends AttributeImpl implements NumericTermAttribute,TermToBytesRefAttribute { + private final NumericTokenStream ts; + + public NumericTermAttributeImpl(NumericTokenStream ts) { + this.ts = ts; + } + + public int toBytesRef(BytesRef bytes) { + try { + assert ts.valSize == 64 || ts.valSize == 32; + return (ts.valSize == 64) ? + NumericUtils.longToPrefixCoded(ts.value, ts.shift, bytes) : + NumericUtils.intToPrefixCoded((int) ts.value, ts.shift, bytes); + } catch (IllegalArgumentException iae) { + // return empty token before first + bytes.length = 0; + return 0; + } + } + + public int getShift() { return ts.shift; } + public long getRawValue() { return ts.value; } + public int getValueSize() { return ts.valSize; } + + @Override + public void clear() { + // this attribute has no contents to clear + } + + @Override + public boolean equals(Object other) { + return other == this; + } + + @Override + public int hashCode() { + return System.identityHashCode(this); + } + + @Override + public void copyTo(AttributeImpl target) { + // this attribute has no contents to copy + } + + @Override + public Object clone() { + // cannot throw CloneNotSupportedException (checked) + throw new UnsupportedOperationException(); + } + } /** * Creates a token stream for numeric values using the default precisionStep @@ -107,23 +194,15 @@ public final class NumericTokenStream ex * before using set a value using the various set???Value() methods. */ public NumericTokenStream(final int precisionStep) { - super(); - this.precisionStep = precisionStep; - if (precisionStep < 1) - throw new IllegalArgumentException("precisionStep must be >=1"); - } + super(new NumericAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY)); + // we must do this after the super call :( + ((NumericAttributeFactory) getAttributeFactory()).ts = this; + addAttribute(NumericTermAttribute.class); - /** - * Expert: Creates a token stream for numeric values with the specified - * precisionStep using the given {@link AttributeSource}. - * The stream is not yet initialized, - * before using set a value using the various set???Value() methods. - */ - public NumericTokenStream(AttributeSource source, final int precisionStep) { - super(source); this.precisionStep = precisionStep; if (precisionStep < 1) throw new IllegalArgumentException("precisionStep must be >=1"); + shift = -precisionStep; } /** @@ -134,10 +213,15 @@ public final class NumericTokenStream ex * before using set a value using the various set???Value() methods. */ public NumericTokenStream(AttributeFactory factory, final int precisionStep) { - super(factory); + super(new NumericAttributeFactory(factory)); + // we must do this after the super call :( + ((NumericAttributeFactory) getAttributeFactory()).ts = this; + addAttribute(NumericTermAttribute.class); + this.precisionStep = precisionStep; if (precisionStep < 1) throw new IllegalArgumentException("precisionStep must be >=1"); + shift = -precisionStep; } /** @@ -149,7 +233,7 @@ public final class NumericTokenStream ex public NumericTokenStream setLongValue(final long value) { this.value = value; valSize = 64; - shift = 0; + shift = -precisionStep; return this; } @@ -162,7 +246,7 @@ public final class NumericTokenStream ex public NumericTokenStream setIntValue(final int value) { this.value = value; valSize = 32; - shift = 0; + shift = -precisionStep; return this; } @@ -175,7 +259,7 @@ public final class NumericTokenStream ex public NumericTokenStream setDoubleValue(final double value) { this.value = NumericUtils.doubleToSortableLong(value); valSize = 64; - shift = 0; + shift = -precisionStep; return this; } @@ -188,7 +272,7 @@ public final class NumericTokenStream ex public NumericTokenStream setFloatValue(final float value) { this.value = NumericUtils.floatToSortableInt(value); valSize = 32; - shift = 0; + shift = -precisionStep; return this; } @@ -196,37 +280,24 @@ public final class NumericTokenStream ex public void reset() { if (valSize == 0) throw new IllegalStateException("call set???Value() before usage"); - shift = 0; + shift = -precisionStep; } @Override public boolean incrementToken() { if (valSize == 0) throw new IllegalStateException("call set???Value() before usage"); - if (shift >= valSize) + shift += precisionStep; + if (shift >= valSize) { + // reset so the attribute still works after exhausted stream + shift -= precisionStep; return false; + } clearAttributes(); - final char[] buffer; - switch (valSize) { - case 64: - buffer = termAtt.resizeTermBuffer(NumericUtils.BUF_SIZE_LONG); - termAtt.setTermLength(NumericUtils.longToPrefixCoded(value, shift, buffer)); - break; - - case 32: - buffer = termAtt.resizeTermBuffer(NumericUtils.BUF_SIZE_INT); - termAtt.setTermLength(NumericUtils.intToPrefixCoded((int) value, shift, buffer)); - break; - - default: - // should not happen - throw new IllegalArgumentException("valSize must be 32 or 64"); - } - + // the TermToBytesRefAttribute is directly accessing shift & value. typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC : TOKEN_TYPE_LOWER_PREC); posIncrAtt.setPositionIncrement((shift == 0) ? 1 : 0); - shift += precisionStep; return true; } @@ -238,12 +309,11 @@ public final class NumericTokenStream ex } // members - private final TermAttribute termAtt = addAttribute(TermAttribute.class); private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); - private int shift = 0, valSize = 0; // valSize==0 means not initialized + int shift, valSize = 0; // valSize==0 means not initialized private final int precisionStep; - private long value = 0L; + long value = 0L; } Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java?rev=931278&r1=931277&r2=931278&view=diff ============================================================================== --- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java (original) +++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Token.java Tue Apr 6 19:19:27 2010 @@ -64,14 +64,14 @@ import org.apache.lucene.util.AttributeI implementing the {@link TokenStream#incrementToken()} API. Failing that, to create a new Token you should first use one of the constructors that starts with null text. To load - the token from a char[] use {@link #setTermBuffer(char[], int, int)}. - To load from a String use {@link #setTermBuffer(String)} or {@link #setTermBuffer(String, int, int)}. - Alternatively you can get the Token's termBuffer by calling either {@link #termBuffer()}, + the token from a char[] use {@link #copyBuffer(char[], int, int)}. + To load from a String use {@link #setEmpty} followed by {@link #append(CharSequence)} or {@link #append(CharSequence, int, int)}. + Alternatively you can get the Token's termBuffer by calling either {@link #buffer()}, if you know that your text is shorter than the capacity of the termBuffer - or {@link #resizeTermBuffer(int)}, if there is any possibility + or {@link #resizeBuffer(int)}, if there is any possibility that you may need to grow the buffer. Fill in the characters of your term into this buffer, with {@link String#getChars(int, int, char[], int)} if loading from a string, - or with {@link System#arraycopy(Object, int, Object, int, int)}, and finally call {@link #setTermLength(int)} to + or with {@link System#arraycopy(Object, int, Object, int, int)}, and finally call {@link #setLength(int)} to set the length of the term text. See LUCENE-969 for details.

@@ -100,7 +100,7 @@ import org.apache.lucene.util.AttributeI
  • Copying from one one Token to another (type is reset to {@link #DEFAULT_TYPE} if not specified):
    -    return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
    +    return reusableToken.reinit(source.buffer(), 0, source.length(), source.startOffset(), source.endOffset()[, source.type()]);
       
  • @@ -115,6 +115,7 @@ import org.apache.lucene.util.AttributeI @see org.apache.lucene.index.Payload */ +// TODO: change superclass to CharTermAttribute in 4.0! public class Token extends TermAttributeImpl implements TypeAttribute, PositionIncrementAttribute, FlagsAttribute, OffsetAttribute, PayloadAttribute { @@ -172,7 +173,7 @@ public class Token extends TermAttribute * @param end end offset */ public Token(String text, int start, int end) { - setTermBuffer(text); + append(text); startOffset = start; endOffset = end; } @@ -187,7 +188,7 @@ public class Token extends TermAttribute * @param typ token type */ public Token(String text, int start, int end, String typ) { - setTermBuffer(text); + append(text); startOffset = start; endOffset = end; type = typ; @@ -204,7 +205,7 @@ public class Token extends TermAttribute * @param flags token type bits */ public Token(String text, int start, int end, int flags) { - setTermBuffer(text); + append(text); startOffset = start; endOffset = end; this.flags = flags; @@ -221,7 +222,7 @@ public class Token extends TermAttribute * @param end */ public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end) { - setTermBuffer(startTermBuffer, termBufferOffset, termBufferLength); + copyBuffer(startTermBuffer, termBufferOffset, termBufferLength); startOffset = start; endOffset = end; } @@ -270,7 +271,7 @@ public class Token extends TermAttribute corresponding to this token in the source text. Note that the difference between endOffset() and startOffset() may not be - equal to {@link #termLength}, as the term text may have been altered by a + equal to {@link #length}, as the term text may have been altered by a stemmer or some other filter. */ public final int startOffset() { return startOffset; @@ -351,7 +352,7 @@ public class Token extends TermAttribute @Override public String toString() { final StringBuilder sb = new StringBuilder(); - sb.append('(').append(term()).append(',') + sb.append('(').append(super.toString()).append(',') .append(startOffset).append(',').append(endOffset); if (!"word".equals(type)) sb.append(",type=").append(type); @@ -387,7 +388,7 @@ public class Token extends TermAttribute /** Makes a clone, but replaces the term buffer & * start/end offset in the process. This is more * efficient than doing a full clone (and then calling - * setTermBuffer) because it saves a wasted copy of the old + * {@link #copyBuffer}) because it saves a wasted copy of the old * termBuffer. */ public Token clone(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) { final Token t = new Token(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset); @@ -442,16 +443,16 @@ public class Token extends TermAttribute } /** Shorthand for calling {@link #clear}, - * {@link #setTermBuffer(char[], int, int)}, + * {@link #copyBuffer(char[], int, int)}, * {@link #setStartOffset}, * {@link #setEndOffset}, * {@link #setType} * @return this Token instance */ public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) { clearNoTermBuffer(); + copyBuffer(newTermBuffer, newTermOffset, newTermLength); payload = null; positionIncrement = 1; - setTermBuffer(newTermBuffer, newTermOffset, newTermLength); startOffset = newStartOffset; endOffset = newEndOffset; type = newType; @@ -459,14 +460,14 @@ public class Token extends TermAttribute } /** Shorthand for calling {@link #clear}, - * {@link #setTermBuffer(char[], int, int)}, + * {@link #copyBuffer(char[], int, int)}, * {@link #setStartOffset}, * {@link #setEndOffset} * {@link #setType} on Token.DEFAULT_TYPE * @return this Token instance */ public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) { clearNoTermBuffer(); - setTermBuffer(newTermBuffer, newTermOffset, newTermLength); + copyBuffer(newTermBuffer, newTermOffset, newTermLength); startOffset = newStartOffset; endOffset = newEndOffset; type = DEFAULT_TYPE; @@ -474,14 +475,14 @@ public class Token extends TermAttribute } /** Shorthand for calling {@link #clear}, - * {@link #setTermBuffer(String)}, + * {@link #append(CharSequence)}, * {@link #setStartOffset}, * {@link #setEndOffset} * {@link #setType} * @return this Token instance */ public Token reinit(String newTerm, int newStartOffset, int newEndOffset, String newType) { - clearNoTermBuffer(); - setTermBuffer(newTerm); + clear(); + append(newTerm); startOffset = newStartOffset; endOffset = newEndOffset; type = newType; @@ -489,14 +490,14 @@ public class Token extends TermAttribute } /** Shorthand for calling {@link #clear}, - * {@link #setTermBuffer(String, int, int)}, + * {@link #append(CharSequence, int, int)}, * {@link #setStartOffset}, * {@link #setEndOffset} * {@link #setType} * @return this Token instance */ public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) { - clearNoTermBuffer(); - setTermBuffer(newTerm, newTermOffset, newTermLength); + clear(); + append(newTerm, newTermOffset, newTermOffset + newTermLength); startOffset = newStartOffset; endOffset = newEndOffset; type = newType; @@ -504,14 +505,14 @@ public class Token extends TermAttribute } /** Shorthand for calling {@link #clear}, - * {@link #setTermBuffer(String)}, + * {@link #append(CharSequence)}, * {@link #setStartOffset}, * {@link #setEndOffset} * {@link #setType} on Token.DEFAULT_TYPE * @return this Token instance */ public Token reinit(String newTerm, int newStartOffset, int newEndOffset) { - clearNoTermBuffer(); - setTermBuffer(newTerm); + clear(); + append(newTerm); startOffset = newStartOffset; endOffset = newEndOffset; type = DEFAULT_TYPE; @@ -519,14 +520,14 @@ public class Token extends TermAttribute } /** Shorthand for calling {@link #clear}, - * {@link #setTermBuffer(String, int, int)}, + * {@link #append(CharSequence, int, int)}, * {@link #setStartOffset}, * {@link #setEndOffset} * {@link #setType} on Token.DEFAULT_TYPE * @return this Token instance */ public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) { - clearNoTermBuffer(); - setTermBuffer(newTerm, newTermOffset, newTermLength); + clear(); + append(newTerm, newTermOffset, newTermOffset + newTermLength); startOffset = newStartOffset; endOffset = newEndOffset; type = DEFAULT_TYPE; @@ -538,7 +539,7 @@ public class Token extends TermAttribute * @param prototype */ public void reinit(Token prototype) { - setTermBuffer(prototype.termBuffer(), 0, prototype.termLength()); + copyBuffer(prototype.buffer(), 0, prototype.length()); positionIncrement = prototype.positionIncrement; flags = prototype.flags; startOffset = prototype.startOffset; @@ -553,7 +554,7 @@ public class Token extends TermAttribute * @param newTerm */ public void reinit(Token prototype, String newTerm) { - setTermBuffer(newTerm); + setEmpty().append(newTerm); positionIncrement = prototype.positionIncrement; flags = prototype.flags; startOffset = prototype.startOffset; @@ -570,7 +571,7 @@ public class Token extends TermAttribute * @param length */ public void reinit(Token prototype, char[] newTermBuffer, int offset, int length) { - setTermBuffer(newTermBuffer, offset, length); + copyBuffer(newTermBuffer, offset, length); positionIncrement = prototype.positionIncrement; flags = prototype.flags; startOffset = prototype.startOffset; Propchange: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Tokenizer.java ------------------------------------------------------------------------------ --- svn:mergeinfo (original) +++ svn:mergeinfo Tue Apr 6 19:19:27 2010 @@ -1,2 +1,3 @@ -/lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/analysis/Tokenizer.java:896850,909334 -/lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java:924483-925561 +/lucene/java/branches/flex_1458/src/java/org/apache/lucene/analysis/Tokenizer.java:824912-931101 +/lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/analysis/Tokenizer.java:909334 +/lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java:924483-924731,924781,925176-925462 Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermAttribute.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermAttribute.java?rev=931278&r1=931277&r2=931278&view=diff ============================================================================== --- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermAttribute.java (original) +++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermAttribute.java Tue Apr 6 19:19:27 2010 @@ -21,7 +21,9 @@ import org.apache.lucene.util.Attribute; /** * The term text of a Token. + * @deprecated Use {@link CharTermAttribute} instead. */ +@Deprecated public interface TermAttribute extends Attribute { /** Returns the Token's term text. * Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermAttributeImpl.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermAttributeImpl.java?rev=931278&r1=931277&r2=931278&view=diff ============================================================================== --- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermAttributeImpl.java (original) +++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TermAttributeImpl.java Tue Apr 6 19:19:27 2010 @@ -17,211 +17,11 @@ package org.apache.lucene.analysis.token * limitations under the License. */ -import java.io.Serializable; - -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.AttributeImpl; -import org.apache.lucene.util.RamUsageEstimator; - /** * The term text of a Token. + * @deprecated This class is only available for AttributeSource + * to be able to load an old TermAttribute implementation class. */ -public class TermAttributeImpl extends AttributeImpl implements TermAttribute, Cloneable, Serializable { - private static int MIN_BUFFER_SIZE = 10; - - private char[] termBuffer; - private int termLength; - - /** Returns the Token's term text. - * - * This method has a performance penalty - * because the text is stored internally in a char[]. If - * possible, use {@link #termBuffer()} and {@link - * #termLength()} directly instead. If you really need a - * String, use this method, which is nothing more than - * a convenience call to new String(token.termBuffer(), 0, token.termLength()) - */ - public String term() { - initTermBuffer(); - return new String(termBuffer, 0, termLength); - } - - /** Copies the contents of buffer, starting at offset for - * length characters, into the termBuffer array. - * @param buffer the buffer to copy - * @param offset the index in the buffer of the first character to copy - * @param length the number of characters to copy - */ - public void setTermBuffer(char[] buffer, int offset, int length) { - growTermBuffer(length); - System.arraycopy(buffer, offset, termBuffer, 0, length); - termLength = length; - } - - /** Copies the contents of buffer into the termBuffer array. - * @param buffer the buffer to copy - */ - public void setTermBuffer(String buffer) { - int length = buffer.length(); - growTermBuffer(length); - buffer.getChars(0, length, termBuffer, 0); - termLength = length; - } - - /** Copies the contents of buffer, starting at offset and continuing - * for length characters, into the termBuffer array. - * @param buffer the buffer to copy - * @param offset the index in the buffer of the first character to copy - * @param length the number of characters to copy - */ - public void setTermBuffer(String buffer, int offset, int length) { - assert offset <= buffer.length(); - assert offset + length <= buffer.length(); - growTermBuffer(length); - buffer.getChars(offset, offset + length, termBuffer, 0); - termLength = length; - } - - /** Returns the internal termBuffer character array which - * you can then directly alter. If the array is too - * small for your token, use {@link - * #resizeTermBuffer(int)} to increase it. After - * altering the buffer be sure to call {@link - * #setTermLength} to record the number of valid - * characters that were placed into the termBuffer. */ - public char[] termBuffer() { - initTermBuffer(); - return termBuffer; - } - - /** Grows the termBuffer to at least size newSize, preserving the - * existing content. Note: If the next operation is to change - * the contents of the term buffer use - * {@link #setTermBuffer(char[], int, int)}, - * {@link #setTermBuffer(String)}, or - * {@link #setTermBuffer(String, int, int)} - * to optimally combine the resize with the setting of the termBuffer. - * @param newSize minimum size of the new termBuffer - * @return newly created termBuffer with length >= newSize - */ - public char[] resizeTermBuffer(int newSize) { - if (termBuffer == null) { - // The buffer is always at least MIN_BUFFER_SIZE - termBuffer = new char[ArrayUtil.oversize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize, RamUsageEstimator.NUM_BYTES_CHAR)]; - } else { - if(termBuffer.length < newSize){ - // Not big enough; create a new array with slight - // over allocation and preserve content - final char[] newCharBuffer = new char[ArrayUtil.oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)]; - System.arraycopy(termBuffer, 0, newCharBuffer, 0, termBuffer.length); - termBuffer = newCharBuffer; - } - } - return termBuffer; - } - - - /** Allocates a buffer char[] of at least newSize, without preserving the existing content. - * its always used in places that set the content - * @param newSize minimum size of the buffer - */ - private void growTermBuffer(int newSize) { - if (termBuffer == null) { - // The buffer is always at least MIN_BUFFER_SIZE - termBuffer = new char[ArrayUtil.oversize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize, RamUsageEstimator.NUM_BYTES_CHAR)]; - } else { - if(termBuffer.length < newSize){ - // Not big enough; create a new array with slight - // over allocation: - termBuffer = new char[ArrayUtil.oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)]; - } - } - } - - private void initTermBuffer() { - if (termBuffer == null) { - termBuffer = new char[ArrayUtil.oversize(MIN_BUFFER_SIZE, RamUsageEstimator.NUM_BYTES_CHAR)]; - termLength = 0; - } - } - - /** Return number of valid characters (length of the term) - * in the termBuffer array. */ - public int termLength() { - return termLength; - } - - /** Set number of valid characters (length of the term) in - * the termBuffer array. Use this to truncate the termBuffer - * or to synchronize with external manipulation of the termBuffer. - * Note: to grow the size of the array, - * use {@link #resizeTermBuffer(int)} first. - * @param length the truncated length - */ - public void setTermLength(int length) { - initTermBuffer(); - if (length > termBuffer.length) - throw new IllegalArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.length + ")"); - termLength = length; - } - - @Override - public int hashCode() { - initTermBuffer(); - int code = termLength; - code = code * 31 + ArrayUtil.hashCode(termBuffer, 0, termLength); - return code; - } - - @Override - public void clear() { - termLength = 0; - } - - @Override - public Object clone() { - TermAttributeImpl t = (TermAttributeImpl)super.clone(); - // Do a deep clone - if (termBuffer != null) { - t.termBuffer = termBuffer.clone(); - } - return t; - } - - @Override - public boolean equals(Object other) { - if (other == this) { - return true; - } - - if (other instanceof TermAttributeImpl) { - initTermBuffer(); - TermAttributeImpl o = ((TermAttributeImpl) other); - o.initTermBuffer(); - - if (termLength != o.termLength) - return false; - for(int i=0;i> DocumentsWriter.BYTE_BLOCK_SHIFT]; assert slice != null; @@ -48,6 +50,7 @@ final class ByteSliceWriter { } /** Write byte into byte slice stream */ + @Override public void writeByte(byte b) { assert slice != null; if (slice[upto] != 0) { @@ -60,6 +63,7 @@ final class ByteSliceWriter { assert upto != slice.length; } + @Override public void writeBytes(final byte[] b, int offset, final int len) { final int offsetEnd = offset + len; while(offset < offsetEnd) { @@ -78,12 +82,4 @@ final class ByteSliceWriter { public int getAddress() { return upto + (offset0 & DocumentsWriter.BYTE_BLOCK_NOT_MASK); } - - public void writeVInt(int i) { - while ((i & ~0x7F) != 0) { - writeByte((byte)((i & 0x7f) | 0x80)); - i >>>= 7; - } - writeByte((byte) i); - } -} +} \ No newline at end of file Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=931278&r1=931277&r2=931278&view=diff ============================================================================== --- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original) +++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java Tue Apr 6 19:19:27 2010 @@ -22,6 +22,9 @@ import org.apache.lucene.store.Directory import org.apache.lucene.store.IndexInput; import org.apache.lucene.document.AbstractField; // for javadocs import org.apache.lucene.document.Document; +import org.apache.lucene.index.codecs.CodecProvider; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; import java.text.NumberFormat; import java.io.PrintStream; @@ -122,6 +125,9 @@ public class CheckIndex { /** Name of the segment. */ public String name; + /** Name of codec used to read this segment. */ + public String codec; + /** Document count (does not take deletions into account). */ public int docCount; @@ -263,26 +269,6 @@ public class CheckIndex { infoStream.println(msg); } - private static class MySegmentTermDocs extends SegmentTermDocs { - - int delCount; - - MySegmentTermDocs(SegmentReader p) { - super(p); - } - - @Override - public void seek(Term term) throws IOException { - super.seek(term); - delCount = 0; - } - - @Override - protected void skippingDoc() throws IOException { - delCount++; - } - } - /** Returns a {@link Status} instance detailing * the state of the index. * @@ -296,6 +282,10 @@ public class CheckIndex { return checkIndex(null); } + protected Status checkIndex(List onlySegments) throws IOException { + return checkIndex(onlySegments, CodecProvider.getDefault()); + } + /** Returns a {@link Status} instance detailing * the state of the index. * @@ -308,13 +298,13 @@ public class CheckIndex { *

    WARNING: make sure * you only call this when the index is not opened by any * writer. */ - public Status checkIndex(List onlySegments) throws IOException { + protected Status checkIndex(List onlySegments, CodecProvider codecs) throws IOException { NumberFormat nf = NumberFormat.getInstance(); SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; try { - sis.read(dir); + sis.read(dir, codecs); } catch (Throwable t) { msg("ERROR: could not read any segments file in directory"); result.missingSegments = true; @@ -371,6 +361,8 @@ public class CheckIndex { sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; + else if (format == SegmentInfos.FORMAT_FLEX_POSTINGS) + sFormat = "FORMAT_FLEX_POSTINGS [Lucene 3.1]"; else if (format < SegmentInfos.CURRENT_FORMAT) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; @@ -429,6 +421,9 @@ public class CheckIndex { SegmentReader reader = null; try { + final String codec = info.getCodec().name; + msg(" codec=" + codec); + segInfoStat.codec = codec; msg(" compound=" + info.getUseCompoundFile()); segInfoStat.compound = info.getUseCompoundFile(); msg(" hasProx=" + info.getHasProx()); @@ -452,6 +447,7 @@ public class CheckIndex { msg(" docStoreIsCompoundFile=" + info.getDocStoreIsCompoundFile()); segInfoStat.docStoreCompoundFile = info.getDocStoreIsCompoundFile(); } + final String delFileName = info.getDelFileName(); if (delFileName == null){ msg(" no deletions"); @@ -503,7 +499,7 @@ public class CheckIndex { segInfoStat.fieldNormStatus = testFieldNorms(fieldNames, reader); // Test the Term Index - segInfoStat.termIndexStatus = testTermIndex(info, reader); + segInfoStat.termIndexStatus = testTermIndex(reader); // Test Stored Fields segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf); @@ -586,69 +582,129 @@ public class CheckIndex { /** * Test the term index. */ - private Status.TermIndexStatus testTermIndex(SegmentInfo info, SegmentReader reader) { + private Status.TermIndexStatus testTermIndex(SegmentReader reader) { final Status.TermIndexStatus status = new Status.TermIndexStatus(); + final int maxDoc = reader.maxDoc(); + final Bits delDocs = reader.getDeletedDocs(); + try { + if (infoStream != null) { infoStream.print(" test: terms, freq, prox..."); } - final TermEnum termEnum = reader.terms(); - final TermPositions termPositions = reader.termPositions(); + final Fields fields = reader.fields(); + if (fields == null) { + msg("OK [no fields/terms]"); + return status; + } + + final FieldsEnum fieldsEnum = fields.iterator(); + while(true) { + final String field = fieldsEnum.next(); + if (field == null) { + break; + } + + final TermsEnum terms = fieldsEnum.terms(); + + DocsEnum docs = null; + DocsAndPositionsEnum postings = null; - // Used only to count up # deleted docs for this term - final MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader); + boolean hasOrd = true; + final long termCountStart = status.termCount; - final int maxDoc = reader.maxDoc(); + while(true) { + + final BytesRef term = terms.next(); + if (term == null) { + break; + } + + final int docFreq = terms.docFreq(); + status.totFreq += docFreq; + + docs = terms.docs(delDocs, docs); + postings = terms.docsAndPositions(delDocs, postings); + + if (hasOrd) { + long ord = -1; + try { + ord = terms.ord(); + } catch (UnsupportedOperationException uoe) { + hasOrd = false; + } + + if (hasOrd) { + final long ordExpected = status.termCount - termCountStart; + if (ord != ordExpected) { + throw new RuntimeException("ord mismatch: TermsEnum has ord=" + ord + " vs actual=" + ordExpected); + } + } + } - while (termEnum.next()) { - status.termCount++; - final Term term = termEnum.term(); - final int docFreq = termEnum.docFreq(); - termPositions.seek(term); - int lastDoc = -1; - int freq0 = 0; - status.totFreq += docFreq; - while (termPositions.next()) { - freq0++; - final int doc = termPositions.doc(); - final int freq = termPositions.freq(); - if (doc <= lastDoc) - throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); - if (doc >= maxDoc) - throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); - - lastDoc = doc; - if (freq <= 0) - throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); + status.termCount++; + + final DocsEnum docs2; + if (postings != null) { + docs2 = postings; + } else { + docs2 = docs; + } + + int lastDoc = -1; + while(true) { + final int doc = docs2.nextDoc(); + if (doc == DocsEnum.NO_MORE_DOCS) { + break; + } + final int freq = docs2.freq(); + status.totPos += freq; + + if (doc <= lastDoc) { + throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); + } + if (doc >= maxDoc) { + throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); + } + + lastDoc = doc; + if (freq <= 0) { + throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); + } - int lastPos = -1; - status.totPos += freq; - for(int j=0;j