lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [17/72] [abbrv] [partial] lucenenet git commit: Lucene.Net.Tests: Removed \core directory and put its contents in root directory
Date Sun, 26 Feb 2017 23:37:05 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestTermVectorsReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestTermVectorsReader.cs b/src/Lucene.Net.Tests/Index/TestTermVectorsReader.cs
new file mode 100644
index 0000000..8426151
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestTermVectorsReader.cs
@@ -0,0 +1,477 @@
+using Lucene.Net.Analysis.TokenAttributes;
+using System;
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    using Lucene.Net.Analysis;
+    using NUnit.Framework;
+    using System.IO;
+    using BytesRef = Lucene.Net.Util.BytesRef;
+    using Codec = Lucene.Net.Codecs.Codec;
+    using Directory = Lucene.Net.Store.Directory;
+    using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+    using Document = Documents.Document;
+    using Field = Field;
+    using FieldType = FieldType;
+    using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+    using TermVectorsReader = Lucene.Net.Codecs.TermVectorsReader;
+    using TestUtil = Lucene.Net.Util.TestUtil;
+    using TextField = TextField;
+
+    [TestFixture]
+    public class TestTermVectorsReader : LuceneTestCase
+    {
+        public TestTermVectorsReader()
+        {
+            InitializeInstanceFields();
+        }
+
+        private void InitializeInstanceFields()
+        {
+            Positions = new int[TestTerms.Length][];
+            Tokens = new TestToken[TestTerms.Length * TERM_FREQ];
+        }
+
+        //Must be lexicographically sorted, will do in setup, versus trying to maintain here
+        private string[] TestFields = new string[] { "f1", "f2", "f3", "f4" };
+
+        private bool[] TestFieldsStorePos = new bool[] { true, false, true, false };
+        private bool[] TestFieldsStoreOff = new bool[] { true, false, false, true };
+        private string[] TestTerms = new string[] { "this", "is", "a", "test" };
+        private int[][] Positions;
+        private Directory Dir;
+        private SegmentCommitInfo Seg;
+        private FieldInfos FieldInfos = new FieldInfos(new FieldInfo[0]);
+        private static int TERM_FREQ = 3;
+
+        internal class TestToken : IComparable<TestToken>
+        {
+            private readonly TestTermVectorsReader OuterInstance;
+
+            public TestToken(TestTermVectorsReader outerInstance)
+            {
+                this.OuterInstance = outerInstance;
+            }
+
+            internal string Text;
+            internal int Pos;
+            internal int StartOffset;
+            internal int EndOffset;
+
+            public virtual int CompareTo(TestToken other)
+            {
+                return Pos - other.Pos;
+            }
+        }
+
+        internal TestToken[] Tokens;
+
+        [SetUp]
+        public override void SetUp()
+        {
+            base.SetUp();
+            /*
+            for (int i = 0; i < testFields.Length; i++) {
+              fieldInfos.Add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]);
+            }
+            */
+
+            Array.Sort(TestTerms);
+            int tokenUpto = 0;
+            for (int i = 0; i < TestTerms.Length; i++)
+            {
+                Positions[i] = new int[TERM_FREQ];
+                // first position must be 0
+                for (int j = 0; j < TERM_FREQ; j++)
+                {
+                    // positions are always sorted in increasing order
+                    Positions[i][j] = (int)(j * 10 + new Random(1).NextDouble() * 10);
+                    TestToken token = Tokens[tokenUpto++] = new TestToken(this);
+                    token.Text = TestTerms[i];
+                    token.Pos = Positions[i][j];
+                    token.StartOffset = j * 10;
+                    token.EndOffset = j * 10 + TestTerms[i].Length;
+                }
+            }
+            Array.Sort(Tokens);
+
+            Dir = NewDirectory();
+            IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MyAnalyzer(this)).SetMaxBufferedDocs(-1).SetMergePolicy(NewLogMergePolicy(false, 10)).SetUseCompoundFile(false));
+
+            Document doc = new Document();
+            for (int i = 0; i < TestFields.Length; i++)
+            {
+                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
+                if (TestFieldsStorePos[i] && TestFieldsStoreOff[i])
+                {
+                    customType.StoreTermVectors = true;
+                    customType.StoreTermVectorPositions = true;
+                    customType.StoreTermVectorOffsets = true;
+                }
+                else if (TestFieldsStorePos[i] && !TestFieldsStoreOff[i])
+                {
+                    customType.StoreTermVectors = true;
+                    customType.StoreTermVectorPositions = true;
+                }
+                else if (!TestFieldsStorePos[i] && TestFieldsStoreOff[i])
+                {
+                    customType.StoreTermVectors = true;
+                    customType.StoreTermVectorOffsets = true;
+                }
+                else
+                {
+                    customType.StoreTermVectors = true;
+                }
+                doc.Add(new Field(TestFields[i], "", customType));
+            }
+
+            //Create 5 documents for testing, they all have the same
+            //terms
+            for (int j = 0; j < 5; j++)
+            {
+                writer.AddDocument(doc);
+            }
+            writer.Commit();
+            Seg = writer.NewestSegment();
+            writer.Dispose();
+
+            FieldInfos = SegmentReader.ReadFieldInfos(Seg);
+        }
+
+        [TearDown]
+        public override void TearDown()
+        {
+            Dir.Dispose();
+            base.TearDown();
+        }
+
+        private class MyTokenizer : Tokenizer
+        {
+            private readonly TestTermVectorsReader OuterInstance;
+
+            internal int TokenUpto;
+
+            internal readonly ICharTermAttribute TermAtt;
+            internal readonly IPositionIncrementAttribute PosIncrAtt;
+            internal readonly IOffsetAttribute OffsetAtt;
+
+            public MyTokenizer(TestTermVectorsReader outerInstance, TextReader reader)
+                : base(reader)
+            {
+                this.OuterInstance = outerInstance;
+                TermAtt = AddAttribute<ICharTermAttribute>();
+                PosIncrAtt = AddAttribute<IPositionIncrementAttribute>();
+                OffsetAtt = AddAttribute<IOffsetAttribute>();
+            }
+
+            public sealed override bool IncrementToken()
+            {
+                if (TokenUpto >= OuterInstance.Tokens.Length)
+                {
+                    return false;
+                }
+                else
+                {
+                    TestToken testToken = OuterInstance.Tokens[TokenUpto++];
+                    ClearAttributes();
+                    TermAtt.Append(testToken.Text);
+                    OffsetAtt.SetOffset(testToken.StartOffset, testToken.EndOffset);
+                    if (TokenUpto > 1)
+                    {
+                        PosIncrAtt.PositionIncrement = testToken.Pos - OuterInstance.Tokens[TokenUpto - 2].Pos;
+                    }
+                    else
+                    {
+                        PosIncrAtt.PositionIncrement = testToken.Pos + 1;
+                    }
+                    return true;
+                }
+            }
+
+            public override void Reset()
+            {
+                base.Reset();
+                this.TokenUpto = 0;
+            }
+        }
+
+        private class MyAnalyzer : Analyzer
+        {
+            private readonly TestTermVectorsReader OuterInstance;
+
+            public MyAnalyzer(TestTermVectorsReader outerInstance)
+            {
+                this.OuterInstance = outerInstance;
+            }
+
+            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                return new TokenStreamComponents(new MyTokenizer(OuterInstance, reader));
+            }
+        }
+
+        [Test]
+        public virtual void Test()
+        {
+            //Check to see the files were created properly in setup
+            DirectoryReader reader = DirectoryReader.Open(Dir);
+            foreach (AtomicReaderContext ctx in reader.Leaves)
+            {
+                SegmentReader sr = (SegmentReader)ctx.Reader;
+                Assert.IsTrue(sr.FieldInfos.HasVectors);
+            }
+            reader.Dispose();
+        }
+
+        [Test]
+        public virtual void TestReader()
+        {
+            TermVectorsReader reader = Codec.Default.TermVectorsFormat.VectorsReader(Dir, Seg.Info, FieldInfos, NewIOContext(Random()));
+            for (int j = 0; j < 5; j++)
+            {
+                Terms vector = reader.Get(j).GetTerms(TestFields[0]);
+                Assert.IsNotNull(vector);
+                Assert.AreEqual(TestTerms.Length, vector.Count);
+                TermsEnum termsEnum = vector.GetIterator(null);
+                for (int i = 0; i < TestTerms.Length; i++)
+                {
+                    BytesRef text = termsEnum.Next();
+                    Assert.IsNotNull(text);
+                    string term = text.Utf8ToString();
+                    //System.out.println("Term: " + term);
+                    Assert.AreEqual(TestTerms[i], term);
+                }
+                Assert.IsNull(termsEnum.Next());
+            }
+            reader.Dispose();
+        }
+
+        [Test]
+        public virtual void TestDocsEnum()
+        {
+            TermVectorsReader reader = Codec.Default.TermVectorsFormat.VectorsReader(Dir, Seg.Info, FieldInfos, NewIOContext(Random()));
+            for (int j = 0; j < 5; j++)
+            {
+                Terms vector = reader.Get(j).GetTerms(TestFields[0]);
+                Assert.IsNotNull(vector);
+                Assert.AreEqual(TestTerms.Length, vector.Count);
+                TermsEnum termsEnum = vector.GetIterator(null);
+                DocsEnum docsEnum = null;
+                for (int i = 0; i < TestTerms.Length; i++)
+                {
+                    BytesRef text = termsEnum.Next();
+                    Assert.IsNotNull(text);
+                    string term = text.Utf8ToString();
+                    //System.out.println("Term: " + term);
+                    Assert.AreEqual(TestTerms[i], term);
+
+                    docsEnum = TestUtil.Docs(Random(), termsEnum, null, docsEnum, DocsEnum.FLAG_NONE);
+                    Assert.IsNotNull(docsEnum);
+                    int doc = docsEnum.DocID;
+                    Assert.AreEqual(-1, doc);
+                    Assert.IsTrue(docsEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+                    Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc());
+                }
+                Assert.IsNull(termsEnum.Next());
+            }
+            reader.Dispose();
+        }
+
+        [Test]
+        public virtual void TestPositionReader()
+        {
+            TermVectorsReader reader = Codec.Default.TermVectorsFormat.VectorsReader(Dir, Seg.Info, FieldInfos, NewIOContext(Random()));
+            //BytesRef[] terms; // LUCENENET NOTE: Not used in Lucene
+            Terms vector = reader.Get(0).GetTerms(TestFields[0]);
+            Assert.IsNotNull(vector);
+            Assert.AreEqual(TestTerms.Length, vector.Count);
+            TermsEnum termsEnum = vector.GetIterator(null);
+            DocsAndPositionsEnum dpEnum = null;
+            for (int i = 0; i < TestTerms.Length; i++)
+            {
+                BytesRef text = termsEnum.Next();
+                Assert.IsNotNull(text);
+                string term = text.Utf8ToString();
+                //System.out.println("Term: " + term);
+                Assert.AreEqual(TestTerms[i], term);
+
+                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
+                Assert.IsNotNull(dpEnum);
+                int doc = dpEnum.DocID;
+                Assert.AreEqual(-1, doc);
+                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+                Assert.AreEqual(dpEnum.Freq, Positions[i].Length);
+                for (int j = 0; j < Positions[i].Length; j++)
+                {
+                    Assert.AreEqual(Positions[i][j], dpEnum.NextPosition());
+                }
+                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
+
+                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
+                doc = dpEnum.DocID;
+                Assert.AreEqual(-1, doc);
+                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+                Assert.IsNotNull(dpEnum);
+                Assert.AreEqual(dpEnum.Freq, Positions[i].Length);
+                for (int j = 0; j < Positions[i].Length; j++)
+                {
+                    Assert.AreEqual(Positions[i][j], dpEnum.NextPosition());
+                    Assert.AreEqual(j * 10, dpEnum.StartOffset);
+                    Assert.AreEqual(j * 10 + TestTerms[i].Length, dpEnum.EndOffset);
+                }
+                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
+            }
+
+            Terms freqVector = reader.Get(0).GetTerms(TestFields[1]); //no pos, no offset
+            Assert.IsNotNull(freqVector);
+            Assert.AreEqual(TestTerms.Length, freqVector.Count);
+            termsEnum = freqVector.GetIterator(null);
+            Assert.IsNotNull(termsEnum);
+            for (int i = 0; i < TestTerms.Length; i++)
+            {
+                BytesRef text = termsEnum.Next();
+                Assert.IsNotNull(text);
+                string term = text.Utf8ToString();
+                //System.out.println("Term: " + term);
+                Assert.AreEqual(TestTerms[i], term);
+                Assert.IsNotNull(termsEnum.Docs(null, null));
+                Assert.IsNull(termsEnum.DocsAndPositions(null, null)); // no pos
+            }
+            reader.Dispose();
+        }
+
+        [Test]
+        public virtual void TestOffsetReader()
+        {
+            TermVectorsReader reader = Codec.Default.TermVectorsFormat.VectorsReader(Dir, Seg.Info, FieldInfos, NewIOContext(Random()));
+            Terms vector = reader.Get(0).GetTerms(TestFields[0]);
+            Assert.IsNotNull(vector);
+            TermsEnum termsEnum = vector.GetIterator(null);
+            Assert.IsNotNull(termsEnum);
+            Assert.AreEqual(TestTerms.Length, vector.Count);
+            DocsAndPositionsEnum dpEnum = null;
+            for (int i = 0; i < TestTerms.Length; i++)
+            {
+                BytesRef text = termsEnum.Next();
+                Assert.IsNotNull(text);
+                string term = text.Utf8ToString();
+                Assert.AreEqual(TestTerms[i], term);
+
+                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
+                Assert.IsNotNull(dpEnum);
+                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+                Assert.AreEqual(dpEnum.Freq, Positions[i].Length);
+                for (int j = 0; j < Positions[i].Length; j++)
+                {
+                    Assert.AreEqual(Positions[i][j], dpEnum.NextPosition());
+                }
+                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
+
+                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
+                Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+                Assert.IsNotNull(dpEnum);
+                Assert.AreEqual(dpEnum.Freq, Positions[i].Length);
+                for (int j = 0; j < Positions[i].Length; j++)
+                {
+                    Assert.AreEqual(Positions[i][j], dpEnum.NextPosition());
+                    Assert.AreEqual(j * 10, dpEnum.StartOffset);
+                    Assert.AreEqual(j * 10 + TestTerms[i].Length, dpEnum.EndOffset);
+                }
+                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
+            }
+            reader.Dispose();
+        }
+
+        [Test]
+        public virtual void TestIllegalIndexableField()
+        {
+            Directory dir = NewDirectory();
+            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+            ft.StoreTermVectors = true;
+            ft.StoreTermVectorPayloads = true;
+            Document doc = new Document();
+            doc.Add(new Field("field", "value", ft));
+            try
+            {
+                w.AddDocument(doc);
+                Assert.Fail("did not hit exception");
+            }
+            catch (System.ArgumentException iae)
+            {
+                // Expected
+                Assert.AreEqual("cannot index term vector payloads without term vector positions (field=\"field\")", iae.Message);
+            }
+
+            ft = new FieldType(TextField.TYPE_NOT_STORED);
+            ft.StoreTermVectors = false;
+            ft.StoreTermVectorOffsets = true;
+            doc = new Document();
+            doc.Add(new Field("field", "value", ft));
+            try
+            {
+                w.AddDocument(doc);
+                Assert.Fail("did not hit exception");
+            }
+            catch (System.ArgumentException iae)
+            {
+                // Expected
+                Assert.AreEqual("cannot index term vector offsets when term vectors are not indexed (field=\"field\")", iae.Message);
+            }
+
+            ft = new FieldType(TextField.TYPE_NOT_STORED);
+            ft.StoreTermVectors = false;
+            ft.StoreTermVectorPositions = true;
+            doc = new Document();
+            doc.Add(new Field("field", "value", ft));
+            try
+            {
+                w.AddDocument(doc);
+                Assert.Fail("did not hit exception");
+            }
+            catch (System.ArgumentException iae)
+            {
+                // Expected
+                Assert.AreEqual("cannot index term vector positions when term vectors are not indexed (field=\"field\")", iae.Message);
+            }
+
+            ft = new FieldType(TextField.TYPE_NOT_STORED);
+            ft.StoreTermVectors = false;
+            ft.StoreTermVectorPayloads = true;
+            doc = new Document();
+            doc.Add(new Field("field", "value", ft));
+            try
+            {
+                w.AddDocument(doc);
+                Assert.Fail("did not hit exception");
+            }
+            catch (System.ArgumentException iae)
+            {
+                // Expected
+                Assert.AreEqual("cannot index term vector payloads when term vectors are not indexed (field=\"field\")", iae.Message);
+            }
+
+            w.Dispose();
+
+            dir.Dispose();
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestTermVectorsWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestTermVectorsWriter.cs b/src/Lucene.Net.Tests/Index/TestTermVectorsWriter.cs
new file mode 100644
index 0000000..355249e
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestTermVectorsWriter.cs
@@ -0,0 +1,601 @@
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+    using NUnit.Framework;
+    using System.IO;
+
+    /*
+         * Licensed to the Apache Software Foundation (ASF) under one or more
+         * contributor license agreements.  See the NOTICE file distributed with
+         * this work for additional information regarding copyright ownership.
+         * The ASF licenses this file to You under the Apache License, Version 2.0
+         * (the "License"); you may not use this file except in compliance with
+         * the License.  You may obtain a copy of the License at
+         *
+         *     http://www.apache.org/licenses/LICENSE-2.0
+         *
+         * Unless required by applicable law or agreed to in writing, software
+         * distributed under the License is distributed on an "AS IS" BASIS,
+         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+         * See the License for the specific language governing permissions and
+         * limitations under the License.
+         */
+
+    using Analyzer = Lucene.Net.Analysis.Analyzer;
+    using BytesRef = Lucene.Net.Util.BytesRef;
+    using CachingTokenFilter = Lucene.Net.Analysis.CachingTokenFilter;
+    using Directory = Lucene.Net.Store.Directory;
+    using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+    using Document = Documents.Document;
+    using Field = Field;
+    using FieldType = FieldType;
+    using IOUtils = Lucene.Net.Util.IOUtils;
+    using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+    using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
+    using MockDirectoryWrapper = Lucene.Net.Store.MockDirectoryWrapper;
+    using MockTokenFilter = Lucene.Net.Analysis.MockTokenFilter;
+    using MockTokenizer = Lucene.Net.Analysis.MockTokenizer;
+    using RAMDirectory = Lucene.Net.Store.RAMDirectory;
+    using StringField = StringField;
+    using TextField = TextField;
+    using TokenStream = Lucene.Net.Analysis.TokenStream;
+
+    /// <summary>
+    /// tests for writing term vectors </summary>
+    [TestFixture]
+    public class TestTermVectorsWriter : LuceneTestCase
+    {
+        // LUCENE-1442
+        [Test]
+        public virtual void TestDoubleOffsetCounting()
+        {
+            Directory dir = NewDirectory();
+            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
+            Document doc = new Document();
+            FieldType customType = new FieldType(StringField.TYPE_NOT_STORED);
+            customType.StoreTermVectors = true;
+            customType.StoreTermVectorPositions = true;
+            customType.StoreTermVectorOffsets = true;
+            Field f = NewField("field", "abcd", customType);
+            doc.Add(f);
+            doc.Add(f);
+            Field f2 = NewField("field", "", customType);
+            doc.Add(f2);
+            doc.Add(f);
+            w.AddDocument(doc);
+            w.Dispose();
+
+            IndexReader r = DirectoryReader.Open(dir);
+            Terms vector = r.GetTermVectors(0).GetTerms("field");
+            Assert.IsNotNull(vector);
+            TermsEnum termsEnum = vector.GetIterator(null);
+            Assert.IsNotNull(termsEnum.Next());
+            Assert.AreEqual("", termsEnum.Term.Utf8ToString());
+
+            // Token "" occurred once
+            Assert.AreEqual(1, termsEnum.TotalTermFreq);
+
+            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
+            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+            dpEnum.NextPosition();
+            Assert.AreEqual(8, dpEnum.StartOffset);
+            Assert.AreEqual(8, dpEnum.EndOffset);
+            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
+
+            // Token "abcd" occurred three times
+            Assert.AreEqual(new BytesRef("abcd"), termsEnum.Next());
+            dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
+            Assert.AreEqual(3, termsEnum.TotalTermFreq);
+
+            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+            dpEnum.NextPosition();
+            Assert.AreEqual(0, dpEnum.StartOffset);
+            Assert.AreEqual(4, dpEnum.EndOffset);
+
+            dpEnum.NextPosition();
+            Assert.AreEqual(4, dpEnum.StartOffset);
+            Assert.AreEqual(8, dpEnum.EndOffset);
+
+            dpEnum.NextPosition();
+            Assert.AreEqual(8, dpEnum.StartOffset);
+            Assert.AreEqual(12, dpEnum.EndOffset);
+
+            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
+            Assert.IsNull(termsEnum.Next());
+            r.Dispose();
+            dir.Dispose();
+        }
+
+        // LUCENE-1442
+        [Test]
+        public virtual void TestDoubleOffsetCounting2()
+        {
+            Directory dir = NewDirectory();
+            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
+            Document doc = new Document();
+            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
+            customType.StoreTermVectors = true;
+            customType.StoreTermVectorPositions = true;
+            customType.StoreTermVectorOffsets = true;
+            Field f = NewField("field", "abcd", customType);
+            doc.Add(f);
+            doc.Add(f);
+            w.AddDocument(doc);
+            w.Dispose();
+
+            IndexReader r = DirectoryReader.Open(dir);
+            TermsEnum termsEnum = r.GetTermVectors(0).GetTerms("field").GetIterator(null);
+            Assert.IsNotNull(termsEnum.Next());
+            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
+            Assert.AreEqual(2, termsEnum.TotalTermFreq);
+
+            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+            dpEnum.NextPosition();
+            Assert.AreEqual(0, dpEnum.StartOffset);
+            Assert.AreEqual(4, dpEnum.EndOffset);
+
+            dpEnum.NextPosition();
+            Assert.AreEqual(5, dpEnum.StartOffset);
+            Assert.AreEqual(9, dpEnum.EndOffset);
+            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
+
+            r.Dispose();
+            dir.Dispose();
+        }
+
+        // LUCENE-1448
+        [Test]
+        public virtual void TestEndOffsetPositionCharAnalyzer()
+        {
+            Directory dir = NewDirectory();
+            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
+            Document doc = new Document();
+            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
+            customType.StoreTermVectors = true;
+            customType.StoreTermVectorPositions = true;
+            customType.StoreTermVectorOffsets = true;
+            Field f = NewField("field", "abcd   ", customType);
+            doc.Add(f);
+            doc.Add(f);
+            w.AddDocument(doc);
+            w.Dispose();
+
+            IndexReader r = DirectoryReader.Open(dir);
+            TermsEnum termsEnum = r.GetTermVectors(0).GetTerms("field").GetIterator(null);
+            Assert.IsNotNull(termsEnum.Next());
+            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
+            Assert.AreEqual(2, termsEnum.TotalTermFreq);
+
+            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+            dpEnum.NextPosition();
+            Assert.AreEqual(0, dpEnum.StartOffset);
+            Assert.AreEqual(4, dpEnum.EndOffset);
+
+            dpEnum.NextPosition();
+            Assert.AreEqual(8, dpEnum.StartOffset);
+            Assert.AreEqual(12, dpEnum.EndOffset);
+            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
+
+            r.Dispose();
+            dir.Dispose();
+        }
+
+        // LUCENE-1448
+        [Test]
+        public virtual void TestEndOffsetPositionWithCachingTokenFilter()
+        {
+            Directory dir = NewDirectory();
+            Analyzer analyzer = new MockAnalyzer(Random());
+            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
+            Document doc = new Document();
+            IOException priorException = null;
+            TokenStream stream = analyzer.TokenStream("field", new StringReader("abcd   "));
+            try
+            {
+                stream.Reset(); // TODO: weird to reset before wrapping with CachingTokenFilter... correct?
+                TokenStream cachedStream = new CachingTokenFilter(stream);
+                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
+                customType.StoreTermVectors = true;
+                customType.StoreTermVectorPositions = true;
+                customType.StoreTermVectorOffsets = true;
+                Field f = new Field("field", cachedStream, customType);
+                doc.Add(f);
+                doc.Add(f);
+                w.AddDocument(doc);
+            }
+            catch (IOException e)
+            {
+                priorException = e;
+            }
+            finally
+            {
+                IOUtils.CloseWhileHandlingException(priorException, stream);
+            }
+            w.Dispose();
+
+            IndexReader r = DirectoryReader.Open(dir);
+            TermsEnum termsEnum = r.GetTermVectors(0).GetTerms("field").GetIterator(null);
+            Assert.IsNotNull(termsEnum.Next());
+            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
+            Assert.AreEqual(2, termsEnum.TotalTermFreq);
+
+            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+            dpEnum.NextPosition();
+            Assert.AreEqual(0, dpEnum.StartOffset);
+            Assert.AreEqual(4, dpEnum.EndOffset);
+
+            dpEnum.NextPosition();
+            Assert.AreEqual(8, dpEnum.StartOffset);
+            Assert.AreEqual(12, dpEnum.EndOffset);
+            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
+
+            r.Dispose();
+            dir.Dispose();
+        }
+
+        // LUCENE-1448
+        [Test]
+        public virtual void TestEndOffsetPositionStopFilter()
+        {
+            Directory dir = NewDirectory();
+            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)));
+            Document doc = new Document();
+            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
+            customType.StoreTermVectors = true;
+            customType.StoreTermVectorPositions = true;
+            customType.StoreTermVectorOffsets = true;
+            Field f = NewField("field", "abcd the", customType);
+            doc.Add(f);
+            doc.Add(f);
+            w.AddDocument(doc);
+            w.Dispose();
+
+            IndexReader r = DirectoryReader.Open(dir);
+            TermsEnum termsEnum = r.GetTermVectors(0).GetTerms("field").GetIterator(null);
+            Assert.IsNotNull(termsEnum.Next());
+            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
+            Assert.AreEqual(2, termsEnum.TotalTermFreq);
+
+            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+            dpEnum.NextPosition();
+            Assert.AreEqual(0, dpEnum.StartOffset);
+            Assert.AreEqual(4, dpEnum.EndOffset);
+
+            dpEnum.NextPosition();
+            Assert.AreEqual(9, dpEnum.StartOffset);
+            Assert.AreEqual(13, dpEnum.EndOffset);
+            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc());
+
+            r.Dispose();
+            dir.Dispose();
+        }
+
+        // LUCENE-1448
+        [Test]
+        public virtual void TestEndOffsetPositionStandard()
+        {
+            Directory dir = NewDirectory();
+            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
+            Document doc = new Document();
+            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
+            customType.StoreTermVectors = true;
+            customType.StoreTermVectorPositions = true;
+            customType.StoreTermVectorOffsets = true;
+            Field f = NewField("field", "abcd the  ", customType);
+            Field f2 = NewField("field", "crunch man", customType);
+            doc.Add(f);
+            doc.Add(f2);
+            w.AddDocument(doc);
+            w.Dispose();
+
+            IndexReader r = DirectoryReader.Open(dir);
+            TermsEnum termsEnum = r.GetTermVectors(0).GetTerms("field").GetIterator(null);
+            Assert.IsNotNull(termsEnum.Next());
+            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
+
+            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+            dpEnum.NextPosition();
+            Assert.AreEqual(0, dpEnum.StartOffset);
+            Assert.AreEqual(4, dpEnum.EndOffset);
+
+            Assert.IsNotNull(termsEnum.Next());
+            dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
+            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+            dpEnum.NextPosition();
+            Assert.AreEqual(11, dpEnum.StartOffset);
+            Assert.AreEqual(17, dpEnum.EndOffset);
+
+            Assert.IsNotNull(termsEnum.Next());
+            dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
+            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+            dpEnum.NextPosition();
+            Assert.AreEqual(18, dpEnum.StartOffset);
+            Assert.AreEqual(21, dpEnum.EndOffset);
+
+            r.Dispose();
+            dir.Dispose();
+        }
+
+        // LUCENE-1448
+        [Test]
+        public virtual void TestEndOffsetPositionStandardEmptyField()
+        {
+            Directory dir = NewDirectory();
+            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
+            Document doc = new Document();
+            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
+            customType.StoreTermVectors = true;
+            customType.StoreTermVectorPositions = true;
+            customType.StoreTermVectorOffsets = true;
+            Field f = NewField("field", "", customType);
+            Field f2 = NewField("field", "crunch man", customType);
+            doc.Add(f);
+            doc.Add(f2);
+            w.AddDocument(doc);
+            w.Dispose();
+
+            IndexReader r = DirectoryReader.Open(dir);
+            TermsEnum termsEnum = r.GetTermVectors(0).GetTerms("field").GetIterator(null);
+            Assert.IsNotNull(termsEnum.Next());
+            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
+
+            Assert.AreEqual(1, (int)termsEnum.TotalTermFreq);
+            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+            dpEnum.NextPosition();
+            Assert.AreEqual(1, dpEnum.StartOffset);
+            Assert.AreEqual(7, dpEnum.EndOffset);
+
+            Assert.IsNotNull(termsEnum.Next());
+            dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
+            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+            dpEnum.NextPosition();
+            Assert.AreEqual(8, dpEnum.StartOffset);
+            Assert.AreEqual(11, dpEnum.EndOffset);
+
+            r.Dispose();
+            dir.Dispose();
+        }
+
+        // LUCENE-1448
+        [Test]
+        public virtual void TestEndOffsetPositionStandardEmptyField2()
+        {
+            Directory dir = NewDirectory();
+            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
+            Document doc = new Document();
+            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
+            customType.StoreTermVectors = true;
+            customType.StoreTermVectorPositions = true;
+            customType.StoreTermVectorOffsets = true;
+
+            Field f = NewField("field", "abcd", customType);
+            doc.Add(f);
+            doc.Add(NewField("field", "", customType));
+
+            Field f2 = NewField("field", "crunch", customType);
+            doc.Add(f2);
+
+            w.AddDocument(doc);
+            w.Dispose();
+
+            IndexReader r = DirectoryReader.Open(dir);
+            TermsEnum termsEnum = r.GetTermVectors(0).GetTerms("field").GetIterator(null);
+            Assert.IsNotNull(termsEnum.Next());
+            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);
+
+            Assert.AreEqual(1, (int)termsEnum.TotalTermFreq);
+            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+            dpEnum.NextPosition();
+            Assert.AreEqual(0, dpEnum.StartOffset);
+            Assert.AreEqual(4, dpEnum.EndOffset);
+
+            Assert.IsNotNull(termsEnum.Next());
+            dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
+            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+            dpEnum.NextPosition();
+            Assert.AreEqual(6, dpEnum.StartOffset);
+            Assert.AreEqual(12, dpEnum.EndOffset);
+
+            r.Dispose();
+            dir.Dispose();
+        }
+
+        // LUCENE-1168
+        [Test]
+        public virtual void TestTermVectorCorruption()
+        {
+            Directory dir = NewDirectory();
+            for (int iter = 0; iter < 2; iter++)
+            {
+                IndexWriter writer = new IndexWriter(dir, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(new LogDocMergePolicy()));
+
+                Document document = new Document();
+                FieldType customType = new FieldType();
+                customType.IsStored = true;
+
+                Field storedField = NewField("stored", "stored", customType);
+                document.Add(storedField);
+                writer.AddDocument(document);
+                writer.AddDocument(document);
+
+                document = new Document();
+                document.Add(storedField);
+                FieldType customType2 = new FieldType(StringField.TYPE_NOT_STORED);
+                customType2.StoreTermVectors = true;
+                customType2.StoreTermVectorPositions = true;
+                customType2.StoreTermVectorOffsets = true;
+                Field termVectorField = NewField("termVector", "termVector", customType2);
+
+                document.Add(termVectorField);
+                writer.AddDocument(document);
+                writer.ForceMerge(1);
+                writer.Dispose();
+
+                IndexReader reader = DirectoryReader.Open(dir);
+                for (int i = 0; i < reader.NumDocs; i++)
+                {
+                    reader.Document(i);
+                    reader.GetTermVectors(i);
+                }
+                reader.Dispose();
+
+                writer = new IndexWriter(dir, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(new LogDocMergePolicy()));
+
+                Directory[] indexDirs = new Directory[] { new MockDirectoryWrapper(Random(), new RAMDirectory(dir, NewIOContext(Random()))) };
+                writer.AddIndexes(indexDirs);
+                writer.ForceMerge(1);
+                writer.Dispose();
+            }
+            dir.Dispose();
+        }
+
+        // LUCENE-1168
+        [Test]
+        public virtual void TestTermVectorCorruption2()
+        {
+            Directory dir = NewDirectory();
+            for (int iter = 0; iter < 2; iter++)
+            {
+                IndexWriter writer = new IndexWriter(dir, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(new LogDocMergePolicy()));
+
+                Document document = new Document();
+
+                FieldType customType = new FieldType();
+                customType.IsStored = true;
+
+                Field storedField = NewField("stored", "stored", customType);
+                document.Add(storedField);
+                writer.AddDocument(document);
+                writer.AddDocument(document);
+
+                document = new Document();
+                document.Add(storedField);
+                FieldType customType2 = new FieldType(StringField.TYPE_NOT_STORED);
+                customType2.StoreTermVectors = true;
+                customType2.StoreTermVectorPositions = true;
+                customType2.StoreTermVectorOffsets = true;
+                Field termVectorField = NewField("termVector", "termVector", customType2);
+                document.Add(termVectorField);
+                writer.AddDocument(document);
+                writer.ForceMerge(1);
+                writer.Dispose();
+
+                IndexReader reader = DirectoryReader.Open(dir);
+                Assert.IsNull(reader.GetTermVectors(0));
+                Assert.IsNull(reader.GetTermVectors(1));
+                Assert.IsNotNull(reader.GetTermVectors(2));
+                reader.Dispose();
+            }
+            dir.Dispose();
+        }
+
+        // LUCENE-1168
+        [Test]
+        public virtual void TestTermVectorCorruption3()
+        {
+            Directory dir = NewDirectory();
+            IndexWriter writer = new IndexWriter(dir, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(new LogDocMergePolicy()));
+
+            Document document = new Document();
+            FieldType customType = new FieldType();
+            customType.IsStored = true;
+
+            Field storedField = NewField("stored", "stored", customType);
+            document.Add(storedField);
+            FieldType customType2 = new FieldType(StringField.TYPE_NOT_STORED);
+            customType2.StoreTermVectors = true;
+            customType2.StoreTermVectorPositions = true;
+            customType2.StoreTermVectorOffsets = true;
+            Field termVectorField = NewField("termVector", "termVector", customType2);
+            document.Add(termVectorField);
+            for (int i = 0; i < 10; i++)
+            {
+                writer.AddDocument(document);
+            }
+            writer.Dispose();
+
+            writer = new IndexWriter(dir, ((IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(new LogDocMergePolicy()));
+            for (int i = 0; i < 6; i++)
+            {
+                writer.AddDocument(document);
+            }
+
+            writer.ForceMerge(1);
+            writer.Dispose();
+
+            IndexReader reader = DirectoryReader.Open(dir);
+            for (int i = 0; i < 10; i++)
+            {
+                reader.GetTermVectors(i);
+                reader.Document(i);
+            }
+            reader.Dispose();
+            dir.Dispose();
+        }
+
+        // LUCENE-1008
+        [Test]
+        public virtual void TestNoTermVectorAfterTermVector()
+        {
+            Directory dir = NewDirectory();
+            IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
+            Document document = new Document();
+            FieldType customType2 = new FieldType(StringField.TYPE_NOT_STORED);
+            customType2.StoreTermVectors = true;
+            customType2.StoreTermVectorPositions = true;
+            customType2.StoreTermVectorOffsets = true;
+            document.Add(NewField("tvtest", "a b c", customType2));
+            iw.AddDocument(document);
+            document = new Document();
+            document.Add(NewTextField("tvtest", "x y z", Field.Store.NO));
+            iw.AddDocument(document);
+            // Make first segment
+            iw.Commit();
+
+            FieldType customType = new FieldType(StringField.TYPE_NOT_STORED);
+            customType.StoreTermVectors = true;
+            document.Add(NewField("tvtest", "a b c", customType));
+            iw.AddDocument(document);
+            // Make 2nd segment
+            iw.Commit();
+
+            iw.ForceMerge(1);
+            iw.Dispose();
+            dir.Dispose();
+        }
+
+        // LUCENE-1010
+        [Test]
+        public virtual void TestNoTermVectorAfterTermVectorMerge()
+        {
+            Directory dir = NewDirectory();
+            IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
+            Document document = new Document();
+            FieldType customType = new FieldType(StringField.TYPE_NOT_STORED);
+            customType.StoreTermVectors = true;
+            document.Add(NewField("tvtest", "a b c", customType));
+            iw.AddDocument(document);
+            iw.Commit();
+
+            document = new Document();
+            document.Add(NewTextField("tvtest", "x y z", Field.Store.NO));
+            iw.AddDocument(document);
+            // Make first segment
+            iw.Commit();
+
+            iw.ForceMerge(1);
+
+            FieldType customType2 = new FieldType(StringField.TYPE_NOT_STORED);
+            customType2.StoreTermVectors = true;
+            document.Add(NewField("tvtest", "a b c", customType2));
+            iw.AddDocument(document);
+            // Make 2nd segment
+            iw.Commit();
+            iw.ForceMerge(1);
+
+            iw.Dispose();
+            dir.Dispose();
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestTermdocPerf.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestTermdocPerf.cs b/src/Lucene.Net.Tests/Index/TestTermdocPerf.cs
new file mode 100644
index 0000000..c76b4ee
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestTermdocPerf.cs
@@ -0,0 +1,176 @@
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Attributes;
+using Lucene.Net.Documents;
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Index
+{
+    using System.IO;
+
+    /*
+        /// Copyright 2006 The Apache Software Foundation
+        ///
+        /// Licensed under the Apache License, Version 2.0 (the "License");
+        /// you may not use this file except in compliance with the License.
+        /// You may obtain a copy of the License at
+        ///
+        ///     http://www.apache.org/licenses/LICENSE-2.0
+        ///
+        /// Unless required by applicable law or agreed to in writing, software
+        /// distributed under the License is distributed on an "AS IS" BASIS,
+        /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+        /// See the License for the specific language governing permissions and
+        /// limitations under the License.
+        */
+
+    using Analyzer = Lucene.Net.Analysis.Analyzer;
+    using BytesRef = Lucene.Net.Util.BytesRef;
+    using CharTermAttribute = Lucene.Net.Analysis.TokenAttributes.CharTermAttribute;
+    using Directory = Lucene.Net.Store.Directory;
+    using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+    using Document = Documents.Document;
+    using Field = Field;
+    using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+    using TestUtil = Lucene.Net.Util.TestUtil;
+    using Tokenizer = Lucene.Net.Analysis.Tokenizer;
+
+    internal class RepeatingTokenizer : Tokenizer
+    {
+        private readonly Random Random;
+        private readonly float PercentDocs;
+        private readonly int MaxTF;
+        private int Num;
+        internal ICharTermAttribute TermAtt;
+        internal string Value;
+
+        public RepeatingTokenizer(TextReader reader, string val, Random random, float percentDocs, int maxTF)
+            : base(reader)
+        {
+            this.Value = val;
+            this.Random = random;
+            this.PercentDocs = percentDocs;
+            this.MaxTF = maxTF;
+            this.TermAtt = AddAttribute<ICharTermAttribute>();
+        }
+
+        public sealed override bool IncrementToken()
+        {
+            Num--;
+            if (Num >= 0)
+            {
+                ClearAttributes();
+                TermAtt.Append(Value);
+                return true;
+            }
+            return false;
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            if (Random.NextDouble() < PercentDocs)
+            {
+                Num = Random.Next(MaxTF) + 1;
+            }
+            else
+            {
+                Num = 0;
+            }
+        }
+    }
+
+    [TestFixture]
+    public class TestTermdocPerf : LuceneTestCase
+    {
+        internal virtual void AddDocs(Random random, Directory dir, int ndocs, string field, string val, int maxTF, float percentDocs)
+        {
+            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(random, val, maxTF, percentDocs);
+
+            Document doc = new Document();
+
+            doc.Add(NewStringField(field, val, Field.Store.NO));
+            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE).SetMaxBufferedDocs(100).SetMergePolicy(NewLogMergePolicy(100)));
+
+            for (int i = 0; i < ndocs; i++)
+            {
+                writer.AddDocument(doc);
+            }
+
+            writer.ForceMerge(1);
+            writer.Dispose();
+        }
+
+        private class AnalyzerAnonymousInnerClassHelper : Analyzer
+        {
+            private Random Random;
+            private string Val;
+            private int MaxTF;
+            private float PercentDocs;
+
+            public AnalyzerAnonymousInnerClassHelper(Random random, string val, int maxTF, float percentDocs)
+            {
+                this.Random = random;
+                this.Val = val;
+                this.MaxTF = maxTF;
+                this.PercentDocs = percentDocs;
+            }
+
+            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                return new TokenStreamComponents(new RepeatingTokenizer(reader, Val, Random, PercentDocs, MaxTF));
+            }
+        }
+
+        public virtual int DoTest(int iter, int ndocs, int maxTF, float percentDocs)
+        {
+            Directory dir = NewDirectory();
+
+            long start = Environment.TickCount;
+            AddDocs(Random(), dir, ndocs, "foo", "val", maxTF, percentDocs);
+            long end = Environment.TickCount;
+            if (VERBOSE)
+            {
+                Console.WriteLine("milliseconds for creation of " + ndocs + " docs = " + (end - start));
+            }
+
+            IndexReader reader = DirectoryReader.Open(dir);
+
+            TermsEnum tenum = MultiFields.GetTerms(reader, "foo").GetIterator(null);
+
+            start = Environment.TickCount;
+
+            int ret = 0;
+            DocsEnum tdocs = null;
+            Random random = new Random(Random().Next());
+            for (int i = 0; i < iter; i++)
+            {
+                tenum.SeekCeil(new BytesRef("val"));
+                tdocs = TestUtil.Docs(random, tenum, MultiFields.GetLiveDocs(reader), tdocs, DocsEnum.FLAG_NONE);
+                while (tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
+                {
+                    ret += tdocs.DocID;
+                }
+            }
+
+            end = Environment.TickCount;
+            if (VERBOSE)
+            {
+                Console.WriteLine("milliseconds for " + iter + " TermDocs iteration: " + (end - start));
+            }
+
+            return ret;
+        }
+
+#if !NETSTANDARD
+        // LUCENENET: There is no Timeout on NUnit for .NET Core.
+        [Timeout(120000)]
+#endif
+        [Test, LongRunningTest, HasTimeout]
+        public virtual void TestTermDocPerf()
+        {
+            // performance test for 10% of documents containing a term
+            DoTest(100000, 10000, 3, .1f);
+        }
+    }
+}
\ No newline at end of file


Mime
View raw message