lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From aro...@apache.org
Subject svn commit: r411501 [11/30] - in /incubator/lucene.net/trunk/C#/src: ./ Demo/DeleteFiles/ Demo/DemoLib/ Demo/DemoLib/HTML/ Demo/IndexFiles/ Demo/IndexHtml/ Demo/SearchFiles/ Lucene.Net/ Lucene.Net/Analysis/ Lucene.Net/Analysis/Standard/ Lucene.Net/Docu...
Date Sun, 04 Jun 2006 02:41:25 GMT
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentReader.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentReader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentReader.cs Sat Jun  3 19:41:13 2006
@@ -13,21 +13,22 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
 using Document = Lucene.Net.Documents.Document;
+using Field = Lucene.Net.Documents.Field;
+using DefaultSimilarity = Lucene.Net.Search.DefaultSimilarity;
 using Directory = Lucene.Net.Store.Directory;
-using InputStream = Lucene.Net.Store.InputStream;
-using OutputStream = Lucene.Net.Store.OutputStream;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
 using BitVector = Lucene.Net.Util.BitVector;
+
 namespace Lucene.Net.Index
 {
 	
-	/// <summary> FIXME: Describe class <code>SegmentReader</code> here.
-	/// 
-	/// </summary>
-	/// <version>  $Id: SegmentReader.java,v 1.23 2004/07/10 06:19:01 otis Exp $
+	/// <version>  $Id: SegmentReader.java 329523 2005-10-30 05:37:11Z yonik $
 	/// </version>
-	sealed public class SegmentReader : IndexReader
+	public class SegmentReader : IndexReader
 	{
 		private System.String segment;
 		
@@ -35,19 +36,25 @@
 		private FieldsReader fieldsReader;
 		
 		internal TermInfosReader tis;
-		internal TermVectorsReader termVectorsReader;
+		internal TermVectorsReader termVectorsReaderOrig = null;
+		internal System.LocalDataStoreSlot termVectorsLocal = System.Threading.Thread.AllocateDataSlot();
 		
 		internal BitVector deletedDocs = null;
 		private bool deletedDocsDirty = false;
 		private bool normsDirty = false;
 		private bool undeleteAll = false;
 		
-		internal InputStream freqStream;
-		internal InputStream proxStream;
+		internal IndexInput freqStream;
+		internal IndexInput proxStream;
 		
 		// Compound File Reader when based on a compound file segment
-		internal CompoundFileReader cfsReader;
+		internal CompoundFileReader cfsReader = null;
 		
+        public FieldInfos FieldInfos
+        {
+            get {   return fieldInfos;  }
+        }
+
 		private class Norm
 		{
 			private void  InitBlock(SegmentReader enclosingInstance)
@@ -63,22 +70,22 @@
 				}
 				
 			}
-			public Norm(SegmentReader enclosingInstance, InputStream in_Renamed, int number)
+			public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number)
 			{
 				InitBlock(enclosingInstance);
 				this.in_Renamed = in_Renamed;
 				this.number = number;
 			}
 			
-			public InputStream in_Renamed;  // private -> public
-			public byte[] bytes;           // private -> public
-			public bool dirty;              // private -> public
-			public int number;              // private -> public
+			public IndexInput in_Renamed;
+			public byte[] bytes;
+			public bool dirty;
+			public int number;
 			
-			public void  ReWrite()          // private -> public
+			public void  ReWrite()
 			{
 				// NOTE: norms are re-written in regular directory, not cfs
-				OutputStream out_Renamed = Enclosing_Instance.Directory().CreateFile(Enclosing_Instance.segment + ".tmp");
+				IndexOutput out_Renamed = Enclosing_Instance.Directory().CreateOutput(Enclosing_Instance.segment + ".tmp");
 				try
 				{
 					out_Renamed.WriteBytes(bytes, Enclosing_Instance.MaxDoc());
@@ -87,7 +94,14 @@
 				{
 					out_Renamed.Close();
 				}
-				System.String fileName = Enclosing_Instance.segment + ".f" + number;
+				System.String fileName;
+				if (Enclosing_Instance.cfsReader == null)
+					fileName = Enclosing_Instance.segment + ".f" + number;
+				else
+				{
+					// use a different file name if we have compound format
+					fileName = Enclosing_Instance.segment + ".s" + number;
+				}
 				Enclosing_Instance.Directory().RenameFile(Enclosing_Instance.segment + ".tmp", fileName);
 				this.dirty = false;
 			}
@@ -95,14 +109,37 @@
 		
 		private System.Collections.Hashtable norms = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable());
 		
-		public /*internal*/ SegmentReader(SegmentInfos sis, SegmentInfo si, bool closeDir) : base(si.dir, sis, closeDir)
+		/// <summary>The class which implements SegmentReader. </summary>
+		private static System.Type IMPL;
+		
+		public SegmentReader() : base(null)
+		{
+		}
+		
+		public static SegmentReader Get(SegmentInfo si)
+		{
+			return Get(si.dir, si, null, false, false);
+		}
+		
+		public static SegmentReader Get(SegmentInfos sis, SegmentInfo si, bool closeDir)
 		{
-			Initialize(si);
+			return Get(si.dir, si, sis, closeDir, true);
 		}
 		
-		public /*internal*/ SegmentReader(SegmentInfo si) : base(si.dir)
+		public static SegmentReader Get(Directory dir, SegmentInfo si, SegmentInfos sis, bool closeDir, bool ownDir)
 		{
-			Initialize(si);
+			SegmentReader instance;
+			try
+			{
+				instance = (SegmentReader) System.Activator.CreateInstance(IMPL);
+			}
+			catch (System.Exception e)
+			{
+				throw new System.SystemException("cannot load SegmentReader class: " + e);
+			}
+			instance.Init(dir, sis, closeDir, ownDir);
+			instance.Initialize(si);
+			return instance;
 		}
 		
 		private void  Initialize(SegmentInfo si)
@@ -129,22 +166,28 @@
 			
 			// make sure that all index files have been read or are kept open
 			// so that if an index update removes them we'll still have them
-			freqStream = cfsDir.OpenFile(segment + ".frq");
-			proxStream = cfsDir.OpenFile(segment + ".prx");
+			freqStream = cfsDir.OpenInput(segment + ".frq");
+			proxStream = cfsDir.OpenInput(segment + ".prx");
 			OpenNorms(cfsDir);
 			
 			if (fieldInfos.HasVectors())
 			{
 				// open term vector files only as needed
-				termVectorsReader = new TermVectorsReader(cfsDir, segment, fieldInfos);
+				termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
 			}
 		}
 		
+		~SegmentReader()
+		{
+			// patch for pre-1.4.2 JVMs, whose ThreadLocals leak
+			System.Threading.Thread.SetData(termVectorsLocal, null);
+		}
+		
 		protected internal override void  DoCommit()
 		{
 			if (deletedDocsDirty)
 			{
-				// re-write deleted 
+				// re-write deleted
 				deletedDocs.Write(Directory(), segment + ".tmp");
 				Directory().RenameFile(segment + ".tmp", segment + ".del");
 			}
@@ -154,7 +197,7 @@
 			}
 			if (normsDirty)
 			{
-				// re-write norms 
+				// re-write norms
 				System.Collections.IEnumerator values = norms.Values.GetEnumerator();
 				while (values.MoveNext())
 				{
@@ -181,8 +224,9 @@
 				proxStream.Close();
 			
 			CloseNorms();
-			if (termVectorsReader != null)
-				termVectorsReader.Close();
+			
+			if (termVectorsReaderOrig != null)
+				termVectorsReaderOrig.Close();
 			
 			if (cfsReader != null)
 				cfsReader.Close();
@@ -207,9 +251,9 @@
 		internal static bool HasSeparateNorms(SegmentInfo si)
 		{
 			System.String[] result = si.dir.List();
-			System.String pattern = si.name + ".f";
+			System.String pattern = si.name + ".s";
 			int patternLength = pattern.Length;
-			for (int i = 0; i < 0; i++)
+			for (int i = 0; i < result.Length; i++)
 			{
 				if (result[i].StartsWith(pattern) && System.Char.IsDigit(result[i][patternLength]))
 					return true;
@@ -233,14 +277,13 @@
 			undeleteAll = true;
 		}
 		
-		internal System.Collections.ArrayList Files()
+		internal virtual System.Collections.ArrayList Files()
 		{
 			System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(16));
-			System.String[] ext = new System.String[]{"cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", "tvx", "tvd", "tvf", "tvp"};
 			
-			for (int i = 0; i < ext.Length; i++)
+			for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.Length; i++)
 			{
-				System.String name = segment + "." + ext[i];
+				System.String name = segment + "." + IndexFileNames.INDEX_EXTENSIONS[i];
 				if (Directory().FileExists(name))
 					files.Add(name);
 			}
@@ -248,8 +291,16 @@
 			for (int i = 0; i < fieldInfos.Size(); i++)
 			{
 				FieldInfo fi = fieldInfos.FieldInfo(i);
-				if (fi.isIndexed)
-					files.Add(segment + ".f" + i);
+				if (fi.isIndexed && !fi.omitNorms)
+				{
+					System.String name;
+					if (cfsReader == null)
+						name = segment + ".f" + i;
+					else
+						name = segment + ".s" + i;
+					if (Directory().FileExists(name))
+						files.Add(name);
+				}
 			}
 			return files;
 		}
@@ -314,11 +365,13 @@
 			return fieldsReader.Size();
 		}
 		
-		/// <seealso cref="IndexReader#GetFieldNames()">
+		/// <seealso cref="IndexReader.GetFieldNames()">
 		/// </seealso>
+		/// <deprecated>  Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
+		/// </deprecated>
 		public override System.Collections.ICollection GetFieldNames()
 		{
-			// maintain a unique set of Field names
+			// maintain a unique set of field names
 			System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
 			for (int i = 0; i < fieldInfos.Size(); i++)
 			{
@@ -328,11 +381,13 @@
 			return fieldSet;
 		}
 		
-		/// <seealso cref="IndexReader#GetFieldNames(boolean)">
+		/// <seealso cref="IndexReader.GetFieldNames(boolean)">
 		/// </seealso>
+		/// <deprecated>  Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
+		/// </deprecated>
 		public override System.Collections.ICollection GetFieldNames(bool indexed)
 		{
-			// maintain a unique set of Field names
+			// maintain a unique set of field names
 			System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
 			for (int i = 0; i < fieldInfos.Size(); i++)
 			{
@@ -343,20 +398,57 @@
 			return fieldSet;
 		}
 		
-		/// <summary> </summary>
-		/// <param name="storedTermVector">if true, returns only Indexed fields that have term vector info, 
-		/// else only indexed fields without term vector info 
-		/// </param>
-		/// <returns> Collection of Strings indicating the names of the fields
-		/// </returns>
-		public override System.Collections.ICollection GetIndexedFieldNames(bool storedTermVector)
+		/// <seealso cref="IndexReader.GetIndexedFieldNames(Field.TermVector tvSpec)">
+		/// </seealso>
+		/// <deprecated>  Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
+		/// </deprecated>
+		public override System.Collections.ICollection GetIndexedFieldNames(Field.TermVector tvSpec)
 		{
-			// maintain a unique set of Field names
+			bool storedTermVector;
+			bool storePositionWithTermVector;
+			bool storeOffsetWithTermVector;
+			
+			if (tvSpec == Field.TermVector.NO)
+			{
+				storedTermVector = false;
+				storePositionWithTermVector = false;
+				storeOffsetWithTermVector = false;
+			}
+			else if (tvSpec == Field.TermVector.YES)
+			{
+				storedTermVector = true;
+				storePositionWithTermVector = false;
+				storeOffsetWithTermVector = false;
+			}
+			else if (tvSpec == Field.TermVector.WITH_POSITIONS)
+			{
+				storedTermVector = true;
+				storePositionWithTermVector = true;
+				storeOffsetWithTermVector = false;
+			}
+			else if (tvSpec == Field.TermVector.WITH_OFFSETS)
+			{                                                                           
+				storedTermVector = true;
+				storePositionWithTermVector = false;
+				storeOffsetWithTermVector = true;
+			}
+			else if (tvSpec == Field.TermVector.WITH_POSITIONS_OFFSETS)
+			{
+				storedTermVector = true;
+				storePositionWithTermVector = true;
+				storeOffsetWithTermVector = true;
+			}
+			else
+			{
+				throw new System.ArgumentException("unknown termVector parameter " + tvSpec);
+			}
+			
+			// maintain a unique set of field names
 			System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
-			for (int ii = 0; ii < fieldInfos.Size(); ii++)
+			for (int i = 0; i < fieldInfos.Size(); i++)
 			{
-				FieldInfo fi = fieldInfos.FieldInfo(ii);
-				if (fi.isIndexed == true && fi.storeTermVector == storedTermVector)
+				FieldInfo fi = fieldInfos.FieldInfo(i);
+				if (fi.isIndexed && fi.storeTermVector == storedTermVector && fi.storePositionWithTermVector == storePositionWithTermVector && fi.storeOffsetWithTermVector == storeOffsetWithTermVector)
 				{
 					fieldSet.Add(fi.name, fi.name);
 				}
@@ -364,14 +456,93 @@
 			return fieldSet;
 		}
 		
-		public override byte[] Norms(System.String field)
+		/// <seealso cref="IndexReader.GetFieldNames(IndexReader.FieldOption fldOption)">
+		/// </seealso>
+		public override System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldOption)
+		{
+			System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
+			for (int i = 0; i < fieldInfos.Size(); i++)
+			{
+				FieldInfo fi = fieldInfos.FieldInfo(i);
+				if (fieldOption == IndexReader.FieldOption.ALL)
+				{
+					fieldSet.Add(fi.name, fi.name);
+				}
+				else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED)
+				{
+					fieldSet.Add(fi.name, fi.name);
+				}
+				else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED)
+				{
+					fieldSet.Add(fi.name, fi.name);
+				}
+				else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR)
+				{
+					fieldSet.Add(fi.name, fi.name);
+				}
+				else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR)
+				{
+					fieldSet.Add(fi.name, fi.name);
+				}
+				else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR)
+				{
+					fieldSet.Add(fi.name, fi.name);
+				}
+				else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION)
+				{
+					fieldSet.Add(fi.name, fi.name);
+				}
+				else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET)
+				{
+					fieldSet.Add(fi.name, fi.name);
+				}
+				else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET)
+				{
+					fieldSet.Add(fi.name, fi.name);
+				}
+			}
+			return fieldSet;
+		}
+		
+		
+		public override bool HasNorms(System.String field)
+		{
+			lock (this)
+			{
+				return norms.ContainsKey(field);
+			}
+		}
+		
+		internal static byte[] CreateFakeNorms(int size)
+		{
+			byte[] ones = new byte[size];
+            byte[] byteArray = new byte[ones.Length];
+            for (int index = 0; index < ones.Length; index++)
+                byteArray[index] = (byte) ones[index];
+            byte val = DefaultSimilarity.EncodeNorm(1.0f);
+            for (int index = 0; index < byteArray.Length; index++)
+                byteArray.SetValue(val, index);
+
+			return ones;
+		}
+		
+		private byte[] ones;
+		private byte[] FakeNorms()
+		{
+			if (ones == null)
+				ones = CreateFakeNorms(MaxDoc());
+			return ones;
+		}
+		
+		// can return null if norms aren't stored
+		protected internal virtual byte[] GetNorms(System.String field)
 		{
 			lock (this)
 			{
 				Norm norm = (Norm) norms[field];
 				if (norm == null)
-				// not an indexed Field
-					return null;
+					return null; // not indexed, or norms not stored
+				
 				if (norm.bytes == null)
 				{
 					// value not yet read
@@ -383,11 +554,23 @@
 			}
 		}
 		
+		// returns fake norms if norms aren't available
+		public override byte[] Norms(System.String field)
+		{
+			lock (this)
+			{
+				byte[] bytes = GetNorms(field);
+				if (bytes == null)
+					bytes = FakeNorms();
+				return bytes;
+			}
+		}
+		
 		protected internal override void  DoSetNorm(int doc, System.String field, byte value_Renamed)
 		{
 			Norm norm = (Norm) norms[field];
 			if (norm == null)
-			// not an indexed Field
+			// not an indexed field
 				return ;
 			norm.dirty = true; // mark it dirty
 			normsDirty = true;
@@ -403,7 +586,10 @@
 				
 				Norm norm = (Norm) norms[field];
 				if (norm == null)
-					return ; // use zeros in array
+				{
+					Array.Copy(FakeNorms(), 0, bytes, offset, MaxDoc());
+					return ;
+				}
 				
 				if (norm.bytes != null)
 				{
@@ -412,7 +598,7 @@
 					return ;
 				}
 				
-				InputStream normStream = (InputStream) norm.in_Renamed.Clone();
+				IndexInput normStream = (IndexInput) norm.in_Renamed.Clone();
 				try
 				{
 					// read from disk
@@ -426,17 +612,23 @@
 			}
 		}
 		
+		
 		private void  OpenNorms(Directory cfsDir)
 		{
 			for (int i = 0; i < fieldInfos.Size(); i++)
 			{
 				FieldInfo fi = fieldInfos.FieldInfo(i);
-				if (fi.isIndexed)
+				if (fi.isIndexed && !fi.omitNorms)
 				{
-					System.String fileName = segment + ".f" + fi.number;
-					// look first for re-written file, then in compound format
-					Directory d = Directory().FileExists(fileName)?Directory():cfsDir;
-					norms[fi.name] = new Norm(this, d.OpenFile(fileName), fi.number);
+					// look first if there are separate norms in compound format
+					System.String fileName = segment + ".s" + fi.number;
+					Directory d = Directory();
+					if (!d.FileExists(fileName))
+					{
+						fileName = segment + ".f" + fi.number;
+						d = cfsDir;
+					}
+					norms[fi.name] = new Norm(this, d.OpenInput(fileName), fi.number);
 				}
 			}
 		}
@@ -454,16 +646,35 @@
 			}
 		}
 		
-		/// <summary>Return a term frequency vector for the specified document and Field. The
+		/// <summary> Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.</summary>
+		/// <returns> TermVectorsReader
+		/// </returns>
+		private TermVectorsReader GetTermVectorsReader()
+		{
+			TermVectorsReader tvReader = (TermVectorsReader) System.Threading.Thread.GetData(termVectorsLocal);
+			if (tvReader == null)
+			{
+				tvReader = (TermVectorsReader) termVectorsReaderOrig.Clone();
+				System.Threading.Thread.SetData(termVectorsLocal, tvReader);
+			}
+			return tvReader;
+		}
+		
+		/// <summary>Return a term frequency vector for the specified document and field. The
 		/// vector returned contains term numbers and frequencies for all terms in
-		/// the specified Field of this document, if the Field had storeTermVector
+		/// the specified field of this document, if the field had storeTermVector
 		/// flag set.  If the flag was not set, the method returns null.
 		/// </summary>
+		/// <throws>  IOException </throws>
 		public override TermFreqVector GetTermFreqVector(int docNumber, System.String field)
 		{
-			// Check if this Field is invalid or has no stored term vector
+			// Check if this field is invalid or has no stored term vector
 			FieldInfo fi = fieldInfos.FieldInfo(field);
-			if (fi == null || !fi.storeTermVector)
+			if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
+				return null;
+			
+			TermVectorsReader termVectorsReader = GetTermVectorsReader();
+			if (termVectorsReader == null)
 				return null;
 			
 			return termVectorsReader.Get(docNumber, field);
@@ -471,17 +682,48 @@
 		
 		
 		/// <summary>Return an array of term frequency vectors for the specified document.
-		/// The array contains a vector for each vectorized Field in the document.
+		/// The array contains a vector for each vectorized field in the document.
 		/// Each vector vector contains term numbers and frequencies for all terms
-		/// in a given vectorized Field.
+		/// in a given vectorized field.
 		/// If no such fields existed, the method returns null.
 		/// </summary>
+		/// <throws>  IOException </throws>
 		public override TermFreqVector[] GetTermFreqVectors(int docNumber)
 		{
+			if (termVectorsReaderOrig == null)
+				return null;
+			
+			TermVectorsReader termVectorsReader = GetTermVectorsReader();
 			if (termVectorsReader == null)
 				return null;
 			
 			return termVectorsReader.Get(docNumber);
+		}
+
+        static SegmentReader()
+		{
+			{
+				try
+				{
+                    System.String name = SupportClass.AppSettings.Get("Lucene.Net.SegmentReader.class", typeof(SegmentReader).FullName);
+					IMPL = System.Type.GetType(name);
+				}
+				catch (System.Security.SecurityException)
+				{
+					try
+					{
+						IMPL = System.Type.GetType(typeof(SegmentReader).FullName);
+					}
+					catch (System.Exception e)
+					{
+						throw new System.SystemException("cannot load default SegmentReader class: " + e);
+					}
+				}
+                catch (System.Exception e)
+                {
+                    throw new System.SystemException("cannot load SegmentReader class: " + e);
+                }
+            }
 		}
 	}
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermDocs.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentTermDocs.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermDocs.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermDocs.cs Sat Jun  3 19:41:13 2006
@@ -13,36 +13,38 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
-using InputStream = Lucene.Net.Store.InputStream;
+using IndexInput = Lucene.Net.Store.IndexInput;
 using BitVector = Lucene.Net.Util.BitVector;
+
 namespace Lucene.Net.Index
 {
 	
 	public class SegmentTermDocs : TermDocs
 	{
 		protected internal SegmentReader parent;
-		private InputStream freqStream;
-		private int count;
-		private int df;
-		private BitVector deletedDocs;
+		protected internal IndexInput freqStream;
+		protected internal int count;
+		protected internal int df;
+		protected internal BitVector deletedDocs;
 		internal int doc = 0;
 		internal int freq;
 		
 		private int skipInterval;
 		private int numSkips;
 		private int skipCount;
-		private InputStream skipStream;
+		private IndexInput skipStream;
 		private int skipDoc;
 		private long freqPointer;
 		private long proxPointer;
 		private long skipPointer;
 		private bool haveSkipped;
 		
-		public /*internal*/ SegmentTermDocs(SegmentReader parent)
+		public SegmentTermDocs(SegmentReader parent)
 		{
 			this.parent = parent;
-			this.freqStream = (InputStream) parent.freqStream.Clone();
+			this.freqStream = (IndexInput) parent.freqStream.Clone();
 			this.deletedDocs = parent.deletedDocs;
 			this.skipInterval = parent.tis.GetSkipInterval();
 		}
@@ -177,7 +179,7 @@
 				// optimized case
 				
 				if (skipStream == null)
-					skipStream = (InputStream) freqStream.Clone(); // lazily clone
+					skipStream = (IndexInput) freqStream.Clone(); // lazily clone
 				
 				if (!haveSkipped)
 				{

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermEnum.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentTermEnum.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermEnum.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermEnum.cs Sat Jun  3 19:41:13 2006
@@ -13,19 +13,24 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
-using InputStream = Lucene.Net.Store.InputStream;
+using IndexInput = Lucene.Net.Store.IndexInput;
+
 namespace Lucene.Net.Index
 {
 	
-	sealed public class SegmentTermEnum:TermEnum, System.ICloneable
+	public sealed class SegmentTermEnum : TermEnum, System.ICloneable
 	{
-		private InputStream input;
+		private IndexInput input;
 		internal FieldInfos fieldInfos;
 		internal long size;
 		internal long position = - 1;
 		
-		private Term term = new Term("", "");
+		private TermBuffer termBuffer = new TermBuffer();
+		private TermBuffer prevBuffer = new TermBuffer();
+		private TermBuffer scratch; // used for scanning
+		
 		private TermInfo termInfo = new TermInfo();
 		
 		private int format;
@@ -34,11 +39,8 @@
 		internal int indexInterval;
 		internal int skipInterval;
 		private int formatM1SkipInterval;
-		internal Term prev;
-		
-		private char[] buffer = new char[]{};
 		
-		internal SegmentTermEnum(InputStream i, FieldInfos fis, bool isi)
+		internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi)
 		{
 			input = i;
 			fieldInfos = fis;
@@ -96,10 +98,12 @@
 			{
 			}
 			
-			clone.input = (InputStream) input.Clone();
+			clone.input = (IndexInput) input.Clone();
 			clone.termInfo = new TermInfo(termInfo);
-			if (term != null)
-				clone.GrowBuffer(term.text.Length);
+			
+			clone.termBuffer = (TermBuffer) termBuffer.Clone();
+			clone.prevBuffer = (TermBuffer) prevBuffer.Clone();
+			clone.scratch = null;
 			
 			return clone;
 		}
@@ -108,10 +112,9 @@
 		{
 			input.Seek(pointer);
 			position = p;
-			term = t;
-			prev = null;
+			termBuffer.Set(t);
+			prevBuffer.Reset();
 			termInfo.Set(ti);
-			GrowBuffer(term.text.Length); // copy term text into buffer
 		}
 		
 		/// <summary>Increments the enumeration to the next element.  True if one exists.</summary>
@@ -119,12 +122,12 @@
 		{
 			if (position++ >= size - 1)
 			{
-				term = null;
+				termBuffer.Reset();
 				return false;
 			}
 			
-			prev = term;
-			term = ReadTerm();
+			prevBuffer.Set(termBuffer);
+			termBuffer.Read(input, fieldInfos);
 			
 			termInfo.docFreq = input.ReadVInt(); // read doc freq
 			termInfo.freqPointer += input.ReadVLong(); // read freq pointer
@@ -154,24 +157,15 @@
 			return true;
 		}
 		
-		private Term ReadTerm()
-		{
-			int start = input.ReadVInt();
-			int length = input.ReadVInt();
-			int totalLength = start + length;
-			if (buffer.Length < totalLength)
-				GrowBuffer(totalLength);
-			
-			input.ReadChars(buffer, start, length);
-			return new Term(fieldInfos.FieldName(input.ReadVInt()), new System.String(buffer, 0, totalLength), false);
-		}
-		
-		private void  GrowBuffer(int length)
+		/// <summary>Optimized scan, without allocating new terms. </summary>
+		internal void  ScanTo(Term term)
 		{
-			buffer = new char[length];
-			for (int i = 0; i < term.text.Length; i++)
-			// copy contents
-				buffer[i] = term.text[i];
+			if (scratch == null)
+				scratch = new TermBuffer();
+			scratch.Set(term);
+			while (scratch.CompareTo(termBuffer) > 0 && Next())
+			{
+			}
 		}
 		
 		/// <summary>Returns the current Term in the enumeration.
@@ -179,13 +173,19 @@
 		/// </summary>
 		public override Term Term()
 		{
-			return term;
+			return termBuffer.ToTerm();
+		}
+		
+		/// <summary>Returns the previous Term enumerated. Initially null.</summary>
+		internal Term Prev()
+		{
+			return prevBuffer.ToTerm();
 		}
 		
 		/// <summary>Returns the current TermInfo in the enumeration.
 		/// Initially invalid, valid after next() called for the first time.
 		/// </summary>
-		public /*internal*/ TermInfo TermInfo()
+		internal TermInfo TermInfo()
 		{
 			return new TermInfo(termInfo);
 		}

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermPositionVector.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentTermPositionVector.cs?rev=411501&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermPositionVector.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermPositionVector.cs Sat Jun  3 19:41:13 2006
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2004 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+	
+	public class SegmentTermPositionVector : SegmentTermVector, TermPositionVector
+	{
+		protected internal int[][] positions;
+		protected internal TermVectorOffsetInfo[][] offsets;
+		public static readonly int[] EMPTY_TERM_POS = new int[0];
+		
+		public SegmentTermPositionVector(System.String field, System.String[] terms, int[] termFreqs, int[][] positions, TermVectorOffsetInfo[][] offsets):base(field, terms, termFreqs)
+		{
+			this.offsets = offsets;
+			this.positions = positions;
+		}
+		
+		/// <summary> Returns an array of TermVectorOffsetInfo in which the term is found.
+		/// 
+		/// </summary>
+		/// <param name="index">The position in the array to get the offsets from
+		/// </param>
+		/// <returns> An array of TermVectorOffsetInfo objects or the empty list
+		/// </returns>
+		/// <seealso cref="Lucene.Net.analysis.Token">
+		/// </seealso>
+		public virtual TermVectorOffsetInfo[] GetOffsets(int index)
+		{
+			TermVectorOffsetInfo[] result = TermVectorOffsetInfo.EMPTY_OFFSET_INFO;
+			if (offsets == null)
+				return null;
+			if (index >= 0 && index < offsets.Length)
+			{
+				result = offsets[index];
+			}
+			return result;
+		}
+		
+		/// <summary> Returns an array of positions in which the term is found.
+		/// Terms are identified by the index at which its number appears in the
+		/// term String array obtained from the <code>indexOf</code> method.
+		/// </summary>
+		public virtual int[] GetTermPositions(int index)
+		{
+			int[] result = EMPTY_TERM_POS;
+			if (positions == null)
+				return null;
+			if (index >= 0 && index < positions.Length)
+			{
+				result = positions[index];
+			}
+			
+			return result;
+		}
+	}
+}
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermPositions.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentTermPositions.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermPositions.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermPositions.cs Sat Jun  3 19:41:13 2006
@@ -13,20 +13,22 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
-using InputStream = Lucene.Net.Store.InputStream;
+using IndexInput = Lucene.Net.Store.IndexInput;
+
 namespace Lucene.Net.Index
 {
 	
 	sealed class SegmentTermPositions : SegmentTermDocs, TermPositions
 	{
-		private InputStream proxStream;
+		private IndexInput proxStream;
 		private int proxCount;
 		private int position;
 		
-		internal SegmentTermPositions(SegmentReader p):base(p)
+		internal SegmentTermPositions(SegmentReader p) : base(p)
 		{
-			this.proxStream = (InputStream) parent.proxStream.Clone();
+			this.proxStream = (IndexInput) parent.proxStream.Clone();
 		}
 		
 		internal override void  Seek(TermInfo ti)
@@ -78,7 +80,7 @@
 		}
 		
 		
-		/// <summary>Called by base.SkipTo(). </summary>
+		/// <summary>Called by super.skipTo(). </summary>
 		protected internal override void  SkipProx(long proxPointer)
 		{
 			proxStream.Seek(proxPointer);

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermVector.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentTermVector.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermVector.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermVector.cs Sat Jun  3 19:41:13 2006
@@ -13,12 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
+
 namespace Lucene.Net.Index
 {
 	
 	
-	class SegmentTermVector : TermFreqVector
+	public class SegmentTermVector : TermFreqVector
 	{
 		private System.String field;
 		private System.String[] terms;
@@ -32,7 +34,7 @@
 		}
 		
 		/// <summary> </summary>
-		/// <returns> The number of the Field this vector is associated with
+		/// <returns> The number of the field this vector is associated with
 		/// </returns>
 		public virtual System.String GetField()
 		{
@@ -44,13 +46,17 @@
 			System.Text.StringBuilder sb = new System.Text.StringBuilder();
 			sb.Append('{');
 			sb.Append(field).Append(": ");
-			for (int i = 0; i < terms.Length; i++)
+			if (terms != null)
 			{
-				if (i > 0)
-					sb.Append(", ");
-				sb.Append(terms[i]).Append('/').Append(termFreqs[i]);
+				for (int i = 0; i < terms.Length; i++)
+				{
+					if (i > 0)
+						sb.Append(", ");
+					sb.Append(terms[i]).Append('/').Append(termFreqs[i]);
+				}
 			}
 			sb.Append('}');
+			
 			return sb.ToString();
 		}
 		
@@ -71,6 +77,8 @@
 		
 		public virtual int IndexOf(System.String termText)
 		{
+			if (terms == null)
+				return - 1;
 			int res = System.Array.BinarySearch(terms, termText);
 			return res >= 0?res:- 1;
 		}
@@ -86,7 +94,7 @@
 			
 			for (int i = 0; i < len; i++)
 			{
-				res[i] = IndexOf(termNumbers[i]);
+				res[i] = IndexOf(termNumbers[start + i]);
 			}
 			return res;
 		}

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Term.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/Term.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Term.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Term.cs Sat Jun  3 19:41:13 2006
@@ -13,32 +13,36 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
+
 namespace Lucene.Net.Index
 {
-    /// <summary>A Term represents a word from text.  This is the unit of search.  It is
-    /// composed of two elements, the text of the word, as a string, and the name of
-    /// the Field that the text occured in, an interned string.
-    /// Note that terms may represent more than words from text fields, but also
-    /// things like dates, email addresses, urls, etc.  
-    /// </summary>
-    [Serializable]
+	
+	/// <summary>A Term represents a word from text.  This is the unit of search.  It is
+	/// composed of two elements, the text of the word, as a string, and the name of
+	/// the field that the text occured in, an interned string.
+	/// Note that terms may represent more than words from text fields, but also
+	/// things like dates, email addresses, urls, etc.  
+	/// </summary>
+	
+	[Serializable]
 	public sealed class Term : System.IComparable
 	{
 		internal System.String field;
 		public /*internal*/ System.String text;
 		
-		/// <summary>Constructs a Term with the given Field and text. </summary>
+		/// <summary>Constructs a Term with the given field and text. </summary>
 		public Term(System.String fld, System.String txt) : this(fld, txt, true)
 		{
 		}
 		internal Term(System.String fld, System.String txt, bool intern)
 		{
-			field = intern ? String.Intern(fld) : fld; // Field names are interned
+			field = intern ? String.Intern(fld) : fld; // field names are interned
 			text = txt; // unless already known to be
 		}
 		
-		/// <summary>Returns the Field of this term, an interned string.   The Field indicates
+		/// <summary>Returns the field of this term, an interned string.   The field indicates
 		/// the part of a document which this term came from. 
 		/// </summary>
 		public System.String Field()
@@ -55,10 +59,22 @@
 			return text;
 		}
 		
+		/// <summary> Optimized construction of new Terms by reusing same field as this Term
+		/// - avoids field.intern() overhead 
+		/// </summary>
+		/// <param name="text">The text of the new term (field is implicitly same as this Term instance)
+		/// </param>
+		/// <returns> A new Term
+		/// </returns>
+		public Term CreateTerm(System.String text)
+		{
+			return new Term(field, text, false);
+		}
+		
 		/// <summary>Compares two terms, returning true iff they have the same
-		/// Field and text. 
+		/// field and text. 
 		/// </summary>
-		public  override bool Equals(System.Object o)
+		public override bool Equals(System.Object o)
 		{
 			if (o == null)
 				return false;
@@ -66,7 +82,7 @@
 			return field == other.field && text.Equals(other.text);
 		}
 		
-		/// <summary>Combines the hashCode() of the Field and the text. </summary>
+		/// <summary>Combines the hashCode() of the field and the text. </summary>
 		public override int GetHashCode()
 		{
 			return field.GetHashCode() + text.GetHashCode();
@@ -77,10 +93,10 @@
 			return CompareTo((Term) other);
 		}
 		
-		/// <summary>Compares two terms, returning an integer which is less than zero iff this
-		/// term belongs after the argument, equal zero iff this term is equal to the
-		/// argument, and greater than zero iff this term belongs after the argument.
-		/// The ordering of terms is first by Field, then by text.
+		/// <summary>Compares two terms, returning a negative integer if this
+		/// term belongs before the argument, zero if this term is equal to the
+		/// argument, and a positive integer if this term belongs after the argument.
+		/// The ordering of terms is first by field, then by text.
 		/// </summary>
 		public int CompareTo(Term other)
 		{
@@ -91,7 +107,7 @@
 				return String.CompareOrdinal(field, other.field);
 		}
 		
-		/// <summary>Resets the Field and text of a Term. </summary>
+		/// <summary>Resets the field and text of a Term. </summary>
 		internal void  Set(System.String fld, System.String txt)
 		{
 			field = fld;
@@ -105,12 +121,13 @@
 		
 		private void  ReadObject(System.IO.BinaryReader in_Renamed)
 		{
-			// This function is private and is never been called, so this may not be a port issue.
-			// in_Renamed.defaultReadObject();    >>    'java.io.ObjectInputStream.defaultReadObject()'     // {{Aroush-1.4.3}}
+			// This function is private and is never been called, so this may not be a port issue.          // {{Aroush-1.4.3}}
+            // 'java.io.ObjectInputStream.defaultReadObject' was not converted                              // {{Aroush-1.4.3}}
+			// in_Renamed.defaultReadObject();                                                              // {{Aroush-1.4.3}}
 			field = String.Intern(field);
 		}
 		
-        // {{Aroush-1.4.3: Or is this what we want (vs. the above)?!!
+        // {{Aroush-1.4.3: or is this method is what we want (vs. the above)?!!
         private void GetObjectData(System.Runtime.Serialization.SerializationInfo info, System.Runtime.Serialization.StreamingContext context)
         {
             info.AddValue("field", field);

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermBuffer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermBuffer.cs?rev=411501&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermBuffer.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermBuffer.cs Sat Jun  3 19:41:13 2006
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2004 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using IndexInput = Lucene.Net.Store.IndexInput;
+
+namespace Lucene.Net.Index
+{
+	
+	sealed class TermBuffer : System.ICloneable
+	{
+		private static readonly char[] NO_CHARS = new char[0];
+		
+		private System.String field;
+		private char[] text = NO_CHARS;
+		private int textLength;
+		private Term term; // cached
+		
+		public int CompareTo(TermBuffer other)
+		{
+			if ((System.Object) field == (System.Object) other.field)
+    			// fields are interned
+				return CompareChars(text, textLength, other.text, other.textLength);
+			else
+				return String.CompareOrdinal(field, other.field);
+		}
+		
+		private static int CompareChars(char[] v1, int len1, char[] v2, int len2)
+		{
+			int end = System.Math.Min(len1, len2);
+			for (int k = 0; k < end; k++)
+			{
+				char c1 = v1[k];
+				char c2 = v2[k];
+				if (c1 != c2)
+				{
+					return c1 - c2;
+				}
+			}
+			return len1 - len2;
+		}
+		
+		private void  SetTextLength(int newLength)
+		{
+			if (text.Length < newLength)
+			{
+				char[] newText = new char[newLength];
+				Array.Copy(text, 0, newText, 0, textLength);
+				text = newText;
+			}
+			textLength = newLength;
+		}
+		
+		public void  Read(IndexInput input, FieldInfos fieldInfos)
+		{
+			this.term = null; // invalidate cache
+			int start = input.ReadVInt();
+			int length = input.ReadVInt();
+			int totalLength = start + length;
+			SetTextLength(totalLength);
+			input.ReadChars(this.text, start, length);
+			this.field = fieldInfos.FieldName(input.ReadVInt());
+		}
+		
+		public void  Set(Term term)
+		{
+			if (term == null)
+			{
+				Reset();
+				return ;
+			}
+			
+			// copy text into the buffer
+			SetTextLength(term.Text().Length);
+
+            System.String sourceString = term.Text();
+            int sourceEnd = term.Text().Length;
+            for (int i = 0; i < sourceEnd; i++)
+            {
+                text[i] = (char) sourceString[i];
+            }
+			
+			this.field = term.Field();
+			this.term = term;
+		}
+		
+		public void  Set(TermBuffer other)
+		{
+			SetTextLength(other.textLength);
+			Array.Copy(other.text, 0, text, 0, textLength);
+			
+			this.field = other.field;
+			this.term = other.term;
+		}
+		
+		public void  Reset()
+		{
+			this.field = null;
+			this.textLength = 0;
+			this.term = null;
+		}
+		
+		public Term ToTerm()
+		{
+			if (field == null)
+    			// unset
+				return null;
+			
+			if (term == null)
+				term = new Term(field, new System.String(text, 0, textLength), false);
+			
+			return term;
+		}
+		
+		public System.Object Clone()
+		{
+			TermBuffer clone = null;
+			try
+			{
+				clone = (TermBuffer) base.MemberwiseClone();
+			}
+			catch (System.Exception)
+			{
+			}
+			
+			clone.text = new char[text.Length];
+			Array.Copy(text, 0, clone.text, 0, textLength);
+			
+			return clone;
+		}
+	}
+}
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermDocs.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermDocs.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermDocs.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermDocs.cs Sat Jun  3 19:41:13 2006
@@ -13,7 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
+
 namespace Lucene.Net.Index
 {
 	
@@ -23,7 +25,7 @@
 	/// the number of times the term occurred in each document.  <p> The pairs are
 	/// ordered by document number.
 	/// </summary>
-	/// <seealso cref="IndexReader#termDocs">
+	/// <seealso cref="IndexReader.TermDocs()">
 	/// </seealso>
 	
 	public interface TermDocs

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermEnum.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermEnum.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermEnum.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermEnum.cs Sat Jun  3 19:41:13 2006
@@ -13,7 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
+
 namespace Lucene.Net.Index
 {
 	

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermFreqVector.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermFreqVector.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermFreqVector.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermFreqVector.cs Sat Jun  3 19:41:13 2006
@@ -13,17 +13,19 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
+
 namespace Lucene.Net.Index
 {
 	
 	/// <summary>Provides access to stored term vector of 
-	/// a document Field.
+	/// a document field.
 	/// </summary>
 	public interface TermFreqVector
 	{
 		/// <summary> </summary>
-		/// <returns> The Field this vector is associated with.
+		/// <returns> The field this vector is associated with.
 		/// 
 		/// </returns>
 		System.String GetField();
@@ -40,7 +42,7 @@
 		/// <summary>Array of term frequencies. Locations of the array correspond one to one
 		/// to the terms in the array obtained from <code>getTerms</code>
 		/// method. Each location in the array contains the number of times this
-		/// term occurs in the document or the document Field.
+		/// term occurs in the document or the document field.
 		/// </summary>
 		int[] GetTermFrequencies();
 		

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermInfo.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfo.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfo.cs Sat Jun  3 19:41:13 2006
@@ -13,11 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
+
 namespace Lucene.Net.Index
 {
-    /// <summary>A TermInfo is the record of information stored for a term.</summary>
-    sealed public class TermInfo
+	
+	/// <summary>A TermInfo is the record of information stored for a term.</summary>
+	
+	public sealed class TermInfo
 	{
 		/// <summary>The number of documents which contain the term. </summary>
 		public /*internal*/ int docFreq = 0;

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfosReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermInfosReader.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfosReader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfosReader.cs Sat Jun  3 19:41:13 2006
@@ -13,8 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
 using Directory = Lucene.Net.Store.Directory;
+
 namespace Lucene.Net.Index
 {
 	
@@ -23,7 +25,7 @@
 	/// set.  
 	/// </summary>
 	
-	sealed public class TermInfosReader
+	public sealed class TermInfosReader
 	{
 		private Directory directory;
 		private System.String segment;
@@ -33,15 +35,34 @@
 		private SegmentTermEnum origEnum;
 		private long size;
 		
+		private Term[] indexTerms = null;
+		private TermInfo[] indexInfos;
+		private long[] indexPointers;
+		
+		private SegmentTermEnum indexEnum;
+		
 		public /*internal*/ TermInfosReader(Directory dir, System.String seg, FieldInfos fis)
 		{
 			directory = dir;
 			segment = seg;
 			fieldInfos = fis;
 			
-			origEnum = new SegmentTermEnum(directory.OpenFile(segment + ".tis"), fieldInfos, false);
+			origEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tis"), fieldInfos, false);
 			size = origEnum.size;
-			ReadIndex();
+			
+			indexEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tii"), fieldInfos, true);
+		}
+		
+		~TermInfosReader()
+		{
+            try
+            {
+                // patch for pre-1.4.2 JVMs, whose ThreadLocals leak
+                System.Threading.Thread.SetData(enumerators, null);     // {{Aroush-1.9}} is this required for .NET ?!
+            }
+            catch (Exception)
+            {
+            }
 		}
 		
 		public int GetSkipInterval()
@@ -53,6 +74,8 @@
 		{
 			if (origEnum != null)
 				origEnum.Close();
+			if (indexEnum != null)
+				indexEnum.Close();
 		}
 		
 		/// <summary>Returns the number of term/value pairs in the set. </summary>
@@ -72,31 +95,33 @@
 			return termEnum;
 		}
 		
-		internal Term[] indexTerms = null;
-		internal TermInfo[] indexInfos;
-		internal long[] indexPointers;
-		
-		private void  ReadIndex()
+		private void  EnsureIndexIsRead()
 		{
-			SegmentTermEnum indexEnum = new SegmentTermEnum(directory.OpenFile(segment + ".tii"), fieldInfos, true);
-			try
+			lock (this)
 			{
-				int indexSize = (int) indexEnum.size;
-				
-				indexTerms = new Term[indexSize];
-				indexInfos = new TermInfo[indexSize];
-				indexPointers = new long[indexSize];
-				
-				for (int i = 0; indexEnum.Next(); i++)
+				if (indexTerms != null)
+				// index already read
+					return ; // do nothing
+				try
 				{
-					indexTerms[i] = indexEnum.Term();
-					indexInfos[i] = indexEnum.TermInfo();
-					indexPointers[i] = indexEnum.indexPointer;
+					int indexSize = (int) indexEnum.size; // otherwise read index
+					
+					indexTerms = new Term[indexSize];
+					indexInfos = new TermInfo[indexSize];
+					indexPointers = new long[indexSize];
+					
+					for (int i = 0; indexEnum.Next(); i++)
+					{
+						indexTerms[i] = indexEnum.Term();
+						indexInfos[i] = indexEnum.TermInfo();
+						indexPointers[i] = indexEnum.indexPointer;
+					}
+				}
+				finally
+				{
+					indexEnum.Close();
+					indexEnum = null;
 				}
-			}
-			finally
-			{
-				indexEnum.Close();
 			}
 		}
 		
@@ -131,9 +156,11 @@
 			if (size == 0)
 				return null;
 			
+			EnsureIndexIsRead();
+			
 			// optimize sequential access: first try scanning cached enum w/o seeking
 			SegmentTermEnum enumerator = GetEnum();
-			if (enumerator.Term() != null && ((enumerator.prev != null && term.CompareTo(enumerator.prev) > 0) || term.CompareTo(enumerator.Term()) >= 0))
+			if (enumerator.Term() != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term()) >= 0))
 			{
 				int enumOffset = (int) (enumerator.position / enumerator.indexInterval) + 1;
 				if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0)
@@ -149,9 +176,7 @@
 		private TermInfo ScanEnum(Term term)
 		{
 			SegmentTermEnum enumerator = GetEnum();
-			while (term.CompareTo(enumerator.Term()) > 0 && enumerator.Next())
-			{
-			}
+			enumerator.ScanTo(term);
 			if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
 				return enumerator.TermInfo();
 			else
@@ -188,6 +213,7 @@
 			if (size == 0)
 				return - 1;
 			
+			EnsureIndexIsRead();
 			int indexOffset = GetIndexOffset(term);
 			SeekEnum(indexOffset);
 			

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfosWriter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermInfosWriter.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfosWriter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfosWriter.cs Sat Jun  3 19:41:13 2006
@@ -13,10 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
 using Directory = Lucene.Net.Store.Directory;
-using OutputStream = Lucene.Net.Store.OutputStream;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
 using StringHelper = Lucene.Net.Util.StringHelper;
+
 namespace Lucene.Net.Index
 {
 	
@@ -24,13 +26,13 @@
 	/// Directory.  A TermInfos can be written once, in order.  
 	/// </summary>
 	
-	sealed public class TermInfosWriter
+	public sealed class TermInfosWriter
 	{
 		/// <summary>The file format version, a negative number. </summary>
 		public const int FORMAT = - 2;
 		
 		private FieldInfos fieldInfos;
-		private OutputStream output;
+		private IndexOutput output;
 		private Term lastTerm = new Term("", "");
 		private TermInfo lastTi = new TermInfo();
 		private long size = 0;
@@ -63,23 +65,24 @@
 		
 		private TermInfosWriter other = null;
 		
-		public /*internal*/ TermInfosWriter(Directory directory, System.String segment, FieldInfos fis)
+		public /*internal*/ TermInfosWriter(Directory directory, System.String segment, FieldInfos fis, int interval)
 		{
-			Initialize(directory, segment, fis, false);
-			other = new TermInfosWriter(directory, segment, fis, true);
+			Initialize(directory, segment, fis, interval, false);
+			other = new TermInfosWriter(directory, segment, fis, interval, true);
 			other.other = this;
 		}
 		
-		private TermInfosWriter(Directory directory, System.String segment, FieldInfos fis, bool isIndex)
+		private TermInfosWriter(Directory directory, System.String segment, FieldInfos fis, int interval, bool isIndex)
 		{
-			Initialize(directory, segment, fis, isIndex);
+			Initialize(directory, segment, fis, interval, isIndex);
 		}
 		
-		private void  Initialize(Directory directory, System.String segment, FieldInfos fis, bool isi)
+		private void  Initialize(Directory directory, System.String segment, FieldInfos fis, int interval, bool isi)
 		{
+			indexInterval = interval;
 			fieldInfos = fis;
 			isIndex = isi;
-			output = directory.CreateFile(segment + (isIndex?".tii":".tis"));
+			output = directory.CreateOutput(segment + (isIndex ? ".tii" : ".tis"));
 			output.WriteInt(FORMAT); // write format
 			output.WriteLong(0); // leave space for size
 			output.WriteInt(indexInterval); // write indexInterval
@@ -131,7 +134,7 @@
 			output.WriteVInt(length); // write delta length
 			output.WriteChars(term.text, start, length); // write delta chars
 			
-			output.WriteVInt(fieldInfos.FieldNumber(term.field)); // write Field num
+			output.WriteVInt(fieldInfos.FieldNumber(term.field)); // write field num
 			
 			lastTerm = term;
 		}

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermPositionVector.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermPositionVector.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermPositionVector.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermPositionVector.cs Sat Jun  3 19:41:13 2006
@@ -13,20 +13,37 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
+
 namespace Lucene.Net.Index
 {
 	
 	/// <summary>Extends <code>TermFreqVector</code> to provide additional information about
-	/// positions in which each of the terms is found.
+	/// positions in which each of the terms is found. A TermPositionVector not necessarily
+	/// contains both positions and offsets, but at least one of these arrays exists.
 	/// </summary>
-	public interface TermPositionVector:TermFreqVector
+	public interface TermPositionVector : TermFreqVector
 	{
 		
 		/// <summary>Returns an array of positions in which the term is found.
 		/// Terms are identified by the index at which its number appears in the
-		/// term number array obtained from <code>getTermNumbers</code> method.
+		/// term String array obtained from the <code>indexOf</code> method.
+		/// May return null if positions have not been stored.
 		/// </summary>
 		int[] GetTermPositions(int index);
+		
+		/// <summary> Returns an array of TermVectorOffsetInfo in which the term is found.
+		/// May return null if offsets have not been stored.
+		/// 
+		/// </summary>
+		/// <seealso cref="Lucene.Net.analysis.Token">
+		/// 
+		/// </seealso>
+		/// <param name="index">The position in the array to get the offsets from
+		/// </param>
+		/// <returns> An array of TermVectorOffsetInfo objects or the empty list
+		/// </returns>
+		TermVectorOffsetInfo[] GetOffsets(int index);
 	}
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermPositions.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermPositions.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermPositions.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermPositions.cs Sat Jun  3 19:41:13 2006
@@ -13,7 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
+
 namespace Lucene.Net.Index
 {
 	
@@ -23,10 +25,10 @@
 	/// positions of each occurrence of a term in a document.
 	/// 
 	/// </summary>
-	/// <seealso cref="IndexReader#termPositions">
+	/// <seealso cref="IndexReader.TermPositions()">
 	/// </seealso>
 	
-	public interface TermPositions:TermDocs
+	public interface TermPositions : TermDocs
 	{
 		/// <summary>Returns next position in the current document.  It is an error to call
 		/// this more than {@link #Freq()} times

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorOffsetInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermVectorOffsetInfo.cs?rev=411501&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorOffsetInfo.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorOffsetInfo.cs Sat Jun  3 19:41:13 2006
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2004 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+	
+	public class TermVectorOffsetInfo
+	{
+		public static readonly TermVectorOffsetInfo[] EMPTY_OFFSET_INFO = new TermVectorOffsetInfo[0];
+		private int startOffset;
+		private int endOffset;
+		
+		public TermVectorOffsetInfo()
+		{
+		}
+		
+		public TermVectorOffsetInfo(int startOffset, int endOffset)
+		{
+			this.endOffset = endOffset;
+			this.startOffset = startOffset;
+		}
+		
+		public virtual int GetEndOffset()
+		{
+			return endOffset;
+		}
+		
+		public virtual void  SetEndOffset(int endOffset)
+		{
+			this.endOffset = endOffset;
+		}
+		
+		public virtual int GetStartOffset()
+		{
+			return startOffset;
+		}
+		
+		public virtual void  SetStartOffset(int startOffset)
+		{
+			this.startOffset = startOffset;
+		}
+		
+		public  override bool Equals(System.Object o)
+		{
+			if (this == o)
+				return true;
+			if (!(o is TermVectorOffsetInfo))
+				return false;
+			
+			TermVectorOffsetInfo termVectorOffsetInfo = (TermVectorOffsetInfo) o;
+			
+			if (endOffset != termVectorOffsetInfo.endOffset)
+				return false;
+			if (startOffset != termVectorOffsetInfo.startOffset)
+				return false;
+			
+			return true;
+		}
+		
+		public override int GetHashCode()
+		{
+			int result;
+			result = startOffset;
+			result = 29 * result + endOffset;
+			return result;
+		}
+	}
+}
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermVectorsReader.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsReader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsReader.cs Sat Jun  3 19:41:13 2006
@@ -13,59 +13,92 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
 using Directory = Lucene.Net.Store.Directory;
-using InputStream = Lucene.Net.Store.InputStream;
+using IndexInput = Lucene.Net.Store.IndexInput;
+
 namespace Lucene.Net.Index
 {
 	
-	/// <summary>TODO: relax synchro!</summary>
-	public class TermVectorsReader
+	/// <version>  $Id: TermVectorsReader.java 170226 2005-05-15 15:04:39Z bmesser $
+	/// </version>
+	public class TermVectorsReader : System.ICloneable
 	{
 		private FieldInfos fieldInfos;
 		
-		private InputStream tvx;
-		private InputStream tvd;
-		private InputStream tvf;
+		private IndexInput tvx;
+		private IndexInput tvd;
+		private IndexInput tvf;
 		private int size;
 		
+		private int tvdFormat;
+		private int tvfFormat;
+		
 		public /*internal*/ TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos)
 		{
 			if (d.FileExists(segment + TermVectorsWriter.TVX_EXTENSION))
 			{
-				tvx = d.OpenFile(segment + TermVectorsWriter.TVX_EXTENSION);
+				tvx = d.OpenInput(segment + TermVectorsWriter.TVX_EXTENSION);
 				CheckValidFormat(tvx);
-				tvd = d.OpenFile(segment + TermVectorsWriter.TVD_EXTENSION);
-				CheckValidFormat(tvd);
-				tvf = d.OpenFile(segment + TermVectorsWriter.TVF_EXTENSION);
-				CheckValidFormat(tvf);
+				tvd = d.OpenInput(segment + TermVectorsWriter.TVD_EXTENSION);
+				tvdFormat = CheckValidFormat(tvd);
+				tvf = d.OpenInput(segment + TermVectorsWriter.TVF_EXTENSION);
+				tvfFormat = CheckValidFormat(tvf);
 				size = (int) tvx.Length() / 8;
 			}
 			
 			this.fieldInfos = fieldInfos;
 		}
 		
-		private void  CheckValidFormat(InputStream in_Renamed)
+		private int CheckValidFormat(IndexInput in_Renamed)
 		{
 			int format = in_Renamed.ReadInt();
 			if (format > TermVectorsWriter.FORMAT_VERSION)
 			{
 				throw new System.IO.IOException("Incompatible format version: " + format + " expected " + TermVectorsWriter.FORMAT_VERSION + " or less");
 			}
+			return format;
 		}
 		
 		internal virtual void  Close()
 		{
-			lock (this)
-			{
-				// why don't we trap the exception and at least make sure that
-				// all streams that we can close are closed?
-				if (tvx != null)
+			// make all effort to close up. Keep the first exception
+			// and throw it as a new one.
+			System.IO.IOException keep = null;
+			if (tvx != null)
+				try
+				{
 					tvx.Close();
-				if (tvd != null)
+				}
+				catch (System.IO.IOException e)
+				{
+					if (keep == null)
+						keep = e;
+				}
+			if (tvd != null)
+				try
+				{
 					tvd.Close();
-				if (tvf != null)
+				}
+				catch (System.IO.IOException e)
+				{
+					if (keep == null)
+						keep = e;
+				}
+			if (tvf != null)
+				try
+				{
 					tvf.Close();
+				}
+				catch (System.IO.IOException e)
+				{
+					if (keep == null)
+						keep = e;
+				}
+			if (keep != null)
+			{
+				throw new System.IO.IOException(keep.StackTrace);
 			}
 		}
 		
@@ -77,130 +110,125 @@
 			return size;
 		}
 		
-		/// <summary> Retrieve the term vector for the given document and Field</summary>
+		/// <summary> Retrieve the term vector for the given document and field</summary>
 		/// <param name="docNum">The document number to retrieve the vector for
 		/// </param>
-		/// <param name="Field">The Field within the document to retrieve
+		/// <param name="field">The field within the document to retrieve
 		/// </param>
-		/// <returns> The TermFreqVector for the document and Field or null
+		/// <returns> The TermFreqVector for the document and field or null if there is no termVector for this field.
 		/// </returns>
+		/// <throws>  IOException if there is an error reading the term vector files </throws>
 		public /*internal*/ virtual TermFreqVector Get(int docNum, System.String field)
 		{
-			lock (this)
+			// Check if no term vectors are available for this segment at all
+			int fieldNumber = fieldInfos.FieldNumber(field);
+			TermFreqVector result = null;
+			if (tvx != null)
 			{
-				// Check if no term vectors are available for this segment at all
-				int fieldNumber = fieldInfos.FieldNumber(field);
-				TermFreqVector result = null;
-				if (tvx != null)
+				//We need to account for the FORMAT_SIZE at when seeking in the tvx
+				//We don't need to do this in other seeks because we already have the
+				// file pointer
+				//that was written in another file
+				tvx.Seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
+				//System.out.println("TVX Pointer: " + tvx.getFilePointer());
+				long position = tvx.ReadLong();
+				
+				tvd.Seek(position);
+				int fieldCount = tvd.ReadVInt();
+				//System.out.println("Num Fields: " + fieldCount);
+				// There are only a few fields per document. We opt for a full scan
+				// rather then requiring that they be ordered. We need to read through
+				// all of the fields anyway to get to the tvf pointers.
+				int number = 0;
+				int found = - 1;
+				for (int i = 0; i < fieldCount; i++)
 				{
-					try
-					{
-						//We need to account for the FORMAT_SIZE at when seeking in the tvx
-						//We don't need to do this in other seeks because we already have the file pointer
-						//that was written in another file
-						tvx.Seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
-						//System.out.println("TVX Pointer: " + tvx.getFilePointer());
-						long position = tvx.ReadLong();
-						
-						tvd.Seek(position);
-						int fieldCount = tvd.ReadVInt();
-						//System.out.println("Num Fields: " + fieldCount);
-						// There are only a few fields per document. We opt for a full scan
-						// rather then requiring that they be ordered. We need to read through
-						// all of the fields anyway to get to the tvf pointers.
-						int number = 0;
-						int found = - 1;
-						for (int i = 0; i < fieldCount; i++)
-						{
-							number += tvd.ReadVInt();
-							if (number == fieldNumber)
-								found = i;
-						}
-						
-						// This Field, although valid in the segment, was not found in this document
-						if (found != - 1)
-						{
-							// Compute position in the tvf file
-							position = 0;
-							for (int i = 0; i <= found; i++)
-							{
-								position += tvd.ReadVLong();
-							}
-							result = ReadTermVector(field, position);
-						}
-						else
-						{
-							//System.out.println("Field not found");
-						}
-					}
-					catch (System.Exception e)
-					{
-						//System.Console.Out.WriteLine(e.StackTrace);
-					}
+					if (tvdFormat == TermVectorsWriter.FORMAT_VERSION)
+						number = tvd.ReadVInt();
+					else
+						number += tvd.ReadVInt();
+					
+					if (number == fieldNumber)
+						found = i;
+				}
+				
+				// This field, although valid in the segment, was not found in this
+				// document
+				if (found != - 1)
+				{
+					// Compute position in the tvf file
+					position = 0;
+					for (int i = 0; i <= found; i++)
+						position += tvd.ReadVLong();
+					
+					result = ReadTermVector(field, position);
 				}
 				else
 				{
-					System.Console.Out.WriteLine("No tvx file");
+					//System.out.println("Field not found");
 				}
-				return result;
 			}
+			else
+			{
+				//System.out.println("No tvx file");
+			}
+			return result;
 		}
 		
-		
-		/// <summary>Return all term vectors stored for this document or null if the could not be read in. </summary>
-		internal virtual TermFreqVector[] Get(int docNum)
+		/// <summary> Return all term vectors stored for this document or null if the could not be read in.
+		/// 
+		/// </summary>
+		/// <param name="docNum">The document number to retrieve the vector for
+		/// </param>
+		/// <returns> All term frequency vectors
+		/// </returns>
+		/// <throws>  IOException if there is an error reading the term vector files  </throws>
+		public /*internal*/ virtual TermFreqVector[] Get(int docNum)
 		{
-			lock (this)
+			TermFreqVector[] result = null;
+			// Check if no term vectors are available for this segment at all
+			if (tvx != null)
 			{
-				TermFreqVector[] result = null;
-				// Check if no term vectors are available for this segment at all
-				if (tvx != null)
+				//We need to offset by
+				tvx.Seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
+				long position = tvx.ReadLong();
+				
+				tvd.Seek(position);
+				int fieldCount = tvd.ReadVInt();
+				
+				// No fields are vectorized for this document
+				if (fieldCount != 0)
 				{
-					try
+					int number = 0;
+					System.String[] fields = new System.String[fieldCount];
+					
+					for (int i = 0; i < fieldCount; i++)
 					{
-						//We need to offset by
-						tvx.Seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
-						long position = tvx.ReadLong();
-						
-						tvd.Seek(position);
-						int fieldCount = tvd.ReadVInt();
+						if (tvdFormat == TermVectorsWriter.FORMAT_VERSION)
+							number = tvd.ReadVInt();
+						else
+							number += tvd.ReadVInt();
 						
-						// No fields are vectorized for this document
-						if (fieldCount != 0)
-						{
-							int number = 0;
-							System.String[] fields = new System.String[fieldCount];
-							
-							for (int i = 0; i < fieldCount; i++)
-							{
-								number += tvd.ReadVInt();
-								fields[i] = fieldInfos.FieldName(number);
-							}
-							
-							// Compute position in the tvf file
-							position = 0;
-							long[] tvfPointers = new long[fieldCount];
-							for (int i = 0; i < fieldCount; i++)
-							{
-								position += tvd.ReadVLong();
-								tvfPointers[i] = position;
-							}
-							
-							result = ReadTermVectors(fields, tvfPointers);
-						}
+						fields[i] = fieldInfos.FieldName(number);
 					}
-					catch (System.IO.IOException e)
+					
+					// Compute position in the tvf file
+					position = 0;
+					long[] tvfPointers = new long[fieldCount];
+					for (int i = 0; i < fieldCount; i++)
 					{
-                        Console.Error.Write(e.StackTrace);
-                        Console.Error.Flush();
-                    }
-				}
-				else
-				{
-					System.Console.Out.WriteLine("No tvx file");
+						position += tvd.ReadVLong();
+						tvfPointers[i] = position;
+					}
+					
+					result = ReadTermVectors(fields, tvfPointers);
 				}
-				return result;
 			}
+			else
+			{
+				//System.out.println("No tvx file");
+			}
+			return result;
 		}
 		
 		
@@ -215,7 +243,7 @@
 		}
 		
 		/// <summary> </summary>
-		/// <param name="fieldNum">The Field to read in
+		/// <param name="field">The field to read in
 		/// </param>
 		/// <param name="tvfPointer">The pointer within the tvf file where we should start reading
 		/// </param>
@@ -231,21 +259,43 @@
 			
 			int numTerms = tvf.ReadVInt();
 			//System.out.println("Num Terms: " + numTerms);
-			// If no terms - return a constant empty termvector
+			// If no terms - return a constant empty termvector. However, this should never occur!
 			if (numTerms == 0)
 				return new SegmentTermVector(field, null, null);
 			
-			int length = numTerms + tvf.ReadVInt();
+			bool storePositions;
+			bool storeOffsets;
 			
-			System.String[] terms = new System.String[numTerms];
+			if (tvfFormat == TermVectorsWriter.FORMAT_VERSION)
+			{
+				byte bits = tvf.ReadByte();
+				storePositions = (bits & TermVectorsWriter.STORE_POSITIONS_WITH_TERMVECTOR) != 0;
+				storeOffsets = (bits & TermVectorsWriter.STORE_OFFSET_WITH_TERMVECTOR) != 0;
+			}
+			else
+			{
+				tvf.ReadVInt();
+				storePositions = false;
+				storeOffsets = false;
+			}
 			
+			System.String[] terms = new System.String[numTerms];
 			int[] termFreqs = new int[numTerms];
 			
+			//  we may not need these, but declare them
+			int[][] positions = null;
+			TermVectorOffsetInfo[][] offsets = null;
+			if (storePositions)
+				positions = new int[numTerms][];
+			if (storeOffsets)
+				offsets = new TermVectorOffsetInfo[numTerms][];
+			
 			int start = 0;
 			int deltaLength = 0;
 			int totalLength = 0;
-			char[] buffer = new char[]{};
-			System.String previousString = "";
+			char[] buffer = new char[10]; // init the buffer with a length of 10 character
+			char[] previousBuffer = new char[]{};
+			
 			for (int i = 0; i < numTerms; i++)
 			{
 				start = tvf.ReadVInt();
@@ -253,18 +303,81 @@
 				totalLength = start + deltaLength;
 				if (buffer.Length < totalLength)
 				{
+					// increase buffer
+					buffer = null; // give a hint to garbage collector
 					buffer = new char[totalLength];
-					for (int j = 0; j < previousString.Length; j++)
-					// copy contents
-						buffer[j] = previousString[j];
+					
+					if (start > 0)
+				    	// just copy if necessary
+						Array.Copy(previousBuffer, 0, buffer, 0, start);
 				}
+				
 				tvf.ReadChars(buffer, start, deltaLength);
 				terms[i] = new System.String(buffer, 0, totalLength);
-				previousString = terms[i];
-				termFreqs[i] = tvf.ReadVInt();
+				previousBuffer = buffer;
+				int freq = tvf.ReadVInt();
+				termFreqs[i] = freq;
+				
+				if (storePositions)
+				{
+					//read in the positions
+					int[] pos = new int[freq];
+					positions[i] = pos;
+					int prevPosition = 0;
+					for (int j = 0; j < freq; j++)
+					{
+						pos[j] = prevPosition + tvf.ReadVInt();
+						prevPosition = pos[j];
+					}
+				}
+				
+				if (storeOffsets)
+				{
+					TermVectorOffsetInfo[] offs = new TermVectorOffsetInfo[freq];
+					offsets[i] = offs;
+					int prevOffset = 0;
+					for (int j = 0; j < freq; j++)
+					{
+						int startOffset = prevOffset + tvf.ReadVInt();
+						int endOffset = startOffset + tvf.ReadVInt();
+						offs[j] = new TermVectorOffsetInfo(startOffset, endOffset);
+						prevOffset = endOffset;
+					}
+				}
+			}
+			
+			SegmentTermVector tv;
+			if (storePositions || storeOffsets)
+			{
+				tv = new SegmentTermPositionVector(field, terms, termFreqs, positions, offsets);
+			}
+			else
+			{
+				tv = new SegmentTermVector(field, terms, termFreqs);
 			}
-			SegmentTermVector tv = new SegmentTermVector(field, terms, termFreqs);
 			return tv;
+		}
+		
+		public virtual System.Object Clone()
+		{
+			
+			if (tvx == null || tvd == null || tvf == null)
+				return null;
+			
+			TermVectorsReader clone = null;
+			try
+			{
+				clone = (TermVectorsReader) base.MemberwiseClone();
+			}
+			catch (System.Exception)
+			{
+			}
+			
+			clone.tvx = (IndexInput) tvx.Clone();
+			clone.tvd = (IndexInput) tvd.Clone();
+			clone.tvf = (IndexInput) tvf.Clone();
+			
+			return clone;
 		}
 	}
 }



Mime
View raw message