lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From aro...@apache.org
Subject svn commit: r671404 [9/10] - /incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/
Date Wed, 25 Jun 2008 02:52:24 GMT
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentReader.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentReader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentReader.cs Tue Jun 24 19:52:22 2008
@@ -16,23 +16,26 @@
  */
 
 using System;
+
 using Document = Lucene.Net.Documents.Document;
 using FieldSelector = Lucene.Net.Documents.FieldSelector;
-using DefaultSimilarity = Lucene.Net.Search.DefaultSimilarity;
+using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput;
 using Directory = Lucene.Net.Store.Directory;
 using IndexInput = Lucene.Net.Store.IndexInput;
 using IndexOutput = Lucene.Net.Store.IndexOutput;
 using BitVector = Lucene.Net.Util.BitVector;
+using DefaultSimilarity = Lucene.Net.Search.DefaultSimilarity;
 
 namespace Lucene.Net.Index
 {
 	
-	/// <version>  $Id: SegmentReader.java 496851 2007-01-16 20:24:52Z mikemccand $
+	/// <version>  $Id: SegmentReader.java 603061 2007-12-10 21:49:41Z gsingers $
 	/// </version>
-	public class SegmentReader : IndexReader
+	public class SegmentReader : DirectoryIndexReader
 	{
 		private System.String segment;
 		private SegmentInfo si;
+		private int readBufferSize;
 		
 		internal FieldInfos fieldInfos;
 		private FieldsReader fieldsReader;
@@ -53,20 +56,17 @@
 		internal IndexInput freqStream;
 		internal IndexInput proxStream;
 		
+		// optionally used for the .nrm file shared by multiple norms
+		private IndexInput singleNormStream;
+		
 		// Compound File Reader when based on a compound file segment
 		internal CompoundFileReader cfsReader = null;
+		internal CompoundFileReader storeCFSReader = null;
+		
+		// indicates the SegmentReader with which the resources are being shared,
+		// in case this is a re-opened reader
+		private SegmentReader referencedSegmentReader = null;
 		
-		public FieldInfos FieldInfos
-		{
-			get {   return fieldInfos;  }
-		}
-
-        public IndexInput ProxStream
-        {
-            get {   return proxStream;  }
-            set {   proxStream = value; }
-        }
-
 		private class Norm
 		{
 			private void  InitBlock(SegmentReader enclosingInstance)
@@ -82,12 +82,39 @@
 				}
 				
 			}
-			public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number, long normSeek)
+			internal volatile int refCount;
+			internal bool useSingleNormStream;
+			
+			public virtual void  IncRef()
+			{
+				lock (this)
+				{
+					System.Diagnostics.Debug.Assert(refCount > 0);
+					refCount++;
+				}
+			}
+			
+			public virtual void  DecRef()
+			{
+				lock (this)
+				{
+					System.Diagnostics.Debug.Assert(refCount > 0);
+					if (refCount == 1)
+					{
+						Close();
+					}
+					refCount--;
+				}
+			}
+			
+			public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, bool useSingleNormStream, int number, long normSeek)
 			{
 				InitBlock(enclosingInstance);
+				refCount = 1;
 				this.in_Renamed = in_Renamed;
 				this.number = number;
 				this.normSeek = normSeek;
+				this.useSingleNormStream = useSingleNormStream;
 			}
 			
 			internal IndexInput in_Renamed;
@@ -100,16 +127,6 @@
 			internal void  ReWrite(SegmentInfo si)
 			{
 				// NOTE: norms are re-written in regular directory, not cfs
-				
-				System.String oldFileName = si.GetNormFileName(this.number);
-				if (oldFileName != null && !oldFileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION))
-				{
-					// Mark this file for deletion.  Note that we don't
-					// actually try to delete it until the new segments files is
-					// successfully written:
-					Enclosing_Instance.deleter.AddPendingFile(oldFileName);
-				}
-				
 				si.AdvanceNormGen(this.number);
 				IndexOutput out_Renamed = Enclosing_Instance.Directory().CreateOutput(si.GetNormFileName(this.number));
 				try
@@ -122,28 +139,126 @@
 				}
 				this.dirty = false;
 			}
+			
+			/// <summary>Closes the underlying IndexInput for this norm.
+			/// It is still valid to access all other norm properties after close is called.
+			/// </summary>
+			/// <throws>  IOException </throws>
+			internal void  Close()
+			{
+				lock (this)
+				{
+					if (in_Renamed != null && !useSingleNormStream)
+					{
+						in_Renamed.Close();
+					}
+					in_Renamed = null;
+				}
+			}
 		}
 		
-		private System.Collections.Hashtable norms = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable());
+		/// <summary> Increments the RC of this reader, as well as
+		/// of all norms this reader is using
+		/// </summary>
+		protected internal override void  IncRef()
+		{
+			lock (this)
+			{
+				base.IncRef();
+				System.Collections.IEnumerator it = norms.Values.GetEnumerator();
+				while (it.MoveNext())
+				{
+					Norm norm = (Norm) it.Current;
+					norm.IncRef();
+				}
+			}
+		}
+		
+		/// <summary> only increments the RC of this reader, not tof 
+		/// he norms. This is important whenever a reopen()
+		/// creates a new SegmentReader that doesn't share
+		/// the norms with this one 
+		/// </summary>
+		private void  IncRefReaderNotNorms()
+		{
+			lock (this)
+			{
+				base.IncRef();
+			}
+		}
+		
+		protected internal override void  DecRef()
+		{
+			lock (this)
+			{
+				base.DecRef();
+				System.Collections.IEnumerator it = norms.Values.GetEnumerator();
+				while (it.MoveNext())
+				{
+					Norm norm = (Norm) it.Current;
+					norm.DecRef();
+				}
+			}
+		}
+		
+		private void  DecRefReaderNotNorms()
+		{
+			lock (this)
+			{
+				base.DecRef();
+			}
+		}
+		
+		internal System.Collections.IDictionary norms = new System.Collections.Hashtable();
 		
 		/// <summary>The class which implements SegmentReader. </summary>
 		private static System.Type IMPL;
 		
-		public SegmentReader() : base(null)
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  IOException if there is a low-level IO error </throws>
+		public static SegmentReader Get(SegmentInfo si)
 		{
+			return Get(si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE, true);
 		}
 		
-		public static SegmentReader Get(SegmentInfo si)
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  IOException if there is a low-level IO error </throws>
+		internal static SegmentReader Get(SegmentInfo si, bool doOpenStores)
+		{
+			return Get(si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE, doOpenStores);
+		}
+		
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  IOException if there is a low-level IO error </throws>
+		public static SegmentReader Get(SegmentInfo si, int readBufferSize)
+		{
+			return Get(si.dir, si, null, false, false, readBufferSize, true);
+		}
+		
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  IOException if there is a low-level IO error </throws>
+		internal static SegmentReader Get(SegmentInfo si, int readBufferSize, bool doOpenStores)
 		{
-			return Get(si.dir, si, null, false, false);
+			return Get(si.dir, si, null, false, false, readBufferSize, doOpenStores);
 		}
 		
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  IOException if there is a low-level IO error </throws>
 		public static SegmentReader Get(SegmentInfos sis, SegmentInfo si, bool closeDir)
 		{
-			return Get(si.dir, si, sis, closeDir, true);
+			return Get(si.dir, si, sis, closeDir, true, BufferedIndexInput.BUFFER_SIZE, true);
 		}
 		
-		public static SegmentReader Get(Directory dir, SegmentInfo si, SegmentInfos sis, bool closeDir, bool ownDir)
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  IOException if there is a low-level IO error </throws>
+		public static SegmentReader Get(Directory dir, SegmentInfo si, SegmentInfos sis, bool closeDir, bool ownDir, int readBufferSize)
+		{
+			return Get(dir, si, sis, closeDir, ownDir, readBufferSize, true);
+		}
+		
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  IOException if there is a low-level IO error </throws>
+		public static SegmentReader Get(Directory dir, SegmentInfo si, SegmentInfos sis, bool closeDir, bool ownDir, int readBufferSize, bool doOpenStores)
 		{
 			SegmentReader instance;
 			try
@@ -152,17 +267,18 @@
 			}
 			catch (System.Exception e)
 			{
-				throw new System.SystemException("cannot load SegmentReader class: " + e, e);
+				throw new System.Exception("cannot load SegmentReader class: " + e, e);
 			}
-			instance.Init(dir, sis, closeDir, ownDir);
-			instance.Initialize(si);
+			instance.Init(dir, sis, closeDir);
+			instance.Initialize(si, readBufferSize, doOpenStores);
 			return instance;
 		}
 		
-		private void  Initialize(SegmentInfo si)
+		private void  Initialize(SegmentInfo si, int readBufferSize, bool doOpenStores)
 		{
 			segment = si.name;
 			this.si = si;
+			this.readBufferSize = readBufferSize;
 			
 			bool success = false;
 			
@@ -172,44 +288,74 @@
 				Directory cfsDir = Directory();
 				if (si.GetUseCompoundFile())
 				{
-					cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
+					cfsReader = new CompoundFileReader(Directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
 					cfsDir = cfsReader;
 				}
 				
-				// No compound file exists - use the multi-file format
-				fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
-				fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
+				Directory storeDir;
 				
-				// Verify two sources of "maxDoc" agree:
-				if (fieldsReader.Size() != si.docCount)
+				if (doOpenStores)
 				{
-					throw new System.SystemException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount);
+					if (si.GetDocStoreOffset() != - 1)
+					{
+						if (si.GetDocStoreIsCompoundFile())
+						{
+							storeCFSReader = new CompoundFileReader(Directory(), si.GetDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize);
+							storeDir = storeCFSReader;
+						}
+						else
+						{
+							storeDir = Directory();
+						}
+					}
+					else
+					{
+						storeDir = cfsDir;
+					}
 				}
+				else
+					storeDir = null;
 				
-				tis = new TermInfosReader(cfsDir, segment, fieldInfos);
+				// No compound file exists - use the multi-file format
+				fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
+				
+				System.String fieldsSegment;
 				
-				// NOTE: the bitvector is stored using the regular directory, not cfs
-				if (HasDeletions(si))
+				if (si.GetDocStoreOffset() != - 1)
+					fieldsSegment = si.GetDocStoreSegment();
+				else
+					fieldsSegment = segment;
+				
+				if (doOpenStores)
 				{
-					deletedDocs = new BitVector(Directory(), si.GetDelFileName());
+					fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
 					
-					// Verify # deletes does not exceed maxDoc for this segment:
-					if (deletedDocs.Count() > MaxDoc())
+					// Verify two sources of "maxDoc" agree:
+					if (si.GetDocStoreOffset() == - 1 && fieldsReader.Size() != si.docCount)
 					{
-						throw new System.SystemException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name);
+						throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.Size() + " but segmentInfo shows " + si.docCount);
 					}
 				}
 				
+				tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize);
+				
+				LoadDeletedDocs();
+				
 				// make sure that all index files have been read or are kept open
 				// so that if an index update removes them we'll still have them
-				freqStream = cfsDir.OpenInput(segment + ".frq");
-				proxStream = cfsDir.OpenInput(segment + ".prx");
-				OpenNorms(cfsDir);
+				freqStream = cfsDir.OpenInput(segment + ".frq", readBufferSize);
+				proxStream = cfsDir.OpenInput(segment + ".prx", readBufferSize);
+				OpenNorms(cfsDir, readBufferSize);
 				
-				if (fieldInfos.HasVectors())
+				if (doOpenStores && fieldInfos.HasVectors())
 				{
 					// open term vector files only as needed
-					termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
+					System.String vectorsSegment;
+					if (si.GetDocStoreOffset() != - 1)
+						vectorsSegment = si.GetDocStoreSegment();
+					else
+						vectorsSegment = segment;
+					termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
 				}
 				success = true;
 			}
@@ -228,20 +374,228 @@
 			}
 		}
 		
-		protected internal override void  DoCommit()
+		private void  LoadDeletedDocs()
 		{
-			if (deletedDocsDirty)
+			// NOTE: the bitvector is stored using the regular directory, not cfs
+			if (HasDeletions(si))
 			{
-				// re-write deleted
-				System.String oldDelFileName = si.GetDelFileName();
-				if (oldDelFileName != null)
+				deletedDocs = new BitVector(Directory(), si.GetDelFileName());
+				
+				// Verify # deletes does not exceed maxDoc for this segment:
+				if (deletedDocs.Count() > MaxDoc())
+				{
+					throw new CorruptIndexException("number of deletes (" + deletedDocs.Count() + ") exceeds max doc (" + MaxDoc() + ") for segment " + si.name);
+				}
+			}
+		}
+		
+		protected internal override DirectoryIndexReader DoReopen(SegmentInfos infos)
+		{
+			lock (this)
+			{
+				DirectoryIndexReader newReader;
+				
+				if (infos.Count == 1)
+				{
+					SegmentInfo si = infos.Info(0);
+					if (segment.Equals(si.name) && si.GetUseCompoundFile() == this.si.GetUseCompoundFile())
+					{
+						newReader = ReopenSegment(si);
+					}
+					else
+					{
+						// segment not referenced anymore, reopen not possible
+						// or segment format changed
+						newReader = SegmentReader.Get(infos, infos.Info(0), false);
+					}
+				}
+				else
 				{
-					// Mark this file for deletion.  Note that we don't
-					// actually try to delete it until the new segments files is
-					// successfully written:
-					deleter.AddPendingFile(oldDelFileName);
+					return new MultiSegmentReader(directory, infos, closeDirectory, new SegmentReader[]{this}, null, null);
 				}
 				
+				return newReader;
+			}
+		}
+		
+		internal virtual SegmentReader ReopenSegment(SegmentInfo si)
+		{
+			lock (this)
+			{
+				bool deletionsUpToDate = (this.si.HasDeletions() == si.HasDeletions()) && (!si.HasDeletions() || this.si.GetDelFileName().Equals(si.GetDelFileName()));
+				bool normsUpToDate = true;
+				
+				
+				bool[] fieldNormsChanged = new bool[fieldInfos.Size()];
+				if (normsUpToDate)
+				{
+					for (int i = 0; i < fieldInfos.Size(); i++)
+					{
+						if (!this.si.GetNormFileName(i).Equals(si.GetNormFileName(i)))
+						{
+							normsUpToDate = false;
+							fieldNormsChanged[i] = true;
+						}
+					}
+				}
+				
+				if (normsUpToDate && deletionsUpToDate)
+				{
+					return this;
+				}
+				
+				
+				// clone reader
+				SegmentReader clone = new SegmentReader();
+				bool success = false;
+				try
+				{
+					clone.directory = directory;
+					clone.si = si;
+					clone.segment = segment;
+					clone.readBufferSize = readBufferSize;
+					clone.cfsReader = cfsReader;
+					clone.storeCFSReader = storeCFSReader;
+					
+					clone.fieldInfos = fieldInfos;
+					clone.tis = tis;
+					clone.freqStream = freqStream;
+					clone.proxStream = proxStream;
+					clone.termVectorsReaderOrig = termVectorsReaderOrig;
+					
+					
+					// we have to open a new FieldsReader, because it is not thread-safe
+					// and can thus not be shared among multiple SegmentReaders
+					// TODO: Change this in case FieldsReader becomes thread-safe in the future
+					System.String fieldsSegment;
+					
+					Directory storeDir = Directory();
+					
+					if (si.GetDocStoreOffset() != - 1)
+					{
+						fieldsSegment = si.GetDocStoreSegment();
+						if (storeCFSReader != null)
+						{
+							storeDir = storeCFSReader;
+						}
+					}
+					else
+					{
+						fieldsSegment = segment;
+						if (cfsReader != null)
+						{
+							storeDir = cfsReader;
+						}
+					}
+					
+					if (fieldsReader != null)
+					{
+						clone.fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize, si.GetDocStoreOffset(), si.docCount);
+					}
+					
+					
+					if (!deletionsUpToDate)
+					{
+						// load deleted docs
+						clone.deletedDocs = null;
+						clone.LoadDeletedDocs();
+					}
+					else
+					{
+						clone.deletedDocs = this.deletedDocs;
+					}
+					
+					clone.norms = new System.Collections.Hashtable();
+					if (!normsUpToDate)
+					{
+						// load norms
+						for (int i = 0; i < fieldNormsChanged.Length; i++)
+						{
+							// copy unchanged norms to the cloned reader and incRef those norms
+							if (!fieldNormsChanged[i])
+							{
+								System.String curField = fieldInfos.FieldInfo(i).name;
+								Norm norm = (Norm) this.norms[curField];
+								norm.IncRef();
+								clone.norms[curField] = norm;
+							}
+						}
+						
+						clone.OpenNorms(si.GetUseCompoundFile() ? cfsReader : Directory(), readBufferSize);
+					}
+					else
+					{
+						System.Collections.IEnumerator it = norms.Keys.GetEnumerator();
+						while (it.MoveNext())
+						{
+							System.String field = (System.String) it.Current;
+							Norm norm = (Norm) norms[field];
+							norm.IncRef();
+							clone.norms[field] = norm;
+						}
+					}
+					
+					if (clone.singleNormStream == null)
+					{
+						for (int i = 0; i < fieldInfos.Size(); i++)
+						{
+							FieldInfo fi = fieldInfos.FieldInfo(i);
+							if (fi.isIndexed && !fi.omitNorms)
+							{
+								Directory d = si.GetUseCompoundFile() ? cfsReader : Directory();
+								System.String fileName = si.GetNormFileName(fi.number);
+								if (si.HasSeparateNorms(fi.number))
+								{
+									continue;
+								}
+								
+								if (fileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION))
+								{
+									clone.singleNormStream = d.OpenInput(fileName, readBufferSize);
+									break;
+								}
+							}
+						}
+					}
+					
+					success = true;
+				}
+				finally
+				{
+					if (this.referencedSegmentReader != null)
+					{
+						// this reader shares resources with another SegmentReader,
+						// so we increment the other readers refCount. We don't
+						// increment the refCount of the norms because we did
+						// that already for the shared norms
+						clone.referencedSegmentReader = this.referencedSegmentReader;
+						referencedSegmentReader.IncRefReaderNotNorms();
+					}
+					else
+					{
+						// this reader wasn't reopened, so we increment this
+						// readers refCount
+						clone.referencedSegmentReader = this;
+						IncRefReaderNotNorms();
+					}
+					
+					if (!success)
+					{
+						// An exception occured during reopen, we have to decRef the norms
+						// that we incRef'ed already and close singleNormsStream and FieldsReader
+						clone.DecRef();
+					}
+				}
+				
+				return clone;
+			}
+		}
+		
+		protected internal override void  CommitChanges()
+		{
+			if (deletedDocsDirty)
+			{
+				// re-write deleted
 				si.AdvanceDelGen();
 				
 				// We can write directly to the actual name (vs to a
@@ -251,24 +605,16 @@
 			}
 			if (undeleteAll && si.HasDeletions())
 			{
-				System.String oldDelFileName = si.GetDelFileName();
-				if (oldDelFileName != null)
-				{
-					// Mark this file for deletion.  Note that we don't
-					// actually try to delete it until the new segments files is
-					// successfully written:
-					deleter.AddPendingFile(oldDelFileName);
-				}
 				si.ClearDelGen();
 			}
 			if (normsDirty)
 			{
 				// re-write norms
 				si.SetNumFields(fieldInfos.Size());
-				System.Collections.IEnumerator values = norms.Values.GetEnumerator();
-				while (values.MoveNext())
+				System.Collections.IEnumerator it = norms.Values.GetEnumerator();
+				while (it.MoveNext())
 				{
-					Norm norm = (Norm) values.Current;
+					Norm norm = (Norm) it.Current;
 					if (norm.dirty)
 					{
 						norm.ReWrite(si);
@@ -280,38 +626,75 @@
 			undeleteAll = false;
 		}
 		
+		internal virtual FieldsReader GetFieldsReader()
+		{
+			return fieldsReader;
+		}
+		
 		protected internal override void  DoClose()
 		{
-			if (fieldsReader != null)
-			{
-				fieldsReader.Close();
-			}
-			if (tis != null)
+			bool hasReferencedReader = (referencedSegmentReader != null);
+			
+			if (hasReferencedReader)
 			{
-				tis.Close();
+				referencedSegmentReader.DecRefReaderNotNorms();
+				referencedSegmentReader = null;
 			}
 			
-			if (freqStream != null)
-				freqStream.Close();
-			if (proxStream != null)
-				proxStream.Close();
+			deletedDocs = null;
 			
-			CloseNorms();
+			// close the single norms stream
+			if (singleNormStream != null)
+			{
+				// we can close this stream, even if the norms
+				// are shared, because every reader has it's own 
+				// singleNormStream
+				singleNormStream.Close();
+				singleNormStream = null;
+			}
 			
-			if (termVectorsReaderOrig != null)
-				termVectorsReaderOrig.Close();
+			// re-opened SegmentReaders have their own instance of FieldsReader
+			if (fieldsReader != null)
+			{
+				fieldsReader.Close();
+			}
 			
-			if (cfsReader != null)
-				cfsReader.Close();
+			if (!hasReferencedReader)
+			{
+				// close everything, nothing is shared anymore with other readers
+				if (tis != null)
+				{
+					tis.Close();
+				}
+				
+				if (freqStream != null)
+					freqStream.Close();
+				if (proxStream != null)
+					proxStream.Close();
+				
+				if (termVectorsReaderOrig != null)
+					termVectorsReaderOrig.Close();
+				
+				if (cfsReader != null)
+					cfsReader.Close();
+				
+				if (storeCFSReader != null)
+					storeCFSReader.Close();
+				
+				// maybe close directory
+				base.DoClose();
+			}
 		}
 		
 		internal static bool HasDeletions(SegmentInfo si)
 		{
+			// Don't call ensureOpen() here (it could affect performance)
 			return si.HasDeletions();
 		}
 		
 		public override bool HasDeletions()
 		{
+			// Don't call ensureOpen() here (it could affect performance)
 			return deletedDocs != null;
 		}
 		
@@ -343,63 +726,33 @@
 		
 		internal virtual System.Collections.ArrayList Files()
 		{
-			System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(16));
-			
-			if (si.GetUseCompoundFile())
-			{
-				System.String name = segment + ".cfs";
-				if (Directory().FileExists(name))
-				{
-					files.Add(name);
-				}
-			}
-			else
-			{
-				for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.Length; i++)
-				{
-					System.String name = segment + "." + IndexFileNames.INDEX_EXTENSIONS[i];
-					if (Directory().FileExists(name))
-						files.Add(name);
-				}
-			}
-			
-			if (si.HasDeletions())
-			{
-				files.Add(si.GetDelFileName());
-			}
-			
-			bool addedNrm = false;
-			for (int i = 0; i < fieldInfos.Size(); i++)
-			{
-				System.String name = si.GetNormFileName(i);
-				if (name != null && Directory().FileExists(name))
-				{
-					if (name.EndsWith("." + IndexFileNames.NORMS_EXTENSION))
-					{
-						if (addedNrm)
-							continue; // add .nrm just once
-						addedNrm = true;
-					}
-					files.Add(name);
-				}
-			}
-			return files;
+			return System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(si.Files()));
 		}
 		
 		public override TermEnum Terms()
 		{
+			EnsureOpen();
 			return tis.Terms();
 		}
 		
 		public override TermEnum Terms(Term t)
 		{
+			EnsureOpen();
 			return tis.Terms(t);
 		}
 		
+		internal virtual FieldInfos GetFieldInfos()
+		{
+			return fieldInfos;
+		}
+		
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  IOException if there is a low-level IO error </throws>
 		public override Document Document(int n, FieldSelector fieldSelector)
 		{
 			lock (this)
 			{
+				EnsureOpen();
 				if (IsDeleted(n))
 					throw new System.ArgumentException("attempt to access a deleted document");
 				return fieldsReader.Doc(n, fieldSelector);
@@ -416,16 +769,19 @@
 		
 		public override TermDocs TermDocs()
 		{
+			EnsureOpen();
 			return new SegmentTermDocs(this);
 		}
 		
 		public override TermPositions TermPositions()
 		{
+			EnsureOpen();
 			return new SegmentTermPositions(this);
 		}
 		
 		public override int DocFreq(Term t)
 		{
+			EnsureOpen();
 			TermInfo ti = tis.Get(t);
 			if (ti != null)
 				return ti.docFreq;
@@ -435,6 +791,7 @@
 		
 		public override int NumDocs()
 		{
+			// Don't call ensureOpen() here (it could affect performance)
 			int n = MaxDoc();
 			if (deletedDocs != null)
 				n -= deletedDocs.Count();
@@ -443,53 +800,69 @@
 		
 		public override int MaxDoc()
 		{
+			// Don't call ensureOpen() here (it could affect performance)
 			return si.docCount;
 		}
 		
-		/// <seealso cref="fldOption)">
+		public override void  SetTermInfosIndexDivisor(int indexDivisor)
+		{
+			tis.SetIndexDivisor(indexDivisor);
+		}
+		
+		public override int GetTermInfosIndexDivisor()
+		{
+			return tis.GetIndexDivisor();
+		}
+		
+		/// <seealso cref="IndexReader.GetFieldNames(IndexReader.FieldOption fldOption)">
 		/// </seealso>
 		public override System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldOption)
 		{
-			
-			System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
+			EnsureOpen();
+
+            System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
 			for (int i = 0; i < fieldInfos.Size(); i++)
 			{
 				FieldInfo fi = fieldInfos.FieldInfo(i);
 				if (fieldOption == IndexReader.FieldOption.ALL)
 				{
-					fieldSet.Add(fi.name, fi.name);
+                    fieldSet.Add(fi.name, fi.name);
 				}
 				else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED)
 				{
-					fieldSet.Add(fi.name, fi.name);
+                    fieldSet.Add(fi.name, fi.name);
+				}
+				else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS)
+				{
+                    fieldSet.Add(fi.name, fi.name);
 				}
 				else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED)
 				{
-					fieldSet.Add(fi.name, fi.name);
+                    fieldSet.Add(fi.name, fi.name);
 				}
 				else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR)
 				{
-					fieldSet.Add(fi.name, fi.name);
+                    fieldSet.Add(fi.name, fi.name);
 				}
 				else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR)
 				{
-					fieldSet.Add(fi.name, fi.name);
+                    fieldSet.Add(fi.name, fi.name);
 				}
 				else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR)
 				{
-					fieldSet.Add(fi.name, fi.name);
+                    fieldSet.Add(fi.name, fi.name);
 				}
 				else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION)
 				{
-					fieldSet.Add(fi.name, fi.name);
+                    fieldSet.Add(fi.name, fi.name);
 				}
 				else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET)
 				{
-					fieldSet.Add(fi.name, fi.name);
+                    fieldSet.Add(fi.name, fi.name);
 				}
 				else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET)
 				{
-					fieldSet.Add(fi.name, fi.name);
+                    fieldSet.Add(fi.name, fi.name);
 				}
 			}
 			return fieldSet;
@@ -500,7 +873,8 @@
 		{
 			lock (this)
 			{
-				return norms.ContainsKey(field);
+				EnsureOpen();
+				return norms.Contains(field);
 			}
 		}
 		
@@ -529,14 +903,20 @@
 				Norm norm = (Norm) norms[field];
 				if (norm == null)
 					return null; // not indexed, or norms not stored
-				if (norm.bytes == null)
+				lock (norm)
 				{
-					// value not yet read
-					byte[] bytes = new byte[MaxDoc()];
-					Norms(field, bytes, 0);
-					norm.bytes = bytes; // cache it
+					if (norm.bytes == null)
+					{
+						// value not yet read
+						byte[] bytes = new byte[MaxDoc()];
+						Norms(field, bytes, 0);
+						norm.bytes = bytes; // cache it
+						// it's OK to close the underlying IndexInput as we have cached the
+						// norms and will never read them again.
+						norm.Close();
+					}
+					return norm.bytes;
 				}
-				return norm.bytes;
 			}
 		}
 		
@@ -545,6 +925,7 @@
 		{
 			lock (this)
 			{
+				EnsureOpen();
 				byte[] bytes = GetNorms(field);
 				if (bytes == null)
 					bytes = FakeNorms();
@@ -556,8 +937,9 @@
 		{
 			Norm norm = (Norm) norms[field];
 			if (norm == null)
-				// not an indexed field
+			// not an indexed field
 				return ;
+			
 			norm.dirty = true; // mark it dirty
 			normsDirty = true;
 			
@@ -570,6 +952,7 @@
 			lock (this)
 			{
 				
+				EnsureOpen();
 				Norm norm = (Norm) norms[field];
 				if (norm == null)
 				{
@@ -577,35 +960,46 @@
 					return ;
 				}
 				
-				if (norm.bytes != null)
+				lock (norm)
 				{
-					// can copy from cache
-					Array.Copy(norm.bytes, 0, bytes, offset, MaxDoc());
-					return ;
-				}
-				
-				IndexInput normStream = (IndexInput) norm.in_Renamed.Clone();
-				try
-				{
-					// read from disk
+					if (norm.bytes != null)
+					{
+						// can copy from cache
+						Array.Copy(norm.bytes, 0, bytes, offset, MaxDoc());
+						return ;
+					}
+					
+					// Read from disk.  norm.in may be shared across  multiple norms and
+					// should only be used in a synchronized context.
+					IndexInput normStream;
+					if (norm.useSingleNormStream)
+					{
+						normStream = singleNormStream;
+					}
+					else
+					{
+						normStream = norm.in_Renamed;
+					}
 					normStream.Seek(norm.normSeek);
 					normStream.ReadBytes(bytes, offset, MaxDoc());
 				}
-				finally
-				{
-					normStream.Close();
-				}
 			}
 		}
 		
 		
-		private void  OpenNorms(Directory cfsDir)
+		private void  OpenNorms(Directory cfsDir, int readBufferSize)
 		{
 			long nextNormSeek = SegmentMerger.NORMS_HEADER.Length; //skip header (header unused for now)
 			int maxDoc = MaxDoc();
 			for (int i = 0; i < fieldInfos.Size(); i++)
 			{
 				FieldInfo fi = fieldInfos.FieldInfo(i);
+				if (norms.Contains(fi.name))
+				{
+					// in case this SegmentReader is being re-opened, we might be able to
+					// reuse some norm instances and skip loading them here
+					continue;
+				}
 				if (fi.isIndexed && !fi.omitNorms)
 				{
 					Directory d = Directory();
@@ -614,24 +1008,60 @@
 					{
 						d = cfsDir;
 					}
-					long normSeek = (fileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION)?nextNormSeek:0);
-					norms[fi.name] = new Norm(this, d.OpenInput(fileName), fi.number, normSeek);
+					
+					// singleNormFile means multiple norms share this file
+					bool singleNormFile = fileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION);
+					IndexInput normInput = null;
+					long normSeek;
+					
+					if (singleNormFile)
+					{
+						normSeek = nextNormSeek;
+						if (singleNormStream == null)
+						{
+							singleNormStream = d.OpenInput(fileName, readBufferSize);
+						}
+						// All norms in the .nrm file can share a single IndexInput since
+						// they are only used in a synchronized context.
+						// If this were to change in the future, a clone could be done here.
+						normInput = singleNormStream;
+					}
+					else
+					{
+						normSeek = 0;
+						normInput = d.OpenInput(fileName);
+					}
+					
+					norms[fi.name] = new Norm(this, normInput, singleNormFile, fi.number, normSeek);
 					nextNormSeek += maxDoc; // increment also if some norms are separate
 				}
 			}
 		}
 		
-		private void  CloseNorms()
+		// for testing only
+		internal virtual bool NormsClosed()
 		{
-			lock (norms.SyncRoot)
+			if (singleNormStream != null)
+			{
+				return false;
+			}
+			System.Collections.IEnumerator it = norms.Values.GetEnumerator();
+			while (it.MoveNext())
 			{
-				System.Collections.IEnumerator enumerator = norms.Values.GetEnumerator();
-				while (enumerator.MoveNext())
+				Norm norm = (Norm) it.Current;
+				if (norm.refCount > 0)
 				{
-					Norm norm = (Norm) enumerator.Current;
-					norm.in_Renamed.Close();
+					return false;
 				}
 			}
+			return true;
+		}
+		
+		// for testing only
+		internal virtual bool NormsClosed(System.String field)
+		{
+			Norm norm = (Norm) norms[field];
+			return norm.refCount == 0;
 		}
 		
 		/// <summary> Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.</summary>
@@ -657,6 +1087,7 @@
 		public override TermFreqVector GetTermFreqVector(int docNumber, System.String field)
 		{
 			// Check if this field is invalid or has no stored term vector
+			EnsureOpen();
 			FieldInfo fi = fieldInfos.FieldInfo(field);
 			if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
 				return null;
@@ -669,6 +1100,37 @@
 		}
 		
 		
+		public override void  GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper)
+		{
+			EnsureOpen();
+			FieldInfo fi = fieldInfos.FieldInfo(field);
+			if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
+				return ;
+			
+			TermVectorsReader termVectorsReader = GetTermVectorsReader();
+			if (termVectorsReader == null)
+			{
+				return ;
+			}
+			
+			
+			termVectorsReader.Get(docNumber, field, mapper);
+		}
+		
+		
+		public override void  GetTermFreqVector(int docNumber, TermVectorMapper mapper)
+		{
+			EnsureOpen();
+			if (termVectorsReaderOrig == null)
+				return ;
+			
+			TermVectorsReader termVectorsReader = GetTermVectorsReader();
+			if (termVectorsReader == null)
+				return ;
+			
+			termVectorsReader.Get(docNumber, mapper);
+		}
+		
 		/// <summary>Return an array of term frequency vectors for the specified document.
 		/// The array contains a vector for each vectorized field in the document.
 		/// Each vector vector contains term numbers and frequencies for all terms
@@ -678,6 +1140,7 @@
 		/// <throws>  IOException </throws>
 		public override TermFreqVector[] GetTermFreqVectors(int docNumber)
 		{
+			EnsureOpen();
 			if (termVectorsReaderOrig == null)
 				return null;
 			
@@ -688,12 +1151,24 @@
 			return termVectorsReader.Get(docNumber);
 		}
 		
+		/// <summary>Returns the field infos of this segment </summary>
+		internal virtual FieldInfos FieldInfos()
+		{
+			return fieldInfos;
+		}
+		
 		/// <summary> Return the name of the segment this reader is reading.</summary>
 		internal virtual System.String GetSegmentName()
 		{
 			return segment;
 		}
 		
+		/// <summary> Return the SegmentInfo of the segment this reader is reading.</summary>
+		internal virtual SegmentInfo GetSegmentInfo()
+		{
+			return si;
+		}
+		
 		internal virtual void  SetSegmentInfo(SegmentInfo info)
 		{
 			si = info;
@@ -705,10 +1180,10 @@
 			rollbackDeletedDocsDirty = deletedDocsDirty;
 			rollbackNormsDirty = normsDirty;
 			rollbackUndeleteAll = undeleteAll;
-			System.Collections.IEnumerator values = norms.Values.GetEnumerator();
-			while (values.MoveNext())
+			System.Collections.IEnumerator it = norms.Values.GetEnumerator();
+			while (it.MoveNext())
 			{
-				Norm norm = (Norm) values.Current;
+				Norm norm = (Norm) it.Current;
 				norm.rollbackDirty = norm.dirty;
 			}
 		}
@@ -719,10 +1194,10 @@
 			deletedDocsDirty = rollbackDeletedDocsDirty;
 			normsDirty = rollbackNormsDirty;
 			undeleteAll = rollbackUndeleteAll;
-			System.Collections.IEnumerator values = norms.Values.GetEnumerator();
-			while (values.MoveNext())
+			System.Collections.IEnumerator it = norms.Values.GetEnumerator();
+			while (it.MoveNext())
 			{
-				Norm norm = (Norm) values.Current;
+				Norm norm = (Norm) it.Current;
 				norm.dirty = norm.rollbackDirty;
 			}
 		}
@@ -734,20 +1209,20 @@
 					System.String name = SupportClass.AppSettings.Get("Lucene.Net.SegmentReader.class", typeof(SegmentReader).FullName);
 					IMPL = System.Type.GetType(name);
 				}
-                catch (System.Security.SecurityException se)
-                {
-                    try
-                    {
-                        IMPL = System.Type.GetType(typeof(SegmentReader).FullName);
-                    }
-                    catch (System.Exception e)
-                    {
-                        throw new System.SystemException("cannot load default SegmentReader class: " + e, e);
-                    }
-                }
-                catch (System.Exception e)
+				catch (System.Security.SecurityException se)
+				{
+					try
+					{
+						IMPL = System.Type.GetType(typeof(SegmentReader).FullName);
+					}
+					catch (System.Exception e)
+					{
+						throw new System.Exception("cannot load default SegmentReader class: " + e, e);
+					}
+				}
+				catch (System.Exception e)
 				{
-					throw new System.SystemException("cannot load SegmentReader class: " + e, e);
+					throw new System.Exception("cannot load SegmentReader class: " + e, e);
 				}
 			}
 		}

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermDocs.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentTermDocs.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermDocs.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermDocs.cs Tue Jun 24 19:52:22 2008
@@ -16,8 +16,9 @@
  */
 
 using System;
-using BitVector = Lucene.Net.Util.BitVector;
+
 using IndexInput = Lucene.Net.Store.IndexInput;
+using BitVector = Lucene.Net.Util.BitVector;
 
 namespace Lucene.Net.Index
 {
@@ -33,47 +34,60 @@
 		internal int freq;
 		
 		private int skipInterval;
-		private int numSkips;
-		private int skipCount;
-		private IndexInput skipStream;
-		private int skipDoc;
-		private long freqPointer;
-		private long proxPointer;
+		private int maxSkipLevels;
+		private DefaultSkipListReader skipListReader;
+		
+		private long freqBasePointer;
+		private long proxBasePointer;
+		
 		private long skipPointer;
 		private bool haveSkipped;
 		
+		protected internal bool currentFieldStoresPayloads;
+		
 		public SegmentTermDocs(SegmentReader parent)
 		{
 			this.parent = parent;
 			this.freqStream = (IndexInput) parent.freqStream.Clone();
 			this.deletedDocs = parent.deletedDocs;
 			this.skipInterval = parent.tis.GetSkipInterval();
+			this.maxSkipLevels = parent.tis.GetMaxSkipLevels();
 		}
 		
 		public virtual void  Seek(Term term)
 		{
 			TermInfo ti = parent.tis.Get(term);
-			Seek(ti);
+			Seek(ti, term);
 		}
 		
 		public virtual void  Seek(TermEnum termEnum)
 		{
 			TermInfo ti;
+			Term term;
 			
 			// use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs
 			if (termEnum is SegmentTermEnum && ((SegmentTermEnum) termEnum).fieldInfos == parent.fieldInfos)
-			// optimized case
-				ti = ((SegmentTermEnum) termEnum).TermInfo();
-			// punt case
+			{
+				// optimized case
+				SegmentTermEnum segmentTermEnum = ((SegmentTermEnum) termEnum);
+				term = segmentTermEnum.Term();
+				ti = segmentTermEnum.TermInfo();
+			}
 			else
-				ti = parent.tis.Get(termEnum.Term());
+			{
+				// punt case
+				term = termEnum.Term();
+				ti = parent.tis.Get(term);
+			}
 			
-			Seek(ti);
+			Seek(ti, term);
 		}
 		
-		internal virtual void  Seek(TermInfo ti)
+		internal virtual void  Seek(TermInfo ti, Term term)
 		{
 			count = 0;
+			FieldInfo fi = parent.fieldInfos.FieldInfo(term.field);
+			currentFieldStoresPayloads = (fi != null) ? fi.storePayloads : false;
 			if (ti == null)
 			{
 				df = 0;
@@ -82,13 +96,10 @@
 			{
 				df = ti.docFreq;
 				doc = 0;
-				skipDoc = 0;
-				skipCount = 0;
-				numSkips = df / skipInterval;
-				freqPointer = ti.freqPointer;
-				proxPointer = ti.proxPointer;
-				skipPointer = freqPointer + ti.skipOffset;
-				freqStream.Seek(freqPointer);
+				freqBasePointer = ti.freqPointer;
+				proxBasePointer = ti.proxPointer;
+				skipPointer = freqBasePointer + ti.skipOffset;
+				freqStream.Seek(freqBasePointer);
 				haveSkipped = false;
 			}
 		}
@@ -96,8 +107,8 @@
 		public virtual void  Close()
 		{
 			freqStream.Close();
-			if (skipStream != null)
-				skipStream.Close();
+			if (skipListReader != null)
+				skipListReader.Close();
 		}
 		
 		public int Doc()
@@ -172,56 +183,34 @@
 		{
 		}
 		
+        protected internal virtual void SkipProx(long proxPointer, int payloadLength)
+        {
+        }
+		
 		/// <summary>Optimized implementation. </summary>
 		public virtual bool SkipTo(int target)
 		{
 			if (df >= skipInterval)
 			{
 				// optimized case
-				
-				if (skipStream == null)
-					skipStream = (IndexInput) freqStream.Clone(); // lazily clone
+				if (skipListReader == null)
+					skipListReader = new DefaultSkipListReader((IndexInput) freqStream.Clone(), maxSkipLevels, skipInterval); // lazily clone
 				
 				if (!haveSkipped)
 				{
-					// lazily seek skip stream
-					skipStream.Seek(skipPointer);
+					// lazily initialize skip stream
+					skipListReader.Init(skipPointer, freqBasePointer, proxBasePointer, df, currentFieldStoresPayloads);
 					haveSkipped = true;
 				}
 				
-				// scan skip data
-				int lastSkipDoc = skipDoc;
-				long lastFreqPointer = freqStream.GetFilePointer();
-				long lastProxPointer = - 1;
-				int numSkipped = - 1 - (count % skipInterval);
-				
-				while (target > skipDoc)
-				{
-					lastSkipDoc = skipDoc;
-					lastFreqPointer = freqPointer;
-					lastProxPointer = proxPointer;
-					
-					if (skipDoc != 0 && skipDoc >= doc)
-						numSkipped += skipInterval;
-					
-					if (skipCount >= numSkips)
-						break;
-					
-					skipDoc += skipStream.ReadVInt();
-					freqPointer += skipStream.ReadVInt();
-					proxPointer += skipStream.ReadVInt();
-					
-					skipCount++;
-				}
-				
-				// if we found something to skip, then skip it
-				if (lastFreqPointer > freqStream.GetFilePointer())
+				int newCount = skipListReader.SkipTo(target);
+				if (newCount > count)
 				{
-					freqStream.Seek(lastFreqPointer);
-					SkipProx(lastProxPointer);
+					freqStream.Seek(skipListReader.GetFreqPointer());
+					SkipProx(skipListReader.GetProxPointer(), skipListReader.GetPayloadLength());
 					
-					doc = lastSkipDoc;
-					count += numSkipped;
+					doc = skipListReader.GetDoc();
+					count = newCount;
 				}
 			}
 			

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermEnum.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentTermEnum.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermEnum.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermEnum.cs Tue Jun 24 19:52:22 2008
@@ -16,6 +16,7 @@
  */
 
 using System;
+
 using IndexInput = Lucene.Net.Store.IndexInput;
 
 namespace Lucene.Net.Index
@@ -39,6 +40,7 @@
 		internal long indexPointer = 0;
 		internal int indexInterval;
 		internal int skipInterval;
+		internal int maxSkipLevels;
 		private int formatM1SkipInterval;
 		
 		internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi)
@@ -46,6 +48,7 @@
 			input = i;
 			fieldInfos = fis;
 			isIndex = isi;
+			maxSkipLevels = 1; // use single-level skip lists for formats > -3 
 			
 			int firstInt = input.ReadInt();
 			if (firstInt >= 0)
@@ -65,7 +68,7 @@
 				
 				// check that it is a format we can understand
 				if (format < TermInfosWriter.FORMAT)
-					throw new System.IO.IOException("Unknown format version:" + format);
+					throw new CorruptIndexException("Unknown format version:" + format);
 				
 				size = input.ReadLong(); // read the size
 				
@@ -84,6 +87,11 @@
 				{
 					indexInterval = input.ReadInt();
 					skipInterval = input.ReadInt();
+					if (format == - 3)
+					{
+						// this new format introduces multi-level skipping
+						maxSkipLevels = input.ReadInt();
+					}
 				}
 			}
 		}
@@ -123,6 +131,7 @@
 		{
 			if (position++ >= size - 1)
 			{
+				prevBuffer.Set(termBuffer);
 				termBuffer.Reset();
 				return false;
 			}

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermPositionVector.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentTermPositionVector.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermPositionVector.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermPositionVector.cs Tue Jun 24 19:52:22 2008
@@ -20,7 +20,7 @@
 namespace Lucene.Net.Index
 {
 	
-	public class SegmentTermPositionVector : SegmentTermVector, TermPositionVector
+	class SegmentTermPositionVector : SegmentTermVector, TermPositionVector
 	{
 		protected internal int[][] positions;
 		protected internal TermVectorOffsetInfo[][] offsets;

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermPositions.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentTermPositions.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermPositions.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermPositions.cs Tue Jun 24 19:52:22 2008
@@ -1,20 +1,24 @@
-/// <summary> Licensed to the Apache Software Foundation (ASF) under one or more
-/// contributor license agreements.  See the NOTICE file distributed with
-/// this work for additional information regarding copyright ownership.
-/// The ASF licenses this file to You under the Apache License, Version 2.0
-/// (the "License"); you may not use this file except in compliance with
-/// the License.  You may obtain a copy of the License at
-/// 
-/// http://www.apache.org/licenses/LICENSE-2.0
-/// 
-/// Unless required by applicable law or agreed to in writing, software
-/// distributed under the License is distributed on an "AS IS" BASIS,
-/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-/// See the License for the specific language governing permissions and
-/// limitations under the License.
-/// </summary>
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 using System;
+
 using IndexInput = Lucene.Net.Store.IndexInput;
+
 namespace Lucene.Net.Index
 {
 	
@@ -24,30 +28,39 @@
 		private int proxCount;
 		private int position;
 		
+		// the current payload length
+		private int payloadLength;
+		// indicates whether the payload of the currend position has
+		// been read from the proxStream yet
+		private bool needToLoadPayload;
+		
 		// these variables are being used to remember information
 		// for a lazy skip
-		private long lazySkipPointer = 0;
-		private int lazySkipDocCount = 0;
+		private long lazySkipPointer = - 1;
+		private int lazySkipProxCount = 0;
 		
-		internal SegmentTermPositions(SegmentReader p) : base(p)
+		internal SegmentTermPositions(SegmentReader p):base(p)
 		{
-			this.proxStream = (IndexInput) parent.proxStream.Clone();
+			this.proxStream = null; // the proxStream will be cloned lazily when nextPosition() is called for the first time
 		}
 		
-		internal override void  Seek(TermInfo ti)
+		internal override void  Seek(TermInfo ti, Term term)
 		{
-			base.Seek(ti);
+			base.Seek(ti, term);
 			if (ti != null)
 				lazySkipPointer = ti.proxPointer;
 			
-			lazySkipDocCount = 0;
+			lazySkipProxCount = 0;
 			proxCount = 0;
+			payloadLength = 0;
+			needToLoadPayload = false;
 		}
 		
 		public override void  Close()
 		{
 			base.Close();
-			proxStream.Close();
+			if (proxStream != null)
+				proxStream.Close();
 		}
 		
 		public int NextPosition()
@@ -55,20 +68,39 @@
 			// perform lazy skips if neccessary
 			LazySkip();
 			proxCount--;
-			return position += proxStream.ReadVInt();
+			return position += ReadDeltaPosition();
+		}
+		
+		private int ReadDeltaPosition()
+		{
+			int delta = proxStream.ReadVInt();
+			if (currentFieldStoresPayloads)
+			{
+				// if the current field stores payloads then
+				// the position delta is shifted one bit to the left.
+				// if the LSB is set, then we have to read the current
+				// payload length
+				if ((delta & 1) != 0)
+				{
+					payloadLength = proxStream.ReadVInt();
+				}
+				delta = (int) (((uint) delta) >> 1);
+				needToLoadPayload = true;
+			}
+			return delta;
 		}
 		
 		protected internal override void  SkippingDoc()
 		{
-			// we remember to skip the remaining positions of the current
-			// document lazily
-			lazySkipDocCount += freq;
+			// we remember to skip a document lazily
+			lazySkipProxCount += freq;
 		}
 		
 		public override bool Next()
 		{
-			// we remember to skip a document lazily
-			lazySkipDocCount += proxCount;
+			// we remember to skip the remaining positions of the current
+			// document lazily
+			lazySkipProxCount += proxCount;
 			
 			if (base.Next())
 			{
@@ -87,19 +119,33 @@
 		
 		
 		/// <summary>Called by super.skipTo(). </summary>
-		protected internal override void  SkipProx(long proxPointer)
+		protected internal override void  SkipProx(long proxPointer, int payloadLength)
 		{
 			// we save the pointer, we might have to skip there lazily
 			lazySkipPointer = proxPointer;
-			lazySkipDocCount = 0;
+			lazySkipProxCount = 0;
 			proxCount = 0;
+			this.payloadLength = payloadLength;
+			needToLoadPayload = false;
 		}
 		
 		private void  SkipPositions(int n)
 		{
 			for (int f = n; f > 0; f--)
-			// skip unread positions
-				proxStream.ReadVInt();
+			{
+				// skip unread positions
+				ReadDeltaPosition();
+				SkipPayload();
+			}
+		}
+		
+		private void  SkipPayload()
+		{
+			if (needToLoadPayload && payloadLength > 0)
+			{
+				proxStream.Seek(proxStream.GetFilePointer() + payloadLength);
+			}
+			needToLoadPayload = false;
 		}
 		
 		// It is not always neccessary to move the prox pointer
@@ -114,17 +160,64 @@
 		// as soon as positions are requested.
 		private void  LazySkip()
 		{
-			if (lazySkipPointer != 0)
+			if (proxStream == null)
+			{
+				// clone lazily
+				proxStream = (IndexInput) parent.proxStream.Clone();
+			}
+			
+			// we might have to skip the current payload
+			// if it was not read yet
+			SkipPayload();
+			
+			if (lazySkipPointer != - 1)
 			{
 				proxStream.Seek(lazySkipPointer);
-				lazySkipPointer = 0;
+				lazySkipPointer = - 1;
 			}
 			
-			if (lazySkipDocCount != 0)
+			if (lazySkipProxCount != 0)
 			{
-				SkipPositions(lazySkipDocCount);
-				lazySkipDocCount = 0;
+				SkipPositions(lazySkipProxCount);
+				lazySkipProxCount = 0;
 			}
 		}
+		
+		public int GetPayloadLength()
+		{
+			return payloadLength;
+		}
+		
+		public byte[] GetPayload(byte[] data, int offset)
+		{
+			if (!needToLoadPayload)
+			{
+				throw new System.IO.IOException("Payload cannot be loaded more than once for the same term position.");
+			}
+			
+			// read payloads lazily
+			byte[] retArray;
+			int retOffset;
+			if (data == null || data.Length - offset < payloadLength)
+			{
+				// the array is too small to store the payload data,
+				// so we allocate a new one
+				retArray = new byte[payloadLength];
+				retOffset = 0;
+			}
+			else
+			{
+				retArray = data;
+				retOffset = offset;
+			}
+			proxStream.ReadBytes(retArray, retOffset, payloadLength);
+			needToLoadPayload = false;
+			return retArray;
+		}
+		
+		public bool IsPayloadAvailable()
+		{
+			return needToLoadPayload && payloadLength > 0;
+		}
 	}
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermVector.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentTermVector.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermVector.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentTermVector.cs Tue Jun 24 19:52:22 2008
@@ -63,7 +63,7 @@
 		
 		public virtual int Size()
 		{
-			return terms == null?0:terms.Length;
+			return terms == null ? 0 : terms.Length;
 		}
 		
 		public virtual System.String[] GetTerms()

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SerialMergeScheduler.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SerialMergeScheduler.cs?rev=671404&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SerialMergeScheduler.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SerialMergeScheduler.cs Tue Jun 24 19:52:22 2008
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+	
+	/// <summary>A {@link MergeScheduler} that simply does each merge
+	/// sequentially, using the current thread. 
+	/// </summary>
+	public class SerialMergeScheduler : MergeScheduler
+	{
+		
+		/// <summary>Just do the merges in sequence. We do this
+		/// "synchronized" so that even if the application is using
+		/// multiple threads, only one merge may run at a time. 
+		/// </summary>
+		public override void  Merge(IndexWriter writer)
+		{
+			lock (this)
+			{
+				
+				while (true)
+				{
+					MergePolicy.OneMerge merge = writer.GetNextMerge();
+					if (merge == null)
+						break;
+					writer.Merge(merge);
+				}
+			}
+		}
+		
+		public override void  Close()
+		{
+		}
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SnapshotDeletionPolicy.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SnapshotDeletionPolicy.cs?rev=671404&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SnapshotDeletionPolicy.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SnapshotDeletionPolicy.cs Tue Jun 24 19:52:22 2008
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+	
+	/// <summary>A {@link IndexDeletionPolicy} that wraps around any other
+	/// {@link IndexDeletionPolicy} and adds the ability to hold and
+	/// later release a single "snapshot" of an index.  While
+	/// the snapshot is held, the {@link IndexWriter} will not
+	/// remove any files associated with it even if the index is
+	/// otherwise being actively, arbitrarily changed.  Because
+	/// we wrap another arbitrary {@link IndexDeletionPolicy}, this
+	/// gives you the freedom to continue using whatever {@link
+	/// IndexDeletionPolicy} you would normally want to use with your
+	/// index. 
+	/// </summary>
+	
+	public class SnapshotDeletionPolicy : IndexDeletionPolicy
+	{
+		
+		private IndexCommitPoint lastCommit;
+		private IndexDeletionPolicy primary;
+		private IndexCommitPoint snapshot;
+		
+		public SnapshotDeletionPolicy(IndexDeletionPolicy primary)
+		{
+			this.primary = primary;
+		}
+		
+		public virtual void  OnInit(System.Collections.IList commits)
+		{
+			lock (this)
+			{
+				primary.OnInit(WrapCommits(commits));
+				lastCommit = (IndexCommitPoint) commits[commits.Count - 1];
+			}
+		}
+		
+		public virtual void  OnCommit(System.Collections.IList commits)
+		{
+			lock (this)
+			{
+				primary.OnCommit(WrapCommits(commits));
+				lastCommit = (IndexCommitPoint) commits[commits.Count - 1];
+			}
+		}
+		
+		/// <summary>Take a snapshot of the most recent commit to the
+		/// index.  You must call release() to free this snapshot.
+		/// Note that while the snapshot is held, the files it
+		/// references will not be deleted, which will consume
+		/// additional disk space in your index. If you take a
+		/// snapshot at a particularly bad time (say just before
+		/// you call optimize()) then in the worst case this could
+		/// consume an extra 1X of your total index size, until
+		/// you release the snapshot. 
+		/// </summary>
+		public virtual IndexCommitPoint Snapshot()
+		{
+			lock (this)
+			{
+				if (snapshot == null)
+					snapshot = lastCommit;
+				else
+					throw new System.SystemException("snapshot is already set; please call release() first");
+				return snapshot;
+			}
+		}
+		
+		/// <summary>Release the currently held snapshot. </summary>
+		public virtual void  Release()
+		{
+			lock (this)
+			{
+				if (snapshot != null)
+					snapshot = null;
+				else
+					throw new System.SystemException("snapshot was not set; please call snapshot() first");
+			}
+		}
+		
+		private class MyCommitPoint : IndexCommitPoint
+		{
+			private void  InitBlock(SnapshotDeletionPolicy enclosingInstance)
+			{
+				this.enclosingInstance = enclosingInstance;
+			}
+			private SnapshotDeletionPolicy enclosingInstance;
+			public SnapshotDeletionPolicy Enclosing_Instance
+			{
+				get
+				{
+					return enclosingInstance;
+				}
+				
+			}
+			internal IndexCommitPoint cp;
+			internal MyCommitPoint(SnapshotDeletionPolicy enclosingInstance, IndexCommitPoint cp)
+			{
+				InitBlock(enclosingInstance);
+				this.cp = cp;
+			}
+			public virtual System.String GetSegmentsFileName()
+			{
+				return cp.GetSegmentsFileName();
+			}
+			public virtual System.Collections.ICollection GetFileNames()
+			{
+				return cp.GetFileNames();
+			}
+			public virtual void  Delete()
+			{
+				lock (Enclosing_Instance)
+				{
+					// Suppress the delete request if this commit point is
+					// our current snapshot.
+					if (Enclosing_Instance.snapshot != cp)
+						cp.Delete();
+				}
+			}
+		}
+		
+		private System.Collections.IList WrapCommits(System.Collections.IList commits)
+		{
+			int count = commits.Count;
+			System.Collections.IList myCommits = new System.Collections.ArrayList(count);
+			for (int i = 0; i < count; i++)
+				myCommits.Add(new MyCommitPoint(this, (IndexCommitPoint) commits[i]));
+			return myCommits;
+		}
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SortedTermVectorMapper.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SortedTermVectorMapper.cs?rev=671404&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SortedTermVectorMapper.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SortedTermVectorMapper.cs Tue Jun 24 19:52:22 2008
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+	
+	/// <summary> Store a sorted collection of {@link Lucene.Net.Index.TermVectorEntry}s.  Collects all term information
+	/// into a single, SortedSet.
+	/// <br/>
+	/// NOTE: This Mapper ignores all Field information for the Document.  This means that if you are using offset/positions you will not
+	/// know what Fields they correlate with.
+	/// <br/>
+	/// This is not thread-safe  
+	/// </summary>
+	public class SortedTermVectorMapper : TermVectorMapper
+	{
+		
+		
+		private System.Collections.Generic.SortedDictionary<Object, Object> currentSet;
+		private System.Collections.IDictionary termToTVE = new System.Collections.Hashtable();
+		private bool storeOffsets;
+		private bool storePositions;
+		/// <summary> Stand-in name for the field in {@link TermVectorEntry}.</summary>
+		public const System.String ALL = "_ALL_";
+		
+		/// <summary> </summary>
+		/// <param name="comparator">A Comparator for sorting {@link TermVectorEntry}s
+		/// </param>
+		public SortedTermVectorMapper(System.Collections.Generic.IComparer<Object> comparator) : this(false, false, comparator)
+		{
+		}
+		
+		
+		public SortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, System.Collections.Generic.IComparer<Object> comparator) : base(ignoringPositions, ignoringOffsets)
+		{
+			currentSet = new System.Collections.Generic.SortedDictionary<Object,Object>(comparator);
+		}
+		
+		/// <summary> </summary>
+		/// <param name="term">The term to map
+		/// </param>
+		/// <param name="frequency">The frequency of the term
+		/// </param>
+		/// <param name="offsets">Offset information, may be null
+		/// </param>
+		/// <param name="positions">Position information, may be null
+		/// </param>
+		//We need to combine any previous mentions of the term
+		public override void  Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions)
+		{
+			TermVectorEntry entry = (TermVectorEntry) termToTVE[term];
+			if (entry == null)
+			{
+				entry = new TermVectorEntry(ALL, term, frequency, storeOffsets == true ? offsets : null, storePositions == true ? positions : null);
+				termToTVE[term] = entry;
+				currentSet.Add(entry, entry);
+			}
+			else
+			{
+				entry.SetFrequency(entry.GetFrequency() + frequency);
+				if (storeOffsets)
+				{
+					TermVectorOffsetInfo[] existingOffsets = entry.GetOffsets();
+					//A few diff. cases here:  offsets is null, existing offsets is null, both are null, same for positions
+					if (existingOffsets != null && offsets != null && offsets.Length > 0)
+					{
+						//copy over the existing offsets
+						TermVectorOffsetInfo[] newOffsets = new TermVectorOffsetInfo[existingOffsets.Length + offsets.Length];
+						Array.Copy(existingOffsets, 0, newOffsets, 0, existingOffsets.Length);
+						Array.Copy(offsets, 0, newOffsets, existingOffsets.Length, offsets.Length);
+						entry.SetOffsets(newOffsets);
+					}
+					else if (existingOffsets == null && offsets != null && offsets.Length > 0)
+					{
+						entry.SetOffsets(offsets);
+					}
+					//else leave it alone
+				}
+				if (storePositions)
+				{
+					int[] existingPositions = entry.GetPositions();
+					if (existingPositions != null && positions != null && positions.Length > 0)
+					{
+						int[] newPositions = new int[existingPositions.Length + positions.Length];
+						Array.Copy(existingPositions, 0, newPositions, 0, existingPositions.Length);
+						Array.Copy(positions, 0, newPositions, existingPositions.Length, positions.Length);
+						entry.SetPositions(newPositions);
+					}
+					else if (existingPositions == null && positions != null && positions.Length > 0)
+					{
+						entry.SetPositions(positions);
+					}
+				}
+			}
+		}
+		
+		public override void  SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions)
+		{
+			
+			this.storeOffsets = storeOffsets;
+			this.storePositions = storePositions;
+		}
+		
+		/// <summary> The TermVectorEntrySet.  A SortedSet of {@link TermVectorEntry} objects.  Sort is by the comparator passed into the constructor.
+		/// <br/>
+		/// This set will be empty until after the mapping process takes place.
+		/// 
+		/// </summary>
+		/// <returns> The SortedSet of {@link TermVectorEntry}.
+		/// </returns>
+		public virtual System.Collections.Generic.SortedDictionary<Object, Object> GetTermVectorEntrySet()
+		{
+			return currentSet;
+		}
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/StaleReaderException.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/StaleReaderException.cs?rev=671404&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/StaleReaderException.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/StaleReaderException.cs Tue Jun 24 19:52:22 2008
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+	
+	/// <summary> This exception is thrown when an {@link IndexReader}
+	/// tries to make changes to the index (via {@link
+	/// IndexReader#deleteDocument}, {@link
+	/// IndexReader#undeleteAll} or {@link IndexReader#setNorm})
+	/// but changes have already been committed to the index
+	/// since this reader was instantiated.  When this happens
+	/// you must open a new reader on the current index to make
+	/// the changes.
+	/// </summary>
+	[Serializable]
+	public class StaleReaderException : System.IO.IOException
+	{
+		public StaleReaderException(System.String message) : base(message)
+		{
+		}
+	}
+}
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Term.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/Term.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Term.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Term.cs Tue Jun 24 19:52:22 2008
@@ -33,7 +33,10 @@
 		internal System.String field;
 		public System.String text;
 		
-		/// <summary>Constructs a Term with the given field and text. </summary>
+		/// <summary>Constructs a Term with the given field and text.
+		/// <p>Note that a null field or null text value results in undefined
+		/// behavior for most Lucene APIs that accept a Term parameter. 
+		/// </summary>
 		public Term(System.String fld, System.String txt) : this(fld, txt, true)
 		{
 		}
@@ -77,8 +80,12 @@
 		/// </summary>
 		public  override bool Equals(System.Object o)
 		{
+			if (o == this)
+				return true;
 			if (o == null)
 				return false;
+			if (!(o is Term))
+				return false;
 			Term other = (Term) o;
 			return field == other.field && text.Equals(other.text);
 		}

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermBuffer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermBuffer.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermBuffer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermBuffer.cs Tue Jun 24 19:52:22 2008
@@ -16,6 +16,7 @@
  */
 
 using System;
+
 using IndexInput = Lucene.Net.Store.IndexInput;
 
 namespace Lucene.Net.Index

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermFreqVector.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermFreqVector.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermFreqVector.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermFreqVector.cs Tue Jun 24 19:52:22 2008
@@ -21,12 +21,14 @@
 {
 	
 	/// <summary>Provides access to stored term vector of 
-	/// a document field.
+	/// a document field.  The vector consists of the name of the field, an array of the terms tha occur in the field of the
+	/// {@link Lucene.Net.Documents.Document} and a parallel array of frequencies.  Thus, getTermFrequencies()[5] corresponds with the
+	/// frequency of getTerms()[5], assuming there are at least 5 terms in the Document.
 	/// </summary>
 	public interface TermFreqVector
 	{
-		/// <summary> </summary>
-		/// <returns> The field this vector is associated with.
+		/// <summary> The {@link Lucene.Net.Documents.Fieldable} name. </summary>
+		/// <returns> The name of the field this vector is associated with.
 		/// 
 		/// </returns>
 		System.String GetField();

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfosReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermInfosReader.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfosReader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfosReader.cs Tue Jun 24 19:52:22 2008
@@ -16,6 +16,8 @@
  */
 
 using System;
+
+using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput;
 using Directory = Lucene.Net.Store.Directory;
 
 namespace Lucene.Net.Index
@@ -42,16 +44,43 @@
 		
 		private SegmentTermEnum indexEnum;
 		
-		public TermInfosReader(Directory dir, System.String seg, FieldInfos fis)
+		private int indexDivisor = 1;
+		private int totalIndexInterval;
+		
+		internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis) : this(dir, seg, fis, BufferedIndexInput.BUFFER_SIZE)
 		{
-			directory = dir;
-			segment = seg;
-			fieldInfos = fis;
-			
-			origEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tis"), fieldInfos, false);
-			size = origEnum.size;
+		}
+		
+		internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize)
+		{
+			bool success = false;
 			
-			indexEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tii"), fieldInfos, true);
+			try
+			{
+				directory = dir;
+				segment = seg;
+				fieldInfos = fis;
+				
+				origEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tis", readBufferSize), fieldInfos, false);
+				size = origEnum.size;
+				totalIndexInterval = origEnum.indexInterval;
+				
+				indexEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tii", readBufferSize), fieldInfos, true);
+				
+				success = true;
+			}
+			finally
+			{
+				// With lock-less commits, it's entirely possible (and
+				// fine) to hit a FileNotFound exception above. In
+				// this case, we want to explicitly close any subset
+				// of things that were opened so that we don't have to
+				// wait for a GC to do so.
+				if (!success)
+				{
+					Close();
+				}
+			}
 		}
 		
 		public int GetSkipInterval()
@@ -59,6 +88,49 @@
 			return origEnum.skipInterval;
 		}
 		
+		public int GetMaxSkipLevels()
+		{
+			return origEnum.maxSkipLevels;
+		}
+		
+		/// <summary> <p>Sets the indexDivisor, which subsamples the number
+		/// of indexed terms loaded into memory.  This has a
+		/// similar effect as {@link
+		/// IndexWriter#setTermIndexInterval} except that setting
+		/// must be done at indexing time while this setting can be
+		/// set per reader.  When set to N, then one in every
+		/// N*termIndexInterval terms in the index is loaded into
+		/// memory.  By setting this to a value > 1 you can reduce
+		/// memory usage, at the expense of higher latency when
+		/// loading a TermInfo.  The default value is 1.</p>
+		/// 
+		/// <b>NOTE:</b> you must call this before the term
+		/// index is loaded.  If the index is already loaded,
+		/// an IllegalStateException is thrown.
+		/// 
+		/// + @throws IllegalStateException if the term index has
+		/// already been loaded into memory.
+		/// </summary>
+		public void  SetIndexDivisor(int indexDivisor)
+		{
+			if (indexDivisor < 1)
+				throw new System.ArgumentException("indexDivisor must be > 0: got " + indexDivisor);
+			
+			if (indexTerms != null)
+				throw new System.SystemException("index terms are already loaded");
+			
+			this.indexDivisor = indexDivisor;
+			totalIndexInterval = origEnum.indexInterval * indexDivisor;
+		}
+		
+		/// <summary>Returns the indexDivisor.</summary>
+		/// <seealso cref="setIndexDivisor">
+		/// </seealso>
+		public int GetIndexDivisor()
+		{
+			return indexDivisor;
+		}
+		
 		public void  Close()
 		{
 			if (origEnum != null)
@@ -93,7 +165,7 @@
 					return ; // do nothing
 				try
 				{
-					int indexSize = (int) indexEnum.size; // otherwise read index
+					int indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; // otherwise read index
 					
 					indexTerms = new Term[indexSize];
 					indexInfos = new TermInfo[indexSize];
@@ -104,6 +176,10 @@
 						indexTerms[i] = indexEnum.Term();
 						indexInfos[i] = indexEnum.TermInfo();
 						indexPointers[i] = indexEnum.indexPointer;
+						
+						for (int j = 1; j < indexDivisor; j++)
+							if (!indexEnum.Next())
+								break;
 					}
 				}
 				finally
@@ -136,7 +212,7 @@
 		
 		private void  SeekEnum(int indexOffset)
 		{
-			GetEnum().Seek(indexPointers[indexOffset], (indexOffset * GetEnum().indexInterval) - 1, indexTerms[indexOffset], indexInfos[indexOffset]);
+			GetEnum().Seek(indexPointers[indexOffset], (indexOffset * totalIndexInterval) - 1, indexTerms[indexOffset], indexInfos[indexOffset]);
 		}
 		
 		/// <summary>Returns the TermInfo for a Term in the set, or null. </summary>
@@ -151,7 +227,7 @@
 			SegmentTermEnum enumerator = GetEnum();
 			if (enumerator.Term() != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term()) >= 0))
 			{
-				int enumOffset = (int) (enumerator.position / enumerator.indexInterval) + 1;
+				int enumOffset = (int) (enumerator.position / totalIndexInterval) + 1;
 				if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0)
 					return ScanEnum(term); // no need to seek
 			}
@@ -179,10 +255,10 @@
 				return null;
 			
 			SegmentTermEnum enumerator = GetEnum();
-			if (enumerator != null && enumerator.Term() != null && position >= enumerator.position && position < (enumerator.position + enumerator.indexInterval))
+			if (enumerator != null && enumerator.Term() != null && position >= enumerator.position && position < (enumerator.position + totalIndexInterval))
 				return ScanEnum(position); // can avoid seek
 			
-			SeekEnum(position / enumerator.indexInterval); // must seek
+			SeekEnum(position / totalIndexInterval); // must seek
 			return ScanEnum(position);
 		}
 		

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfosWriter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermInfosWriter.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfosWriter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermInfosWriter.cs Tue Jun 24 19:52:22 2008
@@ -16,9 +16,9 @@
  */
 
 using System;
-using IndexOutput = Lucene.Net.Store.IndexOutput;
+
 using Directory = Lucene.Net.Store.Directory;
-using StringHelper = Lucene.Net.Util.StringHelper;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
 
 namespace Lucene.Net.Index
 {
@@ -30,13 +30,12 @@
 	public sealed class TermInfosWriter
 	{
 		/// <summary>The file format version, a negative number. </summary>
-		public const int FORMAT = - 2;
+		public const int FORMAT = - 3;
 		
 		private FieldInfos fieldInfos;
 		private IndexOutput output;
-		private Term lastTerm = new Term("", "");
 		private TermInfo lastTi = new TermInfo();
-		private long size = 0;
+		private long size;
 		
 		// TODO: the default values for these two parameters should be settable from
 		// IndexWriter.  However, once that's done, folks will start setting them to
@@ -61,10 +60,20 @@
 		/// </summary>
 		internal int skipInterval = 16;
 		
-		private long lastIndexPointer = 0;
-		private bool isIndex = false;
+		/// <summary>Expert: The maximum number of skip levels. Smaller values result in 
+		/// slightly smaller indexes, but slower skipping in big posting lists.
+		/// </summary>
+		internal int maxSkipLevels = 10;
+		
+		private long lastIndexPointer;
+		private bool isIndex;
+		private char[] lastTermText = new char[10];
+		private int lastTermTextLength;
+		private int lastFieldNumber = - 1;
 		
-		private TermInfosWriter other = null;
+		private char[] termTextBuffer = new char[10];
+		
+		private TermInfosWriter other;
 		
 		public TermInfosWriter(Directory directory, System.String segment, FieldInfos fis, int interval)
 		{
@@ -88,27 +97,86 @@
 			output.WriteLong(0); // leave space for size
 			output.WriteInt(indexInterval); // write indexInterval
 			output.WriteInt(skipInterval); // write skipInterval
+			output.WriteInt(maxSkipLevels); // write maxSkipLevels
+		}
+		
+		internal void  Add(Term term, TermInfo ti)
+		{
+			
+			int length = term.text.Length;
+			if (termTextBuffer.Length < length)
+			{
+				termTextBuffer = new char[(int) (length * 1.25)];
+			}
+
+            int i = 0;
+            System.Collections.Generic.IEnumerator<char> chars = term.text.GetEnumerator();
+            while (chars.MoveNext())
+            {
+                termTextBuffer[i++] = (char)chars.Current;
+            }
+			
+			Add(fieldInfos.FieldNumber(term.field), termTextBuffer, 0, length, ti);
+		}
+		
+		// Currently used only by assert statement
+		private int CompareToLastTerm(int fieldNumber, char[] termText, int start, int length)
+		{
+			int pos = 0;
+			
+			if (lastFieldNumber != fieldNumber)
+			{
+				int cmp = String.CompareOrdinal(fieldInfos.FieldName(lastFieldNumber), fieldInfos.FieldName(fieldNumber));
+				// If there is a field named "" (empty string) then we
+				// will get 0 on this comparison, yet, it's "OK".  But
+				// it's not OK if two different field numbers map to
+				// the same name.
+				if (cmp != 0 || lastFieldNumber != - 1)
+					return cmp;
+			}
+			
+			while (pos < length && pos < lastTermTextLength)
+			{
+				char c1 = lastTermText[pos];
+				char c2 = termText[pos + start];
+				if (c1 < c2)
+					return - 1;
+				else if (c1 > c2)
+					return 1;
+				pos++;
+			}
+			
+			if (pos < lastTermTextLength)
+			// Last term was longer
+				return 1;
+			else if (pos < length)
+			// Last term was shorter
+				return - 1;
+			else
+				return 0;
 		}
 		
-		/// <summary>Adds a new <Term, TermInfo> pair to the set.
+		/// <summary>Adds a new <<fieldNumber, termText>, TermInfo> pair to the set.
 		/// Term must be lexicographically greater than all previous Terms added.
 		/// TermInfo pointers must be positive and greater than all previous.
 		/// </summary>
-		public void  Add(Term term, TermInfo ti)
+		internal void  Add(int fieldNumber, char[] termText, int termTextStart, int termTextLength, TermInfo ti)
 		{
-			if (!isIndex && term.CompareTo(lastTerm) <= 0)
-			{
-				throw new System.IO.IOException("term out of order (\"" + term + "\".compareTo(\"" + lastTerm + "\") <= 0)");
-			}
-			if (ti.freqPointer < lastTi.freqPointer)
-				throw new System.IO.IOException("freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")");
-			if (ti.proxPointer < lastTi.proxPointer)
-				throw new System.IO.IOException("proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")");
+			
+			System.Diagnostics.Debug.Assert(CompareToLastTerm(fieldNumber, termText, termTextStart, termTextLength) < 0 ||
+				(isIndex && termTextLength == 0 && lastTermTextLength == 0),
+				"Terms are out of order: field=" + fieldInfos.FieldName(fieldNumber) +  "(number " + fieldNumber + ")" + 
+				" lastField=" + fieldInfos.FieldName(lastFieldNumber) + " (number " + lastFieldNumber + ")" + 
+				" text=" + new String(termText, termTextStart, termTextLength) + " lastText=" + new String(lastTermText, 0, lastTermTextLength));
+			
+			System.Diagnostics.Debug.Assert(ti.freqPointer >= lastTi.freqPointer, "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")");
+			System.Diagnostics.Debug.Assert(ti.proxPointer >= lastTi.proxPointer, "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")");
 			
 			if (!isIndex && size % indexInterval == 0)
-				other.Add(lastTerm, lastTi); // add an index term
+				other.Add(lastFieldNumber, lastTermText, 0, lastTermTextLength, lastTi); // add an index term
+			
+			WriteTerm(fieldNumber, termText, termTextStart, termTextLength); // write term
 			
-			WriteTerm(term); // write term
 			output.WriteVInt(ti.docFreq); // write doc freq
 			output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
 			output.WriteVLong(ti.proxPointer - lastTi.proxPointer);
@@ -124,28 +192,41 @@
 				lastIndexPointer = other.output.GetFilePointer(); // write pointer
 			}
 			
+			if (lastTermText.Length < termTextLength)
+			{
+				lastTermText = new char[(int) (termTextLength * 1.25)];
+			}
+			Array.Copy(termText, termTextStart, lastTermText, 0, termTextLength);
+			lastTermTextLength = termTextLength;
+			lastFieldNumber = fieldNumber;
+			
 			lastTi.Set(ti);
 			size++;
 		}
 		
-		private void  WriteTerm(Term term)
+		private void  WriteTerm(int fieldNumber, char[] termText, int termTextStart, int termTextLength)
 		{
-			int start = StringHelper.StringDifference(lastTerm.text, term.text);
-			int length = term.text.Length - start;
 			
-			output.WriteVInt(start); // write shared prefix length
-			output.WriteVInt(length); // write delta length
-			output.WriteChars(term.text, start, length); // write delta chars
+			// Compute prefix in common with last term:
+			int start = 0;
+			int limit = termTextLength < lastTermTextLength ? termTextLength : lastTermTextLength;
+			while (start < limit)
+			{
+				if (termText[termTextStart + start] != lastTermText[start])
+					break;
+				start++;
+			}
 			
-			output.WriteVInt(fieldInfos.FieldNumber(term.field)); // write field num
+			int length = termTextLength - start;
 			
-			lastTerm = term;
+			output.WriteVInt(start); // write shared prefix length
+			output.WriteVInt(length); // write delta length
+			output.WriteChars(termText, start + termTextStart, length); // write delta chars
+			output.WriteVInt(fieldNumber); // write field num
 		}
 		
-		
-		
 		/// <summary>Called to complete TermInfos creation. </summary>
-		public void  Close()
+		internal void  Close()
 		{
 			output.Seek(4); // write size after format
 			output.WriteLong(size);



Mime
View raw message