Return-Path: Delivered-To: apmail-incubator-lucene-net-commits-archive@locus.apache.org Received: (qmail 54168 invoked from network); 25 Jun 2008 02:53:33 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 25 Jun 2008 02:53:33 -0000 Received: (qmail 74599 invoked by uid 500); 25 Jun 2008 02:53:35 -0000 Delivered-To: apmail-incubator-lucene-net-commits-archive@incubator.apache.org Received: (qmail 74585 invoked by uid 500); 25 Jun 2008 02:53:35 -0000 Mailing-List: contact lucene-net-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: lucene-net-dev@incubator.apache.org Delivered-To: mailing list lucene-net-commits@incubator.apache.org Received: (qmail 74576 invoked by uid 99); 25 Jun 2008 02:53:35 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 24 Jun 2008 19:53:35 -0700 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 25 Jun 2008 02:52:41 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 2CE122388AB8; Tue, 24 Jun 2008 19:52:27 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r671404 [7/10] - /incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/ Date: Wed, 25 Jun 2008 02:52:24 -0000 To: lucene-net-commits@incubator.apache.org From: aroush@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20080625025227.2CE122388AB8@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/MultiSegmentReader.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/MultiSegmentReader.cs?rev=671404&view=auto ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/MultiSegmentReader.cs (added) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/MultiSegmentReader.cs Tue Jun 24 19:52:22 2008 @@ -0,0 +1,802 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using Document = Lucene.Net.Documents.Document; +using FieldSelector = Lucene.Net.Documents.FieldSelector; +using Directory = Lucene.Net.Store.Directory; + +namespace Lucene.Net.Index +{ + + /// An IndexReader which reads indexes with multiple segments. + class MultiSegmentReader : DirectoryIndexReader + { + protected internal SegmentReader[] subReaders; + private int[] starts; // 1st docno for each segment + private System.Collections.Hashtable normsCache = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable()); + private int maxDoc = 0; + private int numDocs = - 1; + private bool hasDeletions = false; + + /// Construct reading the named set of readers. + internal MultiSegmentReader(Directory directory, SegmentInfos sis, bool closeDirectory):base(directory, sis, closeDirectory) + { + // To reduce the chance of hitting FileNotFound + // (and having to retry), we open segments in + // reverse because IndexWriter merges & deletes + // the newest segments first. + + SegmentReader[] readers = new SegmentReader[sis.Count]; + for (int i = sis.Count - 1; i >= 0; i--) + { + try + { + readers[i] = SegmentReader.Get(sis.Info(i)); + } + catch (System.IO.IOException e) + { + // Close all readers we had opened: + for (i++; i < sis.Count; i++) + { + try + { + readers[i].Close(); + } + catch (System.IO.IOException ignore) + { + // keep going - we want to clean up as much as possible + } + } + throw e; + } + } + + Initialize(readers); + } + + /// This contructor is only used for {@link #Reopen()} + internal MultiSegmentReader(Directory directory, SegmentInfos infos, bool closeDirectory, SegmentReader[] oldReaders, int[] oldStarts, System.Collections.IDictionary oldNormsCache):base(directory, infos, closeDirectory) + { + + // we put the old SegmentReaders in a map, that allows us + // to lookup a reader using its segment name + System.Collections.IDictionary segmentReaders = new System.Collections.Hashtable(); + + if (oldReaders != null) + { + // create a Map SegmentName->SegmentReader + for (int i = 0; i < oldReaders.Length; i++) + { + segmentReaders[oldReaders[i].GetSegmentName()] = (System.Int32) i; + } + } + + SegmentReader[] newReaders = new SegmentReader[infos.Count]; + + // remember which readers are shared between the old and the re-opened + // MultiSegmentReader - we have to incRef those readers + bool[] readerShared = new bool[infos.Count]; + + for (int i = infos.Count - 1; i >= 0; i--) + { + // find SegmentReader for this segment + Object oldReaderIndex = segmentReaders[infos.Info(i).name]; + if (oldReaderIndex == null) + { + // this is a new segment, no old SegmentReader can be reused + newReaders[i] = null; + } + else + { + // there is an old reader for this segment - we'll try to reopen it + newReaders[i] = oldReaders[(System.Int32) oldReaderIndex]; + } + + bool success = false; + try + { + SegmentReader newReader; + if (newReaders[i] == null || infos.Info(i).GetUseCompoundFile() != newReaders[i].GetSegmentInfo().GetUseCompoundFile()) + { + // this is a new reader; in case we hit an exception we can close it safely + newReader = SegmentReader.Get(infos.Info(i)); + } + else + { + newReader = (SegmentReader) newReaders[i].ReopenSegment(infos.Info(i)); + } + if (newReader == newReaders[i]) + { + // this reader will be shared between the old and the new one, + // so we must incRef it + readerShared[i] = true; + newReader.IncRef(); + } + else + { + readerShared[i] = false; + newReaders[i] = newReader; + } + success = true; + } + finally + { + if (!success) + { + for (i++; i < infos.Count; i++) + { + if (newReaders[i] != null) + { + try + { + if (!readerShared[i]) + { + // this is a new subReader that is not used by the old one, + // we can close it + newReaders[i].Close(); + } + else + { + // this subReader is also used by the old reader, so instead + // closing we must decRef it + newReaders[i].DecRef(); + } + } + catch (System.IO.IOException ignore) + { + // keep going - we want to clean up as much as possible + } + } + } + } + } + } + + // initialize the readers to calculate maxDoc before we try to reuse the old normsCache + Initialize(newReaders); + + // try to copy unchanged norms from the old normsCache to the new one + if (oldNormsCache != null) + { + System.Collections.IEnumerator it = oldNormsCache.Keys.GetEnumerator(); + while (it.MoveNext()) + { + System.String field = (System.String) it.Current; + if (!HasNorms(field)) + { + continue; + } + + byte[] oldBytes = (byte[]) oldNormsCache[field]; + + byte[] bytes = new byte[MaxDoc()]; + + for (int i = 0; i < subReaders.Length; i++) + { + Object oldReaderIndex = segmentReaders[subReaders[i].GetSegmentName()]; + + // this SegmentReader was not re-opened, we can copy all of its norms + if (oldReaderIndex != null && (oldReaders[(System.Int32) oldReaderIndex] == subReaders[i] || oldReaders[(System.Int32) oldReaderIndex].norms[field] == subReaders[i].norms[field])) + { + // we don't have to synchronize here: either this constructor is called from a SegmentReader, + // in which case no old norms cache is present, or it is called from MultiReader.reopen(), + // which is synchronized + Array.Copy(oldBytes, oldStarts[(System.Int32) oldReaderIndex], bytes, starts[i], starts[i + 1] - starts[i]); + } + else + { + subReaders[i].Norms(field, bytes, starts[i]); + } + } + + normsCache[field] = bytes; // update cache + } + } + } + + private void Initialize(SegmentReader[] subReaders) + { + this.subReaders = subReaders; + starts = new int[subReaders.Length + 1]; // build starts array + for (int i = 0; i < subReaders.Length; i++) + { + starts[i] = maxDoc; + maxDoc += subReaders[i].MaxDoc(); // compute maxDocs + + if (subReaders[i].HasDeletions()) + hasDeletions = true; + } + starts[subReaders.Length] = maxDoc; + } + + protected internal override DirectoryIndexReader DoReopen(SegmentInfos infos) + { + lock (this) + { + if (infos.Count == 1) + { + // The index has only one segment now, so we can't refresh the MultiSegmentReader. + // Return a new SegmentReader instead + SegmentReader newReader = SegmentReader.Get(infos, infos.Info(0), false); + return newReader; + } + else + { + return new MultiSegmentReader(directory, infos, closeDirectory, subReaders, starts, normsCache); + } + } + } + + public override TermFreqVector[] GetTermFreqVectors(int n) + { + EnsureOpen(); + int i = ReaderIndex(n); // find segment num + return subReaders[i].GetTermFreqVectors(n - starts[i]); // dispatch to segment + } + + public override TermFreqVector GetTermFreqVector(int n, System.String field) + { + EnsureOpen(); + int i = ReaderIndex(n); // find segment num + return subReaders[i].GetTermFreqVector(n - starts[i], field); + } + + + public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper) + { + EnsureOpen(); + int i = ReaderIndex(docNumber); // find segment num + subReaders[i].GetTermFreqVector(docNumber - starts[i], field, mapper); + } + + public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper) + { + EnsureOpen(); + int i = ReaderIndex(docNumber); // find segment num + subReaders[i].GetTermFreqVector(docNumber - starts[i], mapper); + } + + public override bool IsOptimized() + { + return false; + } + + public override int NumDocs() + { + lock (this) + { + // Don't call ensureOpen() here (it could affect performance) + if (numDocs == - 1) + { + // check cache + int n = 0; // cache miss--recompute + for (int i = 0; i < subReaders.Length; i++) + n += subReaders[i].NumDocs(); // sum from readers + numDocs = n; + } + return numDocs; + } + } + + public override int MaxDoc() + { + // Don't call ensureOpen() here (it could affect performance) + return maxDoc; + } + + // inherit javadoc + public override Document Document(int n, FieldSelector fieldSelector) + { + EnsureOpen(); + int i = ReaderIndex(n); // find segment num + return subReaders[i].Document(n - starts[i], fieldSelector); // dispatch to segment reader + } + + public override bool IsDeleted(int n) + { + // Don't call ensureOpen() here (it could affect performance) + int i = ReaderIndex(n); // find segment num + return subReaders[i].IsDeleted(n - starts[i]); // dispatch to segment reader + } + + public override bool HasDeletions() + { + // Don't call ensureOpen() here (it could affect performance) + return hasDeletions; + } + + protected internal override void DoDelete(int n) + { + numDocs = - 1; // invalidate cache + int i = ReaderIndex(n); // find segment num + subReaders[i].DeleteDocument(n - starts[i]); // dispatch to segment reader + hasDeletions = true; + } + + protected internal override void DoUndeleteAll() + { + for (int i = 0; i < subReaders.Length; i++) + subReaders[i].UndeleteAll(); + + hasDeletions = false; + numDocs = - 1; // invalidate cache + } + + private int ReaderIndex(int n) + { + // find reader for doc n: + return ReaderIndex(n, this.starts, this.subReaders.Length); + } + + internal static int ReaderIndex(int n, int[] starts, int numSubReaders) + { + // find reader for doc n: + int lo = 0; // search starts array + int hi = numSubReaders - 1; // for first element less + + while (hi >= lo) + { + int mid = (lo + hi) >> 1; + int midValue = starts[mid]; + if (n < midValue) + hi = mid - 1; + else if (n > midValue) + lo = mid + 1; + else + { + // found a match + while (mid + 1 < numSubReaders && starts[mid + 1] == midValue) + { + mid++; // scan to last match + } + return mid; + } + } + return hi; + } + + public override bool HasNorms(System.String field) + { + EnsureOpen(); + for (int i = 0; i < subReaders.Length; i++) + { + if (subReaders[i].HasNorms(field)) + return true; + } + return false; + } + + private byte[] ones; + private byte[] fakeNorms() + { + if (ones == null) + ones = SegmentReader.CreateFakeNorms(MaxDoc()); + return ones; + } + + public override byte[] Norms(System.String field) + { + lock (this) + { + EnsureOpen(); + byte[] bytes = (byte[]) normsCache[field]; + if (bytes != null) + return bytes; // cache hit + if (!HasNorms(field)) + return fakeNorms(); + + bytes = new byte[MaxDoc()]; + for (int i = 0; i < subReaders.Length; i++) + subReaders[i].Norms(field, bytes, starts[i]); + normsCache[field] = bytes; // update cache + return bytes; + } + } + + public override void Norms(System.String field, byte[] result, int offset) + { + lock (this) + { + EnsureOpen(); + byte[] bytes = (byte[]) normsCache[field]; + if (bytes == null && !HasNorms(field)) + bytes = fakeNorms(); + if (bytes != null) + // cache hit + Array.Copy(bytes, 0, result, offset, MaxDoc()); + + for (int i = 0; i < subReaders.Length; i++) + // read from segments + subReaders[i].Norms(field, result, offset + starts[i]); + } + } + + protected internal override void DoSetNorm(int n, System.String field, byte value_Renamed) + { + normsCache.Remove(field); // clear cache + int i = ReaderIndex(n); // find segment num + subReaders[i].SetNorm(n - starts[i], field, value_Renamed); // dispatch + } + + public override TermEnum Terms() + { + EnsureOpen(); + return new MultiTermEnum(subReaders, starts, null); + } + + public override TermEnum Terms(Term term) + { + EnsureOpen(); + return new MultiTermEnum(subReaders, starts, term); + } + + public override int DocFreq(Term t) + { + EnsureOpen(); + int total = 0; // sum freqs in segments + for (int i = 0; i < subReaders.Length; i++) + total += subReaders[i].DocFreq(t); + return total; + } + + public override TermDocs TermDocs() + { + EnsureOpen(); + return new MultiTermDocs(subReaders, starts); + } + + public override TermPositions TermPositions() + { + EnsureOpen(); + return new MultiTermPositions(subReaders, starts); + } + + protected internal override void CommitChanges() + { + for (int i = 0; i < subReaders.Length; i++) + subReaders[i].Commit(); + } + + internal override void StartCommit() + { + base.StartCommit(); + for (int i = 0; i < subReaders.Length; i++) + { + subReaders[i].StartCommit(); + } + } + + internal override void RollbackCommit() + { + base.RollbackCommit(); + for (int i = 0; i < subReaders.Length; i++) + { + subReaders[i].RollbackCommit(); + } + } + + protected internal override void DoClose() + { + lock (this) + { + for (int i = 0; i < subReaders.Length; i++) + subReaders[i].DecRef(); + + // maybe close directory + base.DoClose(); + } + } + + public override System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldNames) + { + EnsureOpen(); + return GetFieldNames(fieldNames, this.subReaders); + } + + internal static System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldNames, IndexReader[] subReaders) + { + // maintain a unique set of field names + System.Collections.Hashtable fieldSet = new System.Collections.Hashtable(); + for (int i = 0; i < subReaders.Length; i++) + { + IndexReader reader = subReaders[i]; + System.Collections.IEnumerator names = reader.GetFieldNames(fieldNames).GetEnumerator(); + while (names.MoveNext()) + { + fieldSet.Add(names.Current, names.Current); + } + } + return fieldSet.Keys; + } + + // for testing + internal virtual SegmentReader[] GetSubReaders() + { + return subReaders; + } + + public override void SetTermInfosIndexDivisor(int indexDivisor) + { + for (int i = 0; i < subReaders.Length; i++) + subReaders[i].SetTermInfosIndexDivisor(indexDivisor); + } + + public override int GetTermInfosIndexDivisor() + { + if (subReaders.Length > 0) + return subReaders[0].GetTermInfosIndexDivisor(); + else + throw new System.SystemException("no readers"); + } + + internal class MultiTermEnum:TermEnum + { + private SegmentMergeQueue queue; + + private Term term; + private int docFreq; + + public MultiTermEnum(IndexReader[] readers, int[] starts, Term t) + { + queue = new SegmentMergeQueue(readers.Length); + for (int i = 0; i < readers.Length; i++) + { + IndexReader reader = readers[i]; + TermEnum termEnum; + + if (t != null) + { + termEnum = reader.Terms(t); + } + else + termEnum = reader.Terms(); + + SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader); + if (t == null ? smi.Next() : termEnum.Term() != null) + queue.Put(smi); + // initialize queue + else + smi.Close(); + } + + if (t != null && queue.Size() > 0) + { + Next(); + } + } + + public override bool Next() + { + SegmentMergeInfo top = (SegmentMergeInfo) queue.Top(); + if (top == null) + { + term = null; + return false; + } + + term = top.term; + docFreq = 0; + + while (top != null && term.CompareTo(top.term) == 0) + { + queue.Pop(); + docFreq += top.termEnum.DocFreq(); // increment freq + if (top.Next()) + queue.Put(top); + // restore queue + else + top.Close(); // done with a segment + top = (SegmentMergeInfo) queue.Top(); + } + return true; + } + + public override Term Term() + { + return term; + } + + public override int DocFreq() + { + return docFreq; + } + + public override void Close() + { + queue.Close(); + } + } + + internal class MultiTermDocs : TermDocs + { + protected internal IndexReader[] readers; + protected internal int[] starts; + protected internal Term term; + + protected internal int base_Renamed = 0; + protected internal int pointer = 0; + + private TermDocs[] readerTermDocs; + protected internal TermDocs current; // == readerTermDocs[pointer] + + public MultiTermDocs(IndexReader[] r, int[] s) + { + readers = r; + starts = s; + + readerTermDocs = new TermDocs[r.Length]; + } + + public virtual int Doc() + { + return base_Renamed + current.Doc(); + } + public virtual int Freq() + { + return current.Freq(); + } + + public virtual void Seek(Term term) + { + this.term = term; + this.base_Renamed = 0; + this.pointer = 0; + this.current = null; + } + + public virtual void Seek(TermEnum termEnum) + { + Seek(termEnum.Term()); + } + + public virtual bool Next() + { + for (; ; ) + { + if (current != null && current.Next()) + { + return true; + } + else if (pointer < readers.Length) + { + base_Renamed = starts[pointer]; + current = TermDocs(pointer++); + } + else + { + return false; + } + } + } + + /// Optimized implementation. + public virtual int Read(int[] docs, int[] freqs) + { + while (true) + { + while (current == null) + { + if (pointer < readers.Length) + { + // try next segment + base_Renamed = starts[pointer]; + current = TermDocs(pointer++); + } + else + { + return 0; + } + } + int end = current.Read(docs, freqs); + if (end == 0) + { + // none left in segment + current = null; + } + else + { + // got some + int b = base_Renamed; // adjust doc numbers + for (int i = 0; i < end; i++) + docs[i] += b; + return end; + } + } + } + + /* A Possible future optimization could skip entire segments */ + public virtual bool SkipTo(int target) + { + for (; ; ) + { + if (current != null && current.SkipTo(target - base_Renamed)) + { + return true; + } + else if (pointer < readers.Length) + { + base_Renamed = starts[pointer]; + current = TermDocs(pointer++); + } + else + return false; + } + } + + private TermDocs TermDocs(int i) + { + if (term == null) + return null; + TermDocs result = readerTermDocs[i]; + if (result == null) + result = readerTermDocs[i] = TermDocs(readers[i]); + result.Seek(term); + return result; + } + + protected internal virtual TermDocs TermDocs(IndexReader reader) + { + return reader.TermDocs(); + } + + public virtual void Close() + { + for (int i = 0; i < readerTermDocs.Length; i++) + { + if (readerTermDocs[i] != null) + readerTermDocs[i].Close(); + } + } + } + + internal class MultiTermPositions:MultiTermDocs, TermPositions + { + public MultiTermPositions(IndexReader[] r, int[] s):base(r, s) + { + } + + protected internal override TermDocs TermDocs(IndexReader reader) + { + return (TermDocs) reader.TermPositions(); + } + + public virtual int NextPosition() + { + return ((TermPositions) current).NextPosition(); + } + + public virtual int GetPayloadLength() + { + return ((TermPositions) current).GetPayloadLength(); + } + + public virtual byte[] GetPayload(byte[] data, int offset) + { + return ((TermPositions) current).GetPayload(data, offset); + } + + + // TODO: Remove warning after API has been finalized + public virtual bool IsPayloadAvailable() + { + return ((TermPositions) current).IsPayloadAvailable(); + } + } + } +} \ No newline at end of file Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/MultipleTermPositions.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/MultipleTermPositions.cs?rev=671404&r1=671403&r2=671404&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/MultipleTermPositions.cs (original) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/MultipleTermPositions.cs Tue Jun 24 19:52:22 2008 @@ -16,6 +16,7 @@ */ using System; + using PriorityQueue = Lucene.Net.Util.PriorityQueue; namespace Lucene.Net.Index @@ -217,5 +218,29 @@ { throw new System.NotSupportedException(); } + + + /// Not implemented. + /// UnsupportedOperationException + public virtual int GetPayloadLength() + { + throw new System.NotSupportedException(); + } + + /// Not implemented. + /// UnsupportedOperationException + public virtual byte[] GetPayload(byte[] data, int offset) + { + throw new System.NotSupportedException(); + } + + /// + /// false + /// + // TODO: Remove warning after API has been finalized + public virtual bool IsPayloadAvailable() + { + return false; + } } } \ No newline at end of file Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Package.html URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/Package.html?rev=671404&r1=671403&r2=671404&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Package.html (original) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Package.html Tue Jun 24 19:52:22 2008 @@ -1,10 +1,10 @@ - - - - - - - -Code to maintain and access indices. - - + + + + + + + +Code to maintain and access indices. + + Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/ParallelReader.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/ParallelReader.cs?rev=671404&r1=671403&r2=671404&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/ParallelReader.cs (original) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/ParallelReader.cs Tue Jun 24 19:52:22 2008 @@ -16,10 +16,11 @@ */ using System; + using Document = Lucene.Net.Documents.Document; -using Fieldable = Lucene.Net.Documents.Fieldable; using FieldSelector = Lucene.Net.Documents.FieldSelector; using FieldSelectorResult = Lucene.Net.Documents.FieldSelectorResult; +using Fieldable = Lucene.Net.Documents.Fieldable; namespace Lucene.Net.Index { @@ -45,22 +46,37 @@ public class ParallelReader : IndexReader { private System.Collections.ArrayList readers = new System.Collections.ArrayList(); + private System.Collections.IList decrefOnClose = new System.Collections.ArrayList(); // remember which subreaders to decRef on close + internal bool incRefReaders = false; private System.Collections.SortedList fieldToReader = new System.Collections.SortedList(); private System.Collections.IDictionary readerToFields = new System.Collections.Hashtable(); - private System.Collections.ArrayList storedFieldReaders = new System.Collections.ArrayList(); + private System.Collections.IList storedFieldReaders = new System.Collections.ArrayList(); private int maxDoc; private int numDocs; private bool hasDeletions; + /// Construct a ParallelReader. + ///

Note that all subreaders are closed if this ParallelReader is closed.

+ ///
+ public ParallelReader() : this(true) + { + } + /// Construct a ParallelReader. - public ParallelReader() : base(null) + /// indicates whether the subreaders should be closed + /// when this ParallelReader is closed + /// + public ParallelReader(bool closeSubReaders) : base() { + this.incRefReaders = !closeSubReaders; } /// Add an IndexReader. + /// IOException if there is a low-level IO error public virtual void Add(IndexReader reader) { + EnsureOpen(); Add(reader, false); } @@ -75,9 +91,11 @@ /// IllegalArgumentException if not all indexes have the same value /// of {@link IndexReader#MaxDoc()} /// + /// IOException if there is a low-level IO error public virtual void Add(IndexReader reader, bool ignoreStoredFields) { + EnsureOpen(); if (readers.Count == 0) { this.maxDoc = reader.MaxDoc(); @@ -107,26 +125,143 @@ if (!ignoreStoredFields) storedFieldReaders.Add(reader); // add to storedFieldReaders readers.Add(reader); + + if (incRefReaders) + { + reader.IncRef(); + } + decrefOnClose.Add(incRefReaders); } + /// Tries to reopen the subreaders. + ///
+ /// If one or more subreaders could be re-opened (i. e. subReader.reopen() + /// returned a new instance != subReader), then a new ParallelReader instance + /// is returned, otherwise this instance is returned. + ///

+ /// A re-opened instance might share one or more subreaders with the old + /// instance. Index modification operations result in undefined behavior + /// when performed before the old instance is closed. + /// (see {@link IndexReader#Reopen()}). + ///

+ /// If subreaders are shared, then the reference count of those + /// readers is increased to ensure that the subreaders remain open + /// until the last referring reader is closed. + /// + ///

+ /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + public override IndexReader Reopen() + { + EnsureOpen(); + + bool reopened = false; + System.Collections.IList newReaders = new System.Collections.ArrayList(); + System.Collections.IList newDecrefOnClose = new System.Collections.ArrayList(); + + bool success = false; + + try + { + + for (int i = 0; i < readers.Count; i++) + { + IndexReader oldReader = (IndexReader) readers[i]; + IndexReader newReader = oldReader.Reopen(); + newReaders.Add(newReader); + // if at least one of the subreaders was updated we remember that + // and return a new MultiReader + if (newReader != oldReader) + { + reopened = true; + } + } + + if (reopened) + { + ParallelReader pr = new ParallelReader(); + for (int i = 0; i < readers.Count; i++) + { + IndexReader oldReader = (IndexReader) readers[i]; + IndexReader newReader = (IndexReader) newReaders[i]; + if (newReader == oldReader) + { + newDecrefOnClose.Add(true); + newReader.IncRef(); + } + else + { + // this is a new subreader instance, so on close() we don't + // decRef but close it + newDecrefOnClose.Add(false); + } + pr.Add(newReader, !storedFieldReaders.Contains(oldReader)); + } + pr.decrefOnClose = newDecrefOnClose; + pr.incRefReaders = incRefReaders; + success = true; + return pr; + } + else + { + success = true; + // No subreader was refreshed + return this; + } + } + finally + { + if (!success && reopened) + { + for (int i = 0; i < newReaders.Count; i++) + { + IndexReader r = (IndexReader) newReaders[i]; + if (r != null) + { + try + { + if (((System.Boolean) newDecrefOnClose[i])) + { + r.DecRef(); + } + else + { + r.Close(); + } + } + catch (System.IO.IOException ignore) + { + // keep going - we want to clean up as much as possible + } + } + } + } + } + } + + public override int NumDocs() { + // Don't call ensureOpen() here (it could affect performance) return numDocs; } public override int MaxDoc() { + // Don't call ensureOpen() here (it could affect performance) return maxDoc; } public override bool HasDeletions() { + // Don't call ensureOpen() here (it could affect performance) return hasDeletions; } // check first reader public override bool IsDeleted(int n) { + // Don't call ensureOpen() here (it could affect performance) if (readers.Count > 0) return ((IndexReader) readers[0]).IsDeleted(n); return false; @@ -155,6 +290,7 @@ // append fields from storedFieldReaders public override Document Document(int n, FieldSelector fieldSelector) { + EnsureOpen(); Document result = new Document(); for (int i = 0; i < storedFieldReaders.Count; i++) { @@ -188,6 +324,7 @@ // get all vectors public override TermFreqVector[] GetTermFreqVectors(int n) { + EnsureOpen(); System.Collections.ArrayList results = new System.Collections.ArrayList(); System.Collections.IEnumerator i = new System.Collections.Hashtable(fieldToReader).GetEnumerator(); while (i.MoveNext()) @@ -204,24 +341,54 @@ public override TermFreqVector GetTermFreqVector(int n, System.String field) { + EnsureOpen(); IndexReader reader = ((IndexReader) fieldToReader[field]); return reader == null ? null : reader.GetTermFreqVector(n, field); } + + public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper) + { + EnsureOpen(); + IndexReader reader = ((IndexReader) fieldToReader[field]); + if (reader != null) + { + reader.GetTermFreqVector(docNumber, field, mapper); + } + } + + public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper) + { + EnsureOpen(); + EnsureOpen(); + + System.Collections.IEnumerator i = fieldToReader.GetEnumerator(); + while (i.MoveNext()) + { + System.Collections.DictionaryEntry e = (System.Collections.DictionaryEntry) i.Current; + System.String field = (System.String) e.Key; + IndexReader reader = (IndexReader) e.Value; + reader.GetTermFreqVector(docNumber, field, mapper); + } + } + public override bool HasNorms(System.String field) { + EnsureOpen(); IndexReader reader = ((IndexReader) fieldToReader[field]); return reader == null ? false : reader.HasNorms(field); } public override byte[] Norms(System.String field) { + EnsureOpen(); IndexReader reader = ((IndexReader) fieldToReader[field]); return reader == null ? null : reader.Norms(field); } public override void Norms(System.String field, byte[] result, int offset) { + EnsureOpen(); IndexReader reader = ((IndexReader) fieldToReader[field]); if (reader != null) reader.Norms(field, result, offset); @@ -236,40 +403,91 @@ public override TermEnum Terms() { + EnsureOpen(); return new ParallelTermEnum(this); } public override TermEnum Terms(Term term) { + EnsureOpen(); return new ParallelTermEnum(this, term); } public override int DocFreq(Term term) { + EnsureOpen(); IndexReader reader = ((IndexReader) fieldToReader[term.Field()]); return reader == null ? 0 : reader.DocFreq(term); } public override TermDocs TermDocs(Term term) { + EnsureOpen(); return new ParallelTermDocs(this, term); } public override TermDocs TermDocs() { + EnsureOpen(); return new ParallelTermDocs(this); } public override TermPositions TermPositions(Term term) { + EnsureOpen(); return new ParallelTermPositions(this, term); } public override TermPositions TermPositions() { + EnsureOpen(); return new ParallelTermPositions(this); } + /// Checks recursively if all subreaders are up to date. + public override bool IsCurrent() + { + for (int i = 0; i < readers.Count; i++) + { + if (!((IndexReader) readers[i]).IsCurrent()) + { + return false; + } + } + + // all subreaders are up to date + return true; + } + + /// Checks recursively if all subindexes are optimized + public override bool IsOptimized() + { + for (int i = 0; i < readers.Count; i++) + { + if (!((IndexReader) readers[i]).IsOptimized()) + { + return false; + } + } + + // all subindexes are optimized + return true; + } + + + /// Not implemented. + /// UnsupportedOperationException + public override long GetVersion() + { + throw new System.NotSupportedException("ParallelReader does not support this method."); + } + + // for testing + internal virtual IndexReader[] GetSubReaders() + { + return (IndexReader[]) readers.ToArray(typeof(IndexReader)); + } + protected internal override void DoCommit() { for (int i = 0; i < readers.Count; i++) @@ -281,13 +499,22 @@ lock (this) { for (int i = 0; i < readers.Count; i++) - ((IndexReader) readers[i]).Close(); + { + if (((System.Boolean) decrefOnClose[i])) + { + ((IndexReader) readers[i]).DecRef(); + } + else + { + ((IndexReader) readers[i]).Close(); + } + } } } - public override System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldNames) { + EnsureOpen(); System.Collections.Hashtable fieldSet = new System.Collections.Hashtable(); for (int i = 0; i < readers.Count; i++) { @@ -512,6 +739,23 @@ // It is an error to call this if there is no next position, e.g. if termDocs==null return ((TermPositions) termDocs).NextPosition(); } + + public virtual int GetPayloadLength() + { + return ((TermPositions) termDocs).GetPayloadLength(); + } + + public virtual byte[] GetPayload(byte[] data, int offset) + { + return ((TermPositions) termDocs).GetPayload(data, offset); + } + + + // TODO: Remove warning after API has been finalized + public virtual bool IsPayloadAvailable() + { + return ((TermPositions) termDocs).IsPayloadAvailable(); + } } } } \ No newline at end of file Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Payload.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/Payload.cs?rev=671404&view=auto ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Payload.cs (added) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/Payload.cs Tue Jun 24 19:52:22 2008 @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using Token = Lucene.Net.Analysis.Token; +using TokenStream = Lucene.Net.Analysis.TokenStream; + +namespace Lucene.Net.Index +{ + + /// A Payload is metadata that can be stored together with each occurrence + /// of a term. This metadata is stored inline in the posting list of the + /// specific term. + ///

+ /// To store payloads in the index a {@link TokenStream} has to be used that + /// produces {@link Token}s containing payload data. + ///

+ /// Use {@link TermPositions#GetPayloadLength()} and {@link TermPositions#GetPayload(byte[], int)} + /// to retrieve the payloads from the index.
+ /// + ///

+ [Serializable] + public class Payload : System.ICloneable + { + /// the byte array containing the payload data + protected internal byte[] data; + + /// the offset within the byte array + protected internal int offset; + + /// the length of the payload data + protected internal int length; + + /// Creates an empty payload and does not allocate a byte array. + public Payload() + { + // nothing to do + } + + /// Creates a new payload with the the given array as data. + /// A reference to the passed-in array is held, i. e. no + /// copy is made. + /// + /// + /// the data of this payload + /// + public Payload(byte[] data):this(data, 0, data.Length) + { + } + + /// Creates a new payload with the the given array as data. + /// A reference to the passed-in array is held, i. e. no + /// copy is made. + /// + /// + /// the data of this payload + /// + /// the offset in the data byte array + /// + /// the length of the data + /// + public Payload(byte[] data, int offset, int length) + { + if (offset < 0 || offset + length > data.Length) + { + throw new System.ArgumentException(); + } + this.data = data; + this.offset = offset; + this.length = length; + } + + /// Sets this payloads data. + /// A reference to the passed-in array is held, i. e. no + /// copy is made. + /// + public virtual void SetData(byte[] data) + { + SetData(data, 0, data.Length); + } + + /// Sets this payloads data. + /// A reference to the passed-in array is held, i. e. no + /// copy is made. + /// + public virtual void SetData(byte[] data, int offset, int length) + { + this.data = data; + this.offset = offset; + this.length = length; + } + + /// Returns a reference to the underlying byte array + /// that holds this payloads data. + /// + public virtual byte[] GetData() + { + return this.data; + } + + /// Returns the offset in the underlying byte array + public virtual int GetOffset() + { + return this.offset; + } + + /// Returns the length of the payload data. + public virtual int Length() + { + return this.length; + } + + /// Returns the byte at the given index. + public virtual byte ByteAt(int index) + { + if (0 <= index && index < this.length) + { + return this.data[this.offset + index]; + } + throw new System. IndexOutOfRangeException("Index of bound " + index); + } + + /// Allocates a new byte array, copies the payload data into it and returns it. + public virtual byte[] ToByteArray() + { + byte[] retArray = new byte[this.length]; + Array.Copy(this.data, this.offset, retArray, 0, this.length); + return retArray; + } + + /// Copies the payload data to a byte array. + /// + /// + /// the target byte array + /// + /// the offset in the target byte array + /// + public virtual void CopyTo(byte[] target, int targetOffset) + { + if (this.length > target.Length + targetOffset) + { + throw new System.IndexOutOfRangeException(); + } + Array.Copy(this.data, this.offset, target, targetOffset, this.length); + } + + /// Clones this payload by creating a copy of the underlying + /// byte array. + /// + public virtual System.Object Clone() + { + Payload clone = new Payload(this.ToByteArray()); + return clone; + } + } +} \ No newline at end of file Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/PositionBasedTermVectorMapper.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/PositionBasedTermVectorMapper.cs?rev=671404&view=auto ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/PositionBasedTermVectorMapper.cs (added) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/PositionBasedTermVectorMapper.cs Tue Jun 24 19:52:22 2008 @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + /// For each Field, store position by position information. It ignores frequency information + ///

+ /// This is not thread-safe. + ///

+ public class PositionBasedTermVectorMapper : TermVectorMapper + { + private System.Collections.IDictionary fieldToTerms; + + private System.String currentField; + /// A Map of Integer and TVPositionInfo + private System.Collections.IDictionary currentPositions; + private bool storeOffsets; + + + + + /// + /// + /// + public PositionBasedTermVectorMapper():base(false, false) + { + } + + public PositionBasedTermVectorMapper(bool ignoringOffsets):base(false, ignoringOffsets) + { + } + + /// Never ignores positions. This mapper doesn't make much sense unless there are positions + /// false + /// + public override bool IsIgnoringPositions() + { + return false; + } + + /// Callback for the TermVectorReader. + /// + /// + /// + /// + /// + /// + /// + /// + public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) + { + for (int i = 0; i < positions.Length; i++) + { + System.Int32 posVal = (System.Int32) positions[i]; + TVPositionInfo pos = (TVPositionInfo) currentPositions[posVal]; + if (pos == null) + { + pos = new TVPositionInfo(positions[i], storeOffsets); + currentPositions[posVal] = pos; + } + pos.AddTerm(term, offsets != null?offsets[i]:null); + } + } + + /// Callback mechanism used by the TermVectorReader + /// The field being read + /// + /// The number of terms in the vector + /// + /// Whether offsets are available + /// + /// Whether positions are available + /// + public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions) + { + if (storePositions == false) + { + throw new System.SystemException("You must store positions in order to use this Mapper"); + } + if (storeOffsets == true) + { + //ignoring offsets + } + fieldToTerms = new System.Collections.Hashtable(numTerms); + this.storeOffsets = storeOffsets; + currentField = field; + currentPositions = new System.Collections.Hashtable(); + fieldToTerms[currentField] = currentPositions; + } + + /// Get the mapping between fields and terms, sorted by the comparator + /// + /// + /// A map between field names and a Map. The sub-Map key is the position as the integer, the value is {@link Lucene.Net.Index.PositionBasedTermVectorMapper.TVPositionInfo}. + /// + public virtual System.Collections.IDictionary GetFieldToTerms() + { + return fieldToTerms; + } + + /// Container for a term at a position + public class TVPositionInfo + { + private int position; + //a list of Strings + private System.Collections.IList terms; + //A list of TermVectorOffsetInfo + private System.Collections.IList offsets; + + + public TVPositionInfo(int position, bool storeOffsets) + { + this.position = position; + terms = new System.Collections.ArrayList(); + if (storeOffsets) + { + offsets = new System.Collections.ArrayList(); + } + } + + internal virtual void AddTerm(System.String term, TermVectorOffsetInfo info) + { + terms.Add(term); + if (offsets != null) + { + offsets.Add(info); + } + } + + /// + /// The position of the term + /// + public virtual int GetPosition() + { + return position; + } + + /// Note, there may be multiple terms at the same position + /// A List of Strings + /// + public virtual System.Collections.IList GetTerms() + { + return terms; + } + + /// Parallel list (to {@link #GetTerms()}) of TermVectorOffsetInfo objects. There may be multiple entries since there may be multiple terms at a position + /// A List of TermVectorOffsetInfo objects, if offsets are store. + /// + public virtual System.Collections.IList GetOffsets() + { + return offsets; + } + } + } +} \ No newline at end of file Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentInfo.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentInfo.cs?rev=671404&r1=671403&r2=671404&view=diff ============================================================================== --- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentInfo.cs (original) +++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentInfo.cs Tue Jun 24 19:52:22 2008 @@ -16,6 +16,7 @@ */ using System; + using Directory = Lucene.Net.Store.Directory; using IndexOutput = Lucene.Net.Store.IndexOutput; using IndexInput = Lucene.Net.Store.IndexInput; @@ -25,6 +26,12 @@ sealed public class SegmentInfo : System.ICloneable { + + internal const int NO = - 1; // e.g. no norms; no deletes; + internal const int YES = 1; // e.g. have norms; have deletes; + internal const int CHECK_DIR = 0; // e.g. must check dir to see if there are norms/deletions + internal const int WITHOUT_GEN = 0; // a file name that has no GEN in it. + public System.String name; // unique name in dir public int docCount; // number of docs in seg public Directory dir; // where segment resides @@ -32,17 +39,21 @@ private bool preLockless; // true if this is a segments file written before // lock-less commits (2.1) - private long delGen; // current generation of del file; -1 if there - // are no deletes; 0 if it's a pre-2.1 segment - // (and we must check filesystem); 1 or higher if + private long delGen; // current generation of del file; NO if there + // are no deletes; CHECK_DIR if it's a pre-2.1 segment + // (and we must check filesystem); YES or higher if // there are deletes at generation N - private long[] normGen; // current generations of each field's norm file. - // If this array is null, we must check filesystem - // when preLockLess is true. Else, - // there are no separate norms + private long[] normGen; // current generation of each field's norm file. + // If this array is null, for lockLess this means no + // separate norms. For preLockLess this means we must + // check filesystem. If this array is not null, its + // values mean: NO says this field has no separate + // norms; CHECK_DIR says it is a preLockLess segment and + // filesystem must be checked; >= YES says this field + // has separate norms with the specified generation - private sbyte isCompoundFile; // -1 if it is not; 1 if it is; 0 if it's + private sbyte isCompoundFile; // NO if it is not; YES if it is; CHECK_DIR if it's // pre-2.1 (ie, must check file system to see // if .cfs and .nrm exist) @@ -52,32 +63,57 @@ // and true for newly created merged segments (both // compound and non compound). + private System.Collections.IList files; // cached list of files that this segment uses + // in the Directory + + internal long sizeInBytes = - 1; // total byte size of all of our files (computed on demand) + + private int docStoreOffset; // if this segment shares stored fields & vectors, this + // offset is where in that file this segment's docs begin + private System.String docStoreSegment; // name used to derive fields/vectors file we share with + // other segments + private bool docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx) + public SegmentInfo(System.String name, int docCount, Directory dir) { this.name = name; this.docCount = docCount; this.dir = dir; - delGen = - 1; - isCompoundFile = 0; + delGen = NO; + isCompoundFile = (sbyte) (CHECK_DIR); preLockless = true; hasSingleNormFile = false; + docStoreOffset = - 1; + docStoreSegment = name; + docStoreIsCompoundFile = false; + } + + public SegmentInfo(System.String name, int docCount, Directory dir, bool isCompoundFile, bool hasSingleNormFile) : this(name, docCount, dir, isCompoundFile, hasSingleNormFile, - 1, null, false) + { } - public SegmentInfo(System.String name, int docCount, Directory dir, bool isCompoundFile, bool hasSingleNormFile) : this(name, docCount, dir) + public SegmentInfo(System.String name, int docCount, Directory dir, bool isCompoundFile, bool hasSingleNormFile, int docStoreOffset, System.String docStoreSegment, bool docStoreIsCompoundFile) : this(name, docCount, dir) { - this.isCompoundFile = (sbyte) (isCompoundFile ? 1 : - 1); + this.isCompoundFile = (sbyte) (isCompoundFile ? YES : NO); this.hasSingleNormFile = hasSingleNormFile; preLockless = false; + this.docStoreOffset = docStoreOffset; + this.docStoreSegment = docStoreSegment; + this.docStoreIsCompoundFile = docStoreIsCompoundFile; + System.Diagnostics.Debug.Assert(docStoreOffset == - 1 || docStoreSegment != null); } /// Copy everything from src SegmentInfo into our instance. internal void Reset(SegmentInfo src) { + ClearFiles(); name = src.name; docCount = src.docCount; dir = src.dir; preLockless = src.preLockless; delGen = src.delGen; + docStoreOffset = src.docStoreOffset; + docStoreIsCompoundFile = src.docStoreIsCompoundFile; if (src.normGen == null) { normGen = null; @@ -101,7 +137,7 @@ /// /// input handle to read segment info from /// - public SegmentInfo(Directory dir, int format, IndexInput input) + internal SegmentInfo(Directory dir, int format, IndexInput input) { this.dir = dir; name = input.ReadString(); @@ -109,6 +145,26 @@ if (format <= SegmentInfos.FORMAT_LOCKLESS) { delGen = input.ReadLong(); + if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) + { + docStoreOffset = input.ReadInt(); + if (docStoreOffset != - 1) + { + docStoreSegment = input.ReadString(); + docStoreIsCompoundFile = (1 == input.ReadByte()); + } + else + { + docStoreSegment = name; + docStoreIsCompoundFile = false; + } + } + else + { + docStoreOffset = - 1; + docStoreSegment = name; + docStoreIsCompoundFile = false; + } if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) { hasSingleNormFile = (1 == input.ReadByte()); @@ -118,7 +174,7 @@ hasSingleNormFile = false; } int numNormGen = input.ReadInt(); - if (numNormGen == - 1) + if (numNormGen == NO) { normGen = null; } @@ -131,15 +187,18 @@ } } isCompoundFile = (sbyte) input.ReadByte(); - preLockless = isCompoundFile == 0; + preLockless = (isCompoundFile == CHECK_DIR); } else { - delGen = 0; + delGen = CHECK_DIR; normGen = null; - isCompoundFile = 0; + isCompoundFile = (sbyte) (CHECK_DIR); preLockless = true; hasSingleNormFile = false; + docStoreOffset = - 1; + docStoreIsCompoundFile = false; + docStoreSegment = null; } } @@ -152,39 +211,66 @@ // norms set against it yet: normGen = new long[numFields]; - if (!preLockless) + if (preLockless) + { + // Do nothing: thus leaving normGen[k]==CHECK_DIR (==0), so that later we know + // we have to check filesystem for norm files, because this is prelockless. + } + else { // This is a FORMAT_LOCKLESS segment, which means - // there are no norms: + // there are no separate norms: for (int i = 0; i < numFields; i++) { - normGen[i] = - 1; + normGen[i] = NO; } } } } + /// Returns total size in bytes of all of files used by + /// this segment. + /// + internal long SizeInBytes() + { + if (sizeInBytes == - 1) + { + System.Collections.IList files = Files(); + int size = files.Count; + sizeInBytes = 0; + for (int i = 0; i < size; i++) + { + System.String fileName = (System.String) files[i]; + // We don't count bytes used by a shared doc store + // against this segment: + if (docStoreOffset == - 1 || !IndexFileNames.IsDocStoreFile(fileName)) + sizeInBytes += dir.FileLength(fileName); + } + } + return sizeInBytes; + } + internal bool HasDeletions() { // Cases: // - // delGen == -1: this means this segment was written + // delGen == NO: this means this segment was written // by the LOCKLESS code and for certain does not have // deletions yet // - // delGen == 0: this means this segment was written by + // delGen == CHECK_DIR: this means this segment was written by // pre-LOCKLESS code which means we must check // directory to see if .del file exists // - // delGen > 0: this means this segment was written by + // delGen >= YES: this means this segment was written by // the LOCKLESS code and for certain has // deletions // - if (delGen == - 1) + if (delGen == NO) { return false; } - else if (delGen > 0) + else if (delGen >= YES) { return true; } @@ -197,19 +283,21 @@ internal void AdvanceDelGen() { // delGen 0 is reserved for pre-LOCKLESS format - if (delGen == - 1) + if (delGen == NO) { - delGen = 1; + delGen = YES; } else { delGen++; } + ClearFiles(); } internal void ClearDelGen() { - delGen = - 1; + delGen = NO; + ClearFiles(); } public System.Object Clone() @@ -224,12 +312,15 @@ si.normGen = new long[normGen.Length]; normGen.CopyTo(si.normGen, 0); } + si.docStoreOffset = docStoreOffset; + si.docStoreSegment = docStoreSegment; + si.docStoreIsCompoundFile = docStoreIsCompoundFile; return si; } internal System.String GetDelFileName() { - if (delGen == - 1) + if (delGen == NO) { // In this case we know there is no deletion filename // against this segment @@ -237,8 +328,8 @@ } else { - // If delGen is 0, it's the pre-lockless-commit file format - return IndexFileNames.FileNameFromGeneration(name, ".del", delGen); + // If delGen is CHECK_DIR, it's the pre-lockless-commit file format + return IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen); } } @@ -249,13 +340,13 @@ /// internal bool HasSeparateNorms(int fieldNumber) { - if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == 0)) + if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == CHECK_DIR)) { // Must fallback to directory file exists check: System.String fileName = name + ".s" + fieldNumber; return dir.FileExists(fileName); } - else if (normGen == null || normGen[fieldNumber] == - 1) + else if (normGen == null || normGen[fieldNumber] == NO) { return false; } @@ -282,6 +373,11 @@ // code. So we must fallback to the original // directory list check: System.String[] result = dir.List(); + if (result == null) + { + throw new System.IO.IOException("cannot read directory " + dir + ": list() returned null"); + } + System.String pattern; pattern = name + ".s"; int patternLength = pattern.Length; @@ -296,11 +392,11 @@ else { // This means this segment was saved with LOCKLESS - // code so we first check whether any normGen's are > - // 0 (meaning they definitely have separate norms): + // code so we first check whether any normGen's are >= 1 + // (meaning they definitely have separate norms): for (int i = 0; i < normGen.Length; i++) { - if (normGen[i] > 0) + if (normGen[i] >= YES) { return true; } @@ -309,7 +405,7 @@ // pre-LOCKLESS and must be checked in directory: for (int i = 0; i < normGen.Length; i++) { - if (normGen[i] == 0) + if (normGen[i] == CHECK_DIR) { if (HasSeparateNorms(i)) { @@ -330,14 +426,15 @@ /// internal void AdvanceNormGen(int fieldIndex) { - if (normGen[fieldIndex] == - 1) + if (normGen[fieldIndex] == NO) { - normGen[fieldIndex] = 1; + normGen[fieldIndex] = YES; } else { normGen[fieldIndex]++; } + ClearFiles(); } /// Get the file name for the norms file for this field. @@ -352,7 +449,7 @@ long gen; if (normGen == null) { - gen = 0; + gen = CHECK_DIR; } else { @@ -370,12 +467,12 @@ { // case 2: lockless (or nrm file exists) - single file for all norms prefix = "." + IndexFileNames.NORMS_EXTENSION; - return IndexFileNames.FileNameFromGeneration(name, prefix, 0); + return IndexFileNames.FileNameFromGeneration(name, prefix, WITHOUT_GEN); } // case 3: norm file for each field prefix = ".f"; - return IndexFileNames.FileNameFromGeneration(name, prefix + number, 0); + return IndexFileNames.FileNameFromGeneration(name, prefix + number, WITHOUT_GEN); } /// Mark whether this segment is stored as a compound file. @@ -388,12 +485,13 @@ { if (isCompoundFile) { - this.isCompoundFile = 1; + this.isCompoundFile = (sbyte) (YES); } else { - this.isCompoundFile = - 1; + this.isCompoundFile = (sbyte) (NO); } + ClearFiles(); } /// Returns true if this segment is stored as a compound @@ -401,30 +499,64 @@ /// internal bool GetUseCompoundFile() { - if (isCompoundFile == - 1) + if (isCompoundFile == NO) { return false; } - else if (isCompoundFile == 1) + else if (isCompoundFile == YES) { return true; } else { - return dir.FileExists(name + ".cfs"); + return dir.FileExists(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION); } } + internal int GetDocStoreOffset() + { + return docStoreOffset; + } + + internal bool GetDocStoreIsCompoundFile() + { + return docStoreIsCompoundFile; + } + + internal void SetDocStoreIsCompoundFile(bool v) + { + docStoreIsCompoundFile = v; + ClearFiles(); + } + + internal System.String GetDocStoreSegment() + { + return docStoreSegment; + } + + internal void SetDocStoreOffset(int offset) + { + docStoreOffset = offset; + ClearFiles(); + } + /// Save this segment's info. internal void Write(IndexOutput output) { output.WriteString(name); output.WriteInt(docCount); output.WriteLong(delGen); + output.WriteInt(docStoreOffset); + if (docStoreOffset != - 1) + { + output.WriteString(docStoreSegment); + output.WriteByte((byte) (docStoreIsCompoundFile ? 1 : 0)); + } + output.WriteByte((byte) (hasSingleNormFile ? 1 : 0)); if (normGen == null) { - output.WriteInt(- 1); + output.WriteInt(NO); } else { @@ -436,5 +568,198 @@ } output.WriteByte((byte) isCompoundFile); } + + private void AddIfExists(System.Collections.IList files, System.String fileName) + { + if (dir.FileExists(fileName)) + files.Add(fileName); + } + + /* + * Return all files referenced by this SegmentInfo. The + * returns List is a locally cached List so you should not + * modify it. + */ + + public System.Collections.IList Files() + { + + if (files != null) + { + // Already cached: + return files; + } + + files = new System.Collections.ArrayList(); + + bool useCompoundFile = GetUseCompoundFile(); + + if (useCompoundFile) + { + files.Add(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION); + } + else + { + System.String[] exts = IndexFileNames.NON_STORE_INDEX_EXTENSIONS; + for (int i = 0; i < exts.Length; i++) + AddIfExists(files, name + "." + exts[i]); + } + + if (docStoreOffset != - 1) + { + // We are sharing doc stores (stored fields, term + // vectors) with other segments + System.Diagnostics.Debug.Assert(docStoreSegment != null); + if (docStoreIsCompoundFile) + { + files.Add(docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION); + } + else + { + System.String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS; + for (int i = 0; i < exts.Length; i++) + AddIfExists(files, docStoreSegment + "." + exts[i]); + } + } + else if (!useCompoundFile) + { + // We are not sharing, and, these files were not + // included in the compound file + System.String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS; + for (int i = 0; i < exts.Length; i++) + AddIfExists(files, name + "." + exts[i]); + } + + System.String delFileName = IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen); + if (delFileName != null && (delGen >= YES || dir.FileExists(delFileName))) + { + files.Add(delFileName); + } + + // Careful logic for norms files + if (normGen != null) + { + for (int i = 0; i < normGen.Length; i++) + { + long gen = normGen[i]; + if (gen >= YES) + { + // Definitely a separate norm file, with generation: + files.Add(IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen)); + } + else if (NO == gen) + { + // No separate norms but maybe plain norms + // in the non compound file case: + if (!hasSingleNormFile && !useCompoundFile) + { + System.String fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i; + if (dir.FileExists(fileName)) + { + files.Add(fileName); + } + } + } + else if (CHECK_DIR == gen) + { + // Pre-2.1: we have to check file existence + System.String fileName = null; + if (useCompoundFile) + { + fileName = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i; + } + else if (!hasSingleNormFile) + { + fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i; + } + if (fileName != null && dir.FileExists(fileName)) + { + files.Add(fileName); + } + } + } + } + else if (preLockless || (!hasSingleNormFile && !useCompoundFile)) + { + // Pre-2.1: we have to scan the dir to find all + // matching _X.sN/_X.fN files for our segment: + System.String prefix; + if (useCompoundFile) + prefix = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION; + else + prefix = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION; + int prefixLength = prefix.Length; + System.String[] allFiles = dir.List(); + if (allFiles == null) + { + throw new System.IO.IOException("cannot read directory " + dir + ": list() returned null"); + } + for (int i = 0; i < allFiles.Length; i++) + { + System.String fileName = allFiles[i]; + if (fileName.Length > prefixLength && System.Char.IsDigit(fileName[prefixLength]) && fileName.StartsWith(prefix)) + { + files.Add(fileName); + } + } + } + return files; + } + + /* Called whenever any change is made that affects which + * files this segment has. */ + private void ClearFiles() + { + files = null; + sizeInBytes = - 1; + } + + /// Used for debugging + public System.String SegString(Directory dir) + { + System.String cfs; + try + { + if (GetUseCompoundFile()) + cfs = "c"; + else + cfs = "C"; + } + catch (System.IO.IOException ioe) + { + cfs = "?"; + } + + System.String docStore; + + if (docStoreOffset != - 1) + docStore = "->" + docStoreSegment; + else + docStore = ""; + + return name + ":" + cfs + (this.dir == dir ? "" : "x") + docCount + docStore; + } + + /// We consider another SegmentInfo instance equal if it + /// has the same dir and same name. + /// + public override bool Equals(System.Object obj) + { + SegmentInfo other; + try + { + other = (SegmentInfo) obj; + } + catch (System.InvalidCastException cce) + { + return false; + } + return other.dir == dir && other.name.Equals(name); + } + + public override int GetHashCode() + { + return dir.GetHashCode() + name.GetHashCode(); + } } } \ No newline at end of file