lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [1/3] lucenenet git commit: match Lucene logic for merging values
Date Sun, 25 Jan 2015 14:06:30 GMT
Repository: lucenenet
Updated Branches:
  refs/heads/master 56bfeaab2 -> 8520cba87


match Lucene logic for merging values


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/4edb41be
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/4edb41be
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/4edb41be

Branch: refs/heads/master
Commit: 4edb41be48fc2bd73c7ce9260ac7ec692b6861ed
Parents: 593231b
Author: Laimonas Simutis <laimis@gmail.com>
Authored: Sun Jan 25 07:36:37 2015 -0500
Committer: Laimonas Simutis <laimis@gmail.com>
Committed: Sun Jan 25 07:36:37 2015 -0500

----------------------------------------------------------------------
 src/Lucene.Net.Core/Codecs/DocValuesConsumer.cs | 256 +++----------------
 .../Index/BinaryDocValuesWriter.cs              |  23 +-
 src/Lucene.Net.Core/Index/SegmentMerger.cs      |  18 +-
 3 files changed, 55 insertions(+), 242 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4edb41be/src/Lucene.Net.Core/Codecs/DocValuesConsumer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Codecs/DocValuesConsumer.cs b/src/Lucene.Net.Core/Codecs/DocValuesConsumer.cs
index ecec5e1..89693c5 100644
--- a/src/Lucene.Net.Core/Codecs/DocValuesConsumer.cs
+++ b/src/Lucene.Net.Core/Codecs/DocValuesConsumer.cs
@@ -112,19 +112,19 @@ namespace Lucene.Net.Codecs
         /// The default implementation calls <seealso cref="#addNumericField"/>, passing
         /// an Iterable that merges and filters deleted documents on the fly.
         /// </summary>
-        // LUCENE TO-DO This is a bit wacky
-        public virtual void MergeNumericField(FieldInfo fieldInfo, MergeState mergeState,
IList<NumericDocValues> toMerge/*, IList<Bits> docsWithField*/)
+        public virtual void MergeNumericField(FieldInfo fieldInfo, MergeState mergeState,
IList<NumericDocValues> toMerge, IList<Bits> docsWithField)
         {
-            AddNumericField(fieldInfo, GetMergeNumericFieldEnumerable(fieldInfo, mergeState,
toMerge));
+            AddNumericField(fieldInfo, GetMergeNumericFieldEnumerable(fieldInfo, mergeState,
toMerge, docsWithField));
         }
 
-        private IEnumerable<long?> GetMergeNumericFieldEnumerable(FieldInfo fieldinfo,
MergeState mergeState, IList<NumericDocValues> toMerge)
+        private IEnumerable<long?> GetMergeNumericFieldEnumerable(FieldInfo fieldinfo,
MergeState mergeState, IList<NumericDocValues> toMerge, IList<Bits> docsWithField)
         {
             int readerUpto = -1;
             int docIDUpto = 0;
             AtomicReader currentReader = null;
             NumericDocValues currentValues = null;
             Bits currentLiveDocs = null;
+            Bits currentDocsWithField = null;
 
             while (true)
             {
@@ -140,6 +140,7 @@ namespace Lucene.Net.Codecs
                     {
                         currentReader = mergeState.Readers[readerUpto];
                         currentValues = toMerge[readerUpto];
+                        currentDocsWithField = docsWithField[readerUpto];
                         currentLiveDocs = currentReader.LiveDocs;
                     }
                     docIDUpto = 0;
@@ -148,7 +149,18 @@ namespace Lucene.Net.Codecs
 
                 if (currentLiveDocs == null || currentLiveDocs.Get(docIDUpto))
                 {
-                    yield return currentValues.Get(docIDUpto++);
+                    long? nextValue;
+                    if (currentDocsWithField.Get(docIDUpto))
+                    {
+                        nextValue = currentValues.Get(docIDUpto);
+                    }
+                    else
+                    {
+                        nextValue = null;
+                    }
+
+                    docIDUpto++;
+                    yield return nextValue;
                     continue;
                 }
 
@@ -156,131 +168,25 @@ namespace Lucene.Net.Codecs
             }
         }
 
-        /*
-	  private class IterableAnonymousInnerClassHelper : IEnumerable<Number>
-	  {
-		  private readonly DocValuesConsumer OuterInstance;
-
-		  private MergeState MergeState;
-		  private IList<NumericDocValues> ToMerge;
-		  private IList<Bits> DocsWithField;
-
-		  public IterableAnonymousInnerClassHelper(DocValuesConsumer outerInstance, MergeState
mergeState, IList<NumericDocValues> toMerge, IList<Bits> docsWithField)
-		  {
-			  this.OuterInstance = outerInstance;
-			  this.MergeState = mergeState;
-			  this.ToMerge = toMerge;
-			  this.DocsWithField = docsWithField;
-		  }
-
-		  public virtual IEnumerator<Number> GetEnumerator()
-		  {
-			return new IteratorAnonymousInnerClassHelper(this);
-		  }
-
-		  private class IteratorAnonymousInnerClassHelper : IEnumerator<Number>
-		  {
-			  private readonly IterableAnonymousInnerClassHelper OuterInstance;
-
-			  public IteratorAnonymousInnerClassHelper(IterableAnonymousInnerClassHelper outerInstance)
-			  {
-				  this.OuterInstance = outerInstance;
-				  readerUpto = -1;
-			  }
-
-			  internal int readerUpto;
-			  internal int docIDUpto;
-			  internal long? nextValue;
-			  internal AtomicReader currentReader;
-			  internal NumericDocValues currentValues;
-			  internal Bits currentLiveDocs;
-			  internal Bits currentDocsWithField;
-			  internal bool nextIsSet;
-
-			  public virtual bool HasNext()
-			  {
-				return nextIsSet || SetNext();
-			  }
-
-			  public virtual void Remove()
-			  {
-				throw new System.NotSupportedException();
-			  }
-
-			  public virtual Number Next()
-			  {
-				if (!HasNext())
-				{
-				  throw new Exception();
-				}
-				Debug.Assert(nextIsSet);
-				nextIsSet = false;
-				return nextValue;
-			  }
-
-			  private bool SetNext()
-			  {
-				while (true)
-				{
-				  if (readerUpto == OuterInstance.ToMerge.Count)
-				  {
-					return false;
-				  }
-
-				  if (currentReader == null || docIDUpto == currentReader.MaxDoc)
-				  {
-					readerUpto++;
-					if (readerUpto < OuterInstance.ToMerge.Count)
-					{
-					  currentReader = OuterInstance.MergeState.Readers.get(readerUpto);
-					  currentValues = OuterInstance.ToMerge[readerUpto];
-					  currentLiveDocs = currentReader.LiveDocs;
-					  currentDocsWithField = OuterInstance.DocsWithField[readerUpto];
-					}
-					docIDUpto = 0;
-					continue;
-				  }
-
-				  if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto))
-				  {
-					nextIsSet = true;
-					if (currentDocsWithField.get(docIDUpto))
-					{
-					  nextValue = currentValues.get(docIDUpto);
-					}
-					else
-					{
-					  nextValue = null;
-					}
-					docIDUpto++;
-					return true;
-				  }
-
-				  docIDUpto++;
-				}
-			  }
-		  }
-	  }*/
-
         /// <summary>
         /// Merges the binary docvalues from <code>toMerge</code>.
         /// <p>
         /// The default implementation calls <seealso cref="#addBinaryField"/>, passing
         /// an Iterable that merges and filters deleted documents on the fly.
         /// </summary>
-        public void MergeBinaryField(FieldInfo fieldInfo, MergeState mergeState, IList<BinaryDocValues>
toMerge/*, IList<Bits> docsWithField*/)
+        public void MergeBinaryField(FieldInfo fieldInfo, MergeState mergeState, IList<BinaryDocValues>
toMerge, IList<Bits> docsWithField)
         {
-            AddBinaryField(fieldInfo, GetMergeBinaryFieldEnumerable(fieldInfo, mergeState,
toMerge/*, docsWithField*/));
+            AddBinaryField(fieldInfo, GetMergeBinaryFieldEnumerable(fieldInfo, mergeState,
toMerge, docsWithField));
         }
 
-        private IEnumerable<BytesRef> GetMergeBinaryFieldEnumerable(FieldInfo fieldInfo,
MergeState mergeState, IList<BinaryDocValues> toMerge)
+        private IEnumerable<BytesRef> GetMergeBinaryFieldEnumerable(FieldInfo fieldInfo,
MergeState mergeState, IList<BinaryDocValues> toMerge, IList<Bits> docsWithField)
         {
             int readerUpto = -1;
             int docIDUpto = 0;
             AtomicReader currentReader = null;
             BinaryDocValues currentValues = null;
             Bits currentLiveDocs = null;
-            BytesRef nextValue = new BytesRef();
+            Bits currentDocsWithField = null;
 
             while (true)
             {
@@ -296,6 +202,7 @@ namespace Lucene.Net.Codecs
                     {
                         currentReader = mergeState.Readers[readerUpto];
                         currentValues = toMerge[readerUpto];
+                        currentDocsWithField = docsWithField[readerUpto];
                         currentLiveDocs = currentReader.LiveDocs;
                     }
                     docIDUpto = 0;
@@ -304,124 +211,25 @@ namespace Lucene.Net.Codecs
 
                 if (currentLiveDocs == null || currentLiveDocs.Get(docIDUpto))
                 {
-                    currentValues.Get(docIDUpto, nextValue);
-                    docIDUpto++;
-                    yield return nextValue;
-                    continue;
-                }
-
-                docIDUpto++;
-            }
-        }
-
-        /*
-        private class IterableAnonymousInnerClassHelper2 : IEnumerable<BytesRef>
-        {
-            private readonly DocValuesConsumer OuterInstance;
-
-            private MergeState MergeState;
-            private IList<BinaryDocValues> ToMerge;
-            private IList<Bits> DocsWithField;
-
-            public IterableAnonymousInnerClassHelper2(DocValuesConsumer outerInstance, MergeState
mergeState, IList<BinaryDocValues> toMerge, IList<Bits> docsWithField)
-            {
-                this.OuterInstance = outerInstance;
-                this.MergeState = mergeState;
-                this.ToMerge = toMerge;
-                this.DocsWithField = docsWithField;
-            }
-
-            public virtual IEnumerator<BytesRef> GetEnumerator()
-            {
-              return new IteratorAnonymousInnerClassHelper2(this);
-            }
-
-            private class IteratorAnonymousInnerClassHelper2 : IEnumerator<BytesRef>
-            {
-                private readonly IterableAnonymousInnerClassHelper2 OuterInstance;
-
-                public IteratorAnonymousInnerClassHelper2(IterableAnonymousInnerClassHelper2
outerInstance)
-                {
-                    this.OuterInstance = outerInstance;
-                    readerUpto = -1;
-                    nextValue = new BytesRef();
-                }
-
-                internal int readerUpto;
-                internal int docIDUpto;
-                internal BytesRef nextValue;
-                internal BytesRef nextPointer; // points to null if missing, or nextValue
-                internal AtomicReader currentReader;
-                internal BinaryDocValues currentValues;
-                internal Bits currentLiveDocs;
-                internal Bits currentDocsWithField;
-                internal bool nextIsSet;
-
-                public virtual bool HasNext()
-                {
-                  return nextIsSet || SetNext();
-                }
-
-                public virtual void Remove()
-                {
-                  throw new System.NotSupportedException();
-                }
-
-                public virtual BytesRef Next()
-                {
-                  if (!HasNext())
-                  {
-                    throw new Exception();
-                  }
-                  Debug.Assert(nextIsSet);
-                  nextIsSet = false;
-                  return nextPointer;
-                }
-
-                private bool SetNext()
-                {
-                  while (true)
-                  {
-                    if (readerUpto == OuterInstance.ToMerge.Count)
-                    {
-                      return false;
-                    }
+                    BytesRef nextValue = new BytesRef();
 
-                    if (currentReader == null || docIDUpto == currentReader.MaxDoc)
+                    if (currentDocsWithField.Get(docIDUpto))
                     {
-                      readerUpto++;
-                      if (readerUpto < OuterInstance.ToMerge.Count)
-                      {
-                        currentReader = OuterInstance.MergeState.Readers[readerUpto];
-                        currentValues = OuterInstance.ToMerge[readerUpto];
-                        currentDocsWithField = OuterInstance.DocsWithField[readerUpto];
-                        currentLiveDocs = currentReader.LiveDocs;
-                      }
-                      docIDUpto = 0;
-                      continue;
+                        currentValues.Get(docIDUpto, nextValue);
                     }
-
-                    if (currentLiveDocs == null || currentLiveDocs.Get(docIDUpto))
+                    else
                     {
-                      nextIsSet = true;
-                      if (currentDocsWithField.Get(docIDUpto))
-                      {
-                        currentValues.Get(docIDUpto, nextValue);
-                        nextPointer = nextValue;
-                      }
-                      else
-                      {
-                        nextPointer = null;
-                      }
-                      docIDUpto++;
-                      return true;
+                        nextValue = null;
                     }
 
                     docIDUpto++;
-                  }
+                    yield return nextValue;
+                    continue;
                 }
+
+                docIDUpto++;
             }
-        }*/
+        }
 
         /// <summary>
         /// Merges the sorted docvalues from <code>toMerge</code>.

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4edb41be/src/Lucene.Net.Core/Index/BinaryDocValuesWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Index/BinaryDocValuesWriter.cs b/src/Lucene.Net.Core/Index/BinaryDocValuesWriter.cs
index 47f9710..a1b527d 100644
--- a/src/Lucene.Net.Core/Index/BinaryDocValuesWriter.cs
+++ b/src/Lucene.Net.Core/Index/BinaryDocValuesWriter.cs
@@ -159,33 +159,38 @@ namespace Lucene.Net.Index
             // Use yield return instead of ucsom IEnumerable
 
             BytesRef value = new BytesRef();
-            AppendingDeltaPackedLongBuffer.Iterator lengthsIterator = (AppendingDeltaPackedLongBuffer.Iterator)Lengths.GetIterator();
+            AppendingDeltaPackedLongBuffer.Iterator lengthsIterator = Lengths.GetIterator();
             int size = (int)Lengths.Size();
             DataInput bytesIterator = Bytes.DataInput;
             int maxDoc = maxDocParam;
             int upto = 0;
-            long byteOffset = 0L;
 
             while (upto < maxDoc)
             {
+                BytesRef v;
                 if (upto < size)
                 {
                     int length = (int)lengthsIterator.Next();
                     value.Grow(length);
                     value.Length = length;
-                    //LUCENE TODO: This modification is slightly fishy, 4x port uses ByteBlockPool
-                    bytesIterator.ReadBytes(/*byteOffset,*/ value.Bytes, value.Offset, value.Length);
-                    byteOffset += length;
+                    bytesIterator.ReadBytes(value.Bytes, value.Offset, value.Length);
+
+                    if (DocsWithField.Get(upto))
+                    {
+                        v = value;
+                    }
+                    else
+                    {
+                        v = null;
+                    }
                 }
                 else
                 {
-                    // This is to handle last N documents not having
-                    // this DV field in the end of the segment:
-                    value.Length = 0;
+                    v = null;
                 }
 
                 upto++;
-                yield return value;
+                yield return v;
             }
         }
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4edb41be/src/Lucene.Net.Core/Index/SegmentMerger.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Index/SegmentMerger.cs b/src/Lucene.Net.Core/Index/SegmentMerger.cs
index bb7e444..28af2dd 100644
--- a/src/Lucene.Net.Core/Index/SegmentMerger.cs
+++ b/src/Lucene.Net.Core/Index/SegmentMerger.cs
@@ -189,7 +189,7 @@ namespace Lucene.Net.Index
                         if (type == DocValuesType_e.NUMERIC)
                         {
                             IList<NumericDocValues> toMerge = new List<NumericDocValues>();
-                            //IList<Bits> docsWithField = new List<Bits>();
+                            IList<Bits> docsWithField = new List<Bits>();
                             foreach (AtomicReader reader in MergeState.Readers)
                             {
                                 NumericDocValues values = reader.GetNumericDocValues(field.Name);
@@ -200,14 +200,14 @@ namespace Lucene.Net.Index
                                     bits = new Lucene.Net.Util.Bits_MatchNoBits(reader.MaxDoc);
                                 }
                                 toMerge.Add(values);
-                                //docsWithField.Add(bits);
+                                docsWithField.Add(bits);
                             }
-                            consumer.MergeNumericField(field, MergeState, toMerge/*, docsWithField*/);
+                            consumer.MergeNumericField(field, MergeState, toMerge, docsWithField);
                         }
                         else if (type == DocValuesType_e.BINARY)
                         {
                             IList<BinaryDocValues> toMerge = new List<BinaryDocValues>();
-                            //IList<Bits> docsWithField = new List<Bits>();
+                            IList<Bits> docsWithField = new List<Bits>();
                             foreach (AtomicReader reader in MergeState.Readers)
                             {
                                 BinaryDocValues values = reader.GetBinaryDocValues(field.Name);
@@ -218,9 +218,9 @@ namespace Lucene.Net.Index
                                     bits = new Lucene.Net.Util.Bits_MatchNoBits(reader.MaxDoc);
                                 }
                                 toMerge.Add(values);
-                                //docsWithField.Add(bits);
+                                docsWithField.Add(bits);
                             }
-                            consumer.MergeBinaryField(field, MergeState, toMerge/*, docsWithField*/);
+                            consumer.MergeBinaryField(field, MergeState, toMerge, docsWithField);
                         }
                         else if (type == DocValuesType_e.SORTED)
                         {
@@ -282,7 +282,7 @@ namespace Lucene.Net.Index
                     if (field.HasNorms())
                     {
                         IList<NumericDocValues> toMerge = new List<NumericDocValues>();
-                        //IList<Bits> docsWithField = new List<Bits>();
+                        IList<Bits> docsWithField = new List<Bits>();
                         foreach (AtomicReader reader in MergeState.Readers)
                         {
                             NumericDocValues norms = reader.GetNormValues(field.Name);
@@ -291,9 +291,9 @@ namespace Lucene.Net.Index
                                 norms = DocValues.EMPTY_NUMERIC;
                             }
                             toMerge.Add(norms);
-                            //docsWithField.Add(new Lucene.Net.Util.Bits_MatchAllBits(reader.MaxDoc));
+                            docsWithField.Add(new Lucene.Net.Util.Bits_MatchAllBits(reader.MaxDoc));
                         }
-                        consumer.MergeNumericField(field, MergeState, toMerge/*, docsWithField*/);
+                        consumer.MergeNumericField(field, MergeState, toMerge, docsWithField);
                     }
                 }
                 success = true;


Mime
View raw message