lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From paulir...@apache.org
Subject [41/53] [abbrv] git commit: Port Facet.Index
Date Thu, 07 Nov 2013 13:53:56 GMT
Port Facet.Index


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/71e218cb
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/71e218cb
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/71e218cb

Branch: refs/heads/branch_4x
Commit: 71e218cb010cd5819b9774fe76c41b9a3344dc8a
Parents: b362722
Author: Paul Irwin <paulirwin@gmail.com>
Authored: Wed Nov 6 09:49:10 2013 -0500
Committer: Paul Irwin <paulirwin@gmail.com>
Committed: Wed Nov 6 09:49:10 2013 -0500

----------------------------------------------------------------------
 src/contrib/Facet/Contrib.Facet.csproj          |   4 +
 src/contrib/Facet/Index/CountingListBuilder.cs  | 139 +++++++++++++++++++
 src/contrib/Facet/Index/DrillDownStream.cs      |  62 +++++++++
 src/contrib/Facet/Index/FacetFields.cs          | 121 ++++++++++++++++
 src/contrib/Facet/Index/ICategoryListBuilder.cs |  14 ++
 5 files changed, 340 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/71e218cb/src/contrib/Facet/Contrib.Facet.csproj
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Contrib.Facet.csproj b/src/contrib/Facet/Contrib.Facet.csproj
index 1c26fcb..5c15e5b 100644
--- a/src/contrib/Facet/Contrib.Facet.csproj
+++ b/src/contrib/Facet/Contrib.Facet.csproj
@@ -74,6 +74,10 @@
     <Compile Include="Encoding\UniqueValuesIntEncoder.cs" />
     <Compile Include="Encoding\VInt8IntDecoder.cs" />
     <Compile Include="Encoding\VInt8IntEncoder.cs" />
+    <Compile Include="Index\CountingListBuilder.cs" />
+    <Compile Include="Index\DrillDownStream.cs" />
+    <Compile Include="Index\FacetFields.cs" />
+    <Compile Include="Index\ICategoryListBuilder.cs" />
     <Compile Include="Params\CategoryListParams.cs" />
     <Compile Include="Params\FacetIndexingParams.cs" />
     <Compile Include="Params\FacetSearchParams.cs" />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/71e218cb/src/contrib/Facet/Index/CountingListBuilder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Index/CountingListBuilder.cs b/src/contrib/Facet/Index/CountingListBuilder.cs
new file mode 100644
index 0000000..3082c35
--- /dev/null
+++ b/src/contrib/Facet/Index/CountingListBuilder.cs
@@ -0,0 +1,139 @@
+using Lucene.Net.Facet.Encoding;
+using Lucene.Net.Facet.Params;
+using Lucene.Net.Facet.Taxonomy;
+using Lucene.Net.Facet.Util;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Facet.Index
+{
+    public class CountingListBuilder : ICategoryListBuilder
+    {
+        private abstract class OrdinalsEncoder
+        {
+            internal OrdinalsEncoder()
+            {
+            }
+
+            public abstract IDictionary<string, BytesRef> Encode(IntsRef ordinals);
+        }
+
+        private sealed class NoPartitionsOrdinalsEncoder : OrdinalsEncoder
+        {
+            private readonly IntEncoder encoder;
+            private readonly string name = @"";
+            
+            internal NoPartitionsOrdinalsEncoder(CategoryListParams categoryListParams)
+            {
+                encoder = categoryListParams.CreateEncoder();
+            }
+
+            public override IDictionary<string, BytesRef> Encode(IntsRef ordinals)
+            {
+                BytesRef bytes = new BytesRef(128);
+                encoder.Encode(ordinals, bytes);
+                return new Dictionary<string, BytesRef>() { { name, bytes } };
+            }
+        }
+
+        private sealed class PerPartitionOrdinalsEncoder : OrdinalsEncoder
+        {
+            private readonly FacetIndexingParams indexingParams;
+            private readonly CategoryListParams categoryListParams;
+            private readonly int partitionSize;
+            private readonly HashMap<String, IntEncoder> partitionEncoder = new HashMap<String,
IntEncoder>();
+
+            internal PerPartitionOrdinalsEncoder(FacetIndexingParams indexingParams, CategoryListParams
categoryListParams)
+            {
+                this.indexingParams = indexingParams;
+                this.categoryListParams = categoryListParams;
+                this.partitionSize = indexingParams.PartitionSize;
+            }
+
+            public override IDictionary<String, BytesRef> Encode(IntsRef ordinals)
+            {
+                HashMap<String, IntsRef> partitionOrdinals = new HashMap<String,
IntsRef>();
+                for (int i = 0; i < ordinals.length; i++)
+                {
+                    int ordinal = ordinals.ints[i];
+                    string name = PartitionsUtils.PartitionNameByOrdinal(indexingParams,
ordinal);
+                    IntsRef partitionOrds = partitionOrdinals[name];
+                    if (partitionOrds == null)
+                    {
+                        partitionOrds = new IntsRef(32);
+                        partitionOrdinals[name] = partitionOrds;
+                        partitionEncoder[name] = categoryListParams.CreateEncoder();
+                    }
+
+                    partitionOrds.ints[partitionOrds.length++] = ordinal % partitionSize;
+                }
+
+                HashMap<String, BytesRef> partitionBytes = new HashMap<String, BytesRef>();
+                foreach (KeyValuePair<String, IntsRef> e in partitionOrdinals)
+                {
+                    string name = e.Key;
+                    IntEncoder encoder = partitionEncoder[name];
+                    BytesRef bytes = new BytesRef(128);
+                    encoder.Encode(e.Value, bytes);
+                    partitionBytes[name] = bytes;
+                }
+
+                return partitionBytes;
+            }
+        }
+
+        private readonly OrdinalsEncoder ordinalsEncoder;
+        private readonly ITaxonomyWriter taxoWriter;
+        private readonly CategoryListParams clp;
+
+        public CountingListBuilder(CategoryListParams categoryListParams, FacetIndexingParams
indexingParams, ITaxonomyWriter taxoWriter)
+        {
+            this.taxoWriter = taxoWriter;
+            this.clp = categoryListParams;
+            if (indexingParams.PartitionSize == int.MaxValue)
+            {
+                ordinalsEncoder = new NoPartitionsOrdinalsEncoder(categoryListParams);
+            }
+            else
+            {
+                ordinalsEncoder = new PerPartitionOrdinalsEncoder(indexingParams, categoryListParams);
+            }
+        }
+
+        public IDictionary<string, BytesRef> Build(IntsRef ordinals, IEnumerable<CategoryPath>
categories)
+        {
+            int upto = ordinals.length;
+            IEnumerator<CategoryPath> iter = categories.GetEnumerator();
+            for (int i = 0; i < upto; i++)
+            {
+                int ordinal = ordinals.ints[i];
+                iter.MoveNext();
+                CategoryPath cp = iter.Current;
+                CategoryListParams.OrdinalPolicy op = clp.GetOrdinalPolicy(cp.components[0]);
+                if (op != CategoryListParams.OrdinalPolicy.NO_PARENTS)
+                {
+                    int parent = taxoWriter.GetParent(ordinal);
+                    if (parent > 0)
+                    {
+                        while (parent > 0)
+                        {
+                            ordinals.ints[ordinals.length++] = parent;
+                            parent = taxoWriter.GetParent(parent);
+                        }
+
+                        if (op == CategoryListParams.OrdinalPolicy.ALL_BUT_DIMENSION)
+                        {
+                            ordinals.length--;
+                        }
+                    }
+                }
+            }
+
+            return ordinalsEncoder.Encode(ordinals);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/71e218cb/src/contrib/Facet/Index/DrillDownStream.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Index/DrillDownStream.cs b/src/contrib/Facet/Index/DrillDownStream.cs
new file mode 100644
index 0000000..64c998e
--- /dev/null
+++ b/src/contrib/Facet/Index/DrillDownStream.cs
@@ -0,0 +1,62 @@
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Facet.Params;
+using Lucene.Net.Facet.Taxonomy;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Facet.Index
+{
+    public class DrillDownStream : TokenStream
+    {
+        private readonly FacetIndexingParams indexingParams;
+        private readonly IEnumerator<CategoryPath> categories;
+        private readonly ICharTermAttribute termAttribute;
+        private CategoryPath current;
+        private bool isParent;
+
+        public DrillDownStream(IEnumerable<CategoryPath> categories, FacetIndexingParams
indexingParams)
+        {
+            termAttribute = AddAttribute<ICharTermAttribute>();
+            this.categories = categories.GetEnumerator();
+            this.indexingParams = indexingParams;
+        }
+
+        protected virtual void AddAdditionalAttributes(CategoryPath category, bool isParent)
+        {
+        }
+
+        public override bool IncrementToken()
+        {
+            if (current.length == 0)
+            {
+                if (!categories.MoveNext())
+                {
+                    return false;
+                }
+
+                current = categories.Current;
+                termAttribute.ResizeBuffer(current.FullPathLength());
+                isParent = false;
+            }
+
+            int nChars = indexingParams.DrillDownTermText(current, termAttribute.Buffer);
+            termAttribute.SetLength(nChars);
+            AddAdditionalAttributes(current, isParent);
+            current = current.Subpath(current.length - 1);
+            isParent = true;
+            return true;
+        }
+
+        public override void Reset()
+        {
+            // TODO: validate this logic
+            categories.MoveNext();
+            current = categories.Current;
+            termAttribute.ResizeBuffer(current.FullPathLength());
+            isParent = false;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/71e218cb/src/contrib/Facet/Index/FacetFields.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Index/FacetFields.cs b/src/contrib/Facet/Index/FacetFields.cs
new file mode 100644
index 0000000..e950a3d
--- /dev/null
+++ b/src/contrib/Facet/Index/FacetFields.cs
@@ -0,0 +1,121 @@
+using Lucene.Net.Documents;
+using Lucene.Net.Facet.Params;
+using Lucene.Net.Facet.Taxonomy;
+using Lucene.Net.Index;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Facet.Index
+{
+    public class FacetFields
+    {
+        private static readonly FieldType DRILL_DOWN_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
+
+        static FacetFields()
+        {
+            DRILL_DOWN_TYPE.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
+            DRILL_DOWN_TYPE.OmitNorms = true;
+            DRILL_DOWN_TYPE.Freeze();
+        }
+
+        protected readonly ITaxonomyWriter taxonomyWriter;
+        protected readonly FacetIndexingParams indexingParams;
+
+        public FacetFields(ITaxonomyWriter taxonomyWriter)
+            : this(taxonomyWriter, FacetIndexingParams.DEFAULT)
+        {
+        }
+
+        public FacetFields(ITaxonomyWriter taxonomyWriter, FacetIndexingParams params_renamed)
+        {
+            this.taxonomyWriter = taxonomyWriter;
+            this.indexingParams = params_renamed;
+        }
+
+        protected virtual IDictionary<CategoryListParams, IEnumerable<CategoryPath>>
CreateCategoryListMapping(IEnumerable<CategoryPath> categories)
+        {
+            if (indexingParams.AllCategoryListParams.Count == 1)
+            {
+                return new Dictionary<CategoryListParams, IEnumerable<CategoryPath>>()
{ { indexingParams.GetCategoryListParams(null), categories } };
+            }
+
+            HashMap<CategoryListParams, IEnumerable<CategoryPath>> categoryLists
= new HashMap<CategoryListParams, IEnumerable<CategoryPath>>();
+            foreach (CategoryPath cp in categories)
+            {
+                CategoryListParams clp = indexingParams.GetCategoryListParams(cp);
+                List<CategoryPath> list = (List<CategoryPath>)categoryLists[clp];
+                if (list == null)
+                {
+                    list = new List<CategoryPath>();
+                    categoryLists[clp] = list;
+                }
+
+                list.Add(cp);
+            }
+
+            return categoryLists;
+        }
+
+        protected virtual IDictionary<String, BytesRef> GetCategoryListData(CategoryListParams
categoryListParams, IntsRef ordinals, IEnumerable<CategoryPath> categories)
+        {
+            return new CountingListBuilder(categoryListParams, indexingParams, taxonomyWriter).Build(ordinals,
categories);
+        }
+
+        protected virtual DrillDownStream GetDrillDownStream(IEnumerable<CategoryPath>
categories)
+        {
+            return new DrillDownStream(categories, indexingParams);
+        }
+
+        protected virtual FieldType DrillDownFieldType()
+        {
+            return DRILL_DOWN_TYPE;
+        }
+
+        protected virtual void AddCountingListData(Document doc, IDictionary<String, BytesRef>
categoriesData, string field)
+        {
+            foreach (KeyValuePair<String, BytesRef> entry in categoriesData)
+            {
+                doc.Add(new BinaryDocValuesField(field + entry.Key, entry.Value));
+            }
+        }
+
+        public virtual void AddFields(Document doc, IEnumerable<CategoryPath> categories)
+        {
+            if (categories == null)
+            {
+                throw new ArgumentException(@"categories should not be null");
+            }
+
+            IDictionary<CategoryListParams, IEnumerable<CategoryPath>> categoryLists
= CreateCategoryListMapping(categories);
+            IntsRef ordinals = new IntsRef(32);
+            foreach (KeyValuePair<CategoryListParams, IEnumerable<CategoryPath>>
e in categoryLists)
+            {
+                CategoryListParams clp = e.Key;
+                string field = clp.field;
+                ordinals.length = 0;
+                int maxNumOrds = 0;
+                foreach (CategoryPath cp in e.Value)
+                {
+                    int ordinal = taxonomyWriter.AddCategory(cp);
+                    maxNumOrds += cp.length;
+                    if (ordinals.ints.Length < maxNumOrds)
+                    {
+                        ordinals.Grow(maxNumOrds);
+                    }
+
+                    ordinals.ints[ordinals.length++] = ordinal;
+                }
+
+                IDictionary<String, BytesRef> categoriesData = GetCategoryListData(clp,
ordinals, e.Value);
+                AddCountingListData(doc, categoriesData, field);
+                DrillDownStream drillDownStream = GetDrillDownStream(e.Value);
+                Field drillDown = new Field(field, drillDownStream, DrillDownFieldType());
+                doc.Add(drillDown);
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/71e218cb/src/contrib/Facet/Index/ICategoryListBuilder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Facet/Index/ICategoryListBuilder.cs b/src/contrib/Facet/Index/ICategoryListBuilder.cs
new file mode 100644
index 0000000..1b0890f
--- /dev/null
+++ b/src/contrib/Facet/Index/ICategoryListBuilder.cs
@@ -0,0 +1,14 @@
+using Lucene.Net.Facet.Taxonomy;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Facet.Index
+{
+    public interface ICategoryListBuilder
+    {
+        IDictionary<string, BytesRef> Build(IntsRef ordinals, IEnumerable<CategoryPath>
categories);
+    }
+}


Mime
View raw message