Return-Path: X-Original-To: apmail-lucenenet-commits-archive@www.apache.org Delivered-To: apmail-lucenenet-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id DA378105E4 for ; Thu, 7 Nov 2013 13:56:34 +0000 (UTC) Received: (qmail 59079 invoked by uid 500); 7 Nov 2013 13:53:29 -0000 Delivered-To: apmail-lucenenet-commits-archive@lucenenet.apache.org Received: (qmail 58908 invoked by uid 500); 7 Nov 2013 13:53:27 -0000 Mailing-List: contact commits-help@lucenenet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: lucene-net-dev@lucenenet.apache.org Delivered-To: mailing list commits@lucenenet.apache.org Received: (qmail 58735 invoked by uid 99); 7 Nov 2013 13:53:19 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 07 Nov 2013 13:53:19 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id E855231D01B; Thu, 7 Nov 2013 13:53:17 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: paulirwin@apache.org To: commits@lucenenet.apache.org Date: Thu, 07 Nov 2013 13:53:56 -0000 Message-Id: In-Reply-To: <8a96de0ee3994a3abe197590689f5335@git.apache.org> References: <8a96de0ee3994a3abe197590689f5335@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [41/53] [abbrv] git commit: Port Facet.Index Port Facet.Index Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/71e218cb Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/71e218cb Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/71e218cb Branch: refs/heads/branch_4x Commit: 71e218cb010cd5819b9774fe76c41b9a3344dc8a Parents: b362722 Author: Paul Irwin Authored: Wed Nov 6 09:49:10 2013 -0500 Committer: Paul Irwin Committed: Wed Nov 6 09:49:10 2013 -0500 ---------------------------------------------------------------------- src/contrib/Facet/Contrib.Facet.csproj | 4 + src/contrib/Facet/Index/CountingListBuilder.cs | 139 +++++++++++++++++++ src/contrib/Facet/Index/DrillDownStream.cs | 62 +++++++++ src/contrib/Facet/Index/FacetFields.cs | 121 ++++++++++++++++ src/contrib/Facet/Index/ICategoryListBuilder.cs | 14 ++ 5 files changed, 340 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/71e218cb/src/contrib/Facet/Contrib.Facet.csproj ---------------------------------------------------------------------- diff --git a/src/contrib/Facet/Contrib.Facet.csproj b/src/contrib/Facet/Contrib.Facet.csproj index 1c26fcb..5c15e5b 100644 --- a/src/contrib/Facet/Contrib.Facet.csproj +++ b/src/contrib/Facet/Contrib.Facet.csproj @@ -74,6 +74,10 @@ + + + + http://git-wip-us.apache.org/repos/asf/lucenenet/blob/71e218cb/src/contrib/Facet/Index/CountingListBuilder.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Facet/Index/CountingListBuilder.cs b/src/contrib/Facet/Index/CountingListBuilder.cs new file mode 100644 index 0000000..3082c35 --- /dev/null +++ b/src/contrib/Facet/Index/CountingListBuilder.cs @@ -0,0 +1,139 @@ +using Lucene.Net.Facet.Encoding; +using Lucene.Net.Facet.Params; +using Lucene.Net.Facet.Taxonomy; +using Lucene.Net.Facet.Util; +using Lucene.Net.Support; +using Lucene.Net.Util; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Lucene.Net.Facet.Index +{ + public class CountingListBuilder : ICategoryListBuilder + { + private abstract class OrdinalsEncoder + { + internal OrdinalsEncoder() + { + } + + public abstract IDictionary Encode(IntsRef ordinals); + } + + private sealed class NoPartitionsOrdinalsEncoder : OrdinalsEncoder + { + private readonly IntEncoder encoder; + private readonly string name = @""; + + internal NoPartitionsOrdinalsEncoder(CategoryListParams categoryListParams) + { + encoder = categoryListParams.CreateEncoder(); + } + + public override IDictionary Encode(IntsRef ordinals) + { + BytesRef bytes = new BytesRef(128); + encoder.Encode(ordinals, bytes); + return new Dictionary() { { name, bytes } }; + } + } + + private sealed class PerPartitionOrdinalsEncoder : OrdinalsEncoder + { + private readonly FacetIndexingParams indexingParams; + private readonly CategoryListParams categoryListParams; + private readonly int partitionSize; + private readonly HashMap partitionEncoder = new HashMap(); + + internal PerPartitionOrdinalsEncoder(FacetIndexingParams indexingParams, CategoryListParams categoryListParams) + { + this.indexingParams = indexingParams; + this.categoryListParams = categoryListParams; + this.partitionSize = indexingParams.PartitionSize; + } + + public override IDictionary Encode(IntsRef ordinals) + { + HashMap partitionOrdinals = new HashMap(); + for (int i = 0; i < ordinals.length; i++) + { + int ordinal = ordinals.ints[i]; + string name = PartitionsUtils.PartitionNameByOrdinal(indexingParams, ordinal); + IntsRef partitionOrds = partitionOrdinals[name]; + if (partitionOrds == null) + { + partitionOrds = new IntsRef(32); + partitionOrdinals[name] = partitionOrds; + partitionEncoder[name] = categoryListParams.CreateEncoder(); + } + + partitionOrds.ints[partitionOrds.length++] = ordinal % partitionSize; + } + + HashMap partitionBytes = new HashMap(); + foreach (KeyValuePair e in partitionOrdinals) + { + string name = e.Key; + IntEncoder encoder = partitionEncoder[name]; + BytesRef bytes = new BytesRef(128); + encoder.Encode(e.Value, bytes); + partitionBytes[name] = bytes; + } + + return partitionBytes; + } + } + + private readonly OrdinalsEncoder ordinalsEncoder; + private readonly ITaxonomyWriter taxoWriter; + private readonly CategoryListParams clp; + + public CountingListBuilder(CategoryListParams categoryListParams, FacetIndexingParams indexingParams, ITaxonomyWriter taxoWriter) + { + this.taxoWriter = taxoWriter; + this.clp = categoryListParams; + if (indexingParams.PartitionSize == int.MaxValue) + { + ordinalsEncoder = new NoPartitionsOrdinalsEncoder(categoryListParams); + } + else + { + ordinalsEncoder = new PerPartitionOrdinalsEncoder(indexingParams, categoryListParams); + } + } + + public IDictionary Build(IntsRef ordinals, IEnumerable categories) + { + int upto = ordinals.length; + IEnumerator iter = categories.GetEnumerator(); + for (int i = 0; i < upto; i++) + { + int ordinal = ordinals.ints[i]; + iter.MoveNext(); + CategoryPath cp = iter.Current; + CategoryListParams.OrdinalPolicy op = clp.GetOrdinalPolicy(cp.components[0]); + if (op != CategoryListParams.OrdinalPolicy.NO_PARENTS) + { + int parent = taxoWriter.GetParent(ordinal); + if (parent > 0) + { + while (parent > 0) + { + ordinals.ints[ordinals.length++] = parent; + parent = taxoWriter.GetParent(parent); + } + + if (op == CategoryListParams.OrdinalPolicy.ALL_BUT_DIMENSION) + { + ordinals.length--; + } + } + } + } + + return ordinalsEncoder.Encode(ordinals); + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/71e218cb/src/contrib/Facet/Index/DrillDownStream.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Facet/Index/DrillDownStream.cs b/src/contrib/Facet/Index/DrillDownStream.cs new file mode 100644 index 0000000..64c998e --- /dev/null +++ b/src/contrib/Facet/Index/DrillDownStream.cs @@ -0,0 +1,62 @@ +using Lucene.Net.Analysis; +using Lucene.Net.Analysis.Tokenattributes; +using Lucene.Net.Facet.Params; +using Lucene.Net.Facet.Taxonomy; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Lucene.Net.Facet.Index +{ + public class DrillDownStream : TokenStream + { + private readonly FacetIndexingParams indexingParams; + private readonly IEnumerator categories; + private readonly ICharTermAttribute termAttribute; + private CategoryPath current; + private bool isParent; + + public DrillDownStream(IEnumerable categories, FacetIndexingParams indexingParams) + { + termAttribute = AddAttribute(); + this.categories = categories.GetEnumerator(); + this.indexingParams = indexingParams; + } + + protected virtual void AddAdditionalAttributes(CategoryPath category, bool isParent) + { + } + + public override bool IncrementToken() + { + if (current.length == 0) + { + if (!categories.MoveNext()) + { + return false; + } + + current = categories.Current; + termAttribute.ResizeBuffer(current.FullPathLength()); + isParent = false; + } + + int nChars = indexingParams.DrillDownTermText(current, termAttribute.Buffer); + termAttribute.SetLength(nChars); + AddAdditionalAttributes(current, isParent); + current = current.Subpath(current.length - 1); + isParent = true; + return true; + } + + public override void Reset() + { + // TODO: validate this logic + categories.MoveNext(); + current = categories.Current; + termAttribute.ResizeBuffer(current.FullPathLength()); + isParent = false; + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/71e218cb/src/contrib/Facet/Index/FacetFields.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Facet/Index/FacetFields.cs b/src/contrib/Facet/Index/FacetFields.cs new file mode 100644 index 0000000..e950a3d --- /dev/null +++ b/src/contrib/Facet/Index/FacetFields.cs @@ -0,0 +1,121 @@ +using Lucene.Net.Documents; +using Lucene.Net.Facet.Params; +using Lucene.Net.Facet.Taxonomy; +using Lucene.Net.Index; +using Lucene.Net.Support; +using Lucene.Net.Util; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Lucene.Net.Facet.Index +{ + public class FacetFields + { + private static readonly FieldType DRILL_DOWN_TYPE = new FieldType(TextField.TYPE_NOT_STORED); + + static FacetFields() + { + DRILL_DOWN_TYPE.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; + DRILL_DOWN_TYPE.OmitNorms = true; + DRILL_DOWN_TYPE.Freeze(); + } + + protected readonly ITaxonomyWriter taxonomyWriter; + protected readonly FacetIndexingParams indexingParams; + + public FacetFields(ITaxonomyWriter taxonomyWriter) + : this(taxonomyWriter, FacetIndexingParams.DEFAULT) + { + } + + public FacetFields(ITaxonomyWriter taxonomyWriter, FacetIndexingParams params_renamed) + { + this.taxonomyWriter = taxonomyWriter; + this.indexingParams = params_renamed; + } + + protected virtual IDictionary> CreateCategoryListMapping(IEnumerable categories) + { + if (indexingParams.AllCategoryListParams.Count == 1) + { + return new Dictionary>() { { indexingParams.GetCategoryListParams(null), categories } }; + } + + HashMap> categoryLists = new HashMap>(); + foreach (CategoryPath cp in categories) + { + CategoryListParams clp = indexingParams.GetCategoryListParams(cp); + List list = (List)categoryLists[clp]; + if (list == null) + { + list = new List(); + categoryLists[clp] = list; + } + + list.Add(cp); + } + + return categoryLists; + } + + protected virtual IDictionary GetCategoryListData(CategoryListParams categoryListParams, IntsRef ordinals, IEnumerable categories) + { + return new CountingListBuilder(categoryListParams, indexingParams, taxonomyWriter).Build(ordinals, categories); + } + + protected virtual DrillDownStream GetDrillDownStream(IEnumerable categories) + { + return new DrillDownStream(categories, indexingParams); + } + + protected virtual FieldType DrillDownFieldType() + { + return DRILL_DOWN_TYPE; + } + + protected virtual void AddCountingListData(Document doc, IDictionary categoriesData, string field) + { + foreach (KeyValuePair entry in categoriesData) + { + doc.Add(new BinaryDocValuesField(field + entry.Key, entry.Value)); + } + } + + public virtual void AddFields(Document doc, IEnumerable categories) + { + if (categories == null) + { + throw new ArgumentException(@"categories should not be null"); + } + + IDictionary> categoryLists = CreateCategoryListMapping(categories); + IntsRef ordinals = new IntsRef(32); + foreach (KeyValuePair> e in categoryLists) + { + CategoryListParams clp = e.Key; + string field = clp.field; + ordinals.length = 0; + int maxNumOrds = 0; + foreach (CategoryPath cp in e.Value) + { + int ordinal = taxonomyWriter.AddCategory(cp); + maxNumOrds += cp.length; + if (ordinals.ints.Length < maxNumOrds) + { + ordinals.Grow(maxNumOrds); + } + + ordinals.ints[ordinals.length++] = ordinal; + } + + IDictionary categoriesData = GetCategoryListData(clp, ordinals, e.Value); + AddCountingListData(doc, categoriesData, field); + DrillDownStream drillDownStream = GetDrillDownStream(e.Value); + Field drillDown = new Field(field, drillDownStream, DrillDownFieldType()); + doc.Add(drillDown); + } + } + } +} http://git-wip-us.apache.org/repos/asf/lucenenet/blob/71e218cb/src/contrib/Facet/Index/ICategoryListBuilder.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Facet/Index/ICategoryListBuilder.cs b/src/contrib/Facet/Index/ICategoryListBuilder.cs new file mode 100644 index 0000000..1b0890f --- /dev/null +++ b/src/contrib/Facet/Index/ICategoryListBuilder.cs @@ -0,0 +1,14 @@ +using Lucene.Net.Facet.Taxonomy; +using Lucene.Net.Util; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Lucene.Net.Facet.Index +{ + public interface ICategoryListBuilder + { + IDictionary Build(IntsRef ordinals, IEnumerable categories); + } +}