lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [04/16] lucenenet git commit: Move facets into src folder
Date Tue, 25 Nov 2014 18:52:05 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/Taxonomy/Directory/Consts.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/Taxonomy/Directory/Consts.cs b/src/Lucene.Net.Facet/Taxonomy/Directory/Consts.cs
new file mode 100644
index 0000000..5b69985
--- /dev/null
+++ b/src/Lucene.Net.Facet/Taxonomy/Directory/Consts.cs
@@ -0,0 +1,34 @@
+namespace Lucene.Net.Facet.Taxonomy.Directory
+{
+
+    using BytesRef = Lucene.Net.Util.BytesRef;
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// @lucene.experimental
+    /// </summary>
+    internal abstract class Consts
+    {
+        internal const string FULL = "$full_path$";
+        internal const string FIELD_PAYLOADS = "$payloads$";
+        internal const string PAYLOAD_PARENT = "p";
+        internal static readonly BytesRef PAYLOAD_PARENT_BYTES_REF = new BytesRef(PAYLOAD_PARENT);
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/Taxonomy/Directory/DirectoryTaxonomyReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/Taxonomy/Directory/DirectoryTaxonomyReader.cs b/src/Lucene.Net.Facet/Taxonomy/Directory/DirectoryTaxonomyReader.cs
new file mode 100644
index 0000000..a567210
--- /dev/null
+++ b/src/Lucene.Net.Facet/Taxonomy/Directory/DirectoryTaxonomyReader.cs
@@ -0,0 +1,450 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+using Lucene.Net.Store;
+
+namespace Lucene.Net.Facet.Taxonomy.Directory
+{
+    using Document = Lucene.Net.Documents.Document;
+    using Lucene.Net.Facet.Taxonomy;
+    using CorruptIndexException = Lucene.Net.Index.CorruptIndexException; // javadocs
+    using DirectoryReader = Lucene.Net.Index.DirectoryReader;
+    using DocsEnum = Lucene.Net.Index.DocsEnum;
+    using IndexWriter = Lucene.Net.Index.IndexWriter;
+    using MultiFields = Lucene.Net.Index.MultiFields;
+    using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+    using Directory = Lucene.Net.Store.Directory;
+    using BytesRef = Lucene.Net.Util.BytesRef;
+    using IOUtils = Lucene.Net.Util.IOUtils;
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// A <seealso cref="TaxonomyReader"/> which retrieves stored taxonomy information from a
+    /// <seealso cref="Directory"/>.
+    /// <P>
+    /// Reading from the on-disk index on every method call is too slow, so this
+    /// implementation employs caching: Some methods cache recent requests and their
+    /// results, while other methods prefetch all the data into memory and then
+    /// provide answers directly from in-memory tables. See the documentation of
+    /// individual methods for comments on their performance.
+    /// 
+    /// @lucene.experimental
+    /// </summary>
+    public class DirectoryTaxonomyReader : TaxonomyReader, IDisposable
+    {
+
+        public class IntClass
+        {
+            public int? IntItem { get; set; }
+        }
+        private const int DEFAULT_CACHE_VALUE = 4000;
+
+        private readonly DirectoryTaxonomyWriter taxoWriter;
+        private readonly long taxoEpoch; // used in doOpenIfChanged
+        private readonly DirectoryReader indexReader;
+
+        // TODO: test DoubleBarrelLRUCache and consider using it instead
+        private LRUHashMap<FacetLabel, IntClass> ordinalCache;
+        private LRUHashMap<int, FacetLabel> categoryCache;
+
+        private volatile TaxonomyIndexArrays taxoArrays;
+
+        /// <summary>
+        /// Called only from <seealso cref="#doOpenIfChanged()"/>. If the taxonomy has been
+        /// recreated, you should pass {@code null} as the caches and parent/children
+        /// arrays.
+        /// </summary>
+        internal DirectoryTaxonomyReader(DirectoryReader indexReader, DirectoryTaxonomyWriter taxoWriter, LRUHashMap<FacetLabel, IntClass> ordinalCache, LRUHashMap<int, FacetLabel> categoryCache, TaxonomyIndexArrays taxoArrays)
+        {
+            this.indexReader = indexReader;
+            this.taxoWriter = taxoWriter;
+            this.taxoEpoch = taxoWriter == null ? -1 : taxoWriter.TaxonomyEpoch;
+
+            // use the same instance of the cache, note the protective code in getOrdinal and getPath
+            this.ordinalCache = ordinalCache == null ? new LRUHashMap<FacetLabel, IntClass>(DEFAULT_CACHE_VALUE) : ordinalCache;
+            this.categoryCache = categoryCache == null ? new LRUHashMap<int, FacetLabel>(DEFAULT_CACHE_VALUE) : categoryCache;
+
+            this.taxoArrays = taxoArrays != null ? new TaxonomyIndexArrays(indexReader, taxoArrays) : null;
+        }
+
+        /// <summary>
+        /// Open for reading a taxonomy stored in a given <seealso cref="Directory"/>.
+        /// </summary>
+        /// <param name="directory">
+        ///          The <seealso cref="Directory"/> in which the taxonomy resides. </param>
+        /// <exception cref="CorruptIndexException">
+        ///           if the Taxonomy is corrupt. </exception>
+        /// <exception cref="IOException">
+        ///           if another error occurred. </exception>
+        public DirectoryTaxonomyReader(Directory directory)
+        {
+            indexReader = OpenIndexReader(directory);
+            taxoWriter = null;
+            taxoEpoch = -1;
+
+            // These are the default cache sizes; they can be configured after
+            // construction with the cache's setMaxSize() method
+
+            ordinalCache = new LRUHashMap<FacetLabel, IntClass>(DEFAULT_CACHE_VALUE);
+            categoryCache = new LRUHashMap<int, FacetLabel>(DEFAULT_CACHE_VALUE);
+        }
+
+        /// <summary>
+        /// Opens a <seealso cref="DirectoryTaxonomyReader"/> over the given
+        /// <seealso cref="DirectoryTaxonomyWriter"/> (for NRT).
+        /// </summary>
+        /// <param name="taxoWriter">
+        ///          The <seealso cref="DirectoryTaxonomyWriter"/> from which to obtain newly
+        ///          added categories, in real-time. </param>
+        public DirectoryTaxonomyReader(DirectoryTaxonomyWriter taxoWriter)
+        {
+            this.taxoWriter = taxoWriter;
+            taxoEpoch = taxoWriter.TaxonomyEpoch;
+            indexReader = OpenIndexReader(taxoWriter.InternalIndexWriter);
+
+            // These are the default cache sizes; they can be configured after
+            // construction with the cache's setMaxSize() method
+
+            ordinalCache = new LRUHashMap<FacetLabel, IntClass>(DEFAULT_CACHE_VALUE);
+            categoryCache = new LRUHashMap<int, FacetLabel>(DEFAULT_CACHE_VALUE);
+        }
+
+        private void InitTaxoArrays()
+        {
+            lock (this)
+            {
+                if (taxoArrays == null)
+                {
+                    // according to Java Concurrency in Practice, this might perform better on
+                    // some JVMs, because the array initialization doesn't happen on the
+                    // volatile member.
+                    TaxonomyIndexArrays tmpArrays = new TaxonomyIndexArrays(indexReader);
+                    taxoArrays = tmpArrays;
+                }
+            }
+        }
+
+        protected internal override void DoClose()
+        {
+            indexReader.Dispose();
+            taxoArrays = null;
+            // do not clear() the caches, as they may be used by other DTR instances.
+            ordinalCache = null;
+            categoryCache = null;
+        }
+
+        /// <summary>
+        /// Implements the opening of a new <seealso cref="DirectoryTaxonomyReader"/> instance if
+        /// the taxonomy has changed.
+        /// 
+        /// <para>
+        /// <b>NOTE:</b> the returned <seealso cref="DirectoryTaxonomyReader"/> shares the
+        /// ordinal and category caches with this reader. This is not expected to cause
+        /// any issues, unless the two instances continue to live. The reader
+        /// guarantees that the two instances cannot affect each other in terms of
+        /// correctness of the caches, however if the size of the cache is changed
+        /// through <seealso cref="#setCacheSize(int)"/>, it will affect both reader instances.
+        /// </para>
+        /// </summary>
+        protected override TaxonomyReader DoOpenIfChanged()
+        {
+            EnsureOpen();
+
+            // This works for both NRT and non-NRT readers (i.e. an NRT reader remains NRT).
+            var r2 = DirectoryReader.OpenIfChanged(indexReader);
+            if (r2 == null)
+            {
+                return null; // no changes, nothing to do
+            }
+
+            // check if the taxonomy was recreated
+            bool success = false;
+            try
+            {
+                bool recreated = false;
+                if (taxoWriter == null)
+                {
+                    // not NRT, check epoch from commit data
+                    string t1 = indexReader.IndexCommit.UserData[DirectoryTaxonomyWriter.INDEX_EPOCH];
+                    string t2 = r2.IndexCommit.UserData[DirectoryTaxonomyWriter.INDEX_EPOCH];
+                    if (t1 == null)
+                    {
+                        if (t2 != null)
+                        {
+                            recreated = true;
+                        }
+                    }
+                    else if (!t1.Equals(t2))
+                    {
+                        // t1 != null and t2 cannot be null b/c DirTaxoWriter always puts the commit data.
+                        // it's ok to use String.equals because we require the two epoch values to be the same.
+                        recreated = true;
+                    }
+                }
+                else
+                {
+                    // NRT, compare current taxoWriter.epoch() vs the one that was given at construction
+                    if (taxoEpoch != taxoWriter.TaxonomyEpoch)
+                    {
+                        recreated = true;
+                    }
+                }
+
+                DirectoryTaxonomyReader newtr;
+                if (recreated)
+                {
+                    // if recreated, do not reuse anything from this instace. the information
+                    // will be lazily computed by the new instance when needed.
+                    newtr = new DirectoryTaxonomyReader(r2, taxoWriter, null, null, null);
+                }
+                else
+                {
+                    newtr = new DirectoryTaxonomyReader(r2, taxoWriter, ordinalCache, categoryCache, taxoArrays);
+                }
+
+                success = true;
+                return newtr;
+            }
+            finally
+            {
+                if (!success)
+                {
+                    IOUtils.CloseWhileHandlingException(r2);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Open the <seealso cref="DirectoryReader"/> from this {@link
+        ///  Directory}. 
+        /// </summary>
+        protected virtual DirectoryReader OpenIndexReader(Directory directory)
+        {
+            return DirectoryReader.Open(directory);
+        }
+
+        /// <summary>
+        /// Open the <seealso cref="DirectoryReader"/> from this {@link
+        ///  IndexWriter}. 
+        /// </summary>
+        protected virtual DirectoryReader OpenIndexReader(IndexWriter writer)
+        {
+            return DirectoryReader.Open(writer, false);
+        }
+
+        /// <summary>
+        /// Expert: returns the underlying <seealso cref="DirectoryReader"/> instance that is
+        /// used by this <seealso cref="TaxonomyReader"/>.
+        /// </summary>
+        internal virtual DirectoryReader InternalIndexReader
+        {
+            get
+            {
+                EnsureOpen();
+                return indexReader;
+            }
+        }
+
+        public override ParallelTaxonomyArrays ParallelTaxonomyArrays
+        {
+            get
+            {
+                EnsureOpen();
+                if (taxoArrays == null)
+                {
+                    InitTaxoArrays();
+                }
+                return taxoArrays;
+            }
+        }
+
+        public override IDictionary<string, string> CommitUserData
+        {
+            get
+            {
+                EnsureOpen();
+                return indexReader.IndexCommit.UserData;
+            }
+        }
+
+        public override int GetOrdinal(FacetLabel cp)
+        {
+            EnsureOpen();
+            if (cp.Length == 0)
+            {
+                return ROOT_ORDINAL;
+            }
+
+            // First try to find the answer in the LRU cache:
+            lock (ordinalCache)
+            {
+                IntClass res = ordinalCache.Get(cp);
+                if (res != null && res.IntItem != null)
+                {
+                    if ((int)res.IntItem.Value < indexReader.MaxDoc)
+                    {
+                        // Since the cache is shared with DTR instances allocated from
+                        // doOpenIfChanged, we need to ensure that the ordinal is one that
+                        // this DTR instance recognizes.
+                        return (int)res.IntItem.Value;
+                    }
+                    else
+                    {
+                        // if we get here, it means that the category was found in the cache,
+                        // but is not recognized by this TR instance. Therefore there's no
+                        // need to continue search for the path on disk, because we won't find
+                        // it there too.
+                        return TaxonomyReader.INVALID_ORDINAL;
+                    }
+                }
+            }
+
+            // If we're still here, we have a cache miss. We need to fetch the
+            // value from disk, and then also put it in the cache:
+            int ret = TaxonomyReader.INVALID_ORDINAL;
+            DocsEnum docs = MultiFields.GetTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(FacetsConfig.PathToString(cp.Components, cp.Length)), 0);
+            if (docs != null && docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
+            {
+                ret = docs.DocID();
+
+                // we only store the fact that a category exists, not its inexistence.
+                // This is required because the caches are shared with new DTR instances
+                // that are allocated from doOpenIfChanged. Therefore, if we only store
+                // information about found categories, we cannot accidently tell a new
+                // generation of DTR that a category does not exist.
+                lock (ordinalCache)
+                {
+                    ordinalCache.Put(cp, new IntClass { IntItem = Convert.ToInt32(ret) });
+                }
+            }
+
+            return ret;
+        }
+
+        public override FacetLabel GetPath(int ordinal)
+        {
+            EnsureOpen();
+
+            // Since the cache is shared with DTR instances allocated from
+            // doOpenIfChanged, we need to ensure that the ordinal is one that this DTR
+            // instance recognizes. Therefore we do this check up front, before we hit
+            // the cache.
+            if (ordinal < 0 || ordinal >= indexReader.MaxDoc)
+            {
+                return null;
+            }
+
+            // TODO: can we use an int-based hash impl, such as IntToObjectMap,
+            // wrapped as LRU?
+            int catIDInteger = Convert.ToInt32(ordinal);
+            lock (categoryCache)
+            {
+                var res = categoryCache.Get(catIDInteger,false);
+                if (res != null)
+                {
+                    return res;
+                }
+            }
+
+            Document doc = indexReader.Document(ordinal);
+            FacetLabel ret = new FacetLabel(FacetsConfig.StringToPath(doc.Get(Consts.FULL)));
+            lock (categoryCache)
+            {
+                categoryCache.Put(catIDInteger, ret);
+            }
+
+            return ret;
+        }
+
+        public override int Size
+        {
+            get
+            {
+                EnsureOpen();
+                return indexReader.NumDocs;
+            }
+        }
+
+        /// <summary>
+        /// setCacheSize controls the maximum allowed size of each of the caches
+        /// used by <seealso cref="#getPath(int)"/> and <seealso cref="#getOrdinal(FacetLabel)"/>.
+        /// <P>
+        /// Currently, if the given size is smaller than the current size of
+        /// a cache, it will not shrink, and rather we be limited to its current
+        /// size. </summary>
+        /// <param name="size"> the new maximum cache size, in number of entries. </param>
+        public virtual int CacheSize
+        {
+            set
+            {
+                EnsureOpen();
+                lock (categoryCache)
+                {
+                    categoryCache.MaxSize = value;
+                }
+                lock (ordinalCache)
+                {
+                    ordinalCache.MaxSize = value;
+                }
+            }
+        }
+
+        /// <summary>
+        /// Returns ordinal -> label mapping, up to the provided
+        ///  max ordinal or number of ordinals, whichever is
+        ///  smaller. 
+        /// </summary>
+        public virtual string ToString(int max)
+        {
+            EnsureOpen();
+            StringBuilder sb = new StringBuilder();
+            int upperl = Math.Min(max, indexReader.MaxDoc);
+            for (int i = 0; i < upperl; i++)
+            {
+                try
+                {
+                    FacetLabel category = this.GetPath(i);
+                    if (category == null)
+                    {
+                        sb.Append(i + ": NULL!! \n");
+                        continue;
+                    }
+                    if (category.Length == 0)
+                    {
+                        sb.Append(i + ": EMPTY STRING!! \n");
+                        continue;
+                    }
+                    sb.Append(i + ": " + category.ToString() + "\n");
+                }
+                catch (IOException e)
+                {
+                    throw;
+                }
+            }
+            return sb.ToString();
+        }
+
+        public void Dispose()
+        {
+            Dispose(true);
+        }
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/Taxonomy/Directory/DirectoryTaxonomyWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/Taxonomy/Directory/DirectoryTaxonomyWriter.cs b/src/Lucene.Net.Facet/Taxonomy/Directory/DirectoryTaxonomyWriter.cs
new file mode 100644
index 0000000..63967ee
--- /dev/null
+++ b/src/Lucene.Net.Facet/Taxonomy/Directory/DirectoryTaxonomyWriter.cs
@@ -0,0 +1,1202 @@
+using System;
+using System.Collections;
+using System.Collections.Concurrent;
+using System.Diagnostics;
+using System.Collections.Generic;
+using System.IO;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Facet.Taxonomy.Directory
+{
+
+    using TokenStream = Lucene.Net.Analysis.TokenStream;
+    using CharTermAttribute = Lucene.Net.Analysis.Tokenattributes.CharTermAttribute;
+    using PositionIncrementAttribute = Lucene.Net.Analysis.Tokenattributes.PositionIncrementAttribute;
+    using Document = Lucene.Net.Documents.Document;
+    using Field = Lucene.Net.Documents.Field;
+    using FieldType = Lucene.Net.Documents.FieldType;
+    using StringField = Lucene.Net.Documents.StringField;
+    using TextField = Lucene.Net.Documents.TextField;
+    using TaxonomyWriterCache = Lucene.Net.Facet.Taxonomy.WriterCache.TaxonomyWriterCache;
+    using Cl2oTaxonomyWriterCache = Lucene.Net.Facet.Taxonomy.WriterCache.Cl2oTaxonomyWriterCache;
+    using LruTaxonomyWriterCache = Lucene.Net.Facet.Taxonomy.WriterCache.LruTaxonomyWriterCache;
+    using AtomicReader = Lucene.Net.Index.AtomicReader;
+    using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
+    using CorruptIndexException = Lucene.Net.Index.CorruptIndexException; // javadocs
+    using DirectoryReader = Lucene.Net.Index.DirectoryReader;
+    using DocsEnum = Lucene.Net.Index.DocsEnum;
+    using IndexReader = Lucene.Net.Index.IndexReader;
+    using IndexWriter = Lucene.Net.Index.IndexWriter;
+    using OpenMode = Lucene.Net.Index.IndexWriterConfig.OpenMode_e;
+    using IndexWriterConfig = Lucene.Net.Index.IndexWriterConfig;
+    using LogByteSizeMergePolicy = Lucene.Net.Index.LogByteSizeMergePolicy;
+    using ReaderManager = Lucene.Net.Index.ReaderManager;
+    using SegmentInfos = Lucene.Net.Index.SegmentInfos;
+    using Terms = Lucene.Net.Index.Terms;
+    using TermsEnum = Lucene.Net.Index.TermsEnum;
+    using TieredMergePolicy = Lucene.Net.Index.TieredMergePolicy;
+    using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException;
+    using Directory = Lucene.Net.Store.Directory;
+    using LockObtainFailedException = Lucene.Net.Store.LockObtainFailedException; // javadocs
+    using NativeFSLockFactory = Lucene.Net.Store.NativeFSLockFactory;
+    using SimpleFSLockFactory = Lucene.Net.Store.SimpleFSLockFactory;
+    using BytesRef = Lucene.Net.Util.BytesRef;
+    using Version = Lucene.Net.Util.Version;
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// <seealso cref="TaxonomyWriter"/> which uses a <seealso cref="Directory"/> to store the taxonomy
+    /// information on disk, and keeps an additional in-memory cache of some or all
+    /// categories.
+    /// <para>
+    /// In addition to the permanently-stored information in the <seealso cref="Directory"/>,
+    /// efficiency dictates that we also keep an in-memory cache of <B>recently
+    /// seen</B> or <B>all</B> categories, so that we do not need to go back to disk
+    /// for every category addition to see which ordinal this category already has,
+    /// if any. A <seealso cref="TaxonomyWriterCache"/> object determines the specific caching
+    /// algorithm used.
+    /// </para>
+    /// <para>
+    /// This class offers some hooks for extending classes to control the
+    /// <seealso cref="IndexWriter"/> instance that is used. See <seealso cref="#openIndexWriter"/>.
+    /// 
+    /// @lucene.experimental
+    /// </para>
+    /// </summary>
+    public class DirectoryTaxonomyWriter : TaxonomyWriter
+    {
+
+        /// <summary>
+        /// Property name of user commit data that contains the index epoch. The epoch
+        /// changes whenever the taxonomy is recreated (i.e. opened with
+        /// <seealso cref="OpenMode#CREATE"/>.
+        /// <para>
+        /// Applications should not use this property in their commit data because it
+        /// will be overridden by this taxonomy writer.
+        /// </para>
+        /// </summary>
+        public const string INDEX_EPOCH = "index.epoch";
+
+        private readonly Directory dir;
+        private readonly IndexWriter indexWriter;
+        private readonly TaxonomyWriterCache cache;
+        private readonly AtomicInteger cacheMisses = new AtomicInteger(0);
+
+        // Records the taxonomy index epoch, updated on replaceTaxonomy as well.
+        private long indexEpoch;
+
+        private SinglePositionTokenStream parentStream = new SinglePositionTokenStream(Consts.PAYLOAD_PARENT);
+        private Field parentStreamField;
+        private Field fullPathField;
+        private int cacheMissesUntilFill = 11;
+        private bool shouldFillCache = true;
+
+        // even though lazily initialized, not volatile so that access to it is
+        // faster. we keep a volatile boolean init instead.
+        private ReaderManager readerManager;
+        private volatile bool initializedReaderManager = false;
+        private volatile bool shouldRefreshReaderManager;
+
+        /// <summary>
+        /// We call the cache "complete" if we know that every category in our
+        /// taxonomy is in the cache. When the cache is <B>not</B> complete, and
+        /// we can't find a category in the cache, we still need to look for it
+        /// in the on-disk index; Therefore when the cache is not complete, we
+        /// need to open a "reader" to the taxonomy index.
+        /// The cache becomes incomplete if it was never filled with the existing
+        /// categories, or if a put() to the cache ever returned true (meaning
+        /// that some of the cached data was cleared).
+        /// </summary>
+        private volatile bool cacheIsComplete;
+        private volatile bool isClosed = false;
+        private volatile TaxonomyIndexArrays taxoArrays;
+        private volatile int nextID;
+
+        /// <summary>
+        /// Reads the commit data from a Directory. </summary>
+        private static IDictionary<string, string> ReadCommitData(Directory dir)
+        {
+            SegmentInfos infos = new SegmentInfos();
+            infos.Read(dir);
+            return infos.UserData;
+        }
+
+        /// <summary>
+        /// Forcibly unlocks the taxonomy in the named directory.
+        /// <P>
+        /// Caution: this should only be used by failure recovery code, when it is
+        /// known that no other process nor thread is in fact currently accessing
+        /// this taxonomy.
+        /// <P>
+        /// This method is unnecessary if your <seealso cref="Directory"/> uses a
+        /// <seealso cref="NativeFSLockFactory"/> instead of the default
+        /// <seealso cref="SimpleFSLockFactory"/>. When the "native" lock is used, a lock
+        /// does not stay behind forever when the process using it dies. 
+        /// </summary>
+        public static void Unlock(Directory directory)
+        {
+            IndexWriter.Unlock(directory);
+        }
+
+        /// <summary>
+        /// Construct a Taxonomy writer.
+        /// </summary>
+        /// <param name="directory">
+        ///    The <seealso cref="Directory"/> in which to store the taxonomy. Note that
+        ///    the taxonomy is written directly to that directory (not to a
+        ///    subdirectory of it). </param>
+        /// <param name="openMode">
+        ///    Specifies how to open a taxonomy for writing: <code>APPEND</code>
+        ///    means open an existing index for append (failing if the index does
+        ///    not yet exist). <code>CREATE</code> means create a new index (first
+        ///    deleting the old one if it already existed).
+        ///    <code>APPEND_OR_CREATE</code> appends to an existing index if there
+        ///    is one, otherwise it creates a new index. </param>
+        /// <param name="cache">
+        ///    A <seealso cref="TaxonomyWriterCache"/> implementation which determines
+        ///    the in-memory caching policy. See for example
+        ///    <seealso cref="LruTaxonomyWriterCache"/> and <seealso cref="Cl2oTaxonomyWriterCache"/>.
+        ///    If null or missing, <seealso cref="#defaultTaxonomyWriterCache()"/> is used. </param>
+        /// <exception cref="CorruptIndexException">
+        ///     if the taxonomy is corrupted. </exception>
+        /// <exception cref="LockObtainFailedException">
+        ///     if the taxonomy is locked by another writer. If it is known
+        ///     that no other concurrent writer is active, the lock might
+        ///     have been left around by an old dead process, and should be
+        ///     removed using <seealso cref="#unlock(Directory)"/>. </exception>
+        /// <exception cref="IOException">
+        ///     if another error occurred. </exception>
+        public DirectoryTaxonomyWriter(Directory directory, OpenMode openMode, TaxonomyWriterCache cache)
+        {
+
+            dir = directory;
+            IndexWriterConfig config = CreateIndexWriterConfig(openMode);
+            indexWriter = OpenIndexWriter(dir, config);
+
+            // verify (to some extent) that merge policy in effect would preserve category docids 
+            if (indexWriter != null)
+            {
+                Debug.Assert(!(indexWriter.Config.MergePolicy is TieredMergePolicy), "for preserving category docids, merging none-adjacent segments is not allowed");
+            }
+
+            // after we opened the writer, and the index is locked, it's safe to check
+            // the commit data and read the index epoch
+            openMode = config.OpenMode.HasValue ? config.OpenMode.Value : OpenMode.CREATE_OR_APPEND;
+            if (!DirectoryReader.IndexExists(directory))
+            {
+                indexEpoch = 1;
+            }
+            else
+            {
+                string epochStr = null;
+                IDictionary<string, string> commitData = ReadCommitData(directory);
+                if (commitData != null && commitData.ContainsKey(INDEX_EPOCH))
+                {
+                    epochStr = commitData[INDEX_EPOCH];
+                }
+                // no commit data, or no epoch in it means an old taxonomy, so set its epoch to 1, for lack
+                // of a better value.
+                indexEpoch = epochStr == null ? 1 : Convert.ToInt64(epochStr, 16);
+            }
+
+            if (openMode == OpenMode.CREATE)
+            {
+                ++indexEpoch;
+            }
+
+            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+            ft.OmitNorms = true;
+            parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream, ft);
+            fullPathField = new StringField(Consts.FULL, "", Field.Store.YES);
+
+            if (indexWriter == null)
+                return;
+
+            nextID = indexWriter.MaxDoc;
+
+            if (cache == null)
+            {
+                cache = DefaultTaxonomyWriterCache();
+            }
+            this.cache = cache;
+
+            if (nextID == 0)
+            {
+                cacheIsComplete = true;
+                // Make sure that the taxonomy always contain the root category
+                // with category id 0.
+                AddCategory(new FacetLabel());
+            }
+            else
+            {
+                // There are some categories on the disk, which we have not yet
+                // read into the cache, and therefore the cache is incomplete.
+                // We choose not to read all the categories into the cache now,
+                // to avoid terrible performance when a taxonomy index is opened
+                // to add just a single category. We will do it later, after we
+                // notice a few cache misses.
+                cacheIsComplete = false;
+            }
+        }
+
+        /// <summary>
+        /// Open internal index writer, which contains the taxonomy data.
+        /// <para>
+        /// Extensions may provide their own <seealso cref="IndexWriter"/> implementation or instance. 
+        /// <br><b>NOTE:</b> the instance this method returns will be closed upon calling
+        /// to <seealso cref="#close()"/>.
+        /// <br><b>NOTE:</b> the merge policy in effect must not merge none adjacent segments. See
+        /// comment in <seealso cref="#createIndexWriterConfig(IndexWriterConfig.OpenMode)"/> for the logic behind this.
+        ///  
+        /// </para>
+        /// </summary>
+        /// <seealso cref= #createIndexWriterConfig(IndexWriterConfig.OpenMode)
+        /// </seealso>
+        /// <param name="directory">
+        ///          the <seealso cref="Directory"/> on top of which an <seealso cref="IndexWriter"/>
+        ///          should be opened. </param>
+        /// <param name="config">
+        ///          configuration for the internal index writer. </param>
+        protected virtual IndexWriter OpenIndexWriter(Directory directory, IndexWriterConfig config)
+        {
+            return new IndexWriter(directory, config);
+        }
+
+        /// <summary>
+        /// Create the <seealso cref="IndexWriterConfig"/> that would be used for opening the internal index writer.
+        /// <br>Extensions can configure the <seealso cref="IndexWriter"/> as they see fit,
+        /// including setting a <seealso cref="Lucene.Net.index.MergeScheduler merge-scheduler"/>, or
+        /// <seealso cref="Lucene.Net.index.IndexDeletionPolicy deletion-policy"/>, different RAM size
+        /// etc.<br>
+        /// <br><b>NOTE:</b> internal docids of the configured index must not be altered.
+        /// For that, categories are never deleted from the taxonomy index.
+        /// In addition, merge policy in effect must not merge none adjacent segments.
+        /// </summary>
+        /// <seealso cref= #openIndexWriter(Directory, IndexWriterConfig)
+        /// </seealso>
+        /// <param name="openMode"> see <seealso cref="OpenMode"/> </param>
+        protected virtual IndexWriterConfig CreateIndexWriterConfig(OpenMode openMode)
+        {
+            // TODO: should we use a more optimized Codec, e.g. Pulsing (or write custom)?
+            // The taxonomy has a unique structure, where each term is associated with one document
+
+            // :Post-Release-Update-Version.LUCENE_XY:
+            // Make sure we use a MergePolicy which always merges adjacent segments and thus
+            // keeps the doc IDs ordered as well (this is crucial for the taxonomy index).
+            return (new IndexWriterConfig(Version.LUCENE_48, null)).SetOpenMode(openMode).SetMergePolicy(new LogByteSizeMergePolicy());
+        }
+
+        /// <summary>
+        /// Opens a <seealso cref="ReaderManager"/> from the internal <seealso cref="IndexWriter"/>. 
+        /// </summary>
+        private void InitReaderManager()
+        {
+            if (!initializedReaderManager)
+            {
+                lock (this)
+                {
+                    // verify that the taxo-writer hasn't been closed on us.
+                    EnsureOpen();
+                    if (!initializedReaderManager)
+                    {
+                        readerManager = new ReaderManager(indexWriter, false);
+                        shouldRefreshReaderManager = false;
+                        initializedReaderManager = true;
+                    }
+                }
+            }
+        }
+
+        /// <summary>
+        /// Creates a new instance with a default cache as defined by
+        /// <seealso cref="#defaultTaxonomyWriterCache()"/>.
+        /// </summary>
+        public DirectoryTaxonomyWriter(Directory directory, OpenMode openMode = OpenMode.CREATE_OR_APPEND)
+            : this(directory, openMode, DefaultTaxonomyWriterCache())
+        {
+        }
+
+        /// <summary>
+        /// Defines the default <seealso cref="TaxonomyWriterCache"/> to use in constructors
+        /// which do not specify one.
+        /// <P>  
+        /// The current default is <seealso cref="Cl2oTaxonomyWriterCache"/> constructed
+        /// with the parameters (1024, 0.15f, 3), i.e., the entire taxonomy is
+        /// cached in memory while building it.
+        /// </summary>
+        public static TaxonomyWriterCache DefaultTaxonomyWriterCache()
+        {
+            return new Cl2oTaxonomyWriterCache(1024, 0.15f, 3);
+        }
+
+        /// <summary>
+        /// Frees used resources as well as closes the underlying <seealso cref="IndexWriter"/>,
+        /// which commits whatever changes made to it to the underlying
+        /// <seealso cref="Directory"/>.
+        /// </summary>
+        public void Dispose()
+        {
+            lock (this)
+            {
+                if (!isClosed)
+                {
+                    Commit();
+                    DoClose();
+                }
+            }
+        }
+
+        private void DoClose()
+        {
+            indexWriter.Dispose();
+            isClosed = true;
+            CloseResources();
+        }
+
+        /// <summary>
+        /// A hook for extending classes to close additional resources that were used.
+        /// The default implementation closes the <seealso cref="IndexReader"/> as well as the
+        /// <seealso cref="TaxonomyWriterCache"/> instances that were used. <br>
+        /// <b>NOTE:</b> if you override this method, you should include a
+        /// <code>super.closeResources()</code> call in your implementation.
+        /// </summary>
+        protected virtual void CloseResources()
+        {
+            lock (this)
+            {
+                if (initializedReaderManager)
+                {
+                    readerManager.Dispose();
+                    readerManager = null;
+                    initializedReaderManager = false;
+                }
+                if (cache != null)
+                {
+                    cache.Close();
+                }
+            }
+        }
+
+        /// <summary>
+        /// Look up the given category in the cache and/or the on-disk storage,
+        /// returning the category's ordinal, or a negative number in case the
+        /// category does not yet exist in the taxonomy.
+        /// </summary>
+        protected virtual int FindCategory(FacetLabel categoryPath)
+        {
+            lock (this)
+            {
+                // If we can find the category in the cache, or we know the cache is
+                // complete, we can return the response directly from it
+                int res = cache.Get(categoryPath);
+                if (res >= 0 || cacheIsComplete)
+                {
+                    return res;
+                }
+
+                cacheMisses.IncrementAndGet();
+                // After a few cache misses, it makes sense to read all the categories
+                // from disk and into the cache. The reason not to do this on the first
+                // cache miss (or even when opening the writer) is that it will
+                // significantly slow down the case when a taxonomy is opened just to
+                // add one category. The idea only spending a long time on reading
+                // after enough time was spent on cache misses is known as an "online
+                // algorithm".
+                PerhapsFillCache();
+                res = cache.Get(categoryPath);
+                if (res >= 0 || cacheIsComplete)
+                {
+                    // if after filling the cache from the info on disk, the category is in it
+                    // or the cache is complete, return whatever cache.get returned.
+                    return res;
+                }
+
+                // if we get here, it means the category is not in the cache, and it is not
+                // complete, and therefore we must look for the category on disk.
+
+                // We need to get an answer from the on-disk index.
+                InitReaderManager();
+
+                int doc = -1;
+                DirectoryReader reader = readerManager.Acquire();
+                try
+                {
+                    BytesRef catTerm = new BytesRef(FacetsConfig.PathToString(categoryPath.Components, categoryPath.Length));
+                    TermsEnum termsEnum = null; // reuse
+                    DocsEnum docs = null; // reuse
+                    foreach (AtomicReaderContext ctx in reader.Leaves)
+                    {
+                        Terms terms = ctx.AtomicReader.Terms(Consts.FULL);
+                        if (terms != null)
+                        {
+                            termsEnum = terms.Iterator(termsEnum);
+                            if (termsEnum.SeekExact(catTerm))
+                            {
+                                // liveDocs=null because the taxonomy has no deletes
+                                docs = termsEnum.Docs(null, docs, 0); // freqs not required
+                                // if the term was found, we know it has exactly one document.
+                                doc = docs.NextDoc() + ctx.DocBase;
+                                break;
+                            }
+                        }
+                    }
+                }
+                finally
+                {
+                    readerManager.Release(reader);
+                }
+                if (doc > 0)
+                {
+                    AddToCache(categoryPath, doc);
+                }
+                return doc;
+            }
+        }
+
+        public virtual int AddCategory(FacetLabel categoryPath)
+        {
+            EnsureOpen();
+            // check the cache outside the synchronized block. this results in better
+            // concurrency when categories are there.
+            int res = cache.Get(categoryPath);
+            if (res < 0)
+            {
+                // the category is not in the cache - following code cannot be executed in parallel.
+                lock (this)
+                {
+                    res = FindCategory(categoryPath);
+                    if (res < 0)
+                    {
+                        // This is a new category, and we need to insert it into the index
+                        // (and the cache). Actually, we might also need to add some of
+                        // the category's ancestors before we can add the category itself
+                        // (while keeping the invariant that a parent is always added to
+                        // the taxonomy before its child). internalAddCategory() does all
+                        // this recursively
+                        res = InternalAddCategory(categoryPath);
+                    }
+                }
+            }
+            return res;
+        }
+
+        /// <summary>
+        /// Add a new category into the index (and the cache), and return its new
+        /// ordinal.
+        /// <para>
+        /// Actually, we might also need to add some of the category's ancestors
+        /// before we can add the category itself (while keeping the invariant that a
+        /// parent is always added to the taxonomy before its child). We do this by
+        /// recursion.
+        /// </para>
+        /// </summary>
+        private int InternalAddCategory(FacetLabel cp)
+        {
+            // Find our parent's ordinal (recursively adding the parent category
+            // to the taxonomy if it's not already there). Then add the parent
+            // ordinal as payloads (rather than a stored field; payloads can be
+            // more efficiently read into memory in bulk by LuceneTaxonomyReader)
+            int parent;
+            if (cp.Length > 1)
+            {
+                FacetLabel parentPath = cp.Subpath(cp.Length - 1);
+                parent = FindCategory(parentPath);
+                if (parent < 0)
+                {
+                    parent = InternalAddCategory(parentPath);
+                }
+            }
+            else if (cp.Length == 1)
+            {
+                parent = TaxonomyReader.ROOT_ORDINAL;
+            }
+            else
+            {
+                parent = TaxonomyReader.INVALID_ORDINAL;
+            }
+            int id = AddCategoryDocument(cp, parent);
+
+            return id;
+        }
+
+        /// <summary>
+        /// Verifies that this instance wasn't closed, or throws
+        /// <seealso cref="AlreadyClosedException"/> if it is.
+        /// </summary>
+        protected internal void EnsureOpen()
+        {
+            if (isClosed)
+            {
+                throw new AlreadyClosedException("The taxonomy writer has already been closed");
+            }
+        }
+
+        /// <summary>
+        /// Note that the methods calling addCategoryDocument() are synchornized, so
+        /// this method is effectively synchronized as well.
+        /// </summary>
+        private int AddCategoryDocument(FacetLabel categoryPath, int parent)
+        {
+            // Before Lucene 2.9, position increments >=0 were supported, so we
+            // added 1 to parent to allow the parent -1 (the parent of the root).
+            // Unfortunately, starting with Lucene 2.9, after LUCENE-1542, this is
+            // no longer enough, since 0 is not encoded consistently either (see
+            // comment in SinglePositionTokenStream). But because we must be
+            // backward-compatible with existing indexes, we can't just fix what
+            // we write here (e.g., to write parent+2), and need to do a workaround
+            // in the reader (which knows that anyway only category 0 has a parent
+            // -1).    
+            parentStream.Set(Math.Max(parent + 1, 1));
+            Document d = new Document();
+            d.Add(parentStreamField);
+
+            fullPathField.StringValue = FacetsConfig.PathToString(categoryPath.Components, categoryPath.Length);
+            d.Add(fullPathField);
+
+            // Note that we do no pass an Analyzer here because the fields that are
+            // added to the Document are untokenized or contains their own TokenStream.
+            // Therefore the IndexWriter's Analyzer has no effect.
+            indexWriter.AddDocument(d);
+            int id = nextID++;
+
+            // added a category document, mark that ReaderManager is not up-to-date
+            shouldRefreshReaderManager = true;
+
+            // also add to the parent array
+            taxoArrays = TaxoArrays.Add(id, parent);
+
+            // NOTE: this line must be executed last, or else the cache gets updated
+            // before the parents array (LUCENE-4596)
+            AddToCache(categoryPath, id);
+
+            return id;
+        }
+
+        private class SinglePositionTokenStream : TokenStream
+        {
+            internal ICharTermAttribute termAtt;
+            internal IPositionIncrementAttribute posIncrAtt;
+            internal bool returned;
+            internal int val;
+            internal readonly string word;
+
+            public SinglePositionTokenStream(string word)
+            {
+                termAtt = AddAttribute<ICharTermAttribute>();
+                posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
+                this.word = word;
+                returned = true;
+            }
+
+            /// <summary>
+            /// Set the value we want to keep, as the position increment.
+            /// Note that when TermPositions.nextPosition() is later used to
+            /// retrieve this value, val-1 will be returned, not val.
+            /// <P>
+            /// IMPORTANT NOTE: Before Lucene 2.9, val>=0 were safe (for val==0,
+            /// the retrieved position would be -1). But starting with Lucene 2.9,
+            /// this unfortunately changed, and only val>0 are safe. val=0 can
+            /// still be used, but don't count on the value you retrieve later
+            /// (it could be 0 or -1, depending on circumstances or versions).
+            /// This change is described in Lucene's JIRA: LUCENE-1542. 
+            /// </summary>
+            public virtual void Set(int val)
+            {
+                this.val = val;
+                returned = false;
+            }
+
+            public override bool IncrementToken()
+            {
+                if (returned)
+                {
+                    return false;
+                }
+                ClearAttributes();
+                posIncrAtt.PositionIncrement = val;
+                termAtt.SetEmpty();
+                termAtt.Append(word);
+                returned = true;
+                return true;
+            }
+        }
+
+        private void AddToCache(FacetLabel categoryPath, int id)
+        {
+            if (cache.Put(categoryPath, id))
+            {
+                // If cache.put() returned true, it means the cache was limited in
+                // size, became full, and parts of it had to be evicted. It is
+                // possible that a relatively-new category that isn't yet visible
+                // to our 'reader' was evicted, and therefore we must now refresh 
+                // the reader.
+                RefreshReaderManager();
+                cacheIsComplete = false;
+            }
+        }
+
+        private void RefreshReaderManager()
+        {
+            lock (this)
+            {
+                // this method is synchronized since it cannot happen concurrently with
+                // addCategoryDocument -- when this method returns, we must know that the
+                // reader manager's state is current. also, it sets shouldRefresh to false, 
+                // and this cannot overlap with addCatDoc too.
+                // NOTE: since this method is sync'ed, it can call maybeRefresh, instead of
+                // maybeRefreshBlocking. If ever this is changed, make sure to change the
+                // call too.
+                if (shouldRefreshReaderManager && initializedReaderManager)
+                {
+                    readerManager.MaybeRefresh();
+                    shouldRefreshReaderManager = false;
+                }
+            }
+        }
+
+        public virtual void Commit()
+        {
+            lock (this)
+            {
+                EnsureOpen();
+                // LUCENE-4972: if we always call setCommitData, we create empty commits
+                string epochStr = null;
+                indexWriter.CommitData.TryGetValue(INDEX_EPOCH, out epochStr);
+                if (epochStr == null || Convert.ToInt64(epochStr, 16) != indexEpoch)
+                {
+                    indexWriter.CommitData = CombinedCommitData(indexWriter.CommitData);
+                }
+                indexWriter.Commit();
+            }
+        }
+
+        /// <summary>
+        /// Combine original user data with the taxonomy epoch. </summary>
+        private IDictionary<string, string> CombinedCommitData(IDictionary<string, string> commitData)
+        {
+            IDictionary<string, string> m = new Dictionary<string, string>();
+            if (commitData != null)
+            {
+                m.PutAll(commitData);
+            }
+            m[INDEX_EPOCH] = Convert.ToString(indexEpoch, 16);
+            return m;
+        }
+
+        public virtual IDictionary<string, string> CommitData
+        {
+            set
+            {
+                indexWriter.CommitData = CombinedCommitData(value);
+            }
+            get
+            {
+                return CombinedCommitData(indexWriter.CommitData);
+            }
+        }
+
+
+        /// <summary>
+        /// prepare most of the work needed for a two-phase commit.
+        /// See <seealso cref="IndexWriter#prepareCommit"/>.
+        /// </summary>
+        public virtual void PrepareCommit()
+        {
+            lock (this)
+            {
+                EnsureOpen();
+                // LUCENE-4972: if we always call setCommitData, we create empty commits
+                string epochStr = indexWriter.CommitData[INDEX_EPOCH];
+                if (epochStr == null || Convert.ToInt64(epochStr, 16) != indexEpoch)
+                {
+                    indexWriter.CommitData = CombinedCommitData(indexWriter.CommitData);
+                }
+                indexWriter.PrepareCommit();
+            }
+        }
+
+        public virtual int Size
+        {
+            get
+            {
+                EnsureOpen();
+                return nextID;
+            }
+        }
+
+        /// <summary>
+        /// Set the number of cache misses before an attempt is made to read the entire
+        /// taxonomy into the in-memory cache.
+        /// <para>
+        /// This taxonomy writer holds an in-memory cache of recently seen categories
+        /// to speed up operation. On each cache-miss, the on-disk index needs to be
+        /// consulted. When an existing taxonomy is opened, a lot of slow disk reads
+        /// like that are needed until the cache is filled, so it is more efficient to
+        /// read the entire taxonomy into memory at once. We do this complete read
+        /// after a certain number (defined by this method) of cache misses.
+        /// </para>
+        /// <para>
+        /// If the number is set to {@code 0}, the entire taxonomy is read into the
+        /// cache on first use, without fetching individual categories first.
+        /// </para>
+        /// <para>
+        /// NOTE: it is assumed that this method is called immediately after the
+        /// taxonomy writer has been created.
+        /// </para>
+        /// </summary>
+        public virtual int CacheMissesUntilFill
+        {
+            set
+            {
+                EnsureOpen();
+                cacheMissesUntilFill = value;
+            }
+        }
+
+        // we need to guarantee that if several threads call this concurrently, only
+        // one executes it, and after it returns, the cache is updated and is either
+        // complete or not.
+        private void PerhapsFillCache()
+        {
+            lock (this)
+            {
+                if (cacheMisses.Get() < cacheMissesUntilFill)
+                {
+                    return;
+                }
+
+                if (!shouldFillCache)
+                {
+                    // we already filled the cache once, there's no need to re-fill it
+                    return;
+                }
+                shouldFillCache = false;
+
+                InitReaderManager();
+
+                bool aborted = false;
+                DirectoryReader reader = readerManager.Acquire();
+                try
+                {
+                    TermsEnum termsEnum = null;
+                    DocsEnum docsEnum = null;
+                    foreach (AtomicReaderContext ctx in reader.Leaves)
+                    {
+                        Terms terms = ctx.AtomicReader.Terms(Consts.FULL);
+                        if (terms != null) // cannot really happen, but be on the safe side
+                        {
+                            termsEnum = terms.Iterator(termsEnum);
+                            while (termsEnum.Next() != null)
+                            {
+                                if (!cache.Full)
+                                {
+                                    BytesRef t = termsEnum.Term();
+                                    // Since we guarantee uniqueness of categories, each term has exactly
+                                    // one document. Also, since we do not allow removing categories (and
+                                    // hence documents), there are no deletions in the index. Therefore, it
+                                    // is sufficient to call next(), and then doc(), exactly once with no
+                                    // 'validation' checks.
+                                    FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(t.Utf8ToString()));
+                                    docsEnum = termsEnum.Docs(null, docsEnum, DocsEnum.FLAG_NONE);
+                                    bool res = cache.Put(cp, docsEnum.NextDoc() + ctx.DocBase);
+                                    Debug.Assert(!res, "entries should not have been evicted from the cache");
+                                }
+                                else
+                                {
+                                    // the cache is full and the next put() will evict entries from it, therefore abort the iteration.
+                                    aborted = true;
+                                    break;
+                                }
+                            }
+                        }
+                        if (aborted)
+                        {
+                            break;
+                        }
+                    }
+                }
+                finally
+                {
+                    readerManager.Release(reader);
+                }
+
+                cacheIsComplete = !aborted;
+                if (cacheIsComplete)
+                {
+                    lock (this)
+                    {
+                        // everything is in the cache, so no need to keep readerManager open.
+                        // this block is executed in a sync block so that it works well with
+                        // initReaderManager called in parallel.
+                        readerManager.Dispose();
+                        readerManager = null;
+                        initializedReaderManager = false;
+                    }
+                }
+            }
+        }
+
+        private TaxonomyIndexArrays TaxoArrays
+        {
+            get
+            {
+                if (taxoArrays == null)
+                {
+                    lock (this)
+                    {
+                        if (taxoArrays == null)
+                        {
+                            InitReaderManager();
+                            DirectoryReader reader = readerManager.Acquire();
+                            try
+                            {
+                                // according to Java Concurrency, this might perform better on some
+                                // JVMs, since the object initialization doesn't happen on the
+                                // volatile member.
+                                TaxonomyIndexArrays tmpArrays = new TaxonomyIndexArrays(reader);
+                                taxoArrays = tmpArrays;
+                            }
+                            finally
+                            {
+                                readerManager.Release(reader);
+                            }
+                        }
+                    }
+                }
+                return taxoArrays;
+            }
+        }
+
+        public virtual int GetParent(int ordinal)
+        {
+            EnsureOpen();
+            // Note: the following if() just enforces that a user can never ask
+            // for the parent of a nonexistant category - even if the parent array
+            // was allocated bigger than it really needs to be.
+            if (ordinal >= nextID)
+            {
+                throw new System.IndexOutOfRangeException("requested ordinal is bigger than the largest ordinal in the taxonomy");
+            }
+
+            int[] parents = TaxoArrays.Parents();
+            Debug.Assert(ordinal < parents.Length, "requested ordinal (" + ordinal + "); parents.length (" + parents.Length + ") !");
+            return parents[ordinal];
+        }
+
+        /// <summary>
+        /// Takes the categories from the given taxonomy directory, and adds the
+        /// missing ones to this taxonomy. Additionally, it fills the given
+        /// <seealso cref="OrdinalMap"/> with a mapping from the original ordinal to the new
+        /// ordinal.
+        /// </summary>
+        public virtual void AddTaxonomy(Directory taxoDir, OrdinalMap map)
+        {
+            EnsureOpen();
+            DirectoryReader r = DirectoryReader.Open(taxoDir);
+            try
+            {
+                int size = r.NumDocs;
+                OrdinalMap ordinalMap = map;
+                ordinalMap.Size = size;
+                int @base = 0;
+                TermsEnum te = null;
+                DocsEnum docs = null;
+                foreach (AtomicReaderContext ctx in r.Leaves)
+                {
+                    AtomicReader ar = ctx.AtomicReader;
+                    Terms terms = ar.Terms(Consts.FULL);
+                    te = terms.Iterator(te);
+                    while (te.Next() != null)
+                    {
+                        FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(te.Term().Utf8ToString()));
+                        int ordinal = AddCategory(cp);
+                        docs = te.Docs(null, docs, DocsEnum.FLAG_NONE);
+                        ordinalMap.AddMapping(docs.NextDoc() + @base, ordinal);
+                    }
+                    @base += ar.MaxDoc; // no deletions, so we're ok
+                }
+                ordinalMap.AddDone();
+            }
+            finally
+            {
+                r.Dispose();
+            }
+        }
+
+        /// <summary>
+        /// Mapping from old ordinal to new ordinals, used when merging indexes 
+        /// wit separate taxonomies.
+        /// <para> 
+        /// addToTaxonomies() merges one or more taxonomies into the given taxonomy
+        /// (this). An OrdinalMap is filled for each of the added taxonomies,
+        /// containing the new ordinal (in the merged taxonomy) of each of the
+        /// categories in the old taxonomy.
+        /// <P>  
+        /// There exist two implementations of OrdinalMap: MemoryOrdinalMap and
+        /// DiskOrdinalMap. As their names suggest, the former keeps the map in
+        /// memory and the latter in a temporary disk file. Because these maps will
+        /// later be needed one by one (to remap the counting lists), not all at the
+        /// same time, it is recommended to put the first taxonomy's map in memory,
+        /// and all the rest on disk (later to be automatically read into memory one
+        /// by one, when needed).
+        /// </para>
+        /// </summary>
+        public interface OrdinalMap
+        {
+            /// <summary>
+            /// Set the size of the map. This MUST be called before addMapping().
+            /// It is assumed (but not verified) that addMapping() will then be
+            /// called exactly 'size' times, with different origOrdinals between 0
+            /// and size-1.  
+            /// </summary>
+            int Size { set; }
+
+            /// <summary>
+            /// Record a mapping. </summary>
+            void AddMapping(int origOrdinal, int newOrdinal);
+
+            /// <summary>
+            /// Call addDone() to say that all addMapping() have been done.
+            /// In some implementations this might free some resources. 
+            /// </summary>
+            void AddDone();
+
+            /// <summary>
+            /// Return the map from the taxonomy's original (consecutive) ordinals
+            /// to the new taxonomy's ordinals. If the map has to be read from disk
+            /// and ordered appropriately, it is done when getMap() is called.
+            /// getMap() should only be called once, and only when the map is actually
+            /// needed. Calling it will also free all resources that the map might
+            /// be holding (such as temporary disk space), other than the returned int[].
+            /// </summary>
+            int[] Map { get; }
+        }
+
+        /// <summary>
+        /// <seealso cref="OrdinalMap"/> maintained in memory
+        /// </summary>
+        public sealed class MemoryOrdinalMap : OrdinalMap
+        {
+            internal int[] map;
+
+            /// <summary>
+            /// Sole constructor. 
+            /// </summary>
+            public MemoryOrdinalMap()
+            {
+                map = new int[] { };
+            }
+
+            public int Size { set; private get; }
+
+            public void AddMapping(int origOrdinal, int newOrdinal)
+            {
+                if (map.Length - 1 >= origOrdinal)
+                {
+                    map[origOrdinal] = newOrdinal;
+                }
+                else
+                {
+                    Array.Resize(ref map, origOrdinal + 1);
+                    map[origOrdinal] = newOrdinal;
+                }
+
+
+            }
+
+            public void AddDone() // nothing to do
+            {
+            }
+            public int[] Map
+            {
+                get
+                {
+                    return map;
+                }
+            }
+        }
+
+        /// <summary>
+        /// <seealso cref="OrdinalMap"/> maintained on file system
+        /// </summary>
+        public sealed class DiskOrdinalMap : OrdinalMap
+        {
+            internal string tmpfile;
+            internal OutputStreamDataOutput @out;
+
+            /// <summary>
+            /// Sole constructor. 
+            /// </summary>
+            public DiskOrdinalMap(string tmpfile)
+            {
+                this.tmpfile = tmpfile;
+                var outfs = new FileStream(tmpfile, FileMode.OpenOrCreate, FileAccess.Write);
+                @out = new OutputStreamDataOutput(outfs);
+            }
+
+            public void AddMapping(int origOrdinal, int newOrdinal)
+            {
+                @out.WriteInt(origOrdinal);
+                @out.WriteInt(newOrdinal);
+            }
+
+            public int Size
+            {
+                set
+                {
+                    @out.WriteInt(value);
+                }
+            }
+
+            public void AddDone()
+            {
+                if (@out != null)
+                {
+                    @out.Dispose();
+                    @out = null;
+                }
+            }
+
+            int[] map = null;
+
+            public int[] Map
+            {
+                get
+                {
+                    if (map != null)
+                    {
+                        return map;
+                    }
+                    AddDone(); // in case this wasn't previously called
+
+                    var ifs = new FileStream(tmpfile, FileMode.OpenOrCreate, FileAccess.Read);
+                    var @in = new InputStreamDataInput(ifs);
+                    map = new int[@in.ReadInt()];
+                    // NOTE: The current code assumes here that the map is complete,
+                    // i.e., every ordinal gets one and exactly one value. Otherwise,
+                    // we may run into an EOF here, or vice versa, not read everything.
+                    for (int i = 0; i < map.Length; i++)
+                    {
+                        int origordinal = @in.ReadInt();
+                        int newordinal = @in.ReadInt();
+                        map[origordinal] = newordinal;
+                    }
+                    @in.Dispose();
+
+                    // Delete the temporary file, which is no longer needed.
+                    if (File.Exists(tmpfile))
+                    {
+                        File.Delete(tmpfile);
+                    }
+                    return map;
+                }
+            }
+        }
+
+        /// <summary>
+        /// Rollback changes to the taxonomy writer and closes the instance. Following
+        /// this method the instance becomes unusable (calling any of its API methods
+        /// will yield an <seealso cref="AlreadyClosedException"/>).
+        /// </summary>
+        public virtual void Rollback()
+        {
+            lock (this)
+            {
+                EnsureOpen();
+                indexWriter.Rollback();
+                DoClose();
+            }
+        }
+
+        /// <summary>
+        /// Replaces the current taxonomy with the given one. This method should
+        /// generally be called in conjunction with
+        /// <seealso cref="IndexWriter#addIndexes(Directory...)"/> to replace both the taxonomy
+        /// as well as the search index content.
+        /// </summary>
+        public virtual void ReplaceTaxonomy(Directory taxoDir)
+        {
+            lock (this)
+            {
+                // replace the taxonomy by doing IW optimized operations
+                indexWriter.DeleteAll();
+                indexWriter.AddIndexes(taxoDir);
+                shouldRefreshReaderManager = true;
+                InitReaderManager(); // ensure that it's initialized
+                RefreshReaderManager();
+                nextID = indexWriter.MaxDoc;
+                taxoArrays = null; // must nullify so that it's re-computed next time it's needed
+
+                // need to clear the cache, so that addCategory won't accidentally return
+                // old categories that are in the cache.
+                cache.Clear();
+                cacheIsComplete = false;
+                shouldFillCache = true;
+                cacheMisses.Set(0);
+
+                // update indexEpoch as a taxonomy replace is just like it has be recreated
+                ++indexEpoch;
+            }
+        }
+
+        /// <summary>
+        /// Returns the <seealso cref="Directory"/> of this taxonomy writer. </summary>
+        public virtual Directory Directory
+        {
+            get
+            {
+                return dir;
+            }
+        }
+
+        /// <summary>
+        /// Used by <seealso cref="DirectoryTaxonomyReader"/> to support NRT.
+        /// <para>
+        /// <b>NOTE:</b> you should not use the obtained <seealso cref="IndexWriter"/> in any
+        /// way, other than opening an IndexReader on it, or otherwise, the taxonomy
+        /// index may become corrupt!
+        /// </para>
+        /// </summary>
+        internal IndexWriter InternalIndexWriter
+        {
+            get
+            {
+                return indexWriter;
+            }
+        }
+
+        /// <summary>
+        /// Expert: returns current index epoch, if this is a
+        /// near-real-time reader.  Used by {@link
+        /// DirectoryTaxonomyReader} to support NRT. 
+        /// 
+        /// @lucene.internal 
+        /// </summary>
+        public long TaxonomyEpoch
+        {
+            get
+            {
+                return indexEpoch;
+            }
+        }
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/Taxonomy/Directory/TaxonomyIndexArrays.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/Taxonomy/Directory/TaxonomyIndexArrays.cs b/src/Lucene.Net.Facet/Taxonomy/Directory/TaxonomyIndexArrays.cs
new file mode 100644
index 0000000..9a99f4a
--- /dev/null
+++ b/src/Lucene.Net.Facet/Taxonomy/Directory/TaxonomyIndexArrays.cs
@@ -0,0 +1,252 @@
+using System;
+using System.Diagnostics;
+
+namespace Lucene.Net.Facet.Taxonomy.Directory
+{
+
+    using CorruptIndexException = Lucene.Net.Index.CorruptIndexException;
+    using DocsAndPositionsEnum = Lucene.Net.Index.DocsAndPositionsEnum;
+    using IndexReader = Lucene.Net.Index.IndexReader;
+    using MultiFields = Lucene.Net.Index.MultiFields;
+    using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+    using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// A <seealso cref="ParallelTaxonomyArrays"/> that are initialized from the taxonomy
+    /// index.
+    /// 
+    /// @lucene.experimental
+    /// </summary>
+    internal class TaxonomyIndexArrays : ParallelTaxonomyArrays
+    {
+
+        private readonly int[] parents_Renamed;
+
+        // the following two arrays are lazily intialized. note that we only keep a
+        // single boolean member as volatile, instead of declaring the arrays
+        // volatile. the code guarantees that only after the boolean is set to true,
+        // the arrays are returned.
+        private volatile bool initializedChildren = false;
+        private int[] children_Renamed, siblings_Renamed;
+
+        /// <summary>
+        /// Used by <seealso cref="#add(int, int)"/> after the array grew. </summary>
+        private TaxonomyIndexArrays(int[] parents)
+        {
+            this.parents_Renamed = parents;
+        }
+
+        public TaxonomyIndexArrays(IndexReader reader)
+        {
+            parents_Renamed = new int[reader.MaxDoc];
+            if (parents_Renamed.Length > 0)
+            {
+                InitParents(reader, 0);
+                // Starting Lucene 2.9, following the change LUCENE-1542, we can
+                // no longer reliably read the parent "-1" (see comment in
+                // LuceneTaxonomyWriter.SinglePositionTokenStream). We have no way
+                // to fix this in indexing without breaking backward-compatibility
+                // with existing indexes, so what we'll do instead is just
+                // hard-code the parent of ordinal 0 to be -1, and assume (as is
+                // indeed the case) that no other parent can be -1.
+                parents_Renamed[0] = TaxonomyReader.INVALID_ORDINAL;
+            }
+        }
+
+        public TaxonomyIndexArrays(IndexReader reader, TaxonomyIndexArrays copyFrom)
+        {
+            Debug.Assert(copyFrom != null);
+
+            // note that copyParents.length may be equal to reader.maxDoc(). this is not a bug
+            // it may be caused if e.g. the taxonomy segments were merged, and so an updated
+            // NRT reader was obtained, even though nothing was changed. this is not very likely
+            // to happen.
+            int[] copyParents = copyFrom.Parents();
+            this.parents_Renamed = new int[reader.MaxDoc];
+            Array.Copy(copyParents, 0, parents_Renamed, 0, copyParents.Length);
+            InitParents(reader, copyParents.Length);
+
+            if (copyFrom.initializedChildren)
+            {
+                InitChildrenSiblings(copyFrom);
+            }
+        }
+
+        private void InitChildrenSiblings(TaxonomyIndexArrays copyFrom)
+        {
+            lock (this)
+            {
+                if (!initializedChildren) // must do this check !
+                {
+                    children_Renamed = new int[parents_Renamed.Length];
+                    siblings_Renamed = new int[parents_Renamed.Length];
+                    if (copyFrom != null)
+                    {
+                        // called from the ctor, after we know copyFrom has initialized children/siblings
+                        Array.Copy(copyFrom.Children(), 0, children_Renamed, 0, copyFrom.Children().Length);
+                        Array.Copy(copyFrom.Siblings(), 0, siblings_Renamed, 0, copyFrom.Siblings().Length);
+                        ComputeChildrenSiblings(copyFrom.parents_Renamed.Length);
+                    }
+                    else
+                    {
+                        ComputeChildrenSiblings(0);
+                    }
+                    initializedChildren = true;
+                }
+            }
+        }
+
+        private void ComputeChildrenSiblings(int first)
+        {
+            // reset the youngest child of all ordinals. while this should be done only
+            // for the leaves, we don't know up front which are the leaves, so we reset
+            // all of them.
+            for (int i = first; i < parents_Renamed.Length; i++)
+            {
+                children_Renamed[i] = TaxonomyReader.INVALID_ORDINAL;
+            }
+
+            // the root category has no parent, and therefore no siblings
+            if (first == 0)
+            {
+                first = 1;
+                siblings_Renamed[0] = TaxonomyReader.INVALID_ORDINAL;
+            }
+
+            for (int i = first; i < parents_Renamed.Length; i++)
+            {
+                // note that parents[i] is always < i, so the right-hand-side of
+                // the following line is already set when we get here
+                siblings_Renamed[i] = children_Renamed[parents_Renamed[i]];
+                children_Renamed[parents_Renamed[i]] = i;
+            }
+        }
+
+        // Read the parents of the new categories
+        private void InitParents(IndexReader reader, int first)
+        {
+            if (reader.MaxDoc == first)
+            {
+                return;
+            }
+
+            // it's ok to use MultiFields because we only iterate on one posting list.
+            // breaking it to loop over the leaves() only complicates code for no
+            // apparent gain.
+            DocsAndPositionsEnum positions = MultiFields.GetTermPositionsEnum(reader, null, Consts.FIELD_PAYLOADS, Consts.PAYLOAD_PARENT_BYTES_REF, DocsAndPositionsEnum.FLAG_PAYLOADS);
+
+            // shouldn't really happen, if it does, something's wrong
+            if (positions == null || positions.Advance(first) == DocIdSetIterator.NO_MORE_DOCS)
+            {
+                throw new CorruptIndexException("Missing parent data for category " + first);
+            }
+
+            int num = reader.MaxDoc;
+            for (int i = first; i < num; i++)
+            {
+                if (positions.DocID() == i)
+                {
+                    if (positions.Freq() == 0) // shouldn't happen
+                    {
+                        throw new CorruptIndexException("Missing parent data for category " + i);
+                    }
+
+                    parents_Renamed[i] = positions.NextPosition();
+
+                    if (positions.NextDoc() == DocIdSetIterator.NO_MORE_DOCS)
+                    {
+                        if (i + 1 < num)
+                        {
+                            throw new CorruptIndexException("Missing parent data for category " + (i + 1));
+                        }
+                        break;
+                    }
+                } // this shouldn't happen
+                else
+                {
+                    throw new CorruptIndexException("Missing parent data for category " + i);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Adds the given ordinal/parent info and returns either a new instance if the
+        /// underlying array had to grow, or this instance otherwise.
+        /// <para>
+        /// <b>NOTE:</b> you should call this method from a thread-safe code.
+        /// </para>
+        /// </summary>
+        internal virtual TaxonomyIndexArrays Add(int ordinal, int parentOrdinal)
+        {
+            if (ordinal >= parents_Renamed.Length)
+            {
+                int[] newarray = ArrayUtil.Grow(parents_Renamed, ordinal + 1);
+                newarray[ordinal] = parentOrdinal;
+                return new TaxonomyIndexArrays(newarray);
+            }
+            parents_Renamed[ordinal] = parentOrdinal;
+            return this;
+        }
+
+        /// <summary>
+        /// Returns the parents array, where {@code parents[i]} denotes the parent of
+        /// category ordinal {@code i}.
+        /// </summary>
+        public override int[] Parents()
+        {
+            return parents_Renamed;
+        }
+
+        /// <summary>
+        /// Returns the children array, where {@code children[i]} denotes the youngest
+        /// child of category ordinal {@code i}. The youngest child is defined as the
+        /// category that was added last to the taxonomy as an immediate child of
+        /// {@code i}.
+        /// </summary>
+        public override int[] Children()
+        {
+            if (!initializedChildren)
+            {
+                InitChildrenSiblings(null);
+            }
+
+            // the array is guaranteed to be populated
+            return children_Renamed;
+        }
+
+        /// <summary>
+        /// Returns the siblings array, where {@code siblings[i]} denotes the sibling
+        /// of category ordinal {@code i}. The sibling is defined as the previous
+        /// youngest child of {@code parents[i]}.
+        /// </summary>
+        public override int[] Siblings()
+        {
+            if (!initializedChildren)
+            {
+                InitChildrenSiblings(null);
+            }
+
+            // the array is guaranteed to be populated
+            return siblings_Renamed;
+        }
+
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/Taxonomy/DocValuesOrdinalsReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/Taxonomy/DocValuesOrdinalsReader.cs b/src/Lucene.Net.Facet/Taxonomy/DocValuesOrdinalsReader.cs
new file mode 100644
index 0000000..3d50275
--- /dev/null
+++ b/src/Lucene.Net.Facet/Taxonomy/DocValuesOrdinalsReader.cs
@@ -0,0 +1,130 @@
+namespace Lucene.Net.Facet.Taxonomy
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
+    using BinaryDocValues = Lucene.Net.Index.BinaryDocValues;
+    using DocValues = Lucene.Net.Index.DocValues;
+    using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+    using BytesRef = Lucene.Net.Util.BytesRef;
+    using IntsRef = Lucene.Net.Util.IntsRef;
+
+    /// <summary>
+    /// Decodes ordinals previously indexed into a BinaryDocValues field </summary>
+
+    public class DocValuesOrdinalsReader : OrdinalsReader
+    {
+        private readonly string field;
+
+        /// <summary>
+        /// Default constructor. </summary>
+        public DocValuesOrdinalsReader()
+            : this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME)
+        {
+        }
+
+        /// <summary>
+        /// Create this, with the specified indexed field name. </summary>
+        public DocValuesOrdinalsReader(string field)
+        {
+            this.field = field;
+        }
+
+        public override OrdinalsSegmentReader GetReader(AtomicReaderContext context)
+        {
+            BinaryDocValues values0 = context.AtomicReader.GetBinaryDocValues(field);
+            if (values0 == null)
+            {
+                values0 = DocValues.EMPTY_BINARY;
+            }
+
+            BinaryDocValues values = values0;
+
+            return new OrdinalsSegmentReaderAnonymousInnerClassHelper(this, values);
+        }
+
+        private class OrdinalsSegmentReaderAnonymousInnerClassHelper : OrdinalsSegmentReader
+        {
+            private readonly DocValuesOrdinalsReader outerInstance;
+
+            private BinaryDocValues values;
+
+            public OrdinalsSegmentReaderAnonymousInnerClassHelper(DocValuesOrdinalsReader outerInstance, BinaryDocValues values)
+            {
+                this.outerInstance = outerInstance;
+                this.values = values;
+            }
+
+            public override void Get(int docID, IntsRef ordinals)
+            {
+                BytesRef bytes = new BytesRef();
+                values.Get(docID, bytes);
+                outerInstance.Decode(bytes, ordinals);
+            }
+        }
+
+        public override string IndexFieldName
+        {
+            get
+            {
+                return field;
+            }
+        }
+
+        /// <summary>
+        /// Subclass & override if you change the encoding. </summary>
+        protected virtual void Decode(BytesRef buf, IntsRef ordinals)
+        {
+
+            // grow the buffer up front, even if by a large number of values (buf.length)
+            // that saves the need to check inside the loop for every decoded value if
+            // the buffer needs to grow.
+            if (ordinals.Ints.Length < buf.Length)
+            {
+                ordinals.Ints = ArrayUtil.Grow(ordinals.Ints, buf.Length);
+            }
+
+            ordinals.Offset = 0;
+            ordinals.Length = 0;
+
+            // it is better if the decoding is inlined like so, and not e.g.
+            // in a utility method
+            int upto = buf.Offset + buf.Length;
+            int value = 0;
+            int offset = buf.Offset;
+            int prev = 0;
+            while (offset < upto)
+            {
+                byte b = buf.Bytes[offset++];
+                if ((sbyte)b >= 0)
+                {
+                    ordinals.Ints[ordinals.Length] = ((value << 7) | b) + prev;
+                    value = 0;
+                    prev = ordinals.Ints[ordinals.Length];
+                    ordinals.Length++;
+                }
+                else
+                {
+                    value = (value << 7) | (b & 0x7F);
+                }
+            }
+        }
+    }
+
+}
\ No newline at end of file


Mime
View raw message