Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 1A342200CAD for ; Tue, 6 Jun 2017 02:11:43 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 18FE9160BD4; Tue, 6 Jun 2017 00:11:43 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 1BC85160BF9 for ; Tue, 6 Jun 2017 02:11:39 +0200 (CEST) Received: (qmail 66415 invoked by uid 500); 6 Jun 2017 00:11:39 -0000 Mailing-List: contact commits-help@lucenenet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: lucene-net-dev@lucenenet.apache.org Delivered-To: mailing list commits@lucenenet.apache.org Received: (qmail 63854 invoked by uid 99); 6 Jun 2017 00:11:35 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 06 Jun 2017 00:11:35 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 9EC07DFBDA; Tue, 6 Jun 2017 00:11:35 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: nightowl888@apache.org To: commits@lucenenet.apache.org Date: Tue, 06 Jun 2017 00:12:14 -0000 Message-Id: <77ab5f3d1ef9424fb2d587541269def2@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [41/48] lucenenet git commit: Lucene.Net.Codecs.Lucene40: Fixed XML documentation comment warnings archived-at: Tue, 06 Jun 2017 00:11:43 -0000 Lucene.Net.Codecs.Lucene40: Fixed XML documentation comment warnings Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/27cdd048 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/27cdd048 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/27cdd048 Branch: refs/heads/master Commit: 27cdd0480ae5d8f4c83ae73557e77fa8d589792c Parents: 3221b63 Author: Shad Storhaug Authored: Mon Jun 5 14:10:17 2017 +0700 Committer: Shad Storhaug Committed: Tue Jun 6 06:58:41 2017 +0700 ---------------------------------------------------------------------- CONTRIBUTING.md | 3 +- src/Lucene.Net/Codecs/Lucene40/BitVector.cs | 67 +++--- src/Lucene.Net/Codecs/Lucene40/Lucene40Codec.cs | 14 +- .../Codecs/Lucene40/Lucene40DocValuesFormat.cs | 158 ++++++------- .../Codecs/Lucene40/Lucene40DocValuesReader.cs | 17 +- .../Codecs/Lucene40/Lucene40FieldInfosFormat.cs | 126 +++++----- .../Codecs/Lucene40/Lucene40FieldInfosReader.cs | 8 +- .../Codecs/Lucene40/Lucene40LiveDocsFormat.cs | 52 ++--- .../Codecs/Lucene40/Lucene40NormsFormat.cs | 18 +- .../Lucene40/Lucene40PostingsBaseFormat.cs | 6 +- .../Codecs/Lucene40/Lucene40PostingsFormat.cs | 232 +++++++++---------- .../Codecs/Lucene40/Lucene40PostingsReader.cs | 11 +- .../Lucene40/Lucene40SegmentInfoFormat.cs | 75 +++--- .../Lucene40/Lucene40SegmentInfoReader.cs | 8 +- .../Lucene40/Lucene40SegmentInfoWriter.cs | 7 +- .../Codecs/Lucene40/Lucene40SkipListReader.cs | 11 +- .../Lucene40/Lucene40StoredFieldsFormat.cs | 96 ++++---- .../Lucene40/Lucene40StoredFieldsReader.cs | 30 +-- .../Lucene40/Lucene40StoredFieldsWriter.cs | 21 +- .../Lucene40/Lucene40TermVectorsFormat.cs | 139 ++++++----- .../Lucene40/Lucene40TermVectorsReader.cs | 23 +- .../Lucene40/Lucene40TermVectorsWriter.cs | 8 +- 22 files changed, 559 insertions(+), 571 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/27cdd048/CONTRIBUTING.md ---------------------------------------------------------------------- diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5f422f8..c8a36fb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -53,8 +53,7 @@ helpers to help with that, see for examples see our [Java style methods to avoid 1. Lucene.Net.Core (project) 1. Codecs.Compressing (namespace) 2. Codecs.Lucene3x (namespace) - 3. Codecs.Lucene40 (namespace) - 4. Util.Packed (namespace) + 3. Util.Packed (namespace) 2. Lucene.Net.Codecs (project) 1. Appending (namespace) 2. BlockTerms (namespace) http://git-wip-us.apache.org/repos/asf/lucenenet/blob/27cdd048/src/Lucene.Net/Codecs/Lucene40/BitVector.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene40/BitVector.cs b/src/Lucene.Net/Codecs/Lucene40/BitVector.cs index 57a7e14..eb1605a 100644 --- a/src/Lucene.Net/Codecs/Lucene40/BitVector.cs +++ b/src/Lucene.Net/Codecs/Lucene40/BitVector.cs @@ -32,16 +32,16 @@ namespace Lucene.Net.Codecs.Lucene40 using IMutableBits = Lucene.Net.Util.IMutableBits; /// - /// Optimized implementation of a vector of bits. this is more-or-less like - /// java.util.BitSet, but also includes the following: - ///
    - ///
  • a count() method, which efficiently computes the number of one bits;
  • - ///
  • optimized read from and write to disk;
  • - ///
  • inlinable get() method;
  • - ///
  • store and load, as bit set or d-gaps, depending on sparseness;
  • - ///
- /// - /// @lucene.internal + /// Optimized implementation of a vector of bits. This is more-or-less like + /// java.util.BitSet, but also includes the following: + /// + /// a count() method, which efficiently computes the number of one bits; + /// optimized read from and write to disk; + /// inlinable get() method; + /// store and load, as bit set or d-gaps, depending on sparseness; + /// + /// + /// @lucene.internal ///
// pkg-private: if this thing is generally useful then it can go back in .util, // but the serialization must be here underneath the codec. @@ -53,7 +53,7 @@ namespace Lucene.Net.Codecs.Lucene40 private int version; /// - /// Constructs a vector capable of holding n bits. + /// Constructs a vector capable of holding bits. public BitVector(int n) { size = n; @@ -88,7 +88,7 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// Sets the value of bit to one. + /// Sets the value of to one. public void Set(int bit) { if (bit >= size) @@ -100,8 +100,8 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// Sets the value of bit to true, and - /// returns true if bit was already set + /// Sets the value of to true, and + /// returns true if bit was already set. /// public bool GetAndSet(int bit) { @@ -129,7 +129,7 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// Sets the value of bit to zero. + /// Sets the value of to zero. public void Clear(int bit) { if (bit >= size) @@ -166,8 +166,8 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// Returns true if bit is one and - /// false if it is zero. + /// Returns true if is one and + /// false if it is zero. /// public bool Get(int bit) { @@ -186,8 +186,9 @@ namespace Lucene.Net.Codecs.Lucene40 //} /// - /// Returns the number of bits in this vector. this is also one greater than + /// Returns the number of bits in this vector. This is also one greater than /// the number of the largest valid bit number. + /// /// This is the equivalent of either size() or length() in Lucene. /// public int Length @@ -196,9 +197,9 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// Returns the total number of one bits in this vector. this is efficiently - /// computed and cached, so that, if the vector is not changed, no - /// recomputation is done for repeated calls. + /// Returns the total number of one bits in this vector. This is efficiently + /// computed and cached, so that, if the vector is not changed, no + /// recomputation is done for repeated calls. /// public int Count() { @@ -257,9 +258,9 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// Writes this vector to the file name in Directory - /// d, in a format that can be read by the constructor {@link - /// #BitVector(Directory, String, IOContext)}. + /// Writes this vector to the file in Directory + /// , in a format that can be read by the constructor + /// . /// public void Write(Directory d, string name, IOContext context) { @@ -289,7 +290,7 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// Invert all bits + /// Invert all bits. public void InvertAll() { if (count != -1) @@ -322,7 +323,7 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// Set all bits + /// Set all bits. public void SetAll() { Arrays.Fill(bits, (byte)0xff); @@ -331,7 +332,7 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// Write as a bit set + /// Write as a bit set. private void WriteBits(IndexOutput output) { output.WriteInt32(Length); // write size @@ -340,7 +341,7 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// Write as a d-gaps list + /// Write as a d-gaps list. private void WriteClearedDgaps(IndexOutput output) { output.WriteInt32(-1); // mark using d-gaps @@ -412,8 +413,8 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// Constructs a bit vector from the file name in Directory - /// d, as written by the method. + /// Constructs a bit vector from the file in Directory + /// , as written by the method. /// public BitVector(Directory d, string name, IOContext context) { @@ -486,7 +487,7 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// Read as a bit set + /// Read as a bit set. private void ReadBits(IndexInput input) { count = input.ReadInt32(); // read count @@ -495,7 +496,7 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// read as a d-gaps list + /// Read as a d-gaps list. private void ReadSetDgaps(IndexInput input) { size = input.ReadInt32(); // (re)read size @@ -513,7 +514,7 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// read as a d-gaps cleared bits list + /// Read as a d-gaps cleared bits list. private void ReadClearedDgaps(IndexInput input) { size = input.ReadInt32(); // (re)read size http://git-wip-us.apache.org/repos/asf/lucenenet/blob/27cdd048/src/Lucene.Net/Codecs/Lucene40/Lucene40Codec.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene40/Lucene40Codec.cs b/src/Lucene.Net/Codecs/Lucene40/Lucene40Codec.cs index d0cc900..fd5ce7b 100644 --- a/src/Lucene.Net/Codecs/Lucene40/Lucene40Codec.cs +++ b/src/Lucene.Net/Codecs/Lucene40/Lucene40Codec.cs @@ -23,12 +23,12 @@ namespace Lucene.Net.Codecs.Lucene40 /// /// Implements the Lucene 4.0 index format, with configurable per-field postings formats. - ///

+ /// /// If you want to reuse functionality of this codec in another codec, extend - /// . + /// . + /// + /// See package documentation for file format details. ///

- /// - /// @deprecated Only for reading old 4.0 segments // NOTE: if we make largish changes in a minor release, easier to just make Lucene42Codec or whatever // if they are backwards compatible or smallish we can probably do the backwards in the postingsreader // (it writes a minor version, etc). @@ -113,9 +113,9 @@ namespace Lucene.Net.Codecs.Lucene40 /// /// Returns the postings format that should be used for writing - /// new segments of field. - /// - /// The default implementation always returns "Lucene40" + /// new segments of . + /// + /// The default implementation always returns "Lucene40". /// public virtual PostingsFormat GetPostingsFormatForField(string field) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/27cdd048/src/Lucene.Net/Codecs/Lucene40/Lucene40DocValuesFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene40/Lucene40DocValuesFormat.cs b/src/Lucene.Net/Codecs/Lucene40/Lucene40DocValuesFormat.cs index 5c658b4..93227e5 100644 --- a/src/Lucene.Net/Codecs/Lucene40/Lucene40DocValuesFormat.cs +++ b/src/Lucene.Net/Codecs/Lucene40/Lucene40DocValuesFormat.cs @@ -25,98 +25,98 @@ namespace Lucene.Net.Codecs.Lucene40 /// /// Lucene 4.0 DocValues format. - ///

+ /// /// Files: - ///

    - ///
  • .dv.cfs:
  • - ///
  • .dv.cfe:
  • - ///
+ /// + /// .dv.cfs: compound container () + /// .dv.cfe: compound entries () + /// /// Entries within the compound file: - ///
    - ///
  • <segment>_<fieldNumber>.dat: data values
  • - ///
  • <segment>_<fieldNumber>.idx: index into the .dat for DEREF types
  • - ///
- ///

- /// There are several many types of {@code DocValues} with different encodings. - /// From the perspective of filenames, all types store their values in .dat - /// entries within the compound file. In the case of dereferenced/sorted types, the .dat - /// actually contains only the unique values, and an additional .idx file contains + /// + /// <segment>_<fieldNumber>.dat: data values + /// <segment>_<fieldNumber>.idx: index into the .dat for DEREF types + /// + /// + /// There are several many types of with different encodings. + /// From the perspective of filenames, all types store their values in .dat + /// entries within the compound file. In the case of dereferenced/sorted types, the .dat + /// actually contains only the unique values, and an additional .idx file contains /// pointers to these unique values. - ///

+ /// /// Formats: - ///
    - ///
  • {@code VAR_INTS} .dat --> Header, PackedType, MinValue, - /// DefaultValue, PackedStream
  • - ///
  • {@code FIXED_INTS_8} .dat --> Header, ValueSize, - /// maxdoc
  • - ///
  • {@code FIXED_INTS_16} .dat --> Header, ValueSize, - /// maxdoc
  • - ///
  • {@code FIXED_INTS_32} .dat --> Header, ValueSize, - /// maxdoc
  • - ///
  • {@code FIXED_INTS_64} .dat --> Header, ValueSize, - /// maxdoc
  • - ///
  • {@code FLOAT_32} .dat --> Header, ValueSize, Float32maxdoc
  • - ///
  • {@code FLOAT_64} .dat --> Header, ValueSize, Float64maxdoc
  • - ///
  • {@code BYTES_FIXED_STRAIGHT} .dat --> Header, ValueSize, - /// ( * ValueSize)maxdoc
  • - ///
  • {@code BYTES_VAR_STRAIGHT} .idx --> Header, TotalBytes, Addresses
  • - ///
  • {@code BYTES_VAR_STRAIGHT} .dat --> Header, - /// ( * variable ValueSize)maxdoc
  • - ///
  • {@code BYTES_FIXED_DEREF} .idx --> Header, NumValues, Addresses
  • - ///
  • {@code BYTES_FIXED_DEREF} .dat --> Header, ValueSize, - /// ( * ValueSize)NumValues
  • - ///
  • {@code BYTES_VAR_DEREF} .idx --> Header, TotalVarBytes, Addresses
  • - ///
  • {@code BYTES_VAR_DEREF} .dat --> Header, - /// (LengthPrefix + * variable ValueSize)NumValues
  • - ///
  • {@code BYTES_FIXED_SORTED} .idx --> Header, NumValues, Ordinals
  • - ///
  • {@code BYTES_FIXED_SORTED} .dat --> Header, ValueSize, - /// ( * ValueSize)NumValues
  • - ///
  • {@code BYTES_VAR_SORTED} .idx --> Header, TotalVarBytes, Addresses, Ordinals
  • - ///
  • {@code BYTES_VAR_SORTED} .dat --> Header, - /// ( * variable ValueSize)NumValues
  • - ///
+ /// + /// .dat --> Header, PackedType, MinValue, + /// DefaultValue, PackedStream + /// .dat --> Header, ValueSize, + /// Byte () maxdoc + /// .dat --> Header, ValueSize, + /// Short () maxdoc + /// .dat --> Header, ValueSize, + /// Int32 () maxdoc + /// .dat --> Header, ValueSize, + /// Int64 () maxdoc + /// .dat --> Header, ValueSize, Float32maxdoc + /// .dat --> Header, ValueSize, Float64maxdoc + /// .dat --> Header, ValueSize, + /// (Byte () * ValueSize)maxdoc + /// .idx --> Header, TotalBytes, Addresses + /// .dat --> Header, + /// (Byte () * variable ValueSize)maxdoc + /// .idx --> Header, NumValues, Addresses + /// .dat --> Header, ValueSize, + /// (Byte () * ValueSize)NumValues + /// .idx --> Header, TotalVarBytes, Addresses + /// .dat --> Header, + /// (LengthPrefix + Byte () * variable ValueSize)NumValues + /// .idx --> Header, NumValues, Ordinals + /// .dat --> Header, ValueSize, + /// (Byte () * ValueSize)NumValues + /// .idx --> Header, TotalVarBytes, Addresses, Ordinals + /// .dat --> Header, + /// (Byte () * variable ValueSize)NumValues + /// /// Data Types: - ///
    - ///
  • Header -->
  • - ///
  • PackedType -->
  • - ///
  • MaxAddress, MinValue, DefaultValue -->
  • - ///
  • PackedStream, Addresses, Ordinals -->
  • - ///
  • ValueSize, NumValues -->
  • - ///
  • Float32 --> 32-bit float encoded with - /// then written as
  • - ///
  • Float64 --> 64-bit float encoded with - /// then written as
  • - ///
  • TotalBytes -->
  • - ///
  • TotalVarBytes -->
  • - ///
  • LengthPrefix --> Length of the data value as (maximum - /// of 2 bytes)
  • - ///
+ /// + /// Header --> CodecHeader () + /// PackedType --> Byte () + /// MaxAddress, MinValue, DefaultValue --> Int64 () + /// PackedStream, Addresses, Ordinals --> + /// ValueSize, NumValues --> Int32 () + /// Float32 --> 32-bit float encoded with + /// then written as Int32 () + /// Float64 --> 64-bit float encoded with + /// then written as Int64 () + /// TotalBytes --> VLong () + /// TotalVarBytes --> Int64 () + /// LengthPrefix --> Length of the data value as VInt () (maximum + /// of 2 bytes) + /// /// Notes: - ///
    - ///
  • PackedType is a 0 when compressed, 1 when the stream is written as 64-bit integers.
  • - ///
  • Addresses stores pointers to the actual byte location (indexed by docid). In the VAR_STRAIGHT + /// + /// PackedType is a 0 when compressed, 1 when the stream is written as 64-bit integers. + /// Addresses stores pointers to the actual byte location (indexed by docid). In the VAR_STRAIGHT /// case, each entry can have a different length, so to determine the length, docid+1 is /// retrieved. A sentinel address is written at the end for the VAR_STRAIGHT case, so the Addresses /// stream contains maxdoc+1 indices. For the deduplicated VAR_DEREF case, each length - /// is encoded as a prefix to the data itself as a - /// (maximum of 2 bytes).
  • - ///
  • Ordinals stores the term ID in sorted order (indexed by docid). In the FIXED_SORTED case, + /// is encoded as a prefix to the data itself as a VInt () + /// (maximum of 2 bytes). + /// Ordinals stores the term ID in sorted order (indexed by docid). In the FIXED_SORTED case, /// the address into the .dat can be computed from the ordinal as - /// Header+ValueSize+(ordinal*ValueSize) because the byte length is fixed. + /// Header+ValueSize+(ordinal*ValueSize) because the byte length is fixed. /// In the VAR_SORTED case, there is double indirection (docid -> ordinal -> address), but /// an additional sentinel ordinal+address is always written (so there are NumValues+1 ordinals). To - /// determine the length, ord+1's address is looked up as well.
  • - ///
  • {@code BYTES_VAR_STRAIGHT BYTES_VAR_STRAIGHT} in contrast to other straight - /// variants uses a .idx file to improve lookup perfromance. In contrast to - /// {@code BYTES_VAR_DEREF BYTES_VAR_DEREF} it doesn't apply deduplication of the document values. - ///
  • - ///
- ///

+ /// determine the length, ord+1's address is looked up as well. + /// in contrast to other straight + /// variants uses a .idx file to improve lookup perfromance. In contrast to + /// it doesn't apply deduplication of the document values. + /// + /// + /// /// Limitations: - ///

    - ///
  • Binary doc values can be at most in length. - ///
- /// @deprecated Only for reading old 4.0 and 4.1 segments + /// + /// Binary doc values can be at most in length. + /// + /// [Obsolete("Only for reading old 4.0 and 4.1 segments")] [DocValuesFormatName("Lucene40")] // LUCENENET specific - using DocValuesFormatName attribute to ensure the default name passed from subclasses is the same as this class name public class Lucene40DocValuesFormat : DocValuesFormat http://git-wip-us.apache.org/repos/asf/lucenenet/blob/27cdd048/src/Lucene.Net/Codecs/Lucene40/Lucene40DocValuesReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene40/Lucene40DocValuesReader.cs b/src/Lucene.Net/Codecs/Lucene40/Lucene40DocValuesReader.cs index 54d3511..bca9a0c 100644 --- a/src/Lucene.Net/Codecs/Lucene40/Lucene40DocValuesReader.cs +++ b/src/Lucene.Net/Codecs/Lucene40/Lucene40DocValuesReader.cs @@ -42,9 +42,10 @@ namespace Lucene.Net.Codecs.Lucene40 using SortedSetDocValues = Lucene.Net.Index.SortedSetDocValues; /// - /// Reads the 4.0 format of norms/docvalues - /// @lucene.experimental - /// @deprecated Only for reading old 4.0 and 4.1 segments + /// Reads the 4.0 format of norms/docvalues. + /// + /// @lucene.experimental + /// [Obsolete("Only for reading old 4.0 and 4.1 segments")] internal sealed class Lucene40DocValuesReader : DocValuesProducer { @@ -139,7 +140,7 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// NOTE: This was loadVarIntsField() in Lucene + /// NOTE: This was loadVarIntsField() in Lucene. /// private NumericDocValues LoadVarInt32sField(FieldInfo field, IndexInput input) { @@ -243,7 +244,7 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// NOTE: This was loadShortField() in Lucene + /// NOTE: This was loadShortField() in Lucene. /// private NumericDocValues LoadInt16Field(FieldInfo field, IndexInput input) { @@ -279,7 +280,7 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// NOTE: This was loadIntField() in Lucene + /// NOTE: This was loadIntField() in Lucene. /// private NumericDocValues LoadInt32Field(FieldInfo field, IndexInput input) { @@ -315,7 +316,7 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// NOTE: This was loadLongField() in Lucene + /// NOTE: This was loadLongField() in Lucene. /// private NumericDocValues LoadInt64Field(FieldInfo field, IndexInput input) { @@ -351,7 +352,7 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// NOTE: This was loadFloatField() in Lucene + /// NOTE: This was loadFloatField() in Lucene. /// private NumericDocValues LoadSingleField(FieldInfo field, IndexInput input) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/27cdd048/src/Lucene.Net/Codecs/Lucene40/Lucene40FieldInfosFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene40/Lucene40FieldInfosFormat.cs b/src/Lucene.Net/Codecs/Lucene40/Lucene40FieldInfosFormat.cs index a38dc52..49b5008 100644 --- a/src/Lucene.Net/Codecs/Lucene40/Lucene40FieldInfosFormat.cs +++ b/src/Lucene.Net/Codecs/Lucene40/Lucene40FieldInfosFormat.cs @@ -19,76 +19,74 @@ namespace Lucene.Net.Codecs.Lucene40 * limitations under the License. */ - // javadoc - /// /// Lucene 4.0 Field Infos format. - ///

- ///

Field names are stored in the field info file, with suffix .fnm.

- ///

FieldInfos (.fnm) --> Header,FieldsCount, <FieldName,FieldNumber, - /// FieldBits,DocValuesBits,Attributes> FieldsCount

- ///

Data types: - ///

    - ///
  • Header -->
  • - ///
  • FieldsCount -->
  • - ///
  • FieldName -->
  • - ///
  • FieldBits, DocValuesBits -->
  • - ///
  • FieldNumber -->
  • - ///
  • Attributes -->
  • - ///
- ///

+ /// + /// Field names are stored in the field info file, with suffix .fnm. + /// FieldInfos (.fnm) --> Header,FieldsCount, <FieldName,FieldNumber, + /// FieldBits,DocValuesBits,Attributes> FieldsCount + /// Data types: + /// + /// Header --> CodecHeader () + /// FieldsCount --> VInt () + /// FieldName --> String () + /// FieldBits, DocValuesBits --> Byte () + /// FieldNumber --> VInt () + /// Attributes --> IDictionary<String,String> () + /// + /// /// Field Descriptions: - ///
    - ///
  • FieldsCount: the number of fields in this file.
  • - ///
  • FieldName: name of the field as a UTF-8 String.
  • - ///
  • FieldNumber: the field's number. Note that unlike previous versions of + /// + /// FieldsCount: the number of fields in this file. + /// FieldName: name of the field as a UTF-8 String. + /// FieldNumber: the field's number. Note that unlike previous versions of /// Lucene, the fields are not numbered implicitly by their order in the - /// file, instead explicitly.
  • - ///
  • FieldBits: a byte containing field options. - ///
      - ///
    • The low-order bit is one for indexed fields, and zero for non-indexed - /// fields.
    • - ///
    • The second lowest-order bit is one for fields that have term vectors - /// stored, and zero for fields without term vectors.
    • - ///
    • If the third lowest order-bit is set (0x4), offsets are stored into - /// the postings list in addition to positions.
    • - ///
    • Fourth bit is unused.
    • - ///
    • If the fifth lowest-order bit is set (0x10), norms are omitted for the - /// indexed field.
    • - ///
    • If the sixth lowest-order bit is set (0x20), payloads are stored for the - /// indexed field.
    • - ///
    • If the seventh lowest-order bit is set (0x40), term frequencies and - /// positions omitted for the indexed field.
    • - ///
    • If the eighth lowest-order bit is set (0x80), positions are omitted for the - /// indexed field.
    • - ///
    - ///
  • - ///
  • DocValuesBits: a byte containing per-document value types. The type + /// file, instead explicitly. + /// FieldBits: a byte containing field options. + /// + /// The low-order bit is one for indexed fields, and zero for non-indexed + /// fields. + /// The second lowest-order bit is one for fields that have term vectors + /// stored, and zero for fields without term vectors. + /// If the third lowest order-bit is set (0x4), offsets are stored into + /// the postings list in addition to positions. + /// Fourth bit is unused. + /// If the fifth lowest-order bit is set (0x10), norms are omitted for the + /// indexed field. + /// If the sixth lowest-order bit is set (0x20), payloads are stored for the + /// indexed field. + /// If the seventh lowest-order bit is set (0x40), term frequencies and + /// positions omitted for the indexed field. + /// If the eighth lowest-order bit is set (0x80), positions are omitted for the + /// indexed field. + /// + /// + /// DocValuesBits: a byte containing per-document value types. The type /// recorded as two four-bit integers, with the high-order bits representing - /// norms options, and the low-order bits representing - /// {@code DocValues} options. Each four-bit integer can be decoded as such: - ///
      - ///
    • 0: no DocValues for this field.
    • - ///
    • 1: variable-width signed integers. ({@code Type#VAR_INTS VAR_INTS})
    • - ///
    • 2: 32-bit floating point values. ({@code Type#FLOAT_32 FLOAT_32})
    • - ///
    • 3: 64-bit floating point values. ({@code Type#FLOAT_64 FLOAT_64})
    • - ///
    • 4: fixed-length byte array values. ({@code Type#BYTES_FIXED_STRAIGHT BYTES_FIXED_STRAIGHT})
    • - ///
    • 5: fixed-length dereferenced byte array values. ({@code Type#BYTES_FIXED_DEREF BYTES_FIXED_DEREF})
    • - ///
    • 6: variable-length byte array values. ({@code Type#BYTES_VAR_STRAIGHT BYTES_VAR_STRAIGHT})
    • - ///
    • 7: variable-length dereferenced byte array values. ({@code Type#BYTES_VAR_DEREF BYTES_VAR_DEREF})
    • - ///
    • 8: 16-bit signed integers. ({@code Type#FIXED_INTS_16 FIXED_INTS_16})
    • - ///
    • 9: 32-bit signed integers. ({@code Type#FIXED_INTS_32 FIXED_INTS_32})
    • - ///
    • 10: 64-bit signed integers. ({@code Type#FIXED_INTS_64 FIXED_INTS_64})
    • - ///
    • 11: 8-bit signed integers. ({@code Type#FIXED_INTS_8 FIXED_INTS_8})
    • - ///
    • 12: fixed-length sorted byte array values. ({@code Type#BYTES_FIXED_SORTED BYTES_FIXED_SORTED})
    • - ///
    • 13: variable-length sorted byte array values. ({@code Type#BYTES_VAR_SORTED BYTES_VAR_SORTED})
    • - ///
    - ///
  • - ///
  • Attributes: a key-value map of codec-private attributes.
  • - ///
+ /// norms options, and the low-order bits representing + /// options. Each four-bit integer can be decoded as such: + /// + /// 0: no DocValues for this field. + /// 1: variable-width signed integers. () + /// 2: 32-bit floating point values. () + /// 3: 64-bit floating point values. () + /// 4: fixed-length byte array values. () + /// 5: fixed-length dereferenced byte array values. () + /// 6: variable-length byte array values. () + /// 7: variable-length dereferenced byte array values. () + /// 8: 16-bit signed integers. () + /// 9: 32-bit signed integers. () + /// 10: 64-bit signed integers. () + /// 11: 8-bit signed integers. () + /// 12: fixed-length sorted byte array values. () + /// 13: variable-length sorted byte array values. () + /// + /// + /// Attributes: a key-value map of codec-private attributes. + /// /// - /// @lucene.experimental
- /// @deprecated Only for reading old 4.0 and 4.1 segments + /// @lucene.experimental + /// [Obsolete("Only for reading old 4.0 and 4.1 segments")] public class Lucene40FieldInfosFormat : FieldInfosFormat { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/27cdd048/src/Lucene.Net/Codecs/Lucene40/Lucene40FieldInfosReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene40/Lucene40FieldInfosReader.cs b/src/Lucene.Net/Codecs/Lucene40/Lucene40FieldInfosReader.cs index 4805a4b..3f14e3f 100644 --- a/src/Lucene.Net/Codecs/Lucene40/Lucene40FieldInfosReader.cs +++ b/src/Lucene.Net/Codecs/Lucene40/Lucene40FieldInfosReader.cs @@ -34,10 +34,10 @@ namespace Lucene.Net.Codecs.Lucene40 /// /// Lucene 4.0 FieldInfos reader. - /// - /// @lucene.experimental - /// - /// @deprecated Only for reading old 4.0 and 4.1 segments + /// + /// @lucene.experimental + /// + /// [Obsolete("Only for reading old 4.0 and 4.1 segments")] internal class Lucene40FieldInfosReader : FieldInfosReader { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/27cdd048/src/Lucene.Net/Codecs/Lucene40/Lucene40LiveDocsFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene40/Lucene40LiveDocsFormat.cs b/src/Lucene.Net/Codecs/Lucene40/Lucene40LiveDocsFormat.cs index cd81003..f26406e 100644 --- a/src/Lucene.Net/Codecs/Lucene40/Lucene40LiveDocsFormat.cs +++ b/src/Lucene.Net/Codecs/Lucene40/Lucene40LiveDocsFormat.cs @@ -31,35 +31,35 @@ namespace Lucene.Net.Codecs.Lucene40 /// /// Lucene 4.0 Live Documents Format. - ///

- ///

The .del file is optional, and only exists when a segment contains - /// deletions.

- ///

Although per-segment, this file is maintained exterior to compound segment - /// files.

- ///

Deletions (.del) --> Format,Header,ByteCount,BitCount, Bits | DGaps (depending - /// on Format)

- ///
    - ///
  • Format,ByteSize,BitCount -->
  • - ///
  • Bits --> <> ByteCount
  • - ///
  • DGaps --> <DGap,NonOnesByte> NonzeroBytesCount
  • - ///
  • DGap -->
  • - ///
  • NonOnesByte -->
  • - ///
  • Header -->
  • - ///
- ///

Format is 1: indicates cleared DGaps.

- ///

ByteCount indicates the number of bytes in Bits. It is typically - /// (SegSize/8)+1.

- ///

BitCount indicates the number of bits that are currently set in Bits.

- ///

Bits contains one bit for each document indexed. When the bit corresponding + /// + /// The .del file is optional, and only exists when a segment contains + /// deletions. + /// Although per-segment, this file is maintained exterior to compound segment + /// files. + /// Deletions (.del) --> Format,Header,ByteCount,BitCount, Bits | DGaps (depending + /// on Format) + /// + /// Format,ByteSize,BitCount --> Uint32 () + /// Bits --> < Byte () > ByteCount + /// DGaps --> <DGap,NonOnesByte> NonzeroBytesCount + /// DGap --> VInt () + /// NonOnesByte --> Byte() + /// Header --> CodecHeader () + /// + /// Format is 1: indicates cleared DGaps. + /// ByteCount indicates the number of bytes in Bits. It is typically + /// (SegSize/8)+1. + /// BitCount indicates the number of bits that are currently set in Bits. + /// Bits contains one bit for each document indexed. When the bit corresponding /// to a document number is cleared, that document is marked as deleted. Bit ordering /// is from least to most significant. Thus, if Bits contains two bytes, 0x00 and - /// 0x02, then document 9 is marked as alive (not deleted).

- ///

DGaps represents sparse bit-vectors more efficiently than Bits. It is made + /// 0x02, then document 9 is marked as alive (not deleted). + /// DGaps represents sparse bit-vectors more efficiently than Bits. It is made /// of DGaps on indexes of nonOnes bytes in Bits, and the nonOnes bytes themselves. - /// The number of nonOnes bytes in Bits (NonOnesBytesCount) is not stored.

- ///

For example, if there are 8000 bits and only bits 10,12,32 are cleared, DGaps - /// would be used:

- ///

(VInt) 1 , (byte) 20 , (VInt) 3 , (Byte) 1

+ /// The number of nonOnes bytes in Bits (NonOnesBytesCount) is not stored. + /// For example, if there are 8000 bits and only bits 10,12,32 are cleared, DGaps + /// would be used: + /// (VInt) 1 , (byte) 20 , (VInt) 3 , (Byte) 1 ///
public class Lucene40LiveDocsFormat : LiveDocsFormat { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/27cdd048/src/Lucene.Net/Codecs/Lucene40/Lucene40NormsFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene40/Lucene40NormsFormat.cs b/src/Lucene.Net/Codecs/Lucene40/Lucene40NormsFormat.cs index 02d97ed..424d63b 100644 --- a/src/Lucene.Net/Codecs/Lucene40/Lucene40NormsFormat.cs +++ b/src/Lucene.Net/Codecs/Lucene40/Lucene40NormsFormat.cs @@ -25,18 +25,18 @@ namespace Lucene.Net.Codecs.Lucene40 /// /// Lucene 4.0 Norms Format. - ///

+ /// /// Files: - ///

    - ///
  • .nrm.cfs:
  • - ///
  • .nrm.cfe:
  • - ///
+ /// + /// .nrm.cfs: compound container () + /// .nrm.cfe: compound entries () + /// /// Norms are implemented as DocValues, so other than file extension, norms are - /// written exactly the same way as . + /// written exactly the same way as . + /// + /// @lucene.experimental ///
- /// - /// @deprecated Only for reading old 4.0 and 4.1 segments + /// [Obsolete("Only for reading old 4.0 and 4.1 segments")] public class Lucene40NormsFormat : NormsFormat { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/27cdd048/src/Lucene.Net/Codecs/Lucene40/Lucene40PostingsBaseFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene40/Lucene40PostingsBaseFormat.cs b/src/Lucene.Net/Codecs/Lucene40/Lucene40PostingsBaseFormat.cs index ecd85b9..8ecde5b 100644 --- a/src/Lucene.Net/Codecs/Lucene40/Lucene40PostingsBaseFormat.cs +++ b/src/Lucene.Net/Codecs/Lucene40/Lucene40PostingsBaseFormat.cs @@ -23,11 +23,9 @@ namespace Lucene.Net.Codecs.Lucene40 using SegmentWriteState = Lucene.Net.Index.SegmentWriteState; /// - /// Provides a and {@link - /// PostingsWriterBase}. + /// Provides a and + /// . /// - /// @deprecated Only for reading old 4.0 segments - // TODO: should these also be named / looked up via SPI? [Obsolete("Only for reading old 4.0 segments")] public sealed class Lucene40PostingsBaseFormat : PostingsBaseFormat http://git-wip-us.apache.org/repos/asf/lucenenet/blob/27cdd048/src/Lucene.Net/Codecs/Lucene40/Lucene40PostingsFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene40/Lucene40PostingsFormat.cs b/src/Lucene.Net/Codecs/Lucene40/Lucene40PostingsFormat.cs index 440003e..bc0829a 100644 --- a/src/Lucene.Net/Codecs/Lucene40/Lucene40PostingsFormat.cs +++ b/src/Lucene.Net/Codecs/Lucene40/Lucene40PostingsFormat.cs @@ -20,193 +20,187 @@ namespace Lucene.Net.Codecs.Lucene40 * limitations under the License. */ - // javadocs - // javadocs - // javadocs using SegmentReadState = Lucene.Net.Index.SegmentReadState; using SegmentWriteState = Lucene.Net.Index.SegmentWriteState; - // javadocs - // javadocs - /// /// Lucene 4.0 Postings format. - ///

+ /// /// Files: - ///

- ///

- ///

+ /// + /// .tim: Term Dictionary + /// .tip: Term Index + /// .frq: Frequencies + /// .prx: Positions + /// + /// + /// /// ///

Term Dictionary

/// - ///

The .tim file contains the list of terms in each + /// The .tim file contains the list of terms in each /// field along with per-term statistics (such as docfreq) /// and pointers to the frequencies, positions and /// skip data in the .frq and .prx files. - /// See for more details on the format. - ///

+ /// See for more details on the format. + /// /// - ///

NOTE: The term dictionary can plug into different postings implementations: + /// NOTE: The term dictionary can plug into different postings implementations: /// the postings writer/reader are actually responsible for encoding - /// and decoding the Postings Metadata and Term Metadata sections described here:

- ///
    - ///
  • Postings Metadata --> Header, SkipInterval, MaxSkipLevels, SkipMinimum
  • - ///
  • Term Metadata --> FreqDelta, SkipDelta?, ProxDelta? - ///
  • Header -->
  • - ///
  • SkipInterval,MaxSkipLevels,SkipMinimum -->
  • - ///
  • SkipDelta,FreqDelta,ProxDelta -->
  • - ///
- ///

Notes:

- ///
    - ///
  • Header is a storing the version information - /// for the postings.
  • - ///
  • SkipInterval is the fraction of TermDocs stored in skip tables. It is used to accelerate - /// . Larger values result in smaller indexes, greater + /// and decoding the Postings Metadata and Term Metadata sections described here: + /// + /// Postings Metadata --> Header, SkipInterval, MaxSkipLevels, SkipMinimum + /// Term Metadata --> FreqDelta, SkipDelta?, ProxDelta? + /// Header --> CodecHeader () + /// SkipInterval,MaxSkipLevels,SkipMinimum --> Uint32 () + /// SkipDelta,FreqDelta,ProxDelta --> VLong () + /// + /// Notes: + /// + /// Header is a CodecHeader () storing the version information + /// for the postings. + /// SkipInterval is the fraction of TermDocs stored in skip tables. It is used to accelerate + /// . Larger values result in smaller indexes, greater /// acceleration, but fewer accelerable cases, while smaller values result in bigger indexes, /// less acceleration (in case of a small value for MaxSkipLevels) and more accelerable cases. - ///
  • - ///
  • MaxSkipLevels is the max. number of skip levels stored for each term in the .frq file. A + /// + /// MaxSkipLevels is the max. number of skip levels stored for each term in the .frq file. A /// low value results in smaller indexes but less acceleration, a larger value results in /// slightly larger indexes but greater acceleration. See format of .frq file for more - /// information about skip levels.
  • - ///
  • SkipMinimum is the minimum document frequency a term must have in order to write any - /// skip data at all.
  • - ///
  • FreqDelta determines the position of this term's TermFreqs within the .frq + /// information about skip levels. + /// SkipMinimum is the minimum document frequency a term must have in order to write any + /// skip data at all. + /// FreqDelta determines the position of this term's TermFreqs within the .frq /// file. In particular, it is the difference between the position of this term's /// data in that file and the position of the previous term's data (or zero, for - /// the first term in the block).
  • - ///
  • ProxDelta determines the position of this term's TermPositions within the + /// the first term in the block). + /// ProxDelta determines the position of this term's TermPositions within the /// .prx file. In particular, it is the difference between the position of this /// term's data in that file and the position of the previous term's data (or zero, /// for the first term in the block. For fields that omit position data, this will - /// be 0 since prox information is not stored.
  • - ///
  • SkipDelta determines the position of this term's SkipData within the .frq + /// be 0 since prox information is not stored. + /// SkipDelta determines the position of this term's SkipData within the .frq /// file. In particular, it is the number of bytes after TermFreqs that the /// SkipData starts. In other words, it is the length of the TermFreq data. - /// SkipDelta is only stored if DocFreq is not smaller than SkipMinimum.
  • - ///
+ /// SkipDelta is only stored if DocFreq is not smaller than SkipMinimum. + /// /// ///

Term Index

- ///

The .tip file contains an index into the term dictionary, so that it can be - /// accessed randomly. See for more details on the format.

+ /// The .tip file contains an index into the term dictionary, so that it can be + /// accessed randomly. See for more details on the format. /// ///

Frequencies

- ///

The .frq file contains the lists of documents which contain each term, along + /// The .frq file contains the lists of documents which contain each term, along /// with the frequency of the term in that document (except when frequencies are - /// omitted: ).

- ///
    - ///
  • FreqFile (.frq) --> Header, <TermFreqs, SkipData?> TermCount
  • - ///
  • Header -->
  • - ///
  • TermFreqs --> <TermFreq> DocFreq
  • - ///
  • TermFreq --> DocDelta[, Freq?]
  • - ///
  • SkipData --> <<SkipLevelLength, SkipLevel> - /// NumSkipLevels-1, SkipLevel> <SkipDatum>
  • - ///
  • SkipLevel --> <SkipDatum> DocFreq/(SkipInterval^(Level + - /// 1))
  • - ///
  • SkipDatum --> - /// DocSkip,PayloadLength?,OffsetLength?,FreqSkip,ProxSkip,SkipChildLevelPointer?
  • - ///
  • DocDelta,Freq,DocSkip,PayloadLength,OffsetLength,FreqSkip,ProxSkip -->
  • - ///
  • SkipChildLevelPointer -->
  • - ///
- ///

TermFreqs are ordered by term (the term is implicit, from the term dictionary).

- ///

TermFreq entries are ordered by increasing document number.

- ///

DocDelta: if frequencies are indexed, this determines both the document + /// omitted: ). + /// + /// FreqFile (.frq) --> Header, <TermFreqs, SkipData?> TermCount + /// Header --> CodecHeader () + /// TermFreqs --> <TermFreq> DocFreq + /// TermFreq --> DocDelta[, Freq?] + /// SkipData --> <<SkipLevelLength, SkipLevel> + /// NumSkipLevels-1, SkipLevel> <SkipDatum> + /// SkipLevel --> <SkipDatum> DocFreq/(SkipInterval^(Level + + /// 1)) + /// SkipDatum --> + /// DocSkip,PayloadLength?,OffsetLength?,FreqSkip,ProxSkip,SkipChildLevelPointer? + /// DocDelta,Freq,DocSkip,PayloadLength,OffsetLength,FreqSkip,ProxSkip --> VInt () + /// SkipChildLevelPointer --> VLong () + /// + /// TermFreqs are ordered by term (the term is implicit, from the term dictionary). + /// TermFreq entries are ordered by increasing document number. + /// DocDelta: if frequencies are indexed, this determines both the document /// number and the frequency. In particular, DocDelta/2 is the difference between /// this document number and the previous document number (or zero when this is the /// first document in a TermFreqs). When DocDelta is odd, the frequency is one. /// When DocDelta is even, the frequency is read as another VInt. If frequencies /// are omitted, DocDelta contains the gap (not multiplied by 2) between document - /// numbers and no frequency information is stored.

- ///

For example, the TermFreqs for a term which occurs once in document seven + /// numbers and no frequency information is stored. + /// For example, the TermFreqs for a term which occurs once in document seven /// and three times in document eleven, with frequencies indexed, would be the - /// following sequence of VInts:

- ///

15, 8, 3

- ///

If frequencies were omitted () it would be this - /// sequence of VInts instead:

- ///

7,4

- ///

DocSkip records the document number before every SkipInterval th + /// following sequence of VInts: + /// 15, 8, 3 + /// If frequencies were omitted () it would be this + /// sequence of VInts instead: + /// 7,4 + /// DocSkip records the document number before every SkipInterval th /// document in TermFreqs. If payloads and offsets are disabled for the term's field, then /// DocSkip represents the difference from the previous value in the sequence. If /// payloads and/or offsets are enabled for the term's field, then DocSkip/2 represents the /// difference from the previous value in the sequence. In this case when /// DocSkip is odd, then PayloadLength and/or OffsetLength are stored indicating the length of - /// the last payload/offset before the SkipIntervalth document in TermPositions.

- ///

PayloadLength indicates the length of the last payload.

- ///

OffsetLength indicates the length of the last offset (endOffset-startOffset).

- ///

+ /// the last payload/offset before the SkipIntervalth document in TermPositions. + /// PayloadLength indicates the length of the last payload. + /// OffsetLength indicates the length of the last offset (endOffset-startOffset). + /// /// FreqSkip and ProxSkip record the position of every SkipInterval th /// entry in FreqFile and ProxFile, respectively. File positions are relative to /// the start of TermFreqs and Positions, to the previous SkipDatum in the - /// sequence.

- ///

For example, if DocFreq=35 and SkipInterval=16, then there are two SkipData + /// sequence. + /// For example, if DocFreq=35 and SkipInterval=16, then there are two SkipData /// entries, containing the 15 th and 31 st document numbers /// in TermFreqs. The first FreqSkip names the number of bytes after the beginning /// of TermFreqs that the 16 th SkipDatum starts, and the second the /// number of bytes after that that the 32 nd starts. The first ProxSkip /// names the number of bytes after the beginning of Positions that the 16 /// th SkipDatum starts, and the second the number of bytes after that - /// that the 32 nd starts.

- ///

Each term can have multiple skip levels. The amount of skip levels for a + /// that the 32 nd starts. + /// Each term can have multiple skip levels. The amount of skip levels for a /// term is NumSkipLevels = Min(MaxSkipLevels, /// floor(log(DocFreq/log(SkipInterval)))). The number of SkipData entries for a /// skip level is DocFreq/(SkipInterval^(Level + 1)), whereas the lowest skip level - /// is Level=0.
+ /// is Level=0. + /// /// Example: SkipInterval = 4, MaxSkipLevels = 2, DocFreq = 35. Then skip level 0 /// has 8 SkipData entries, containing the 3rd, 7th, /// 11th, 15th, 19th, 23rd, /// 27th, and 31st document numbers in TermFreqs. Skip level /// 1 has 2 SkipData entries, containing the 15th and 31st - /// document numbers in TermFreqs.
+ /// document numbers in TermFreqs. + /// /// The SkipData entries on all upper levels > 0 contain a SkipChildLevelPointer /// referencing the corresponding SkipData entry in level-1. In the example has /// entry 15 on level 1 a pointer to entry 15 on level 0 and entry 31 on level 1 a /// pointer to entry 31 on level 0. - ///

+ /// /// ///

Positions

- ///

The .prx file contains the lists of positions that each term occurs at + /// The .prx file contains the lists of positions that each term occurs at /// within documents. Note that fields omitting positional data do not store /// anything into this file, and if all fields in the index omit positional data - /// then the .prx file will not exist.

- ///
    - ///
  • ProxFile (.prx) --> Header, <TermPositions> TermCount
  • - ///
  • Header -->
  • - ///
  • TermPositions --> <Positions> DocFreq
  • - ///
  • Positions --> <PositionDelta,PayloadLength?,OffsetDelta?,OffsetLength?,PayloadData?> Freq
  • - ///
  • PositionDelta,OffsetDelta,OffsetLength,PayloadLength -->
  • - ///
  • PayloadData --> PayloadLength
  • - ///
- ///

TermPositions are ordered by term (the term is implicit, from the term dictionary).

- ///

Positions entries are ordered by increasing document number (the document - /// number is implicit from the .frq file).

- ///

PositionDelta is, if payloads are disabled for the term's field, the + /// then the .prx file will not exist. + /// + /// ProxFile (.prx) --> Header, <TermPositions> TermCount + /// Header --> CodecHeader () + /// TermPositions --> <Positions> DocFreq + /// Positions --> <PositionDelta,PayloadLength?,OffsetDelta?,OffsetLength?,PayloadData?> Freq + /// PositionDelta,OffsetDelta,OffsetLength,PayloadLength --> VInt () + /// PayloadData --> byte () PayloadLength + /// + /// TermPositions are ordered by term (the term is implicit, from the term dictionary). + /// Positions entries are ordered by increasing document number (the document + /// number is implicit from the .frq file). + /// PositionDelta is, if payloads are disabled for the term's field, the /// difference between the position of the current occurrence in the document and /// the previous occurrence (or zero, if this is the first occurrence in this /// document). If payloads are enabled for the term's field, then PositionDelta/2 /// is the difference between the current and the previous position. If payloads /// are enabled and PositionDelta is odd, then PayloadLength is stored, indicating - /// the length of the payload at the current term position.

- ///

For example, the TermPositions for a term which occurs as the fourth term in + /// the length of the payload at the current term position. + /// For example, the TermPositions for a term which occurs as the fourth term in /// one document, and as the fifth and ninth term in a subsequent document, would - /// be the following sequence of VInts (payloads disabled):

- ///

4, 5, 4

- ///

PayloadData is metadata associated with the current term position. If + /// be the following sequence of VInts (payloads disabled): + /// 4, 5, 4 + /// PayloadData is metadata associated with the current term position. If /// PayloadLength is stored at the current position, then it indicates the length /// of this payload. If PayloadLength is not stored, then this payload has the same - /// length as the payload at the previous position.

- ///

OffsetDelta/2 is the difference between this position's startOffset from the + /// length as the payload at the previous position. + /// OffsetDelta/2 is the difference between this position's startOffset from the /// previous occurrence (or zero, if this is the first occurrence in this document). /// If OffsetDelta is odd, then the length (endOffset-startOffset) differs from the /// previous occurrence and an OffsetLength follows. Offset data is only written for - /// .

+ /// . ///
- /// @deprecated Only for reading old 4.0 segments - // TODO: this class could be created by wrapping // BlockTreeTermsDict around Lucene40PostingsBaseFormat; ie // we should not duplicate the code from that class here: @@ -215,16 +209,16 @@ namespace Lucene.Net.Codecs.Lucene40 public class Lucene40PostingsFormat : PostingsFormat { /// - /// minimum items (terms or sub-blocks) per block for BlockTree + /// Minimum items (terms or sub-blocks) per block for BlockTree. protected readonly int m_minBlockSize; /// - /// maximum items (terms or sub-blocks) per block for BlockTree + /// Maximum items (terms or sub-blocks) per block for BlockTree. protected readonly int m_maxBlockSize; /// - /// Creates {@code Lucene40PostingsFormat} with default - /// settings. + /// Creates with default + /// settings. /// public Lucene40PostingsFormat() : this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE) @@ -232,10 +226,10 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// Creates {@code Lucene40PostingsFormat} with custom - /// values for {@code minBlockSize} and {@code - /// maxBlockSize} passed to block terms dictionary. - /// + /// Creates with custom + /// values for and + /// passed to block terms dictionary. + /// private Lucene40PostingsFormat(int minBlockSize, int maxBlockSize) : base() { @@ -270,11 +264,11 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// Extension of freq postings file + /// Extension of freq postings file. internal static readonly string FREQ_EXTENSION = "frq"; /// - /// Extension of prox postings file + /// Extension of prox postings file. internal static readonly string PROX_EXTENSION = "prx"; public override string ToString() http://git-wip-us.apache.org/repos/asf/lucenenet/blob/27cdd048/src/Lucene.Net/Codecs/Lucene40/Lucene40PostingsReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene40/Lucene40PostingsReader.cs b/src/Lucene.Net/Codecs/Lucene40/Lucene40PostingsReader.cs index 29516db..12fb35d 100644 --- a/src/Lucene.Net/Codecs/Lucene40/Lucene40PostingsReader.cs +++ b/src/Lucene.Net/Codecs/Lucene40/Lucene40PostingsReader.cs @@ -42,8 +42,7 @@ namespace Lucene.Net.Codecs.Lucene40 /// Concrete class that reads the 4.0 frq/prox /// postings format. /// - /// - /// @deprecated Only for reading old 4.0 segments + /// [Obsolete("Only for reading old 4.0 segments")] public class Lucene40PostingsReader : PostingsReaderBase { @@ -958,8 +957,8 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// Returns the payload at this position, or null if no - /// payload was indexed. + /// Returns the payload at this position, or null if no + /// payload was indexed. /// public override BytesRef GetPayload() { @@ -1263,8 +1262,8 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// Returns the payload at this position, or null if no - /// payload was indexed. + /// Returns the payload at this position, or null if no + /// payload was indexed. /// public override BytesRef GetPayload() { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/27cdd048/src/Lucene.Net/Codecs/Lucene40/Lucene40SegmentInfoFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene40/Lucene40SegmentInfoFormat.cs b/src/Lucene.Net/Codecs/Lucene40/Lucene40SegmentInfoFormat.cs index c3ce3c9..8fb9084 100644 --- a/src/Lucene.Net/Codecs/Lucene40/Lucene40SegmentInfoFormat.cs +++ b/src/Lucene.Net/Codecs/Lucene40/Lucene40SegmentInfoFormat.cs @@ -19,54 +19,49 @@ namespace Lucene.Net.Codecs.Lucene40 * limitations under the License. */ - // javadocs - using SegmentInfo = Lucene.Net.Index.SegmentInfo; // javadocs - - // javadocs - // javadocs + using SegmentInfo = Lucene.Net.Index.SegmentInfo; /// /// Lucene 4.0 Segment info format. - ///

+ /// /// Files: - ///

    - ///
  • .si: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Attributes, Files - ///
- ///

+ /// + /// .si: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Attributes, Files + /// + /// /// Data types: - ///

- ///

    - ///
  • Header -->
  • - ///
  • SegSize -->
  • - ///
  • SegVersion -->
  • - ///
  • Files -->
  • - ///
  • Diagnostics, Attributes -->
  • - ///
  • IsCompoundFile -->
  • - ///
- ///

+ /// + /// + /// Header --> CodecHeader () + /// SegSize --> Int32 () + /// SegVersion --> String () + /// Files --> ISet<String> () + /// Diagnostics, Attributes --> IDictionary<String,String> () + /// IsCompoundFile --> Int8 () + /// + /// /// Field Descriptions: - ///

- ///

    - ///
  • SegVersion is the code version that created the segment.
  • - ///
  • SegSize is the number of documents contained in the segment index.
  • - ///
  • IsCompoundFile records whether the segment is written as a compound file or + /// + /// + /// SegVersion is the code version that created the segment. + /// SegSize is the number of documents contained in the segment index. + /// IsCompoundFile records whether the segment is written as a compound file or /// not. If this is -1, the segment is not a compound file. If it is 1, the segment - /// is a compound file.
  • - ///
  • Checksum contains the CRC32 checksum of all bytes in the segments_N file up - /// until the checksum. this is used to verify integrity of the file on opening the - /// index.
  • - ///
  • The Diagnostics Map is privately written by , as a debugging aid, + /// is a compound file. + /// Checksum contains the CRC32 checksum of all bytes in the segments_N file up + /// until the checksum. This is used to verify integrity of the file on opening the + /// index. + /// The Diagnostics Map is privately written by , as a debugging aid, /// for each segment it creates. It includes metadata like the current Lucene - /// version, OS, Java version, why the segment was created (merge, flush, - /// addIndexes), etc.
  • - ///
  • Attributes: a key-value map of codec-private attributes.
  • - ///
  • Files is a list of files referred to by this segment.
  • - ///
- ///

+ /// version, OS, .NET/Java version, why the segment was created (merge, flush, + /// addIndexes), etc. + /// Attributes: a key-value map of codec-private attributes. + /// Files is a list of files referred to by this segment. + /// + /// + /// @lucene.experimental ///
- /// - /// @deprecated Only for reading old 4.0-4.5 segments, and supporting IndexWriter.addIndexes + /// [Obsolete("Only for reading old 4.0-4.5 segments, and supporting IndexWriter.AddIndexes()")] public class Lucene40SegmentInfoFormat : SegmentInfoFormat { @@ -98,7 +93,7 @@ namespace Lucene.Net.Codecs.Lucene40 } /// - /// File extension used to store . + /// File extension used to store . public readonly static string SI_EXTENSION = "si"; internal readonly static string CODEC_NAME = "Lucene40SegmentInfo"; http://git-wip-us.apache.org/repos/asf/lucenenet/blob/27cdd048/src/Lucene.Net/Codecs/Lucene40/Lucene40SegmentInfoReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene40/Lucene40SegmentInfoReader.cs b/src/Lucene.Net/Codecs/Lucene40/Lucene40SegmentInfoReader.cs index aec213d..07b728f 100644 --- a/src/Lucene.Net/Codecs/Lucene40/Lucene40SegmentInfoReader.cs +++ b/src/Lucene.Net/Codecs/Lucene40/Lucene40SegmentInfoReader.cs @@ -29,11 +29,11 @@ namespace Lucene.Net.Codecs.Lucene40 using SegmentInfo = Lucene.Net.Index.SegmentInfo; /// - /// Lucene 4.0 implementation of . + /// Lucene 4.0 implementation of . + /// + /// @lucene.experimental /// - /// - /// @deprecated Only for reading old 4.0-4.5 segments + /// [Obsolete("Only for reading old 4.0-4.5 segments")] public class Lucene40SegmentInfoReader : SegmentInfoReader { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/27cdd048/src/Lucene.Net/Codecs/Lucene40/Lucene40SegmentInfoWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene40/Lucene40SegmentInfoWriter.cs b/src/Lucene.Net/Codecs/Lucene40/Lucene40SegmentInfoWriter.cs index a2d2925..ef8807f 100644 --- a/src/Lucene.Net/Codecs/Lucene40/Lucene40SegmentInfoWriter.cs +++ b/src/Lucene.Net/Codecs/Lucene40/Lucene40SegmentInfoWriter.cs @@ -29,10 +29,11 @@ namespace Lucene.Net.Codecs.Lucene40 using SegmentInfo = Lucene.Net.Index.SegmentInfo; /// - /// Lucene 4.0 implementation of . + /// Lucene 4.0 implementation of . + /// + /// @lucene.experimental /// - /// + /// [Obsolete] public class Lucene40SegmentInfoWriter : SegmentInfoWriter { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/27cdd048/src/Lucene.Net/Codecs/Lucene40/Lucene40SkipListReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net/Codecs/Lucene40/Lucene40SkipListReader.cs b/src/Lucene.Net/Codecs/Lucene40/Lucene40SkipListReader.cs index cacafe5..1dcec33 100644 --- a/src/Lucene.Net/Codecs/Lucene40/Lucene40SkipListReader.cs +++ b/src/Lucene.Net/Codecs/Lucene40/Lucene40SkipListReader.cs @@ -26,8 +26,7 @@ namespace Lucene.Net.Codecs.Lucene40 /// Implements the skip list reader for the 4.0 posting list format /// that stores positions and payloads. /// - /// - /// @deprecated Only for reading old 4.0 segments + /// [Obsolete("Only for reading old 4.0 segments")] public class Lucene40SkipListReader : MultiLevelSkipListReader { @@ -72,7 +71,7 @@ namespace Lucene.Net.Codecs.Lucene40 /// /// Returns the freq pointer of the doc to which the last call of - /// has skipped. + /// has skipped. /// public virtual long FreqPointer { @@ -84,7 +83,7 @@ namespace Lucene.Net.Codecs.Lucene40 /// /// Returns the prox pointer of the doc to which the last call of - /// has skipped. + /// has skipped. /// public virtual long ProxPointer { @@ -96,7 +95,7 @@ namespace Lucene.Net.Codecs.Lucene40 /// /// Returns the payload length of the payload stored just before - /// the doc to which the last call of + /// the doc to which the last call of /// has skipped. /// public virtual int PayloadLength @@ -109,7 +108,7 @@ namespace Lucene.Net.Codecs.Lucene40 /// /// Returns the offset length (endOffset-startOffset) of the position stored just before - /// the doc to which the last call of + /// the doc to which the last call of /// has skipped. /// public virtual int OffsetLength