Return-Path: X-Original-To: apmail-lucenenet-commits-archive@www.apache.org Delivered-To: apmail-lucenenet-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id B7DBA17F84 for ; Sun, 28 Sep 2014 08:50:05 +0000 (UTC) Received: (qmail 29295 invoked by uid 500); 28 Sep 2014 08:50:05 -0000 Delivered-To: apmail-lucenenet-commits-archive@lucenenet.apache.org Received: (qmail 29247 invoked by uid 500); 28 Sep 2014 08:50:05 -0000 Mailing-List: contact commits-help@lucenenet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: lucene-net-dev@lucenenet.apache.org Delivered-To: mailing list commits@lucenenet.apache.org Received: (qmail 29236 invoked by uid 99); 28 Sep 2014 08:50:05 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 28 Sep 2014 08:50:05 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id 2A7CA9B933C; Sun, 28 Sep 2014 08:50:05 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: pnasser@apache.org To: commits@lucenenet.apache.org Date: Sun, 28 Sep 2014 08:50:05 -0000 Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: [01/10] Lucene.Net.Codes/Sep fully ported, work done on SimpleText and Memory as well Repository: lucenenet Updated Branches: refs/heads/master 3b226e8bf -> d852d5b04 http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d852d5b0/src/Lucene.Net.Codecs/Sep/SepSkipListWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/Sep/SepSkipListWriter.cs b/src/Lucene.Net.Codecs/Sep/SepSkipListWriter.cs index b4ba4cf..605f7bd 100644 --- a/src/Lucene.Net.Codecs/Sep/SepSkipListWriter.cs +++ b/src/Lucene.Net.Codecs/Sep/SepSkipListWriter.cs @@ -1,6 +1,4 @@ -package codecs.sep; - -/* +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,184 +15,209 @@ package codecs.sep; * limitations under the License. */ -import java.io.IOException; -import java.util.Arrays; - -import store.IndexOutput; -import codecs.MultiLevelSkipListWriter; -import index.FieldInfo.IndexOptions; - -// TODO: -- skip data should somehow be more local to the -// particular stream (doc, freq, pos, payload) - -/** - * Implements the skip list writer for the default posting list format - * that stores positions and payloads. - * - * @lucene.experimental - */ -class SepSkipListWriter extends MultiLevelSkipListWriter { - private int[] lastSkipDoc; - private int[] lastSkipPayloadLength; - private long[] lastSkipPayloadPointer; - - private IntIndexOutput.Index[] docIndex; - private IntIndexOutput.Index[] freqIndex; - private IntIndexOutput.Index[] posIndex; - - private IntIndexOutput freqOutput; - // TODO: -- private again - IntIndexOutput posOutput; - // TODO: -- private again - IndexOutput payloadOutput; - - private int curDoc; - private bool curStorePayloads; - private int curPayloadLength; - private long curPayloadPointer; - - SepSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, - IntIndexOutput freqOutput, - IntIndexOutput docOutput, - IntIndexOutput posOutput, - IndexOutput payloadOutput) - { - super(skipInterval, numberOfSkipLevels, docCount); - - this.freqOutput = freqOutput; - this.posOutput = posOutput; - this.payloadOutput = payloadOutput; - - lastSkipDoc = new int[numberOfSkipLevels]; - lastSkipPayloadLength = new int[numberOfSkipLevels]; - // TODO: -- also cutover normal IndexOutput to use getIndex()? - lastSkipPayloadPointer = new long[numberOfSkipLevels]; - - freqIndex = new IntIndexOutput.Index[numberOfSkipLevels]; - docIndex = new IntIndexOutput.Index[numberOfSkipLevels]; - posIndex = new IntIndexOutput.Index[numberOfSkipLevels]; - - for(int i=0;i DocSkip, FreqSkip, ProxSkip - // DocSkip,FreqSkip,ProxSkip --> VInt - // DocSkip records the document number before every SkipInterval th document in TermFreqs. - // Document numbers are represented as differences from the previous value in the sequence. - // Case 2: current field stores payloads - // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip - // DocSkip,FreqSkip,ProxSkip --> VInt - // PayloadLength --> VInt - // In this case DocSkip/2 is the difference between - // the current and the previous value. If DocSkip - // is odd, then a PayloadLength encoded as VInt follows, - // if DocSkip is even, then it is assumed that the - // current payload length equals the length at the previous - // skip point - - Debug.Assert( indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !curStorePayloads; - - if (curStorePayloads) { - int delta = curDoc - lastSkipDoc[level]; - if (curPayloadLength == lastSkipPayloadLength[level]) { - // the current payload length equals the length at the previous skip point, - // so we don't store the length again - skipBuffer.writeVInt(delta << 1); - } else { - // the payload length is different from the previous one. We shift the DocSkip, - // set the lowest bit and store the current payload length as VInt. - skipBuffer.writeVInt(delta << 1 | 1); - skipBuffer.writeVInt(curPayloadLength); - lastSkipPayloadLength[level] = curPayloadLength; - } - } else { - // current field does not store payloads - skipBuffer.writeVInt(curDoc - lastSkipDoc[level]); - } - - if (indexOptions != IndexOptions.DOCS_ONLY) { - freqIndex[level].mark(); - freqIndex[level].write(skipBuffer, false); - } - docIndex[level].mark(); - docIndex[level].write(skipBuffer, false); - if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { - posIndex[level].mark(); - posIndex[level].write(skipBuffer, false); - if (curStorePayloads) { - skipBuffer.writeVInt((int) (curPayloadPointer - lastSkipPayloadPointer[level])); - } - } - - lastSkipDoc[level] = curDoc; - lastSkipPayloadPointer[level] = curPayloadPointer; - } -} +namespace Lucene.Net.Codecs.Sep +{ + using System.Diagnostics; + using Index; + using Store; + using Support; + + /// + /// Implements the skip list writer for the default posting list format + /// that stores positions and payloads. + /// + /// @lucene.experimental + /// + /// + /// TODO: -- skip data should somehow be more local to the particular stream + /// (doc, freq, pos, payload) + /// + internal class SepSkipListWriter : MultiLevelSkipListWriter + { + private readonly int[] _lastSkipDoc; + private readonly int[] _lastSkipPayloadLength; + private readonly long[] _lastSkipPayloadPointer; + private FieldInfo.IndexOptions _indexOptions; + + private readonly IntIndexOutputIndex[] _docIndex; + private readonly IntIndexOutputIndex[] _freqIndex; + private readonly IntIndexOutputIndex[] _posIndex; + + private readonly IntIndexOutput _freqOutput; + private IntIndexOutput _posOutput; + private IndexOutput _payloadOutput; + + private int _curDoc; + private bool _curStorePayloads; + private int _curPayloadLength; + private long _curPayloadPointer; + + internal SepSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IntIndexOutput freqOutput, + IntIndexOutput docOutput, IntIndexOutput posOutput, IndexOutput payloadOutput) + : base(skipInterval, numberOfSkipLevels, docCount) + { + + _freqOutput = freqOutput; + _posOutput = posOutput; + _payloadOutput = payloadOutput; + + _lastSkipDoc = new int[numberOfSkipLevels]; + _lastSkipPayloadLength = new int[numberOfSkipLevels]; + // TODO: -- also cutover normal IndexOutput to use getIndex()? + _lastSkipPayloadPointer = new long[numberOfSkipLevels]; + + _freqIndex = new IntIndexOutputIndex[numberOfSkipLevels]; + _docIndex = new IntIndexOutputIndex[numberOfSkipLevels]; + _posIndex = new IntIndexOutputIndex[numberOfSkipLevels]; + + for (var i = 0; i < numberOfSkipLevels; i++) + { + if (freqOutput != null) + { + _freqIndex[i] = freqOutput.Index(); + } + _docIndex[i] = docOutput.Index(); + if (posOutput != null) + { + _posIndex[i] = posOutput.Index(); + } + } + } + + internal virtual FieldInfo.IndexOptions IndexOptions + { + set { _indexOptions = value; } + } + + internal virtual IntIndexOutput PosOutput + { + set + { + _posOutput = value; + for (var i = 0; i < NumberOfSkipLevels; i++) + { + _posIndex[i] = value.Index(); + } + } + } + + internal virtual IndexOutput PayloadOutput + { + set { _payloadOutput = value; } + } + + /// + /// Sets the values for the current skip data. + /// Called @ every index interval (every 128th (by default) doc) + /// + internal virtual void SetSkipData(int doc, bool storePayloads, int payloadLength) + { + _curDoc = doc; + _curStorePayloads = storePayloads; + _curPayloadLength = payloadLength; + if (_payloadOutput != null) + { + _curPayloadPointer = _payloadOutput.FilePointer; + } + } + + /// + /// Called @ start of new term + /// + protected internal virtual void ResetSkip(IntIndexOutputIndex topDocIndex, IntIndexOutputIndex topFreqIndex, + IntIndexOutputIndex topPosIndex) + { + base.ResetSkip(); + + Arrays.Fill(_lastSkipDoc, 0); + Arrays.Fill(_lastSkipPayloadLength, -1); // we don't have to write the first length in the skip list + for (int i = 0; i < NumberOfSkipLevels; i++) + { + _docIndex[i].CopyFrom(topDocIndex, true); + if (_freqOutput != null) + { + _freqIndex[i].CopyFrom(topFreqIndex, true); + } + if (_posOutput != null) + { + _posIndex[i].CopyFrom(topPosIndex, true); + } + } + if (_payloadOutput != null) + { + Arrays.Fill(_lastSkipPayloadPointer, _payloadOutput.FilePointer); + } + } + + protected override void WriteSkipData(int level, IndexOutput skipBuffer) + { + // To efficiently store payloads in the posting lists we do not store the length of + // every payload. Instead we omit the length for a payload if the previous payload had + // the same length. + // However, in order to support skipping the payload length at every skip point must be known. + // So we use the same length encoding that we use for the posting lists for the skip data as well: + // Case 1: current field does not store payloads + // SkipDatum --> DocSkip, FreqSkip, ProxSkip + // DocSkip,FreqSkip,ProxSkip --> VInt + // DocSkip records the document number before every SkipInterval th document in TermFreqs. + // Document numbers are represented as differences from the previous value in the sequence. + // Case 2: current field stores payloads + // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip + // DocSkip,FreqSkip,ProxSkip --> VInt + // PayloadLength --> VInt + // In this case DocSkip/2 is the difference between + // the current and the previous value. If DocSkip + // is odd, then a PayloadLength encoded as VInt follows, + // if DocSkip is even, then it is assumed that the + // current payload length equals the length at the previous + // skip point + + Debug.Assert(_indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !_curStorePayloads); + + if (_curStorePayloads) + { + int delta = _curDoc - _lastSkipDoc[level]; + if (_curPayloadLength == _lastSkipPayloadLength[level]) + { + // the current payload length equals the length at the previous skip point, + // so we don't store the length again + skipBuffer.WriteVInt(delta << 1); + } + else + { + // the payload length is different from the previous one. We shift the DocSkip, + // set the lowest bit and store the current payload length as VInt. + skipBuffer.WriteVInt(delta << 1 | 1); + skipBuffer.WriteVInt(_curPayloadLength); + _lastSkipPayloadLength[level] = _curPayloadLength; + } + } + else + { + // current field does not store payloads + skipBuffer.WriteVInt(_curDoc - _lastSkipDoc[level]); + } + + if (_indexOptions != FieldInfo.IndexOptions.DOCS_ONLY) + { + _freqIndex[level].Mark(); + _freqIndex[level].Write(skipBuffer, false); + } + _docIndex[level].Mark(); + _docIndex[level].Write(skipBuffer, false); + if (_indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) + { + _posIndex[level].Mark(); + _posIndex[level].Write(skipBuffer, false); + if (_curStorePayloads) + { + skipBuffer.WriteVInt((int) (_curPayloadPointer - _lastSkipPayloadPointer[level])); + } + } + + _lastSkipDoc[level] = _curDoc; + _lastSkipPayloadPointer[level] = _curPayloadPointer; + } + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d852d5b0/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesReader.cs b/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesReader.cs index 64e0b89..4ac5623 100644 --- a/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesReader.cs +++ b/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesReader.cs @@ -258,12 +258,10 @@ namespace Lucene.Net.Codecs.SimpleText private Bits GetBinaryDocsWithField(FieldInfo fieldInfo) { var field = FIELDS[fieldInfo.Name]; - var @in = (IndexInput)DATA.Clone(); - BytesRef scratch = new BytesRef(); + var input = (IndexInput)DATA.Clone(); + var scratch = new BytesRef(); - DecimalFormat decoder = new DecimalFormat(field.Pattern, new DecimalFormatSymbols(Locale.ROOT)); - - return new BitsAnonymousInnerClassHelper2(this, field, @in, scratch, decoder); + return new BitsAnonymousInnerClassHelper2(this, field, input, scratch); } private class BitsAnonymousInnerClassHelper2 : Bits @@ -273,39 +271,32 @@ namespace Lucene.Net.Codecs.SimpleText private readonly OneField _field; private readonly IndexInput _input; private readonly BytesRef _scratch; - private readonly DecimalFormat _decoder; public BitsAnonymousInnerClassHelper2(SimpleTextDocValuesReader outerInstance, OneField field, - IndexInput input, BytesRef scratch, DecimalFormat decoder) + IndexInput input, BytesRef scratch) { _outerInstance = outerInstance; _field = field; _input = input; _scratch = scratch; - _decoder = decoder; } public bool Get(int index) { - _input.Seek(_field.DataStartFilePointer + (9 + _field.Pattern.Length + _field.MaxLength + 2)*index); SimpleTextUtil.ReadLine(_input, _scratch); Debug.Assert(StringHelper.StartsWith(_scratch, SimpleTextDocValuesWriter.LENGTH)); int len; try { - len = - (int) - _decoder.parse(new string(_scratch.bytes, _scratch.offset + SimpleTextDocValuesWriter.LENGTH.length, - _scratch.length - LENGTH.length, StandardCharsets.UTF_8)); + len = int.Parse(_scratch.Bytes.SubList( _scratch.Offset + SimpleTextDocValuesWriter.LENGTH.Length, + _scratch.Length - SimpleTextDocValuesWriter.LENGTH.Length).ToString()); } - catch (ParseException pe) + catch (FormatException ex) { - CorruptIndexException e = - new CorruptIndexException("failed to parse int length (resource=" + _input + ")"); - e.initCause(pe); - throw e; + throw new CorruptIndexException("failed to parse int value (resource=" + _input + ")", ex); } + // skip past bytes var bytes = new sbyte[len]; _input.ReadBytes(bytes, 0, len); http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d852d5b0/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesWriter.cs b/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesWriter.cs index 81abb86..c3f8fa4 100644 --- a/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesWriter.cs +++ b/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesWriter.cs @@ -83,51 +83,37 @@ namespace Lucene.Net.Codecs.SimpleText SimpleTextUtil.Write(data, Convert.ToString(minValue), scratch); SimpleTextUtil.WriteNewline(data); - // build up our fixed-width "simple text packed ints" - // format - System.Numerics.BigInteger maxBig = System.Numerics.BigInteger.valueOf(maxValue); - System.Numerics.BigInteger minBig = System.Numerics.BigInteger.valueOf(minValue); - System.Numerics.BigInteger diffBig = maxBig - minBig; - int maxBytesPerValue = diffBig.ToString().Length; - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < maxBytesPerValue; i++) - { - sb.Append('0'); - } + // build up our fixed-width "simple text packed ints" format + System.Numerics.BigInteger maxBig = maxValue; + System.Numerics.BigInteger minBig = minValue; + var diffBig = maxBig - minBig; + var maxBytesPerValue = diffBig.ToString().Length; + var sb = new StringBuilder(); + for (var i = 0; i < maxBytesPerValue; i++) + sb.Append('0'); + // write our pattern to the .dat SimpleTextUtil.Write(data, PATTERN); SimpleTextUtil.Write(data, sb.ToString(), scratch); SimpleTextUtil.WriteNewline(data); -//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': -//ORIGINAL LINE: final String patternString = sb.toString(); - string patternString = sb.ToString(); - -//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': -//ORIGINAL LINE: final java.text.DecimalFormat encoder = new java.text.DecimalFormat(patternString, new java.text.DecimalFormatSymbols(java.util.Locale.ROOT)); + var patternString = sb.ToString(); DecimalFormat encoder = new DecimalFormat(patternString, new DecimalFormatSymbols(Locale.ROOT)); int numDocsWritten = 0; // second pass to write the values - foreach (Number n in values) + foreach (var value in values) { - long value = n == null ? 0 : (long) n; Debug.Assert(value >= minValue); - Number delta = System.Numerics.BigInteger.valueOf(value) - System.Numerics.BigInteger.valueOf(minValue); + + var delta = value - minValue; string s = encoder.format(delta); Debug.Assert(s.Length == patternString.Length); SimpleTextUtil.Write(data, s, scratch); SimpleTextUtil.WriteNewline(data); - if (n == null) - { - SimpleTextUtil.Write(data, "F", scratch); - } - else - { - SimpleTextUtil.Write(data, "T", scratch); - } + SimpleTextUtil.Write(data, n == null ? "F" : "T", scratch); SimpleTextUtil.WriteNewline(data); numDocsWritten++; Debug.Assert(numDocsWritten <= numDocs); @@ -141,10 +127,10 @@ namespace Lucene.Net.Codecs.SimpleText Debug.Assert(FieldSeen(field.Name)); Debug.Assert(field.DocValuesType == FieldInfo.DocValuesType_e.BINARY); - int maxLength = 0; - foreach (BytesRef value in values) + var maxLength = 0; + foreach (var value in values) { - int length = value == null ? 0 : value.Length; + var length = value == null ? 0 : value.Length; maxLength = Math.Max(maxLength, length); } WriteFieldEntry(field, FieldInfo.DocValuesType_e.BINARY); @@ -179,7 +165,7 @@ namespace Lucene.Net.Codecs.SimpleText // because it escapes: if (value != null) { - data.WriteBytes(value.bytes, value.offset, value.length); + data.WriteBytes(value.Bytes, value.Offset, value.Length); } // pad to fit @@ -188,14 +174,7 @@ namespace Lucene.Net.Codecs.SimpleText data.WriteByte((sbyte) ' '); } SimpleTextUtil.WriteNewline(data); - if (value == null) - { - SimpleTextUtil.Write(data, "F", scratch); - } - else - { - SimpleTextUtil.Write(data, "T", scratch); - } + SimpleTextUtil.Write(data, value == null ? "F" : "T", scratch); SimpleTextUtil.WriteNewline(data); numDocsWritten++; } @@ -390,19 +369,18 @@ namespace Lucene.Net.Codecs.SimpleText foreach (var n in docToOrdCount) { sb2.Length = 0; - int count = (int) n; - for (int i = 0; i < count; i++) + var count = (int) n; + for (var i = 0; i < count; i++) { - long ord = (long) ordStream.Next(); + var ord = (long) ordStream.Next(); if (sb2.Length > 0) - { sb2.Append(","); - } + sb2.Append(Convert.ToString(ord)); } // now pad to fit: these are numbers so spaces work well. reader calls trim() - int numPadding = maxOrdListLength - sb2.Length; - for (int i = 0; i < numPadding; i++) + var numPadding = maxOrdListLength - sb2.Length; + for (var i = 0; i < numPadding; i++) { sb2.Append(' '); } @@ -418,7 +396,7 @@ namespace Lucene.Net.Codecs.SimpleText try { Debug.Assert(_fieldsSeen.Count > 0); - // TODO: sheisty to do this here? + // java : sheisty to do this here? SimpleTextUtil.Write(data, END); SimpleTextUtil.WriteNewline(data); SimpleTextUtil.WriteChecksum(data, scratch); http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d852d5b0/src/Lucene.Net.Codecs/SimpleText/SimpleTextSegmentInfoReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/SimpleText/SimpleTextSegmentInfoReader.cs b/src/Lucene.Net.Codecs/SimpleText/SimpleTextSegmentInfoReader.cs index e168e04..01236cd 100644 --- a/src/Lucene.Net.Codecs/SimpleText/SimpleTextSegmentInfoReader.cs +++ b/src/Lucene.Net.Codecs/SimpleText/SimpleTextSegmentInfoReader.cs @@ -44,7 +44,7 @@ namespace Lucene.Net.Codecs.SimpleText public override SegmentInfo Read(Directory directory, string segmentName, IOContext context) { - BytesRef scratch = new BytesRef(); + var scratch = new BytesRef(); string segFileName = IndexFileNames.SegmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION); ChecksumIndexInput input = directory.OpenChecksumInput(segFileName, context); @@ -83,7 +83,7 @@ namespace Lucene.Net.Codecs.SimpleText SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_NUM_FILES)); int numFiles = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_NUM_FILES.Length, scratch)); - HashSet files = new HashSet(); + var files = new HashSet(); for (int i = 0; i < numFiles; i++) { http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d852d5b0/src/Lucene.Net.Codecs/SimpleText/SimpleTextTermVectorsWriter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/SimpleText/SimpleTextTermVectorsWriter.cs b/src/Lucene.Net.Codecs/SimpleText/SimpleTextTermVectorsWriter.cs index 4dd1265..b824c15 100644 --- a/src/Lucene.Net.Codecs/SimpleText/SimpleTextTermVectorsWriter.cs +++ b/src/Lucene.Net.Codecs/SimpleText/SimpleTextTermVectorsWriter.cs @@ -58,19 +58,19 @@ namespace Lucene.Net.Codecs.SimpleText internal const string VECTORS_EXTENSION = "vec"; - private readonly Directory directory; - private readonly string segment; + private readonly Directory _directory; + private readonly string _segment; private IndexOutput _output; - private int numDocsWritten = 0; - private readonly BytesRef scratch = new BytesRef(); - private bool offsets; - private bool positions; - private bool payloads; + private int _numDocsWritten; + private readonly BytesRef _scratch = new BytesRef(); + private bool _offsets; + private bool _positions; + private bool _payloads; public SimpleTextTermVectorsWriter(Directory directory, string segment, IOContext context) { - this.directory = directory; - this.segment = segment; + _directory = directory; + _segment = segment; bool success = false; try { @@ -89,13 +89,13 @@ namespace Lucene.Net.Codecs.SimpleText public override void StartDocument(int numVectorFields) { Write(DOC); - Write(Convert.ToString(numDocsWritten)); + Write(Convert.ToString(_numDocsWritten)); NewLine(); Write(NUMFIELDS); Write(Convert.ToString(numVectorFields)); NewLine(); - numDocsWritten++; + _numDocsWritten++; } public override void StartField(FieldInfo info, int numTerms, bool positions, bool offsets, bool payloads) @@ -124,9 +124,9 @@ namespace Lucene.Net.Codecs.SimpleText Write(Convert.ToString(numTerms)); NewLine(); - this.positions = positions; - this.offsets = offsets; - this.payloads = payloads; + _positions = positions; + _offsets = offsets; + _payloads = payloads; } public override void StartTerm(BytesRef term, int freq) @@ -142,15 +142,15 @@ namespace Lucene.Net.Codecs.SimpleText public override void AddPosition(int position, int startOffset, int endOffset, BytesRef payload) { - Debug.Assert(positions || offsets); + Debug.Assert(_positions || _offsets); - if (positions) + if (_positions) { Write(POSITION); Write(Convert.ToString(position)); NewLine(); - if (payloads) + if (_payloads) { Write(PAYLOAD); if (payload != null) @@ -162,7 +162,7 @@ namespace Lucene.Net.Codecs.SimpleText } } - if (offsets) + if (_offsets) { Write(STARTOFFSET); Write(Convert.ToString(startOffset)); @@ -174,7 +174,7 @@ namespace Lucene.Net.Codecs.SimpleText } } - public override void Abort() + public override sealed void Abort() { try { @@ -183,22 +183,22 @@ namespace Lucene.Net.Codecs.SimpleText finally { - IOUtils.DeleteFilesIgnoringExceptions(directory, - IndexFileNames.SegmentFileName(segment, "", VECTORS_EXTENSION)); + IOUtils.DeleteFilesIgnoringExceptions(_directory, + IndexFileNames.SegmentFileName(_segment, "", VECTORS_EXTENSION)); } } public override void Finish(FieldInfos fis, int numDocs) { - if (numDocsWritten != numDocs) + if (_numDocsWritten != numDocs) { throw new Exception("mergeVectors produced an invalid result: mergedDocs is " + numDocs + - " but vec numDocs is " + numDocsWritten + " file=" + _output + + " but vec numDocs is " + _numDocsWritten + " file=" + _output + "; now aborting this merge to prevent index corruption"); } Write(END); NewLine(); - SimpleTextUtil.WriteChecksum(_output, scratch); + SimpleTextUtil.WriteChecksum(_output, _scratch); } protected override void Dispose(bool disposing) @@ -222,7 +222,7 @@ namespace Lucene.Net.Codecs.SimpleText private void Write(string s) { - SimpleTextUtil.Write(_output, s, scratch); + SimpleTextUtil.Write(_output, s, _scratch); } private void Write(BytesRef bytes) http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d852d5b0/src/Lucene.Net.Codecs/StringHelperClass.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Codecs/StringHelperClass.cs b/src/Lucene.Net.Codecs/StringHelperClass.cs index a9ba97a..0e333f3 100644 --- a/src/Lucene.Net.Codecs/StringHelperClass.cs +++ b/src/Lucene.Net.Codecs/StringHelperClass.cs @@ -22,7 +22,9 @@ // This class is used to convert some aspects of the Java String class. //------------------------------------------------------------------------------------------- -namespace Lucene.Net.Codes +using System.Runtime.InteropServices; + +namespace Lucene.Net.Codecs { internal static class StringHelperClass @@ -109,7 +111,7 @@ namespace Lucene.Net.Codes private static sbyte[] GetSBytesForEncoding(System.Text.Encoding encoding, string s) { - sbyte[] sbytes = new sbyte[encoding.GetByteCount(s)]; + var sbytes = new sbyte[encoding.GetByteCount(s)]; encoding.GetBytes(s, 0, s.Length, (byte[]) (object) sbytes, 0); return sbytes; } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d852d5b0/src/Lucene.Net.Core/Codecs/StoredFieldsReader.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Codecs/StoredFieldsReader.cs b/src/Lucene.Net.Core/Codecs/StoredFieldsReader.cs index caa8403..997e530 100644 --- a/src/Lucene.Net.Core/Codecs/StoredFieldsReader.cs +++ b/src/Lucene.Net.Core/Codecs/StoredFieldsReader.cs @@ -1,31 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + using System; namespace Lucene.Net.Codecs { - /// - /// Copyright 2004 The Apache Software Foundation - /// - /// Licensed under the Apache License, Version 2.0 (the "License"); you may not - /// use this file except in compliance with the License. You may obtain a copy of - /// the License at - /// - /// http://www.apache.org/licenses/LICENSE-2.0 - /// - /// Unless required by applicable law or agreed to in writing, software - /// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - /// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - /// License for the specific language governing permissions and limitations under - /// the License. - /// - - using StoredFieldVisitor = Lucene.Net.Index.StoredFieldVisitor; + using StoredFieldVisitor = Index.StoredFieldVisitor; /// /// Codec API for reading stored fields. - ///

- /// You need to implement to - /// read the stored fields for a document, implement (creating - /// clones of any IndexInputs used, etc), and + /// + /// You need to implement visitDocument(int, StoredFieldVisitor) to + /// read the stored fields for a document, implement clone( (creating + /// clones of any IndexInputs used, etc), and close() + /// /// @lucene.experimental ///

public abstract class StoredFieldsReader : ICloneable, IDisposable @@ -58,7 +60,7 @@ namespace Lucene.Net.Codecs /// /// Checks consistency of this reader. - ///

+ /// /// Note that this may be costly in terms of I/O, e.g. /// may involve computing a checksum value against large data files. /// @lucene.internal