Return-Path: X-Original-To: apmail-lucenenet-commits-archive@www.apache.org Delivered-To: apmail-lucenenet-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 66EB4113BE for ; Mon, 15 Sep 2014 22:47:00 +0000 (UTC) Received: (qmail 55774 invoked by uid 500); 15 Sep 2014 22:47:00 -0000 Delivered-To: apmail-lucenenet-commits-archive@lucenenet.apache.org Received: (qmail 55661 invoked by uid 500); 15 Sep 2014 22:47:00 -0000 Mailing-List: contact commits-help@lucenenet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: lucene-net-dev@lucenenet.apache.org Delivered-To: mailing list commits@lucenenet.apache.org Received: (qmail 55279 invoked by uid 99); 15 Sep 2014 22:47:00 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 15 Sep 2014 22:47:00 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id C73E4A148A3; Mon, 15 Sep 2014 22:46:59 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: synhershko@apache.org To: commits@lucenenet.apache.org Date: Mon, 15 Sep 2014 22:47:08 -0000 Message-Id: <8b1c006868714e44874d4001c2c90c69@git.apache.org> In-Reply-To: <75d0fa137f7d46c4bcc2f8aec600c161@git.apache.org> References: <75d0fa137f7d46c4bcc2f8aec600c161@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [10/11] git commit: Skeleton porting of Lucene.Net.Misc Skeleton porting of Lucene.Net.Misc Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/674f0cb9 Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/674f0cb9 Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/674f0cb9 Branch: refs/heads/master Commit: 674f0cb97dfae0961d5f9622f49f17d891be08bc Parents: 882f487 Author: Itamar Syn-Hershko Authored: Tue Sep 16 01:39:08 2014 +0300 Committer: Itamar Syn-Hershko Committed: Tue Sep 16 01:39:08 2014 +0300 ---------------------------------------------------------------------- src/Lucene.Net.Misc/ByteBuffer.cs | 325 ++++++ src/Lucene.Net.Misc/Document/LazyDocument.cs | 226 ++++ .../Index/CompoundFileExtractor.cs | 165 +++ src/Lucene.Net.Misc/Index/IndexSplitter.cs | 200 ++++ .../Index/MultiPassIndexSplitter.cs | 329 ++++++ src/Lucene.Net.Misc/Index/PKIndexSplitter.cs | 220 ++++ .../Index/Sorter/BlockJoinComparatorSource.cs | 321 ++++++ .../Sorter/EarlyTerminatingSortingCollector.cs | 147 +++ src/Lucene.Net.Misc/Index/Sorter/Sorter.cs | 404 +++++++ .../Index/Sorter/SortingAtomicReader.cs | 1081 ++++++++++++++++++ .../Index/Sorter/SortingMergePolicy.cs | 309 +++++ src/Lucene.Net.Misc/Lucene.Net.Misc.csproj | 73 ++ src/Lucene.Net.Misc/Misc/GetTermInfo.cs | 74 ++ src/Lucene.Net.Misc/Misc/HighFreqTerms.cs | 230 ++++ src/Lucene.Net.Misc/Misc/IndexMergeTool.cs | 66 ++ src/Lucene.Net.Misc/Misc/SweetSpotSimilarity.cs | 238 ++++ src/Lucene.Net.Misc/Misc/TermStats.cs | 55 + src/Lucene.Net.Misc/Properties/AssemblyInfo.cs | 35 + src/Lucene.Net.Misc/Store/NativePosixUtil.cs | 64 ++ .../Store/NativeUnixDirectory.cs | 527 +++++++++ src/Lucene.Net.Misc/Store/WindowsDirectory.cs | 181 +++ src/Lucene.Net.Misc/Util/Fst/ListOfOutputs.cs | 246 ++++ .../Util/Fst/UpToTwoPositiveIntOutputs.cs | 328 ++++++ 23 files changed, 5844 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/ByteBuffer.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Misc/ByteBuffer.cs b/src/Lucene.Net.Misc/ByteBuffer.cs new file mode 100644 index 0000000..204f7de --- /dev/null +++ b/src/Lucene.Net.Misc/ByteBuffer.cs @@ -0,0 +1,325 @@ +//------------------------------------------------------------------------------------------- +// Copyright © 2007 - 2014 Tangible Software Solutions Inc. +// This class can be used by anyone provided that the copyright notice remains intact. +// +// This class is used to simulate the java.nio.ByteBuffer class in C#. +// +// Instances are only obtainable via the static 'allocate' method. +// +// Some methods are not available: +// Methods which create shared views of the buffer, such as: array, +// asCharBuffer, asDoubleBuffer, asFloatBuffer, asIntBuffer, asLongBuffer, +// asReadOnlyBuffer, asShortBuffer, duplicate, slice, & wrap. +// +// Methods mark, reset, isReadOnly, order, compareTo, arrayOffset, & limit (setter). +//------------------------------------------------------------------------------------------- +public class ByteBuffer +{ + //'Mode' is only used to determine whether to return data length or capacity from the 'limit' method: + private enum Mode + { + Read, + Write + } + private Mode mode; + + private System.IO.MemoryStream stream; + private System.IO.BinaryReader reader; + private System.IO.BinaryWriter writer; + + private ByteBuffer() + { + stream = new System.IO.MemoryStream(); + reader = new System.IO.BinaryReader(stream); + writer = new System.IO.BinaryWriter(stream); + } + + ~ByteBuffer() + { + reader.Close(); + writer.Close(); + stream.Close(); + stream.Dispose(); + } + + public static ByteBuffer allocate(int capacity) + { + ByteBuffer buffer = new ByteBuffer(); + buffer.stream.Capacity = capacity; + buffer.mode = Mode.Write; + return buffer; + } + + public static ByteBuffer allocateDirect(int capacity) + { + //this wrapper class makes no distinction between 'allocate' & 'allocateDirect' + return allocate(capacity); + } + + public int capacity() + { + return stream.Capacity; + } + + public ByteBuffer flip() + { + mode = Mode.Read; + stream.SetLength(stream.Position); + stream.Position = 0; + return this; + } + + public ByteBuffer clear() + { + mode = Mode.Write; + stream.Position = 0; + return this; + } + + public ByteBuffer compact() + { + mode = Mode.Write; + System.IO.MemoryStream newStream = new System.IO.MemoryStream(stream.Capacity); + stream.CopyTo(newStream); + stream = newStream; + return this; + } + + public ByteBuffer rewind() + { + stream.Position = 0; + return this; + } + + public long limit() + { + if (mode == Mode.Write) + return stream.Capacity; + else + return stream.Length; + } + + public long position() + { + return stream.Position; + } + + public ByteBuffer position(long newPosition) + { + stream.Position = newPosition; + return this; + } + + public long remaining() + { + return this.limit() - this.position(); + } + + public bool hasRemaining() + { + return this.remaining() > 0; + } + + public int get() + { + return stream.ReadByte(); + } + + public ByteBuffer get(byte[] dst, int offset, int length) + { + stream.Read(dst, offset, length); + return this; + } + + public ByteBuffer put(byte b) + { + stream.WriteByte(b); + return this; + } + + public ByteBuffer put(byte[] src, int offset, int length) + { + stream.Write(src, offset, length); + return this; + } + + public bool Equals(ByteBuffer other) + { + if (other != null && this.remaining() == other.remaining()) + { + long thisOriginalPosition = this.position(); + long otherOriginalPosition = other.position(); + + bool differenceFound = false; + while (stream.Position < stream.Length) + { + if (this.get() != other.get()) + { + differenceFound = true; + break; + } + } + + this.position(thisOriginalPosition); + other.position(otherOriginalPosition); + + return ! differenceFound; + } + else + return false; + } + + //methods using the internal BinaryReader: + public char getChar() + { + return reader.ReadChar(); + } + public char getChar(int index) + { + long originalPosition = stream.Position; + stream.Position = index; + char value = reader.ReadChar(); + stream.Position = originalPosition; + return value; + } + public double getDouble() + { + return reader.ReadDouble(); + } + public double getDouble(int index) + { + long originalPosition = stream.Position; + stream.Position = index; + double value = reader.ReadDouble(); + stream.Position = originalPosition; + return value; + } + public float getFloat() + { + return reader.ReadSingle(); + } + public float getFloat(int index) + { + long originalPosition = stream.Position; + stream.Position = index; + float value = reader.ReadSingle(); + stream.Position = originalPosition; + return value; + } + public int getInt() + { + return reader.ReadInt32(); + } + public int getInt(int index) + { + long originalPosition = stream.Position; + stream.Position = index; + int value = reader.ReadInt32(); + stream.Position = originalPosition; + return value; + } + public long getLong() + { + return reader.ReadInt64(); + } + public long getLong(int index) + { + long originalPosition = stream.Position; + stream.Position = index; + long value = reader.ReadInt64(); + stream.Position = originalPosition; + return value; + } + public short getShort() + { + return reader.ReadInt16(); + } + public short getShort(int index) + { + long originalPosition = stream.Position; + stream.Position = index; + short value = reader.ReadInt16(); + stream.Position = originalPosition; + return value; + } + + //methods using the internal BinaryWriter: + public ByteBuffer putChar(char value) + { + writer.Write(value); + return this; + } + public ByteBuffer putChar(int index, char value) + { + long originalPosition = stream.Position; + stream.Position = index; + writer.Write(value); + stream.Position = originalPosition; + return this; + } + public ByteBuffer putDouble(double value) + { + writer.Write(value); + return this; + } + public ByteBuffer putDouble(int index, double value) + { + long originalPosition = stream.Position; + stream.Position = index; + writer.Write(value); + stream.Position = originalPosition; + return this; + } + public ByteBuffer putFloat(float value) + { + writer.Write(value); + return this; + } + public ByteBuffer putFloat(int index, float value) + { + long originalPosition = stream.Position; + stream.Position = index; + writer.Write(value); + stream.Position = originalPosition; + return this; + } + public ByteBuffer putInt(int value) + { + writer.Write(value); + return this; + } + public ByteBuffer putInt(int index, int value) + { + long originalPosition = stream.Position; + stream.Position = index; + writer.Write(value); + stream.Position = originalPosition; + return this; + } + public ByteBuffer putLong(long value) + { + writer.Write(value); + return this; + } + public ByteBuffer putLong(int index, long value) + { + long originalPosition = stream.Position; + stream.Position = index; + writer.Write(value); + stream.Position = originalPosition; + return this; + } + public ByteBuffer putShort(short value) + { + writer.Write(value); + return this; + } + public ByteBuffer putShort(int index, short value) + { + long originalPosition = stream.Position; + stream.Position = index; + writer.Write(value); + stream.Position = originalPosition; + return this; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Document/LazyDocument.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Misc/Document/LazyDocument.cs b/src/Lucene.Net.Misc/Document/LazyDocument.cs new file mode 100644 index 0000000..6faed94 --- /dev/null +++ b/src/Lucene.Net.Misc/Document/LazyDocument.cs @@ -0,0 +1,226 @@ +using System.Diagnostics; +using System.Collections.Generic; + +namespace org.apache.lucene.document +{ + + /// + /// Copyright 2004 The Apache Software Foundation + /// + /// Licensed under the Apache License, Version 2.0 (the "License"); + /// you may not use this file except in compliance with the License. + /// You may obtain a copy of the License at + /// + /// http://www.apache.org/licenses/LICENSE-2.0 + /// + /// Unless required by applicable law or agreed to in writing, software + /// distributed under the License is distributed on an "AS IS" BASIS, + /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + /// See the License for the specific language governing permissions and + /// limitations under the License. + /// + + using Analyzer = org.apache.lucene.analysis.Analyzer; + using TokenStream = org.apache.lucene.analysis.TokenStream; + using FieldInfo = org.apache.lucene.index.FieldInfo; + using IndexReader = org.apache.lucene.index.IndexReader; + using IndexableField = org.apache.lucene.index.IndexableField; + using IndexableFieldType = org.apache.lucene.index.IndexableFieldType; + using BytesRef = org.apache.lucene.util.BytesRef; + + /// + /// Defers actually loading a field's value until you ask + /// for it. You must not use the returned Field instances + /// after the provided reader has been closed. + /// + public class LazyDocument + { + private readonly IndexReader reader; + private readonly int docID; + + // null until first field is loaded + private Document doc; + + private IDictionary> fields = new Dictionary>(); + private HashSet fieldNames = new HashSet(); + + public LazyDocument(IndexReader reader, int docID) + { + this.reader = reader; + this.docID = docID; + } + + /// + /// Creates an IndexableField whose value will be lazy loaded if and + /// when it is used. + /// + /// NOTE: This method must be called once for each value of the field + /// name specified in sequence that the values exist. This method may not be + /// used to generate multiple, lazy, IndexableField instances refering to + /// the same underlying IndexableField instance. + /// + /// + /// The lazy loading of field values from all instances of IndexableField + /// objects returned by this method are all backed by a single Document + /// per LazyDocument instance. + /// + /// + public virtual IndexableField getField(FieldInfo fieldInfo) + { + + fieldNames.Add(fieldInfo.name); + IList values = fields[fieldInfo.number]; + if (null == values) + { + values = new List<>(); + fields[fieldInfo.number] = values; + } + + LazyField value = new LazyField(this, fieldInfo.name, fieldInfo.number); + values.Add(value); + + lock (this) + { + // edge case: if someone asks this LazyDoc for more LazyFields + // after other LazyFields from the same LazyDoc have been + // actuallized, we need to force the doc to be re-fetched + // so the new LazyFields are also populated. + doc = null; + } + return value; + } + + /// + /// non-private for test only access + /// @lucene.internal + /// + internal virtual Document Document + { + get + { + lock (this) + { + if (doc == null) + { + try + { + doc = reader.document(docID, fieldNames); + } + catch (IOException ioe) + { + throw new IllegalStateException("unable to load document", ioe); + } + } + return doc; + } + } + } + + // :TODO: synchronize to prevent redundent copying? (sync per field name?) + private void fetchRealValues(string name, int fieldNum) + { + Document d = Document; + + IList lazyValues = fields[fieldNum]; + IndexableField[] realValues = d.getFields(name); + + Debug.Assert(realValues.Length <= lazyValues.Count, "More lazy values then real values for field: " + name); + + for (int i = 0; i < lazyValues.Count; i++) + { + LazyField f = lazyValues[i]; + if (null != f) + { + f.realValue = realValues[i]; + } + } + } + + + /// + /// @lucene.internal + /// + public class LazyField : IndexableField + { + private readonly LazyDocument outerInstance; + + internal string name_Renamed; + internal int fieldNum; + internal volatile IndexableField realValue = null; + + internal LazyField(LazyDocument outerInstance, string name, int fieldNum) + { + this.outerInstance = outerInstance; + this.name_Renamed = name; + this.fieldNum = fieldNum; + } + + /// + /// non-private for test only access + /// @lucene.internal + /// + public virtual bool hasBeenLoaded() + { + return null != realValue; + } + + internal virtual IndexableField RealValue + { + get + { + if (null == realValue) + { + outerInstance.fetchRealValues(name_Renamed, fieldNum); + } + Debug.Assert(hasBeenLoaded(), "field value was not lazy loaded"); + Debug.Assert(realValue.name().Equals(name()), "realvalue name != name: " + realValue.name() + " != " + name()); + + return realValue; + } + } + + public override string name() + { + return name_Renamed; + } + + public override float boost() + { + return 1.0f; + } + + public override BytesRef binaryValue() + { + return RealValue.binaryValue(); + } + + public override string stringValue() + { + return RealValue.stringValue(); + } + + public override Reader readerValue() + { + return RealValue.readerValue(); + } + + public override Number numericValue() + { + return RealValue.numericValue(); + } + + public override IndexableFieldType fieldType() + { + return RealValue.fieldType(); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: @Override public org.apache.lucene.analysis.TokenStream tokenStream(org.apache.lucene.analysis.Analyzer analyzer) throws java.io.IOException + public override TokenStream tokenStream(Analyzer analyzer) + { + return RealValue.tokenStream(analyzer); + } + } + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/CompoundFileExtractor.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Misc/Index/CompoundFileExtractor.cs b/src/Lucene.Net.Misc/Index/CompoundFileExtractor.cs new file mode 100644 index 0000000..855b6f3 --- /dev/null +++ b/src/Lucene.Net.Misc/Index/CompoundFileExtractor.cs @@ -0,0 +1,165 @@ +using System; + +namespace org.apache.lucene.index +{ + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// + /// Prints the filename and size of each file within a given compound file. + /// Add the -extract flag to extract files to the current working directory. + /// In order to make the extracted version of the index work, you have to copy + /// the segments file from the compound index into the directory where the extracted files are stored. + /// Usage: org.apache.lucene.index.IndexReader [-extract] <cfsfile> + + + using CompoundFileDirectory = org.apache.lucene.store.CompoundFileDirectory; + using Directory = org.apache.lucene.store.Directory; + using FSDirectory = org.apache.lucene.store.FSDirectory; + using IOContext = org.apache.lucene.store.IOContext; + using IndexInput = org.apache.lucene.store.IndexInput; + using ArrayUtil = org.apache.lucene.util.ArrayUtil; + using CommandLineUtil = org.apache.lucene.util.CommandLineUtil; + + /// + /// Command-line tool for extracting sub-files out of a compound file. + /// + public class CompoundFileExtractor + { + + public static void Main(string[] args) + { + string filename = null; + bool extract = false; + string dirImpl = null; + + int j = 0; + while (j < args.Length) + { + string arg = args[j]; + if ("-extract".Equals(arg)) + { + extract = true; + } + else if ("-dir-impl".Equals(arg)) + { + if (j == args.Length - 1) + { + Console.WriteLine("ERROR: missing value for -dir-impl option"); + Environment.Exit(1); + } + j++; + dirImpl = args[j]; + } + else if (filename == null) + { + filename = arg; + } + j++; + } + + if (filename == null) + { + Console.WriteLine("Usage: org.apache.lucene.index.CompoundFileExtractor [-extract] [-dir-impl X] "); + return; + } + + Directory dir = null; + CompoundFileDirectory cfr = null; + IOContext context = IOContext.READ; + + try + { + File file = new File(filename); + string dirname = file.AbsoluteFile.Parent; + filename = file.Name; + if (dirImpl == null) + { + dir = FSDirectory.open(new File(dirname)); + } + else + { + dir = CommandLineUtil.newFSDirectory(dirImpl, new File(dirname)); + } + + cfr = new CompoundFileDirectory(dir, filename, IOContext.DEFAULT, false); + + string[] files = cfr.listAll(); + ArrayUtil.timSort(files); // sort the array of filename so that the output is more readable + + for (int i = 0; i < files.Length; ++i) + { + long len = cfr.fileLength(files[i]); + + if (extract) + { + Console.WriteLine("extract " + files[i] + " with " + len + " bytes to local directory..."); + IndexInput ii = cfr.openInput(files[i], context); + + FileOutputStream f = new FileOutputStream(files[i]); + + // read and write with a small buffer, which is more effective than reading byte by byte + sbyte[] buffer = new sbyte[1024]; + int chunk = buffer.Length; + while (len > 0) + { +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final int bufLen = (int) Math.min(chunk, len); + int bufLen = (int) Math.Min(chunk, len); + ii.readBytes(buffer, 0, bufLen); + f.write(buffer, 0, bufLen); + len -= bufLen; + } + + f.close(); + ii.close(); + } + else + { + Console.WriteLine(files[i] + ": " + len + " bytes"); + } + } + } + catch (IOException ioe) + { + Console.WriteLine(ioe.ToString()); + Console.Write(ioe.StackTrace); + } + finally + { + try + { + if (dir != null) + { + dir.close(); + } + if (cfr != null) + { + cfr.close(); + } + } + catch (IOException ioe) + { + Console.WriteLine(ioe.ToString()); + Console.Write(ioe.StackTrace); + } + } + } + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/IndexSplitter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Misc/Index/IndexSplitter.cs b/src/Lucene.Net.Misc/Index/IndexSplitter.cs new file mode 100644 index 0000000..a0e9946 --- /dev/null +++ b/src/Lucene.Net.Misc/Index/IndexSplitter.cs @@ -0,0 +1,200 @@ +using System; +using System.Collections.Generic; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +namespace org.apache.lucene.index +{ + + + using FSDirectory = org.apache.lucene.store.FSDirectory; + + /// + /// Command-line tool that enables listing segments in an + /// index, copying specific segments to another index, and + /// deleting segments from an index. + /// + /// This tool does file-level copying of segments files. + /// This means it's unable to split apart a single segment + /// into multiple segments. For example if your index is a + /// single segment, this tool won't help. Also, it does basic + /// file-level copying (using simple + /// File{In,Out}putStream) so it will not work with non + /// FSDirectory Directory impls. + /// + /// @lucene.experimental You can easily + /// accidentally remove segments from your index so be + /// careful! + /// + public class IndexSplitter + { + public SegmentInfos infos; + + internal FSDirectory fsDir; + + internal File dir; + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public static void main(String[] args) throws Exception + public static void Main(string[] args) + { + if (args.Length < 2) + { + Console.Error.WriteLine("Usage: IndexSplitter -l (list the segments and their sizes)"); + Console.Error.WriteLine("IndexSplitter +"); + Console.Error.WriteLine("IndexSplitter -d (delete the following segments)"); + return; + } + File srcDir = new File(args[0]); + IndexSplitter @is = new IndexSplitter(srcDir); + if (!srcDir.exists()) + { + throw new Exception("srcdir:" + srcDir.AbsolutePath + " doesn't exist"); + } + if (args[1].Equals("-l")) + { + @is.listSegments(); + } + else if (args[1].Equals("-d")) + { + IList segs = new List(); + for (int x = 2; x < args.Length; x++) + { + segs.Add(args[x]); + } + @is.remove(segs.ToArray()); + } + else + { + File targetDir = new File(args[1]); + IList segs = new List(); + for (int x = 2; x < args.Length; x++) + { + segs.Add(args[x]); + } + @is.Split(targetDir, segs.ToArray()); + } + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public IndexSplitter(java.io.File dir) throws java.io.IOException + public IndexSplitter(File dir) + { + this.dir = dir; + fsDir = FSDirectory.open(dir); + infos = new SegmentInfos(); + infos.read(fsDir); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void listSegments() throws java.io.IOException + public virtual void listSegments() + { + DecimalFormat formatter = new DecimalFormat("###,###.###", DecimalFormatSymbols.getInstance(Locale.ROOT)); + for (int x = 0; x < infos.size(); x++) + { + SegmentCommitInfo info = infos.info(x); + string sizeStr = formatter.format(info.sizeInBytes()); + Console.WriteLine(info.info.name + " " + sizeStr); + } + } + + private int getIdx(string name) + { + for (int x = 0; x < infos.size(); x++) + { + if (name.Equals(infos.info(x).info.name)) + { + return x; + } + } + return -1; + } + + private SegmentCommitInfo getInfo(string name) + { + for (int x = 0; x < infos.size(); x++) + { + if (name.Equals(infos.info(x).info.name)) + { + return infos.info(x); + } + } + return null; + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void remove(String[] segs) throws java.io.IOException + public virtual void remove(string[] segs) + { + foreach (string n in segs) + { + int idx = getIdx(n); + infos.remove(idx); + } + infos.changed(); + infos.commit(fsDir); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void split(java.io.File destDir, String[] segs) throws java.io.IOException + public virtual void Split(File destDir, string[] segs) + { + destDir.mkdirs(); + FSDirectory destFSDir = FSDirectory.open(destDir); + SegmentInfos destInfos = new SegmentInfos(); + destInfos.counter = infos.counter; + foreach (string n in segs) + { + SegmentCommitInfo infoPerCommit = getInfo(n); + SegmentInfo info = infoPerCommit.info; + // Same info just changing the dir: + SegmentInfo newInfo = new SegmentInfo(destFSDir, info.Version, info.name, info.DocCount, info.UseCompoundFile, info.Codec, info.Diagnostics); + destInfos.add(new SegmentCommitInfo(newInfo, infoPerCommit.DelCount, infoPerCommit.DelGen, infoPerCommit.FieldInfosGen)); + // now copy files over + ICollection files = infoPerCommit.files(); + foreach (String srcName in files) + { + File srcFile = new File(dir, srcName); + File destFile = new File(destDir, srcName); + copyFile(srcFile, destFile); + } + } + destInfos.changed(); + destInfos.commit(destFSDir); + // System.out.println("destDir:"+destDir.getAbsolutePath()); + } + + private static readonly sbyte[] copyBuffer = new sbyte[32 * 1024]; + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: private static void copyFile(java.io.File src, java.io.File dst) throws java.io.IOException + private static void copyFile(File src, File dst) + { + InputStream @in = new FileInputStream(src); + OutputStream @out = new FileOutputStream(dst); + int len; + while ((len = @in.read(copyBuffer)) > 0) + { + @out.write(copyBuffer, 0, len); + } + @in.close(); + @out.close(); + } + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/MultiPassIndexSplitter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Misc/Index/MultiPassIndexSplitter.cs b/src/Lucene.Net.Misc/Index/MultiPassIndexSplitter.cs new file mode 100644 index 0000000..1e03fed --- /dev/null +++ b/src/Lucene.Net.Misc/Index/MultiPassIndexSplitter.cs @@ -0,0 +1,329 @@ +using System; +using System.Diagnostics; +using System.Collections.Generic; + +namespace org.apache.lucene.index +{ + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + using OpenMode = org.apache.lucene.index.IndexWriterConfig.OpenMode; + using Directory = org.apache.lucene.store.Directory; + using FSDirectory = org.apache.lucene.store.FSDirectory; + using FixedBitSet = org.apache.lucene.util.FixedBitSet; + using Bits = org.apache.lucene.util.Bits; + using Version = org.apache.lucene.util.Version; + + /// + /// This tool splits input index into multiple equal parts. The method employed + /// here uses where the input data + /// comes from the input index with artificially applied deletes to the document + /// id-s that fall outside the selected partition. + /// Note 1: Deletes are only applied to a buffered list of deleted docs and + /// don't affect the source index - this tool works also with read-only indexes. + /// + /// Note 2: the disadvantage of this tool is that source index needs to be + /// read as many times as there are parts to be created, hence the name of this + /// tool. + /// + /// + /// NOTE: this tool is unaware of documents added + /// atomically via or {@link + /// IndexWriter#updateDocuments}, which means it can easily + /// break up such document groups. + /// + /// + public class MultiPassIndexSplitter + { + + /// + /// Split source index into multiple parts. + /// source index, can have deletions, can have + /// multiple segments (or multiple readers). + /// list of directories where the output parts will be stored. + /// if true, then the source index will be split into equal + /// increasing ranges of document id-s. If false, source document id-s will be + /// assigned in a deterministic round-robin fashion to one of the output splits. + /// If there is a low-level I/O error +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void split(org.apache.lucene.util.Version version, IndexReader in, org.apache.lucene.store.Directory[] outputs, boolean seq) throws java.io.IOException + public virtual void Split(Version version, IndexReader @in, Directory[] outputs, bool seq) + { + if (outputs == null || outputs.Length < 2) + { + throw new IOException("Invalid number of outputs."); + } + if (@in == null || @in.numDocs() < 2) + { + throw new IOException("Not enough documents for splitting"); + } + int numParts = outputs.Length; + // wrap a potentially read-only input + // this way we don't have to preserve original deletions because neither + // deleteDocument(int) or undeleteAll() is applied to the wrapped input index. + FakeDeleteIndexReader input = new FakeDeleteIndexReader(@in); + int maxDoc = input.maxDoc(); + int partLen = maxDoc / numParts; + for (int i = 0; i < numParts; i++) + { + input.undeleteAll(); + if (seq) // sequential range + { + int lo = partLen * i; + int hi = lo + partLen; + // below range + for (int j = 0; j < lo; j++) + { + input.deleteDocument(j); + } + // above range - last part collects all id-s that remained due to + // integer rounding errors + if (i < numParts - 1) + { + for (int j = hi; j < maxDoc; j++) + { + input.deleteDocument(j); + } + } + } + else + { + // round-robin + for (int j = 0; j < maxDoc; j++) + { + if ((j + numParts - i) % numParts != 0) + { + input.deleteDocument(j); + } + } + } + IndexWriter w = new IndexWriter(outputs[i], new IndexWriterConfig(version, null) + .setOpenMode(OpenMode.CREATE)); + Console.Error.WriteLine("Writing part " + (i + 1) + " ..."); + // pass the subreaders directly, as our wrapper's numDocs/hasDeletetions are not up-to-date +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final java.util.List sr = input.getSequentialSubReaders(); +//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET: + IList sr = input.SequentialSubReaders; + w.addIndexes(sr.ToArray()); // TODO: maybe take List here? + w.close(); + } + Console.Error.WriteLine("Done."); + } + +//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes: +//ORIGINAL LINE: @SuppressWarnings("deprecation") public static void main(String[] args) throws Exception +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: + public static void Main(string[] args) + { + if (args.Length < 5) + { + Console.Error.WriteLine("Usage: MultiPassIndexSplitter -out -num [-seq] [ indexes = new List(); + string outDir = null; + int numParts = -1; + bool seq = false; + for (int i = 0; i < args.Length; i++) + { + if (args[i].Equals("-out")) + { + outDir = args[++i]; + } + else if (args[i].Equals("-num")) + { + numParts = Convert.ToInt32(args[++i]); + } + else if (args[i].Equals("-seq")) + { + seq = true; + } + else + { + File file = new File(args[i]); + if (!file.exists() || !file.Directory) + { + Console.Error.WriteLine("Invalid input path - skipping: " + file); + continue; + } + Directory dir = FSDirectory.open(new File(args[i])); + try + { + if (!DirectoryReader.indexExists(dir)) + { + Console.Error.WriteLine("Invalid input index - skipping: " + file); + continue; + } + } + catch (Exception) + { + Console.Error.WriteLine("Invalid input index - skipping: " + file); + continue; + } + indexes.Add(DirectoryReader.open(dir)); + } + } + if (outDir == null) + { + throw new Exception("Required argument missing: -out outputDir"); + } + if (numParts < 2) + { + throw new Exception("Invalid value of required argument: -num numParts"); + } + if (indexes.Count == 0) + { + throw new Exception("No input indexes to process"); + } + File @out = new File(outDir); + if (!@out.mkdirs()) + { + throw new Exception("Can't create output directory: " + @out); + } + Directory[] dirs = new Directory[numParts]; + for (int i = 0; i < numParts; i++) + { + dirs[i] = FSDirectory.open(new File(@out, "part-" + i)); + } + MultiPassIndexSplitter splitter = new MultiPassIndexSplitter(); + IndexReader input; + if (indexes.Count == 1) + { + input = indexes[0]; + } + else + { + input = new MultiReader(indexes.ToArray()); + } + splitter.Split(Version.LUCENE_CURRENT, input, dirs, seq); + } + + /// + /// This class emulates deletions on the underlying index. + /// + private sealed class FakeDeleteIndexReader : BaseCompositeReader + { + + public FakeDeleteIndexReader(IndexReader reader) : base(initSubReaders(reader)) + { + } + + internal static FakeDeleteAtomicIndexReader[] initSubReaders(IndexReader reader) + { +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final java.util.List leaves = reader.leaves(); + IList leaves = reader.leaves(); +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final FakeDeleteAtomicIndexReader[] subs = new FakeDeleteAtomicIndexReader[leaves.size()]; + FakeDeleteAtomicIndexReader[] subs = new FakeDeleteAtomicIndexReader[leaves.Count]; + int i = 0; + foreach (AtomicReaderContext ctx in leaves) + { + subs[i++] = new FakeDeleteAtomicIndexReader(ctx.reader()); + } + return subs; + } + + public void deleteDocument(int docID) + { +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final int i = readerIndex(docID); + int i = readerIndex(docID); + SequentialSubReaders.get(i).deleteDocument(docID - readerBase(i)); + } + + public void undeleteAll() + { + foreach (FakeDeleteAtomicIndexReader r in SequentialSubReaders) + { + r.undeleteAll(); + } + } + + protected internal override void doClose() + { + } + + // no need to override numDocs/hasDeletions, + // as we pass the subreaders directly to IW.addIndexes(). + } + + private sealed class FakeDeleteAtomicIndexReader : FilterAtomicReader + { + internal FixedBitSet liveDocs; + + public FakeDeleteAtomicIndexReader(AtomicReader reader) : base(reader) + { + undeleteAll(); // initialize main bitset + } + + public override int numDocs() + { + return liveDocs.cardinality(); + } + + public void undeleteAll() + { +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final int maxDoc = in.maxDoc(); + int maxDoc = @in.maxDoc(); + liveDocs = new FixedBitSet(@in.maxDoc()); + if (@in.hasDeletions()) + { +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final org.apache.lucene.util.Bits oldLiveDocs = in.getLiveDocs(); + Bits oldLiveDocs = @in.LiveDocs; + Debug.Assert(oldLiveDocs != null); + // this loop is a little bit ineffective, as Bits has no nextSetBit(): + for (int i = 0; i < maxDoc; i++) + { + if (oldLiveDocs.get(i)) + { + liveDocs.set(i); + } + } + } + else + { + // mark all docs as valid + liveDocs.set(0, maxDoc); + } + } + + public void deleteDocument(int n) + { + liveDocs.clear(n); + } + + public override Bits LiveDocs + { + get + { + return liveDocs; + } + } + } + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/PKIndexSplitter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Misc/Index/PKIndexSplitter.cs b/src/Lucene.Net.Misc/Index/PKIndexSplitter.cs new file mode 100644 index 0000000..f3e7ed4 --- /dev/null +++ b/src/Lucene.Net.Misc/Index/PKIndexSplitter.cs @@ -0,0 +1,220 @@ +using System.Diagnostics; +using System.Collections.Generic; + +namespace org.apache.lucene.index +{ + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + using OpenMode = org.apache.lucene.index.IndexWriterConfig.OpenMode; + using DocIdSet = org.apache.lucene.search.DocIdSet; + using DocIdSetIterator = org.apache.lucene.search.DocIdSetIterator; + using Filter = org.apache.lucene.search.Filter; + using TermRangeFilter = org.apache.lucene.search.TermRangeFilter; + using Directory = org.apache.lucene.store.Directory; + using Bits = org.apache.lucene.util.Bits; + using FixedBitSet = org.apache.lucene.util.FixedBitSet; + using IOUtils = org.apache.lucene.util.IOUtils; + using Version = org.apache.lucene.util.Version; + + /// + /// Split an index based on a . + /// + + public class PKIndexSplitter + { + private readonly Filter docsInFirstIndex; + private readonly Directory input; + private readonly Directory dir1; + private readonly Directory dir2; + private readonly IndexWriterConfig config1; + private readonly IndexWriterConfig config2; + + /// + /// Split an index based on a . All documents that match the filter + /// are sent to dir1, remaining ones to dir2. + /// + public PKIndexSplitter(Version version, Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex) : this(input, dir1, dir2, docsInFirstIndex, newDefaultConfig(version), newDefaultConfig(version)) + { + } + + private static IndexWriterConfig newDefaultConfig(Version version) + { + return (new IndexWriterConfig(version, null)).setOpenMode(OpenMode.CREATE); + } + + public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2) + { + this.input = input; + this.dir1 = dir1; + this.dir2 = dir2; + this.docsInFirstIndex = docsInFirstIndex; + this.config1 = config1; + this.config2 = config2; + } + + /// + /// Split an index based on a given primary key term + /// and a 'middle' term. If the middle term is present, it's + /// sent to dir2. + /// + public PKIndexSplitter(Version version, Directory input, Directory dir1, Directory dir2, Term midTerm) : this(version, input, dir1, dir2, new TermRangeFilter(midTerm.field(), null, midTerm.bytes(), true, false)) + { + } + + public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2) : this(input, dir1, dir2, new TermRangeFilter(midTerm.field(), null, midTerm.bytes(), true, false), config1, config2) + { + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public void split() throws java.io.IOException + public virtual void Split() + { + bool success = false; + DirectoryReader reader = DirectoryReader.open(input); + try + { + // pass an individual config in here since one config can not be reused! + createIndex(config1, dir1, reader, docsInFirstIndex, false); + createIndex(config2, dir2, reader, docsInFirstIndex, true); + success = true; + } + finally + { + if (success) + { + IOUtils.close(reader); + } + else + { + IOUtils.closeWhileHandlingException(reader); + } + } + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: private void createIndex(IndexWriterConfig config, org.apache.lucene.store.Directory target, IndexReader reader, org.apache.lucene.search.Filter preserveFilter, boolean negateFilter) throws java.io.IOException + private void createIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, bool negateFilter) + { + bool success = false; +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final IndexWriter w = new IndexWriter(target, config); + IndexWriter w = new IndexWriter(target, config); + try + { +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final java.util.List leaves = reader.leaves(); + IList leaves = reader.leaves(); +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final IndexReader[] subReaders = new IndexReader[leaves.size()]; + IndexReader[] subReaders = new IndexReader[leaves.Count]; + int i = 0; + foreach (AtomicReaderContext ctx in leaves) + { + subReaders[i++] = new DocumentFilteredAtomicIndexReader(ctx, preserveFilter, negateFilter); + } + w.addIndexes(subReaders); + success = true; + } + finally + { + if (success) + { + IOUtils.close(w); + } + else + { + IOUtils.closeWhileHandlingException(w); + } + } + } + + private class DocumentFilteredAtomicIndexReader : FilterAtomicReader + { + internal readonly Bits liveDocs; + internal readonly int numDocs_Renamed; + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: public DocumentFilteredAtomicIndexReader(AtomicReaderContext context, org.apache.lucene.search.Filter preserveFilter, boolean negateFilter) throws java.io.IOException + public DocumentFilteredAtomicIndexReader(AtomicReaderContext context, Filter preserveFilter, bool negateFilter) : base(context.reader()) + { +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final int maxDoc = in.maxDoc(); + int maxDoc = @in.maxDoc(); +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final org.apache.lucene.util.FixedBitSet bits = new org.apache.lucene.util.FixedBitSet(maxDoc); + FixedBitSet bits = new FixedBitSet(maxDoc); + // ignore livedocs here, as we filter them later: +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final org.apache.lucene.search.DocIdSet docs = preserveFilter.getDocIdSet(context, null); + DocIdSet docs = preserveFilter.getDocIdSet(context, null); + if (docs != null) + { +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final org.apache.lucene.search.DocIdSetIterator it = docs.iterator(); + DocIdSetIterator it = docs.GetEnumerator(); + if (it != null) + { + bits.or(it); + } + } + if (negateFilter) + { + bits.flip(0, maxDoc); + } + + if (@in.hasDeletions()) + { +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final org.apache.lucene.util.Bits oldLiveDocs = in.getLiveDocs(); + Bits oldLiveDocs = @in.LiveDocs; + Debug.Assert(oldLiveDocs != null); +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final org.apache.lucene.search.DocIdSetIterator it = bits.iterator(); + DocIdSetIterator it = bits.GetEnumerator(); + for (int i = it.nextDoc(); i < maxDoc; i = it.nextDoc()) + { + if (!oldLiveDocs.get(i)) + { + // we can safely modify the current bit, as the iterator already stepped over it: + bits.clear(i); + } + } + } + + this.liveDocs = bits; + this.numDocs_Renamed = bits.cardinality(); + } + + public override int numDocs() + { + return numDocs_Renamed; + } + + public override Bits LiveDocs + { + get + { + return liveDocs; + } + } + } + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/Sorter/BlockJoinComparatorSource.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Misc/Index/Sorter/BlockJoinComparatorSource.cs b/src/Lucene.Net.Misc/Index/Sorter/BlockJoinComparatorSource.cs new file mode 100644 index 0000000..70ad20a --- /dev/null +++ b/src/Lucene.Net.Misc/Index/Sorter/BlockJoinComparatorSource.cs @@ -0,0 +1,321 @@ +using System; + +namespace org.apache.lucene.index.sorter +{ + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using DocIdSet = org.apache.lucene.search.DocIdSet; + using FieldComparator = org.apache.lucene.search.FieldComparator; + using FieldComparatorSource = org.apache.lucene.search.FieldComparatorSource; + using Filter = org.apache.lucene.search.Filter; + using IndexSearcher = org.apache.lucene.search.IndexSearcher; // javadocs + using Query = org.apache.lucene.search.Query; // javadocs + using ScoreDoc = org.apache.lucene.search.ScoreDoc; // javadocs + using Scorer = org.apache.lucene.search.Scorer; + using Sort = org.apache.lucene.search.Sort; + using SortField = org.apache.lucene.search.SortField; + using FixedBitSet = org.apache.lucene.util.FixedBitSet; + + /// + /// Helper class to sort readers that contain blocks of documents. + /// + /// Note that this class is intended to used with , + /// and for other purposes has some limitations: + ///
    + ///
  • Cannot yet be used with + ///
  • Filling sort field values is not yet supported. + ///
+ /// @lucene.experimental + ///
+ ///
+ // TODO: can/should we clean this thing up (e.g. return a proper sort value) + // and move to the join/ module? + public class BlockJoinComparatorSource : FieldComparatorSource + { + internal readonly Filter parentsFilter; + internal readonly Sort parentSort; + internal readonly Sort childSort; + + /// + /// Create a new BlockJoinComparatorSource, sorting only blocks of documents + /// with {@code parentSort} and not reordering children with a block. + /// + /// Filter identifying parent documents + /// Sort for parent documents + public BlockJoinComparatorSource(Filter parentsFilter, Sort parentSort) : this(parentsFilter, parentSort, new Sort(SortField.FIELD_DOC)) + { + } + + /// + /// Create a new BlockJoinComparatorSource, specifying the sort order for both + /// blocks of documents and children within a block. + /// + /// Filter identifying parent documents + /// Sort for parent documents + /// Sort for child documents in the same block + public BlockJoinComparatorSource(Filter parentsFilter, Sort parentSort, Sort childSort) + { + this.parentsFilter = parentsFilter; + this.parentSort = parentSort; + this.childSort = childSort; + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: @Override public org.apache.lucene.search.FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws java.io.IOException + public override FieldComparator newComparator(string fieldname, int numHits, int sortPos, bool reversed) + { + // we keep parallel slots: the parent ids and the child ids +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final int parentSlots[] = new int[numHits]; + int[] parentSlots = new int[numHits]; +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final int childSlots[] = new int[numHits]; + int[] childSlots = new int[numHits]; + + SortField[] parentFields = parentSort.Sort; +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final int parentReverseMul[] = new int[parentFields.length]; + int[] parentReverseMul = new int[parentFields.Length]; +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final org.apache.lucene.search.FieldComparator parentComparators[] = new org.apache.lucene.search.FieldComparator[parentFields.length]; +//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET: + FieldComparator[] parentComparators = new FieldComparator[parentFields.Length]; + for (int i = 0; i < parentFields.Length; i++) + { + parentReverseMul[i] = parentFields[i].Reverse ? - 1 : 1; + parentComparators[i] = parentFields[i].getComparator(1, i); + } + + SortField[] childFields = childSort.Sort; +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final int childReverseMul[] = new int[childFields.length]; + int[] childReverseMul = new int[childFields.Length]; +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final org.apache.lucene.search.FieldComparator childComparators[] = new org.apache.lucene.search.FieldComparator[childFields.length]; +//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET: + FieldComparator[] childComparators = new FieldComparator[childFields.Length]; + for (int i = 0; i < childFields.Length; i++) + { + childReverseMul[i] = childFields[i].Reverse ? - 1 : 1; + childComparators[i] = childFields[i].getComparator(1, i); + } + + // NOTE: we could return parent ID as value but really our sort "value" is more complex... + // So we throw UOE for now. At the moment you really should only use this at indexing time. + return new FieldComparatorAnonymousInnerClassHelper(this, parentSlots, childSlots, parentReverseMul, parentComparators, childReverseMul, childComparators); + } + + private class FieldComparatorAnonymousInnerClassHelper : FieldComparator + { + private readonly BlockJoinComparatorSource outerInstance; + + private int[] parentSlots; + private int[] childSlots; + private int[] parentReverseMul; +//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET: +//ORIGINAL LINE: private org.apache.lucene.search.FieldComparator[] parentComparators; + private FieldComparator[] parentComparators; + private int[] childReverseMul; +//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET: +//ORIGINAL LINE: private org.apache.lucene.search.FieldComparator[] childComparators; + private FieldComparator[] childComparators; + + public FieldComparatorAnonymousInnerClassHelper(BlockJoinComparatorSource outerInstance, int[] parentSlots, int[] childSlots, int[] parentReverseMul, FieldComparator[] parentComparators, int[] childReverseMul, FieldComparator[] childComparators) + { + this.outerInstance = outerInstance; + this.parentSlots = parentSlots; + this.childSlots = childSlots; + this.parentReverseMul = parentReverseMul; + this.parentComparators = parentComparators; + this.childReverseMul = childReverseMul; + this.childComparators = childComparators; + } + + internal int bottomParent; + internal int bottomChild; + internal FixedBitSet parentBits; + + public override int compare(int slot1, int slot2) + { + try + { + return compare(childSlots[slot1], parentSlots[slot1], childSlots[slot2], parentSlots[slot2]); + } + catch (IOException e) + { + throw new Exception(e); + } + } + + public override int Bottom + { + set + { + bottomParent = parentSlots[value]; + bottomChild = childSlots[value]; + } + } + + public override int? TopValue + { + set + { + // we dont have enough information (the docid is needed) + throw new System.NotSupportedException("this comparator cannot be used with deep paging"); + } + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: @Override public int compareBottom(int doc) throws java.io.IOException + public override int compareBottom(int doc) + { + return compare(bottomChild, bottomParent, doc, parent(doc)); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: @Override public int compareTop(int doc) throws java.io.IOException + public override int compareTop(int doc) + { + // we dont have enough information (the docid is needed) + throw new System.NotSupportedException("this comparator cannot be used with deep paging"); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: @Override public void copy(int slot, int doc) throws java.io.IOException + public override void copy(int slot, int doc) + { + childSlots[slot] = doc; + parentSlots[slot] = parent(doc); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: @Override public org.apache.lucene.search.FieldComparator setNextReader(org.apache.lucene.index.AtomicReaderContext context) throws java.io.IOException + public override FieldComparator setNextReader(AtomicReaderContext context) + { +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final org.apache.lucene.search.DocIdSet parents = parentsFilter.getDocIdSet(context, null); + DocIdSet parents = outerInstance.parentsFilter.getDocIdSet(context, null); + if (parents == null) + { + throw new IllegalStateException("AtomicReader " + context.reader() + " contains no parents!"); + } + if (!(parents is FixedBitSet)) + { + throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents); + } + parentBits = (FixedBitSet) parents; + for (int i = 0; i < parentComparators.Length; i++) + { + parentComparators[i] = parentComparators[i].setNextReader(context); + } + for (int i = 0; i < childComparators.Length; i++) + { + childComparators[i] = childComparators[i].setNextReader(context); + } + return this; + } + + public override int? value(int slot) + { + // really our sort "value" is more complex... + throw new System.NotSupportedException("filling sort field values is not yet supported"); + } + + public override Scorer Scorer + { + set + { + base.Scorer = value; + //JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET: + //ORIGINAL LINE: for (org.apache.lucene.search.FieldComparator comp : parentComparators) + foreach (FieldComparator comp in parentComparators) + { + comp.Scorer = value; + } + //JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET: + //ORIGINAL LINE: for (org.apache.lucene.search.FieldComparator comp : childComparators) + foreach (FieldComparator comp in childComparators) + { + comp.Scorer = value; + } + } + } + + internal virtual int parent(int doc) + { + return parentBits.nextSetBit(doc); + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: int compare(int docID1, int parent1, int docID2, int parent2) throws java.io.IOException + internal virtual int compare(int docID1, int parent1, int docID2, int parent2) + { + if (parent1 == parent2) // both are in the same block + { + if (docID1 == parent1 || docID2 == parent2) + { + // keep parents at the end of blocks + return docID1 - docID2; + } + else + { + return compare(docID1, docID2, childComparators, childReverseMul); + } + } + else + { + int cmp = compare(parent1, parent2, parentComparators, parentReverseMul); + if (cmp == 0) + { + return parent1 - parent2; + } + else + { + return cmp; + } + } + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: int compare(int docID1, int docID2, org.apache.lucene.search.FieldComparator comparators[] , int reverseMul[]) throws java.io.IOException + internal virtual int compare(int docID1, int docID2, FieldComparator[] comparators, int[] reverseMul) + { + for (int i = 0; i < comparators.Length; i++) + { + // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co, + // the segments are always the same here... + comparators[i].copy(0, docID1); + comparators[i].Bottom = 0; + int comp = reverseMul[i] * comparators[i].compareBottom(docID2); + if (comp != 0) + { + return comp; + } + } + return 0; // no need to docid tiebreak + } + } + + public override string ToString() + { + return "blockJoin(parentSort=" + parentSort + ",childSort=" + childSort + ")"; + } + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/Sorter/EarlyTerminatingSortingCollector.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Misc/Index/Sorter/EarlyTerminatingSortingCollector.cs b/src/Lucene.Net.Misc/Index/Sorter/EarlyTerminatingSortingCollector.cs new file mode 100644 index 0000000..654ba85 --- /dev/null +++ b/src/Lucene.Net.Misc/Index/Sorter/EarlyTerminatingSortingCollector.cs @@ -0,0 +1,147 @@ +namespace org.apache.lucene.index.sorter +{ + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using CollectionTerminatedException = org.apache.lucene.search.CollectionTerminatedException; + using Collector = org.apache.lucene.search.Collector; + using Scorer = org.apache.lucene.search.Scorer; + using Sort = org.apache.lucene.search.Sort; + using TopDocsCollector = org.apache.lucene.search.TopDocsCollector; + using TotalHitCountCollector = org.apache.lucene.search.TotalHitCountCollector; + + /// + /// A that early terminates collection of documents on a + /// per-segment basis, if the segment was sorted according to the given + /// . + /// + /// + /// NOTE: the {@code Collector} detects sorted segments according to + /// , so it's best used in conjunction with it. Also, + /// it collects up to a specified {@code numDocsToCollect} from each segment, + /// and therefore is mostly suitable for use in conjunction with collectors such as + /// , and not e.g. . + /// + /// + /// NOTE: If you wrap a {@code TopDocsCollector} that sorts in the same + /// order as the index order, the returned + /// will be correct. However the total of {@link TopDocsCollector#getTotalHits() + /// hit count} will be underestimated since not all matching documents will have + /// been collected. + /// + /// + /// NOTE: This {@code Collector} uses to detect + /// whether a segment was sorted with the same {@code Sort}. This has + /// two implications: + ///
    + ///
  • if a custom comparator is not implemented correctly and returns + /// different identifiers for equivalent instances, this collector will not + /// detect sorted segments,
  • + ///
  • if you suddenly change the 's + /// {@code SortingMergePolicy} to sort according to another criterion and if both + /// the old and the new {@code Sort}s have the same identifier, this + /// {@code Collector} will incorrectly detect sorted segments.
  • + ///
+ /// + /// @lucene.experimental + ///
+ ///
+ public class EarlyTerminatingSortingCollector : Collector + { + /// + /// The wrapped Collector + protected internal readonly Collector @in; + /// + /// Sort used to sort the search results + protected internal readonly Sort sort; + /// + /// Number of documents to collect in each segment + protected internal readonly int numDocsToCollect; + /// + /// Number of documents to collect in the current segment being processed + protected internal int segmentTotalCollect; + /// + /// True if the current segment being processed is sorted by + protected internal bool segmentSorted; + + private int numCollected; + + /// + /// Create a new instance. + /// + /// + /// the collector to wrap + /// + /// the sort you are sorting the search results on + /// + /// the number of documents to collect on each segment. When wrapping + /// a , this number should be the number of + /// hits. + public EarlyTerminatingSortingCollector(Collector @in, Sort sort, int numDocsToCollect) + { + if (numDocsToCollect <= 0) + { + throw new IllegalStateException("numDocsToCollect must always be > 0, got " + segmentTotalCollect); + } + this.@in = @in; + this.sort = sort; + this.numDocsToCollect = numDocsToCollect; + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: @Override public void setScorer(org.apache.lucene.search.Scorer scorer) throws java.io.IOException + public override Scorer Scorer + { + set + { + @in.Scorer = value; + } + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: @Override public void collect(int doc) throws java.io.IOException + public override void collect(int doc) + { + @in.collect(doc); + if (++numCollected >= segmentTotalCollect) + { + throw new CollectionTerminatedException(); + } + } + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: @Override public void setNextReader(org.apache.lucene.index.AtomicReaderContext context) throws java.io.IOException + public override AtomicReaderContext NextReader + { + set + { + @in.NextReader = value; + segmentSorted = SortingMergePolicy.isSorted(value.reader(), sort); + segmentTotalCollect = segmentSorted ? numDocsToCollect : int.MaxValue; + numCollected = 0; + } + } + + public override bool acceptsDocsOutOfOrder() + { + return !segmentSorted && @in.acceptsDocsOutOfOrder(); + } + + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/Sorter/Sorter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Misc/Index/Sorter/Sorter.cs b/src/Lucene.Net.Misc/Index/Sorter/Sorter.cs new file mode 100644 index 0000000..f315b9c --- /dev/null +++ b/src/Lucene.Net.Misc/Index/Sorter/Sorter.cs @@ -0,0 +1,404 @@ +using System; +using System.Diagnostics; + +namespace org.apache.lucene.index.sorter +{ + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + using FieldComparator = org.apache.lucene.search.FieldComparator; + using Scorer = org.apache.lucene.search.Scorer; + using Sort = org.apache.lucene.search.Sort; + using SortField = org.apache.lucene.search.SortField; + using TimSorter = org.apache.lucene.util.TimSorter; + using MonotonicAppendingLongBuffer = org.apache.lucene.util.packed.MonotonicAppendingLongBuffer; + + /// + /// Sorts documents of a given index by returning a permutation on the document + /// IDs. + /// @lucene.experimental + /// + internal sealed class Sorter + { + internal readonly Sort sort_Renamed; + + /// + /// Creates a new Sorter to sort the index with {@code sort} + internal Sorter(Sort sort) + { + if (sort.needsScores()) + { + throw new System.ArgumentException("Cannot sort an index with a Sort that refers to the relevance score"); + } + this.sort_Renamed = sort; + } + + /// + /// A permutation of doc IDs. For every document ID between 0 and + /// , oldToNew(newToOld(docID)) must + /// return docID. + /// + internal abstract class DocMap + { + + /// + /// Given a doc ID from the original index, return its ordinal in the + /// sorted index. + /// + internal abstract int oldToNew(int docID); + + /// + /// Given the ordinal of a doc ID, return its doc ID in the original index. + internal abstract int newToOld(int docID); + + /// + /// Return the number of documents in this map. This must be equal to the + /// of the + /// which is sorted. + /// + internal abstract int size(); + } + + /// + /// Check consistency of a , useful for assertions. + internal static bool isConsistent(DocMap docMap) + { +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final int maxDoc = docMap.size(); + int maxDoc = docMap.size(); + for (int i = 0; i < maxDoc; ++i) + { +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final int newID = docMap.oldToNew(i); + int newID = docMap.oldToNew(i); +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final int oldID = docMap.newToOld(newID); + int oldID = docMap.newToOld(newID); + Debug.Assert(newID >= 0 && newID < maxDoc, "doc IDs must be in [0-" + maxDoc + "[, got " + newID); + Debug.Assert(i == oldID, "mapping is inconsistent: " + i + " --oldToNew--> " + newID + " --newToOld--> " + oldID); + if (i != oldID || newID < 0 || newID >= maxDoc) + { + return false; + } + } + return true; + } + + /// + /// A comparator of doc IDs. + internal abstract class DocComparator + { + + /// + /// Compare docID1 against docID2. The contract for the return value is the + /// same as . + /// + public abstract int compare(int docID1, int docID2); + + } + + private sealed class DocValueSorter : TimSorter + { + + internal readonly int[] docs; + internal readonly Sorter.DocComparator comparator; + internal readonly int[] tmp; + + internal DocValueSorter(int[] docs, Sorter.DocComparator comparator) : base(docs.Length / 64) + { + this.docs = docs; + this.comparator = comparator; + tmp = new int[docs.Length / 64]; + } + + protected internal override int compare(int i, int j) + { + return comparator.compare(docs[i], docs[j]); + } + + protected internal override void swap(int i, int j) + { + int tmpDoc = docs[i]; + docs[i] = docs[j]; + docs[j] = tmpDoc; + } + + protected internal override void copy(int src, int dest) + { + docs[dest] = docs[src]; + } + + protected internal override void save(int i, int len) + { + Array.Copy(docs, i, tmp, 0, len); + } + + protected internal override void restore(int i, int j) + { + docs[j] = tmp[i]; + } + + protected internal override int compareSaved(int i, int j) + { + return comparator.compare(tmp[i], docs[j]); + } + } + + /// + /// Computes the old-to-new permutation over the given comparator. +//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET: +//ORIGINAL LINE: private static Sorter.DocMap sort(final int maxDoc, DocComparator comparator) + private static Sorter.DocMap sort(int maxDoc, DocComparator comparator) + { + // check if the index is sorted + bool sorted = true; + for (int i = 1; i < maxDoc; ++i) + { + if (comparator.compare(i - 1, i) > 0) + { + sorted = false; + break; + } + } + if (sorted) + { + return null; + } + + // sort doc IDs +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final int[] docs = new int[maxDoc]; + int[] docs = new int[maxDoc]; + for (int i = 0; i < maxDoc; i++) + { + docs[i] = i; + } + + DocValueSorter sorter = new DocValueSorter(docs, comparator); + // It can be common to sort a reader, add docs, sort it again, ... and in + // that case timSort can save a lot of time + sorter.sort(0, docs.Length); // docs is now the newToOld mapping + + // The reason why we use MonotonicAppendingLongBuffer here is that it + // wastes very little memory if the index is in random order but can save + // a lot of memory if the index is already "almost" sorted +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final org.apache.lucene.util.packed.MonotonicAppendingLongBuffer newToOld = new org.apache.lucene.util.packed.MonotonicAppendingLongBuffer(); + MonotonicAppendingLongBuffer newToOld = new MonotonicAppendingLongBuffer(); + for (int i = 0; i < maxDoc; ++i) + { + newToOld.add(docs[i]); + } + newToOld.freeze(); + + for (int i = 0; i < maxDoc; ++i) + { + docs[(int) newToOld.get(i)] = i; + } // docs is now the oldToNew mapping + +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final org.apache.lucene.util.packed.MonotonicAppendingLongBuffer oldToNew = new org.apache.lucene.util.packed.MonotonicAppendingLongBuffer(); + MonotonicAppendingLongBuffer oldToNew = new MonotonicAppendingLongBuffer(); + for (int i = 0; i < maxDoc; ++i) + { + oldToNew.add(docs[i]); + } + oldToNew.freeze(); + + return new DocMapAnonymousInnerClassHelper(maxDoc, newToOld, oldToNew); + } + + private class DocMapAnonymousInnerClassHelper : Sorter.DocMap + { + private int maxDoc; + private MonotonicAppendingLongBuffer newToOld; + private MonotonicAppendingLongBuffer oldToNew; + + public DocMapAnonymousInnerClassHelper(int maxDoc, MonotonicAppendingLongBuffer newToOld, MonotonicAppendingLongBuffer oldToNew) + { + this.maxDoc = maxDoc; + this.newToOld = newToOld; + this.oldToNew = oldToNew; + } + + + public override int oldToNew(int docID) + { + return (int) oldToNew.get(docID); + } + + public override int newToOld(int docID) + { + return (int) newToOld.get(docID); + } + + public override int size() + { + return maxDoc; + } + } + + /// + /// Returns a mapping from the old document ID to its new location in the + /// sorted index. Implementations can use the auxiliary + /// to compute the old-to-new permutation + /// given a list of documents and their corresponding values. + /// + /// A return value of null is allowed and means that + /// reader is already sorted. + /// + /// + /// NOTE: deleted documents are expected to appear in the mapping as + /// well, they will however be marked as deleted in the sorted view. + /// + /// +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: DocMap sort(org.apache.lucene.index.AtomicReader reader) throws java.io.IOException + internal DocMap sort(AtomicReader reader) + { + SortField[] fields = sort_Renamed.Sort; +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final int reverseMul[] = new int[fields.length]; + int[] reverseMul = new int[fields.Length]; +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final org.apache.lucene.search.FieldComparator comparators[] = new org.apache.lucene.search.FieldComparator[fields.length]; +//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET: + FieldComparator[] comparators = new FieldComparator[fields.Length]; + + for (int i = 0; i < fields.Length; i++) + { + reverseMul[i] = fields[i].Reverse ? - 1 : 1; + comparators[i] = fields[i].getComparator(1, i); + comparators[i].NextReader = reader.Context; + comparators[i].Scorer = FAKESCORER; + } +//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': +//ORIGINAL LINE: final DocComparator comparator = new DocComparator() + DocComparator comparator = new DocComparatorAnonymousInnerClassHelper(this, reverseMul, comparators); + return sort(reader.maxDoc(), comparator); + } + + private class DocComparatorAnonymousInnerClassHelper : DocComparator + { + private readonly Sorter outerInstance; + + private int[] reverseMul; +//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET: +//ORIGINAL LINE: private org.apache.lucene.search.FieldComparator[] comparators; + private FieldComparator[] comparators; + + public DocComparatorAnonymousInnerClassHelper(Sorter outerInstance, int[] reverseMul, FieldComparator[] comparators) + { + this.outerInstance = outerInstance; + this.reverseMul = reverseMul; + this.comparators = comparators; + } + + public override int compare(int docID1, int docID2) + { + try + { + for (int i = 0; i < comparators.Length; i++) + { + // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co, + // the segments are always the same here... + comparators[i].copy(0, docID1); + comparators[i].Bottom = 0; + int comp = reverseMul[i] * comparators[i].compareBottom(docID2); + if (comp != 0) + { + return comp; + } + } + return int.compare(docID1, docID2); // docid order tiebreak + } + catch (IOException e) + { + throw new Exception(e); + } + } + } + + /// + /// Returns the identifier of this . + /// This identifier is similar to and should be + /// chosen so that two instances of this class that sort documents likewise + /// will have the same identifier. On the contrary, this identifier should be + /// different on different . + /// + /// + public string ID + { + get + { + return sort_Renamed.ToString(); + } + } + + public override string ToString() + { + return ID; + } + + internal static readonly Scorer FAKESCORER = new ScorerAnonymousInnerClassHelper(); + + private class ScorerAnonymousInnerClassHelper : Scorer + { + public ScorerAnonymousInnerClassHelper() : base(null) + { + } + + +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: @Override public float score() throws java.io.IOException + public override float score() + { + throw new System.NotSupportedException(); + } +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: @Override public int freq() throws java.io.IOException + public override int freq() + { + throw new System.NotSupportedException(); + } + public override int docID() + { + throw new System.NotSupportedException(); + } +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: @Override public int nextDoc() throws java.io.IOException + public override int nextDoc() + { + throw new System.NotSupportedException(); + } +//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: +//ORIGINAL LINE: @Override public int advance(int target) throws java.io.IOException + public override int advance(int target) + { + throw new System.NotSupportedException(); + } + public override long cost() + { + throw new System.NotSupportedException(); + } + } + + } + +} \ No newline at end of file