Return-Path: X-Original-To: apmail-lucene-java-user-archive@www.apache.org Delivered-To: apmail-lucene-java-user-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 0472E69EB for ; Thu, 9 Jun 2011 11:24:39 +0000 (UTC) Received: (qmail 21007 invoked by uid 500); 9 Jun 2011 11:24:36 -0000 Delivered-To: apmail-lucene-java-user-archive@lucene.apache.org Received: (qmail 20959 invoked by uid 500); 9 Jun 2011 11:24:36 -0000 Mailing-List: contact java-user-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: java-user@lucene.apache.org Delivered-To: mailing list java-user@lucene.apache.org Received: (qmail 20950 invoked by uid 99); 9 Jun 2011 11:24:36 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 09 Jun 2011 11:24:36 +0000 X-ASF-Spam-Status: No, hits=-0.0 required=5.0 tests=SPF_PASS X-Spam-Check-By: apache.org Received-SPF: pass (nike.apache.org: local policy) Received: from [62.213.161.134] (HELO pmx.sirma.bg) (62.213.161.134) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 09 Jun 2011 11:24:28 +0000 X-Virus-Scanned: Sirma Antivirus System Received: from [192.168.128.140] (ivasilev.sirma.int [192.168.128.140]) by pmx.sirma.bg (Sirma mail system) with ESMTP id 322D124005 for ; Thu, 9 Jun 2011 14:24:07 +0300 (EEST) Message-ID: <4DF0AD57.6060102@sirma.bg> Date: Thu, 09 Jun 2011 14:24:07 +0300 From: Ivan Vasilev User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US; rv:1.9.2.17) Gecko/20110414 Thunderbird/3.1.10 MIME-Version: 1.0 To: LUCENE MAIL LIST Subject: Bug fix to contrib/.../IndexSplitter Content-Type: multipart/mixed; boundary="------------070000040909000206070708" X-Virus-Checked: Checked by ClamAV on apache.org --------------070000040909000206070708 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Hi Guys, I would like to fix a class in contrib/misc/src/java/org/apache/lucene/index called IndexSplitter. It has a bug - when splits the segments in separate index the segment descriptor file contains a wrong data - the number (the name) of next segment to generate is 0. Although it can not cause exception in some cases (depends on existing segment names and the number of newly generated ones) in most of cases it do cases Exception. I do not know if I would have rights to submit this fix to Lucene contrib dir but I am attaching the fix and a test that shows the exception when using original class and there is no exception when using fixing class. Cheers, Ivan --------------070000040909000206070708 Content-Type: text/plain; name="TestIndexSplitter.java" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="TestIndexSplitter.java" /** * */ package test; import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexSplitter; import org.apache.lucene.index.IndexSplitterFixed; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * @author ivasilev * */ public class TestIndexSplitter { private static File INDEX_PATH = new File("E:/Temp/ContribIndexSpliter/index"); private static File INDEX_SPLIT_PATH = new File("E:/Temp/ContribIndexSpliter/splitIndex"); private static File INDEX_SPLIT_FIXED_PATH = new File("E:/Temp/ContribIndexSpliter/splitFixedIndex"); public static void main(String[] args) throws IOException { initDirs(); createIndex(); splitIndexWithBothSplitters(); deleteFirstDocAndOptimize(INDEX_SPLIT_FIXED_PATH); readIndex(INDEX_SPLIT_FIXED_PATH); deleteFirstDocAndOptimize(INDEX_SPLIT_PATH); // might throw exception readIndex(INDEX_SPLIT_PATH); // surely throws Exception } private static void initDirs() { initDir(INDEX_PATH); initDir(INDEX_SPLIT_PATH); initDir(INDEX_SPLIT_FIXED_PATH); } private static void initDir(File dir) { if ( ! dir.exists()) { dir.mkdirs(); } for (File currFile : dir.listFiles()) { if (currFile.isFile()) { currFile.delete(); } } } private static void createIndex() throws IOException { IndexWriter iw = null; try { IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_32, new StandardAnalyzer(Version.LUCENE_32)); iwConfig.setOpenMode(OpenMode.CREATE); iw = new IndexWriter(FSDirectory.open(INDEX_PATH), iwConfig); Document doc = new Document(); doc.add(new Field("content", "doc 1", Store.YES, Index.ANALYZED_NO_NORMS)); iw.addDocument(doc); doc = new Document(); doc.add(new Field("content", "doc 2", Store.YES, Index.ANALYZED_NO_NORMS)); iw.addDocument(doc); iw.close(); } finally { if (iw != null) { iw.close(); } } } private static void splitIndexWithBothSplitters() throws IOException { IndexSplitter is = new IndexSplitter(INDEX_PATH); is.split(INDEX_SPLIT_PATH, new String[] { "_0" }); IndexSplitterFixed isf = new IndexSplitterFixed(INDEX_PATH); isf.split(INDEX_SPLIT_FIXED_PATH, new String[] { "_0" }); } private static void deleteFirstDocAndOptimize(File indexDir) throws IOException { IndexReader ir = null; IndexWriter iw = null; try { ir = IndexReader.open(FSDirectory.open(indexDir), false); ir.deleteDocument(0); ir.close(); IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_32, new StandardAnalyzer(Version.LUCENE_32)); iw = new IndexWriter(FSDirectory.open(indexDir), iwConfig); iw.optimize(); } finally { if (ir != null) { ir.close(); } if (iw != null) { iw.close(); } } } private static void readIndex(File indexDir) throws IOException { IndexReader ir = null; try { ir = IndexReader.open(FSDirectory.open(indexDir)); System.out.println(indexDir.getPath() + " index -> ir.numDocs = " + ir.numDocs()); } finally { if (ir != null) { ir.close(); } } } } --------------070000040909000206070708 Content-Type: text/plain; name="IndexSplitterFixed.java" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="IndexSplitterFixed.java" /** * */ package org.apache.lucene.index; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.List; import org.apache.lucene.store.FSDirectory; /** * @author ivasilev * */ public class IndexSplitterFixed extends IndexSplitter { public IndexSplitterFixed(File dir) throws IOException { super(dir); } public void split(File destDir, String[] segs) throws IOException { destDir.mkdirs(); FSDirectory destFSDir = FSDirectory.open(destDir); SegmentInfos destInfos = new SegmentInfos(); destInfos.counter = nextSegmentName(segs); // the fix for (String n : segs) { SegmentInfo info = getInfo(n); destInfos.add(info); // now copy files over List files = info.files(); for (final String srcName : files) { File srcFile = new File(dir, srcName); File destFile = new File(destDir, srcName); copyFile(srcFile, destFile); } } destInfos.changed(); destInfos.commit(destFSDir); // System.out.println("destDir:"+destDir.getAbsolutePath()); } private int nextSegmentName(String ... segs) { int ret = 0; for (String currSeg : segs) { int currSegNum = Integer.parseInt(currSeg.substring(1), 36); if (ret < currSegNum) { ret = currSegNum; } } return ++ret; } // following methods just copyied from IndexSplitter (as there are private) private SegmentInfo getInfo(String name) { for (int x = 0; x < infos.size(); x++) { if (name.equals(infos.info(x).name)) return infos.info(x); } return null; } private static final byte[] copyBuffer = new byte[32*1024]; private static void copyFile(File src, File dst) throws IOException { InputStream in = new FileInputStream(src); OutputStream out = new FileOutputStream(dst); int len; while ((len = in.read(copyBuffer)) > 0) { out.write(copyBuffer, 0, len); } in.close(); out.close(); } } --------------070000040909000206070708 Content-Type: text/plain; charset=us-ascii --------------------------------------------------------------------- To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org For additional commands, e-mail: java-user-help@lucene.apache.org --------------070000040909000206070708--