lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r823153 - in /lucene/java/trunk/contrib: CHANGES.txt misc/src/java/org/apache/lucene/index/IndexSplitter.java misc/src/test/org/apache/lucene/index/TestIndexSplitter.java
Date Thu, 08 Oct 2009 12:50:20 GMT
Author: mikemccand
Date: Thu Oct  8 12:50:19 2009
New Revision: 823153

URL: http://svn.apache.org/viewvc?rev=823153&view=rev
Log:
LUCENE-1959: add IndexSplitter tool to pull segment files out of an index into another

Added:
    lucene/java/trunk/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java   (with
props)
    lucene/java/trunk/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java
  (with props)
Modified:
    lucene/java/trunk/contrib/CHANGES.txt

Modified: lucene/java/trunk/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/CHANGES.txt?rev=823153&r1=823152&r2=823153&view=diff
==============================================================================
--- lucene/java/trunk/contrib/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/CHANGES.txt Thu Oct  8 12:50:19 2009
@@ -33,6 +33,11 @@
    segment merges to give better search performance in a mixed
    indexing/searching environment.  (John Wang via Mike McCandless)
 
+ * LUCENE-1959: Add IndexSplitter tool, to copy specific segments out
+   of the index into a new index.  It can also list the segments in
+   the index, and delete specified segments.  (Jason Rutherglen via
+   Mike McCandless)
+
 Optimizations
 
 Documentation

Added: lucene/java/trunk/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java?rev=823153&view=auto
==============================================================================
--- lucene/java/trunk/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java (added)
+++ lucene/java/trunk/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java Thu
Oct  8 12:50:19 2009
@@ -0,0 +1,163 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.text.DecimalFormat;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.store.FSDirectory;
+
+/**
+ * Command-line tool that enables listing segments in an
+ * index, copying specific segments to another index, and
+ * deleting segments from an index.
+ *
+ * <p>This tool does file-level copying of segments files.
+ * This means it's unable to split apart a single segment
+ * into multiple segments.  For example if your index is
+ * optimized, this tool won't help.  Also, it does basic
+ * file-level copying (using simple
+ * File{In,Out}putStream) so it will not work with non
+ * FSDirectory Directory impls.</p>
+ *
+ * <p><b>NOTE</b>: The tool is experimental and might change
+ * in incompatible ways in the next release.  You can easily
+ * accidentally remove segments from your index so be
+ * careful!
+ */
+public class IndexSplitter {
+  public SegmentInfos infos;
+
+  FSDirectory fsDir;
+
+  File dir;
+
+  /**
+   * @param args
+   */
+  public static void main(String[] args) throws Exception {
+    if (args.length < 2) {
+      System.err
+          .println("Usage: IndexSplitter <srcDir> -l (list the segments and their sizes)");
+      System.err.println("IndexSplitter <srcDir> <destDir> <segments>+");
+      System.err
+          .println("IndexSplitter <srcDir> -d (delete the following segments)");
+      return;
+    }
+    File srcDir = new File(args[0]);
+    IndexSplitter is = new IndexSplitter(srcDir);
+    if (!srcDir.exists()) {
+      throw new Exception("srcdir:" + srcDir.getAbsolutePath()
+          + " doesn't exist");
+    }
+    if (args[1].equals("-l")) {
+      is.listSegments();
+    } else if (args[1].equals("-d")) {
+      List<String> segs = new ArrayList<String>();
+      for (int x = 2; x < args.length; x++) {
+        segs.add(args[x]);
+      }
+      is.remove((String[]) segs.toArray(new String[0]));
+    } else {
+      File targetDir = new File(args[1]);
+      List<String> segs = new ArrayList<String>();
+      for (int x = 2; x < args.length; x++) {
+        segs.add(args[x]);
+      }
+      is.split(targetDir, (String[]) segs.toArray(new String[0]));
+    }
+  }
+
+  public IndexSplitter(File dir) throws IOException {
+    this.dir = dir;
+    fsDir = FSDirectory.open(dir);
+    infos = new SegmentInfos();
+    infos.read(fsDir);
+  }
+
+  public void listSegments() throws IOException {
+    DecimalFormat formatter = new DecimalFormat("###,###.###");
+    for (int x = 0; x < infos.size(); x++) {
+      SegmentInfo info = infos.info(x);
+      String sizeStr = formatter.format(info.sizeInBytes());
+      System.out.println(info.name + " " + sizeStr);
+    }
+  }
+
+  private int getIdx(String name) {
+    for (int x = 0; x < infos.size(); x++) {
+      if (name.equals(infos.info(x).name))
+        return x;
+    }
+    return -1;
+  }
+
+  private SegmentInfo getInfo(String name) {
+    for (int x = 0; x < infos.size(); x++) {
+      if (name.equals(infos.info(x).name))
+        return infos.info(x);
+    }
+    return null;
+  }
+
+  public void remove(String[] segs) throws IOException {
+    for (String n : segs) {
+      int idx = getIdx(n);
+      infos.remove(idx);
+    }
+    infos.commit(fsDir);
+  }
+
+  public void split(File destDir, String[] segs) throws IOException {
+    destDir.mkdirs();
+    FSDirectory destFSDir = FSDirectory.open(destDir);
+    SegmentInfos destInfos = new SegmentInfos();
+    for (String n : segs) {
+      SegmentInfo info = getInfo(n);
+      destInfos.add(info);
+      // now copy files over
+      List files = info.files();
+      for (int x = 0; x < files.size(); x++) {
+        String srcName = (String) files.get(x);
+        File srcFile = new File(dir, srcName);
+        File destFile = new File(destDir, srcName);
+        copyFile(srcFile, destFile);
+      }
+    }
+    destInfos.commit(destFSDir);
+    // System.out.println("destDir:"+destDir.getAbsolutePath());
+  }
+
+  private static void copyFile(File src, File dst) throws IOException {
+    InputStream in = new FileInputStream(src);
+    OutputStream out = new FileOutputStream(dst);
+    byte[] buf = new byte[32*1024];
+    int len;
+    while ((len = in.read(buf)) > 0) {
+      out.write(buf, 0, len);
+    }
+    in.close();
+    out.close();
+  }
+}

Propchange: lucene/java/trunk/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java?rev=823153&view=auto
==============================================================================
--- lucene/java/trunk/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java
(added)
+++ lucene/java/trunk/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java
Thu Oct  8 12:50:19 2009
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import java.io.File;
+
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexWriter.MaxFieldLength;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+public class TestIndexSplitter extends LuceneTestCase {
+  public void test() throws Exception {
+    String tmpDir = System.getProperty("java.io.tmpdir");
+    File dir = new File(tmpDir, "testfilesplitter");
+    _TestUtil.rmDir(dir);
+    dir.mkdirs();
+    File destDir = new File(tmpDir, "testfilesplitterdest");
+    _TestUtil.rmDir(destDir);
+    destDir.mkdirs();
+    FSDirectory fsDir = FSDirectory.open(dir);
+    IndexWriter iw = new IndexWriter(fsDir, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED);
+    for (int x=0; x < 100; x++) {
+      Document doc = TestIndexWriterReader.createDocument(x, "index", 5);
+      iw.addDocument(doc);
+    }
+    iw.commit();
+    for (int x=100; x < 150; x++) {
+      Document doc = TestIndexWriterReader.createDocument(x, "index2", 5);
+      iw.addDocument(doc);
+    }
+    iw.commit();
+    for (int x=150; x < 200; x++) {
+      Document doc = TestIndexWriterReader.createDocument(x, "index3", 5);
+      iw.addDocument(doc);
+    }
+    iw.commit();
+    assertEquals(3, iw.getReader().getSequentialSubReaders().length);
+    iw.close();
+    // we should have 2 segments now
+    IndexSplitter is = new IndexSplitter(dir);
+    String splitSegName = is.infos.info(1).name;
+    is.split(destDir, new String[] {splitSegName});
+    IndexReader r = IndexReader.open(FSDirectory.open(destDir), true);
+    assertEquals(50, r.maxDoc());
+    
+    // now test cmdline
+    File destDir2 = new File(tmpDir, "testfilesplitterdest2");
+    _TestUtil.rmDir(destDir2);
+    destDir2.mkdirs();
+    IndexSplitter.main(new String[] {dir.getAbsolutePath(), destDir2.getAbsolutePath(), splitSegName});
+    assertEquals(3, destDir2.listFiles().length);
+    r = IndexReader.open(FSDirectory.open(destDir2), true);
+    assertEquals(50, r.maxDoc());
+    
+    // now remove the copied segment from src
+    IndexSplitter.main(new String[] {dir.getAbsolutePath(), "-d", splitSegName});
+    r = IndexReader.open(FSDirectory.open(dir), true);
+    assertEquals(2, r.getSequentialSubReaders().length);
+  }
+}

Propchange: lucene/java/trunk/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message