lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Lance Norskog <goks...@gmail.com>
Subject Re: segment_N file is missed
Date Sat, 19 Jun 2010 06:40:21 GMT
This code is old (2006!) and I've updated it for Lucene 2.9.2, ad the
trunk.This version only works for one CFS file (that I've tested). The
code does not check versions carefully.  Here are both versions:

Lucene 2.9.2:
--------------------------------
package org.apache.lucene.index;

import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;

import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IndexInput;

public class CFSopen {

  // this code fixes up a directory
  // make a ramdirectory, use this to fix it up if needed
  // does compoundfilereader work directly?

  Directory fixIndex(String path) throws IOException {
    File file = new File(path);
    Directory directory = FSDirectory.getDirectory(file, false);

    String[] files = file.list(new FilenameFilter() {
      public boolean accept(File dir, String name) {
        return name.endsWith(".cfs");
      }
    });

    SegmentInfos infos = new SegmentInfos();
    int counter = 0;
    for (int i = 0; i < files.length; i++) {
      String fileName = files[i];
      String segmentName = fileName.substring(1, fileName.lastIndexOf('.'));

      int segmentInt = Integer.parseInt(segmentName, Character.MAX_RADIX);
      counter = Math.max(counter, segmentInt);

      segmentName = fileName.substring(0, fileName.lastIndexOf('.'));

      Directory fileReader = new CompoundFileReader(directory, fileName);
      IndexInput indexStream = fileReader.openInput(".fdx");
      int size = (int) (indexStream.length() / 8);
      indexStream.close();
      fileReader.close();

      SegmentInfo segmentInfo = new SegmentInfo(segmentName, size, directory);
      infos.addElement(segmentInfo);
    }

    infos.counter = ++counter;

    infos.prepareCommit(directory);
    infos.finishCommit(directory);
    return directory;
  }

  /**
   * @param args
   * @throws IOException
   */
  public static void main(String[] args) throws IOException {
    // TODO Auto-generated method stub
    CFSopen cfsopen = new CFSopen();
    Directory dir = cfsopen.fixIndex("/cygwin/tmp/index");
    dir.hashCode();
  }

}

---------------------------------------
trunk:
---------------------------------------
package org.apache.lucene.index;

import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;

import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.standard.StandardCodec;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IndexInput;

public class CFSopen {

  Directory fixIndex(String path) throws IOException {
    File file = new File(path);
    Directory directory = FSDirectory.open(file);

    String[] files = file.list(new FilenameFilter() {
      public boolean accept(File dir, String name) {
        return name.endsWith(".cfs");
      }
    });

    SegmentInfos infos = new SegmentInfos();
    int counter = 0;
    for (int i = 0; i < files.length; i++) {
      String fileName = files[i];
      String segmentName = fileName.substring(1, fileName.lastIndexOf('.'));

      int segmentInt = Integer.parseInt(segmentName, Character.MAX_RADIX);
      counter = Math.max(counter, segmentInt);

      segmentName = fileName.substring(0, fileName.lastIndexOf('.'));

      Directory fileReader = new CompoundFileReader(directory, fileName);
      IndexInput indexStream = fileReader.openInput(".fdx");
      int size = (int) (indexStream.length() / 8);
      indexStream.close();
      fileReader.close();

      // need to get codec name out of the CFS files
      Codec codec = new StandardCodec();
      SegmentInfo segmentInfo = new SegmentInfo(segmentName, size,
directory, false, -1, null, false, false, codec );
      infos.addElement(segmentInfo);
    }

    infos.counter = ++counter;

    infos.prepareCommit(directory);
    infos.finishCommit(directory);
    return directory;
  }

  /**
   * @param args
   * @throws IOException
   */
  public static void main(String[] args) throws IOException {
    // TODO Auto-generated method stub
    CFSopen cfsopen = new CFSopen();
    Directory dir = cfsopen.fixIndex("/cygwin/tmp/index");
    dir.hashCode();
  }

}




On 6/16/10, Michael McCandless <lucene@mikemccandless.com> wrote:
> On Wed, Jun 16, 2010 at 10:38 AM, Yonik Seeley
> <yonik@lucidimagination.com> wrote:
>> On Tue, Jun 15, 2010 at 5:23 AM, Michael McCandless
>> <lucene@mikemccandless.com> wrote:
>>> CheckIndex is not able to recover from this corruption (missing
>>> segments_N file); this would be a nice addition...
>>>
>>> But it sounds like you've worked out a way to write your own segmetns_N?
>>>
>>> Use oal.store.ChecksumIndexOutput (wraps any other IndexOutput) to
>>> properly write the checksum.
>>>
>>> BTW how did you lose your segments_N file...?
>>
>> Can this also be caused by the new behavior introduced here?
>> https://issues.apache.org/jira/browse/LUCENE-2386
>> If you open a writer, add docs, and then crash before calling commit?
>
> That could be; Maryam is that what happened?
>
> Mike
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-user-help@lucene.apache.org
>
>


-- 
Lance Norskog
goksron@gmail.com

---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org


Mime
View raw message