lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Sebastin Naveen" <sebasmt...@gmail.com>
Subject Lucene Compression
Date Wed, 06 Jun 2007 14:19:01 GMT
Hi All,
            I am a lucene developer. i saw your benchmark in lucene website
http://lucene.apache.org
I have records upto 45 GB.when i compress the record it goes to 80
GB.howcan i compress to 10 GB or lower than that.
please help me in this regards.

i have send you the source code that i use:



*public class MediationIndexer {

 public static void main(String[] args) throws Exception{
        String slNo="";
         String fileName="";
         String callType="";
         String callingPartyNumber="";
         String calledPartyNumber="";
         String dateSc="";
         String timeSc="";
         String chargDur="";
         String outgoingRoute="";
         String incomingRoute="";
         String orgCalledNumber="";
         String redirectingNumber="";
         String imsiNumber="";



        File   indexDir = new File("C:/Sample/Mediatio/Index");
            Analyzer analyzer = new StandardAnalyzer();
        IndexWriter indexWriter = new IndexWriter(indexDir,analyzer,true);
           // indexWriter.setUseCompoundFile(true);

        File mediationFiles=new File("C:/mediation files");
        File   fileDir  = new File("C:/mediation files");
            long startTime = new Date().getTime();
        String mediFiles[]=mediationFiles.list();
        for(int j=0;j<mediFiles.length;j++)
        {
        File   file = new File("C:/mediation files"+ "/" +mediFiles[j]);

        //indexDir is the directory that hosts Lucene's index files




        String myFiles[] = file.list();
        System.out.println(myFiles.length);

        for (int i = 0; i <myFiles.length ; i++){

        int recCount = 0;
             try {
                  FileReader fr     = new FileReader(file+"/"+ myFiles[i]);
                  BufferedReader br = new BufferedReader(fr);

        //Add documents to the index

                 *

*                 String record = br.readLine();
                      System.out.println("First:"+record);
                  while (record  != null){

                       System.out.println("Current:"+record);

                       System.out.println(record);

                       String[] afterSplit = record.split(",");
                       for(int p=0;p<1;p++) {

                       slNo = afterSplit[0];
                       fileName= afterSplit[1];
                       callType=  afterSplit[2];
                       callingPartyNumber= afterSplit[3];
                       calledPartyNumber=afterSplit[4];
                       dateSc= afterSplit[5];
                       timeSc=afterSplit[6];
                       chargDur= afterSplit[7];
                       outgoingRoute=afterSplit[8];
                       incomingRoute=afterSplit[9];
                       orgCalledNumber=afterSplit[10];
                       redirectingNumber=afterSplit[11];
                       imsiNumber=afterSplit[12];

                       String contents =
                          new String(callType  + callingPartyNumber  +
calledPartyNumber  + dateSc +
                                      timeSc  + chargDur +  outgoingRoute +
                                      incomingRoute  +
                                     imsiNumber);
                                    recCount++;
                       System.out.println(recCount + ": " + record);
                       System.out.println(recCount + ": " + contents);

                       Document document = new Document();
                       *

*                       document.add(new Field("contents",contents,
Field.Store.YES,Field.Index.TOKENIZED));
                       document.add(new Field("callType",callType,
Field.Store.YES ,Field.Index.NO <http://field.index.no/>));
                       document.add(new
Field("callingPartyNumber",callingPartyNumber,Field.Store.YES,Field.Index.NO<http://field.index.no/>));

                       document.add(new
Field("calledPartyNumber",calledPartyNumber,Field.Store.YES,Field.Index.NO<http://field.index.no/>
));
                       document.add(new Field("dateSc",dateSc,
Field.Store.YES,Field.Index.TOKENIZED));
                       document.add(new Field("timeSc",timeSc,
Field.Store.YES,Field.Index.NO <http://field.index.no/>));
                       document.add (new Field("chargDur",chargDur,
Field.Store.YES,Field.Index.NO <http://field.index.no/>));
                       document.add(new Field("outgoingRoute",outgoingRoute,
Field.Store.YES, Field.Index.NO <http://field.index.no/>));
                       document.add(new Field("incomingRoute",incomingRoute,
Field.Store.YES,Field.Index.NO <http://field.index.no/>));
                       }



                       record = br.readLine();
                       if(record.equalsIgnoreCase("")) {
                           record=null;
                       }


                   }


                  }catch (IOException e) {
                               // catch possible io errors from readLine()
                               e.printStackTrace();
                            }
        }
        }
            long endTime = new Date().getTime();
            System.out.println("It took " + (endTime - startTime)
             + " milliseconds to create an index for the files in the
directory "
             + fileDir.getPath());
        }


}
*


-- 
Regards,
Sebastin Naveen

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message