lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Natarajan.T" <nataraj...@crimsonlogic.co.in>
Subject PDFBox problem.
Date Fri, 23 Jul 2004 15:02:12 GMT
FYI,
 
I am using PDFBox.jar  to Convert PDF to Text.
 
Problem is in the runtime its printing lot of object messages
 
How can I avoid this one??? How can I go with this one. 
 
import java.io.InputStream;
import java.io.BufferedWriter;
import java.io.IOException;
 
import org.pdfbox.util.PDFTextStripper;
import org.pdfbox.pdfparser.PDFParser;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.pdmodel.PDDocumentInformation;
 
 
/**
 * @author natarajant
 *
 * TODO To change the template for this generated type comment go to
 * Window - Preferences - Java - Code Generation - Code and Comments  */
public class PDFConverter extends DocumentConverter{
 
      public PDFConverter() {
      }
 
       /**
        * This method will construct the Lucene document object from the
        * given information by extracting the text from PDF file.
        *
        * @param              reader and writer - InputStream
and BufferedWriter
        * @return             true or false i.e. extract the
text or not
        */
        public boolean extractText(InputStream  reader, BufferedWriter
writer) throws IOException{
 
             PDFParser parser = null;
             PDDocument pdDoc = null;
             PDFTextStripper stripper = null;
             String pdftext = "";
             String pdftitle = "";
             try {
             parser = new PDFParser(reader);
                   parser.parse();
                   pdDoc = parser.getPDDocument();
 
                   stripper = new PDFTextStripper();
                   pdftext = stripper.getText(pdDoc);
 
                   writer.write(pdftext +" ");
 
             PDDocumentInformation info =
pdDoc.getDocumentInformation();
                   pdftitle = info.getTitle();
 
       } catch(Exception err) {
 
                   System.out.println(err.getMessage());
            }
            writer.close();
            return true;
       }
 
 
}
 

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message