lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Günter Kukies <guenter.kuk...@heuft.com>
Subject Why is document.get("contents")); null ?
Date Fri, 21 Feb 2003 08:17:14 GMT
Hello,

Why is document.get("contents"); null ?

Thanks,

Günter


private static void addContent(PortalServlet servlet, Document document, InputStream is, String
documentLocation ) throws IOException {
        try {
            
            PDFParser parser = new PDFParser( is );
            parser.parse();
            
            COSDocument pdfDocument = parser.getDocument();
            
            if( pdfDocument.isEncrypted() ) {
                DecryptDocument decryptor = new DecryptDocument( pdfDocument );
                /*Just try using the default password and move on */
                decryptor.decryptDocument( "" );
            }
            
            /*create a tmp output stream with the size of the content.*/
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            PDFTextStripper stripper = new PDFTextStripper();
            stripper.writeText( pdfDocument, new OutputStreamWriter( out ) );
            
            byte[] contents = out.toByteArray();
            InputStreamReader input = new InputStreamReader( new ByteArrayInputStream( contents
) );
            // Add the tag-stripped contents as a Reader-valued Text field so it will
            // get tokenized and indexed.
            document.add(Field.Text("contents", input ));
            servlet.log("documentstripper: "+stripper.getText(pdfDocument));
            servlet.log("documentLocation: "+documentLocation);
            servlet.log("contents: "+input+" doc: "+document.get("contents"));
            servlet.log("document: "+document);
 
        }
        catch( CryptographyException e ) {
            throw new IOException( "Error decrypting document(" + documentLocation + "): "
+ e );
        }
        catch( InvalidPasswordException e ) {
            throw new IOException( "Error: The document(" + documentLocation + ") is encrypted
and will not be indexed." );
        }
        finally {
            if( is != null ) {
                is.close();
            }
        }
    }

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message