pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From til...@apache.org
Subject svn commit: r1792512 - in /pdfbox/trunk/examples: pom.xml src/main/java/org/apache/pdfbox/examples/lucene/IndexPDFFiles.java src/main/java/org/apache/pdfbox/examples/lucene/LucenePDFDocument.java
Date Mon, 24 Apr 2017 16:22:24 GMT
Author: tilman
Date: Mon Apr 24 16:22:23 2017
New Revision: 1792512

URL: http://svn.apache.org/viewvc?rev=1792512&view=rev
Log:
PDFBOX-3736: lucene 5.* can use JDK7, lucene 6.* wants JDK8

Modified:
    pdfbox/trunk/examples/pom.xml
    pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/lucene/IndexPDFFiles.java
    pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/lucene/LucenePDFDocument.java

Modified: pdfbox/trunk/examples/pom.xml
URL: http://svn.apache.org/viewvc/pdfbox/trunk/examples/pom.xml?rev=1792512&r1=1792511&r2=1792512&view=diff
==============================================================================
--- pdfbox/trunk/examples/pom.xml (original)
+++ pdfbox/trunk/examples/pom.xml Mon Apr 24 16:22:23 2017
@@ -38,8 +38,8 @@
   <inceptionYear>2002</inceptionYear>
 
   <properties>
-    <lucene.version>4.7.2</lucene.version>
-    <!-- don't update this, because later versions require JDK7 -->
+    <lucene.version>5.5.4</lucene.version>
+    <!-- don't update this, because later versions require JDK8 -->
   </properties>
 
   <dependencies>

Modified: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/lucene/IndexPDFFiles.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/lucene/IndexPDFFiles.java?rev=1792512&r1=1792511&r2=1792512&view=diff
==============================================================================
--- pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/lucene/IndexPDFFiles.java
(original)
+++ pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/lucene/IndexPDFFiles.java
Mon Apr 24 16:22:23 2017
@@ -66,19 +66,19 @@ public final class IndexPDFFiles
         boolean create = true;
         for (int i = 0; i < args.length; i++)
         {
-            if ("-index".equals(args[i]))
+            switch (args[i])
             {
-                indexPath = args[i + 1];
-                i++;
-            }
-            else if ("-docs".equals(args[i]))
-            {
-                docsPath = args[i + 1];
-                i++;
-            }
-            else if ("-update".equals(args[i]))
-            {
-                create = false;
+                case "-index":
+                    indexPath = args[i + 1];
+                    i++;
+                    break;
+                case "-docs":
+                    docsPath = args[i + 1];
+                    i++;
+                    break;
+                case "-update":
+                    create = false;
+                    break;
             }
         }
 
@@ -101,9 +101,9 @@ public final class IndexPDFFiles
         {
             System.out.println("Indexing to directory '" + indexPath + "'...");
 
-            Directory dir = FSDirectory.open(new File(indexPath));
-            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
-            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);
+            Directory dir = FSDirectory.open(new File(indexPath).toPath());
+            Analyzer analyzer = new StandardAnalyzer();
+            IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
 
             if (create)
             {
@@ -123,19 +123,18 @@ public final class IndexPDFFiles
             // size to the JVM (eg add -Xmx512m or -Xmx1g):
             //
             // iwc.setRAMBufferSizeMB(256.0);
-
-            IndexWriter writer = new IndexWriter(dir, iwc);
-            indexDocs(writer, docDir);
-
-            // NOTE: if you want to maximize search performance,
-            // you can optionally call forceMerge here. This can be
-            // a terribly costly operation, so generally it's only
-            // worth it when your index is relatively static (ie
-            // you're done adding documents to it):
-            //
-            // writer.forceMerge(1);
-
-            writer.close();
+            try (IndexWriter writer = new IndexWriter(dir, iwc))
+            {
+                indexDocs(writer, docDir);
+                
+                // NOTE: if you want to maximize search performance,
+                // you can optionally call forceMerge here. This can be
+                // a terribly costly operation, so generally it's only
+                // worth it when your index is relatively static (ie
+                // you're done adding documents to it):
+                //
+                // writer.forceMerge(1);
+            }
 
             Date end = new Date();
             System.out.println(end.getTime() - start.getTime() + " total milliseconds");
@@ -197,7 +196,7 @@ public final class IndexPDFFiles
                 {
 
                     String path = file.getName().toUpperCase();
-                    Document doc = null;
+                    Document doc;
                     if (path.toLowerCase().endsWith(".pdf"))
                     {
                         System.out.println("Indexing PDF document: " + file);

Modified: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/lucene/LucenePDFDocument.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/lucene/LucenePDFDocument.java?rev=1792512&r1=1792511&r2=1792512&view=diff
==============================================================================
--- pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/lucene/LucenePDFDocument.java
(original)
+++ pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/lucene/LucenePDFDocument.java
Mon Apr 24 16:22:23 2017
@@ -34,6 +34,7 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexOptions;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.PDDocumentInformation;
 import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
@@ -120,7 +121,7 @@ public class LucenePDFDocument
 
     static
     {
-        TYPE_STORED_NOT_INDEXED.setIndexed(false);
+        TYPE_STORED_NOT_INDEXED.setIndexOptions(IndexOptions.NONE);
         TYPE_STORED_NOT_INDEXED.setStored(true);
         TYPE_STORED_NOT_INDEXED.setTokenized(true);
         TYPE_STORED_NOT_INDEXED.freeze();
@@ -248,19 +249,10 @@ public class LucenePDFDocument
         // tokenized prior to indexing.
         addUnstoredKeywordField(document, "uid", uid);
 
-        FileInputStream input = null;
-        try
+        try (FileInputStream input = new FileInputStream(file))
         {
-            input = new FileInputStream(file);
             addContent(document, input, file.getPath());
         }
-        finally
-        {
-            if (input != null)
-            {
-                input.close();
-            }
-        }
 
         // return the document
 
@@ -295,19 +287,10 @@ public class LucenePDFDocument
         // tokenized prior to indexing.
         addUnstoredKeywordField(document, "uid", uid);
 
-        InputStream input = null;
-        try
+        try (InputStream input = connection.getInputStream())
         {
-            input = connection.getInputStream();
             addContent(document, input, url.toExternalForm());
         }
-        finally
-        {
-            if (input != null)
-            {
-                input.close();
-            }
-        }
 
         // return the document
         return document;
@@ -369,11 +352,8 @@ public class LucenePDFDocument
      */
     private void addContent(Document document, InputStream is, String documentLocation) throws
IOException
     {
-        PDDocument pdfDocument = null;
-        try
+        try (PDDocument pdfDocument = PDDocument.load(is))
         {
-            pdfDocument = PDDocument.load(is);
-
             // create a writer where to append the text content.
             StringWriter writer = new StringWriter();
             if (stripper == null)
@@ -418,13 +398,6 @@ public class LucenePDFDocument
             // they didn't suppply a password and the default of "" was wrong.
             throw new IOException("Error: The document(" + documentLocation + ") is encrypted
and will not be indexed.", e);
         }
-        finally
-        {
-            if (pdfDocument != null)
-            {
-                pdfDocument.close();
-            }
-        }
     }
 
     /**



Mime
View raw message