lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dor...@apache.org
Subject svn commit: r1418892 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/benchmark/ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/
Date Sun, 09 Dec 2012 11:04:31 GMT
Author: doronc
Date: Sun Dec  9 11:04:31 2012
New Revision: 1418892

URL: http://svn.apache.org/viewvc?rev=1418892&view=rev
Log:
LUCENE-4588: merge from trunk: EnwikiContentSource silently swallows the last wiki doc.

Added:
    lucene/dev/branches/branch_4x/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSourceTest.java
      - copied, changed from r1417788, lucene/dev/trunk/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSourceTest.java
Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/benchmark/   (props changed)
    lucene/dev/branches/branch_4x/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java

Modified: lucene/dev/branches/branch_4x/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java?rev=1418892&r1=1418891&r2=1418892&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java
Sun Dec  9 11:04:31 2012
@@ -53,6 +53,7 @@ public class EnwikiContentSource extends
   private class Parser extends DefaultHandler implements Runnable {
     private Thread t;
     private boolean threadDone;
+    private boolean stopped = false;
     private String[] tuple;
     private NoMoreDataException nmde;
     private StringBuilder contents = new StringBuilder();
@@ -70,31 +71,31 @@ public class EnwikiContentSource extends
       }
       String[] result;
       synchronized(this){
-        while(tuple == null && nmde == null && !threadDone) {
+        while(tuple == null && nmde == null && !threadDone && !stopped)
{
           try {
             wait();
           } catch (InterruptedException ie) {
             throw new ThreadInterruptedException(ie);
           }
         }
+        if (tuple != null) {
+          result = tuple;
+          tuple = null;
+          notify();
+          return result;
+        }
         if (nmde != null) {
           // Set to null so we will re-start thread in case
           // we are re-used:
           t = null;
           throw nmde;
         }
-        if (t != null && threadDone) {
-          // The thread has exited yet did not hit end of
-          // data, so this means it hit an exception.  We
-          // throw NoMorDataException here to force
-          // benchmark to stop the current alg:
-          throw new NoMoreDataException();
-        }
-        result = tuple;
-        tuple = null;
-        notify();
+        // The thread has exited yet did not hit end of
+        // data, so this means it hit an exception.  We
+        // throw NoMorDataException here to force
+        // benchmark to stop the current alg:
+        throw new NoMoreDataException();
       }
-      return result;
     }
     
     String time(String original) {
@@ -132,7 +133,7 @@ public class EnwikiContentSource extends
             tmpTuple[BODY] = body.replaceAll("[\t\n]", " ");
             tmpTuple[ID] = id;
             synchronized(this) {
-              while (tuple != null) {
+              while (tuple != null && !stopped) {
                 try {
                   wait();
                 } catch (InterruptedException ie) {
@@ -175,7 +176,7 @@ public class EnwikiContentSource extends
         XMLReader reader = XMLReaderFactory.createXMLReader();
         reader.setContentHandler(this);
         reader.setErrorHandler(this);
-        while(true){
+        while(!stopped){
           final InputStream localFileIS = is;
           try {
             // To work around a bug in XERCES (XERCESJ-1257), we assume the XML is always
UTF8, so we simply provide reader.
@@ -186,8 +187,7 @@ public class EnwikiContentSource extends
           } catch (IOException ioe) {
             synchronized(EnwikiContentSource.this) {
               if (localFileIS != is) {
-                // fileIS was closed on us, so, just fall
-                // through
+                // fileIS was closed on us, so, just fall through
               } else
                 // Exception is real
                 throw ioe;
@@ -200,7 +200,7 @@ public class EnwikiContentSource extends
               return;
             } else if (localFileIS == is) {
               // If file is not already re-opened then re-open it now
-              is = StreamUtils.inputStream(file);
+              is = openInputStream();
             }
           }
         }
@@ -238,6 +238,17 @@ public class EnwikiContentSource extends
           // this element should be discarded.
       }
     }
+
+    private void stop() {
+      synchronized (this) {
+        stopped = true;
+        if (tuple != null) {
+          tuple = null;
+          notify();
+        }
+      }
+    }
+
   }
 
   private static final Map<String,Integer> ELEMENTS = new HashMap<String,Integer>();
@@ -284,6 +295,7 @@ public class EnwikiContentSource extends
         is.close();
         is = null;
       }
+      parser.stop();
     }
   }
   
@@ -301,7 +313,12 @@ public class EnwikiContentSource extends
   @Override
   public void resetInputs() throws IOException {
     super.resetInputs();
-    is = StreamUtils.inputStream(file);
+    is = openInputStream();
+  }
+
+  /** Open the input stream. */
+  protected InputStream openInputStream() throws IOException {
+    return StreamUtils.inputStream(file);
   }
   
   @Override
@@ -309,10 +326,9 @@ public class EnwikiContentSource extends
     super.setConfig(config);
     keepImages = config.get("keep.image.only.docs", true);
     String fileName = config.get("docs.file", null);
-    if (fileName == null) {
-      throw new IllegalArgumentException("docs.file must be set");
+    if (fileName != null) {
+      file = new File(fileName).getAbsoluteFile();
     }
-    file = new File(fileName).getAbsoluteFile();
   }
   
 }

Copied: lucene/dev/branches/branch_4x/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSourceTest.java
(from r1417788, lucene/dev/trunk/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSourceTest.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSourceTest.java?p2=lucene/dev/branches/branch_4x/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSourceTest.java&p1=lucene/dev/trunk/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSourceTest.java&r1=1417788&r2=1418892&rev=1418892&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSourceTest.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSourceTest.java
Sun Dec  9 11:04:31 2012
@@ -17,14 +17,17 @@ package org.apache.lucene.benchmark.byTa
  * limitations under the License.
  */
 
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.OutputStreamWriter;
 import java.text.ParseException;
 import java.util.Properties;
 
 import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
-import org.junit.Ignore;
 import org.junit.Test;
 
 public class EnwikiContentSourceTest extends LuceneTestCase {
@@ -38,10 +41,9 @@ public class EnwikiContentSourceTest ext
       this.docs = docs;
     }
     
-    @SuppressWarnings("deprecation") // fine for the characters used in this test
     @Override
     protected InputStream openInputStream() throws IOException {
-      return new java.io.StringBufferInputStream(docs);
+      return new ByteArrayInputStream(docs.getBytes(IOUtils.CHARSET_UTF_8));
     }
 
   }



Mime
View raw message