lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dor...@apache.org
Subject svn commit: r550905 - in /lucene/java/trunk/contrib/benchmark: ./ src/java/org/apache/lucene/benchmark/byTask/feeds/ src/java/org/apache/lucene/benchmark/byTask/tasks/ src/test/org/apache/lucene/benchmark/byTask/
Date Tue, 26 Jun 2007 18:27:22 GMT
Author: doronc
Date: Tue Jun 26 11:27:21 2007
New Revision: 550905

URL: http://svn.apache.org/viewvc?view=rev&rev=550905
Log:
LUCENE-940: Multi-threaded issues fixed: SimpleDateFormat; 
            logging for addDoc/deleteDoc tasks;

Modified:
    lucene/java/trunk/contrib/benchmark/CHANGES.txt
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/DeleteDocTask.java
    lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java

Modified: lucene/java/trunk/contrib/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/CHANGES.txt?view=diff&rev=550905&r1=550904&r2=550905
==============================================================================
--- lucene/java/trunk/contrib/benchmark/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/benchmark/CHANGES.txt Tue Jun 26 11:27:21 2007
@@ -4,6 +4,10 @@
 
 $Id:$
 
+6/25/07
+- LUCENE-940: Multi-threaded issues fixed: SimpleDateFormat; 
+  logging for addDoc/deleteDoc tasks. (Doron Cohen)
+
 4/17/07
 - LUCENE-863: Deprecated StandardBenchmarker in favour of byTask code.
   (Otis Gospodnetic)

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java?view=diff&rev=550905&r1=550904&r2=550905
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java
(original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java
Tue Jun 26 11:27:21 2007
@@ -22,9 +22,7 @@
 import java.io.StringReader;
 import java.text.DateFormat;
 import java.text.ParseException;
-import java.text.SimpleDateFormat;
 import java.util.Date;
-import java.util.Locale;
 import java.util.Properties;
 
 /**
@@ -32,11 +30,7 @@
  */
 public class DemoHTMLParser implements org.apache.lucene.benchmark.byTask.feeds.HTMLParser
{
 
-  DateFormat dateFormat;
-  
   public DemoHTMLParser () {
-    dateFormat = new SimpleDateFormat("EEE, dd MMM yyyy kk:mm:ss ",Locale.US);  //Tue, 09
Dec 2003 22:39:08 GMT
-    dateFormat.setLenient(true);
   }
 
   /*
@@ -74,8 +68,11 @@
     return new DocData(name, bodyBuf.toString(), title, props, date);
   }
 
+  /*
+   *  (non-Javadoc)
+   * @see org.apache.lucene.benchmark.byTask.feeds.HTMLParser#parse(java.lang.String, java.util.Date,
java.lang.StringBuffer, java.text.DateFormat)
+   */
   public DocData parse(String name, Date date, StringBuffer inputText, DateFormat dateFormat)
throws IOException, InterruptedException {
-    // TODO Auto-generated method stub
     return parse(name, date, new StringReader(inputText.toString()), dateFormat);
   }
 

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java?view=diff&rev=550905&r1=550904&r2=550905
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
(original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java
Tue Jun 26 11:27:21 2007
@@ -39,7 +39,7 @@
  */
 public class ReutersDocMaker extends BasicDocMaker {
 
-  private DateFormat dateFormat;
+  private ThreadLocal dateFormat = new ThreadLocal();
   private File dataDir = null;
   private ArrayList inputFiles = new ArrayList();
   private int nextFile = 0;
@@ -58,11 +58,21 @@
     if (inputFiles.size()==0) {
       throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
     }
-    // date format: 30-MAR-1987 14:22:36.87
-    dateFormat = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US);
-    dateFormat.setLenient(true);
   }
 
+  // get/initiate a thread-local simple date format (must do so 
+  // because SimpleDateFormat is not thread-safe.  
+  protected synchronized DateFormat getDateFormat () {
+    DateFormat df = (DateFormat) dateFormat.get();
+    if (df == null) {
+      // date format: 30-MAR-1987 14:22:36.87
+      df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US);
+      df.setLenient(true);
+      dateFormat.set(df);
+    }
+    return df;
+  }
+  
   protected DocData getNextDocData() throws Exception {
     File f = null;
     String name = null;
@@ -95,7 +105,7 @@
     addBytes(f.length());
 
     
-    Date date = dateFormat.parse(dateStr.trim()); 
+    Date date = getDateFormat().parse(dateStr.trim()); 
     return new DocData(name, bodyBuf.toString(), title, null, date);
   }
 

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java?view=diff&rev=550905&r1=550904&r2=550905
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java
(original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SimpleDocMaker.java
Tue Jun 26 11:27:21 2007
@@ -37,7 +37,10 @@
     "right place at the right time with the right knowledge.";
   
   // return a new docid
-  private synchronized int newdocid() {
+  private synchronized int newdocid() throws NoMoreDataException {
+    if (docID>0 && !forever) {
+      throw new NoMoreDataException();
+    }
     return docID++;
   }
 
@@ -59,11 +62,9 @@
   }
 
   protected DocData getNextDocData() throws NoMoreDataException {
-    if (docID>0 && !forever) {
-      throw new NoMoreDataException();
-    }
+    int id = newdocid();
     addBytes(DOC_TEXT.length());
-    return new DocData("doc"+newdocid(),DOC_TEXT, null, null, null);
+    return new DocData("doc"+id, DOC_TEXT, null, null, null);
   }
 
 }

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java?view=diff&rev=550905&r1=550904&r2=550905
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java
(original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java
Tue Jun 26 11:27:21 2007
@@ -41,7 +41,7 @@
 
   private static final String newline = System.getProperty("line.separator");
   
-  private DateFormat dateFormat [];
+  private ThreadLocal dateFormat = new ThreadLocal();
   private File dataDir = null;
   private ArrayList inputFiles = new ArrayList();
   private int nextFile = 0;
@@ -67,12 +67,6 @@
     if (inputFiles.size()==0) {
       throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
     }
-    // date format: 30-MAR-1987 14:22:36.87
-    dateFormat = new SimpleDateFormat[DATE_FORMATS.length];
-    for (int i = 0; i < dateFormat.length; i++) {
-      dateFormat[i] = new SimpleDateFormat(DATE_FORMATS[i],Locale.US);
-      dateFormat[i].setLenient(true);
-    }
  }
 
   private void openNextFile() throws NoMoreDataException, Exception {
@@ -177,17 +171,30 @@
     // this is the next document, so parse it 
     Date date = parseDate(dateStr);
     HTMLParser p = getHtmlParser();
-    DocData docData = p.parse(name, date, sb, dateFormat[0]);
+    DocData docData = p.parse(name, date, sb, getDateFormat(0));
     addBytes(sb.length()); // count char length of parsed html text (larger than the plain
doc body text). 
     
     return docData;
   }
 
+  private DateFormat getDateFormat(int n) {
+    DateFormat df[] = (DateFormat[]) dateFormat.get();
+    if (df == null) {
+      df = new SimpleDateFormat[DATE_FORMATS.length];
+      for (int i = 0; i < df.length; i++) {
+        df[i] = new SimpleDateFormat(DATE_FORMATS[i],Locale.US);
+        df[i].setLenient(true);
+      }
+      dateFormat.set(df);
+    }
+    return df[n];
+  }
+
   private Date parseDate(String dateStr) {
     Date date = null;
-    for (int i=0; i<dateFormat.length; i++) {
+    for (int i=0; i<DATE_FORMATS.length; i++) {
       try {
-        date = dateFormat[i].parse(dateStr.trim());
+        date = getDateFormat(i).parse(dateStr.trim());
         return date;
       } catch (ParseException e) {
       }

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java?view=diff&rev=550905&r1=550904&r2=550905
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java
(original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java
Tue Jun 26 11:27:21 2007
@@ -40,8 +40,9 @@
     super(runData);
   }
 
-  private static int logStep = -1;
+  private int logStep = -1;
   private int docSize = 0;
+  int count = 0;
   
   // volatile data passed between setup(), doLogic(), tearDown().
   private Document doc = null;
@@ -64,8 +65,7 @@
    * @see PerfTask#tearDown()
    */
   public void tearDown() throws Exception {
-    DocMaker docMaker = getRunData().getDocMaker();
-    log(docMaker.getCount());
+    log(++count);
     doc = null;
     super.tearDown();
   }
@@ -77,11 +77,11 @@
 
   private void log (int count) {
     if (logStep<0) {
-      // avoid sync although race possible here
+      // init once per instance
       logStep = getRunData().getConfig().get("doc.add.log.step",DEFAULT_ADD_DOC_LOG_STEP);
     }
     if (logStep>0 && (count%logStep)==0) {
-      System.out.println("--> processed (add) "+count+" docs");
+      System.out.println("--> "+Thread.currentThread().getName()+" processed (add) "+count+"
docs");
     }
   }
 

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/DeleteDocTask.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/DeleteDocTask.java?view=diff&rev=550905&r1=550904&r2=550905
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/DeleteDocTask.java
(original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/DeleteDocTask.java
Tue Jun 26 11:27:21 2007
@@ -43,8 +43,8 @@
     super(runData);
   }
 
-  private static int logStep = -1;
-  private static int deleteStep = -1;
+  private int logStep = -1;
+  private int deleteStep = -1;
   private static int numDeleted = 0;
   private static int lastDeleted = -1;
 

Modified: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?view=diff&rev=550905&r1=550904&r2=550905
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
(original)
+++ lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
Tue Jun 26 11:27:21 2007
@@ -123,6 +123,34 @@
     assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs());
   }
 
+  /**
+   * Test Parallel Doc Maker logic (for LUCENE-940)
+   */
+  public void testParallelDocMaker() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "# ----- properties ",
+        "doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker",
+        "doc.add.log.step=2697",
+        "doc.term.vector=false",
+        "doc.maker.forever=false",
+        "directory=FSDirectory",
+        "doc.stored=false",
+        "doc.tokenized=false",
+        "# ----- alg ",
+        "CreateIndex",
+        "[ { AddDoc } : * ] : 4 ",
+        "CloseIndex",
+    };
+    
+    // 2. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+
+    // 3. test number of docs in the index
+    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
+    int ndocsExpected = 21578; // that's how many docs there are in the Reuters collecton.
+    assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
+  }
   
   // create the benchmark and execute it. 
   private Benchmark execBenchmark(String[] algLines) throws Exception {



Mime
View raw message