lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tomm...@apache.org
Subject svn commit: r1448204 - in /lucene/dev/trunk/lucene/classification: build.xml src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java
Date Wed, 20 Feb 2013 15:00:54 GMT
Author: tommaso
Date: Wed Feb 20 15:00:53 2013
New Revision: 1448204

URL: http://svn.apache.org/r1448204
Log:
LUCENE-4782 - fixed SNBC docsWithClassSize initialization in case of codec doesn't support
Terms#getDocCount

Modified:
    lucene/dev/trunk/lucene/classification/build.xml
    lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java

Modified: lucene/dev/trunk/lucene/classification/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/classification/build.xml?rev=1448204&r1=1448203&r2=1448204&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/classification/build.xml (original)
+++ lucene/dev/trunk/lucene/classification/build.xml Wed Feb 20 15:00:53 2013
@@ -38,7 +38,7 @@
     <pathelement location="${codecs.jar}"/>
     <path refid="test.base.classpath"/>
   </path>
-
+  <target name="dist-maven" depends="dist-maven-src-java"/>
   <target name="compile-core" depends="jar-queries,jar-analyzers-common,common.compile-core"
/>
 
   <target name="jar-core" depends="common.jar-core" />

Modified: lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java?rev=1448204&r1=1448203&r2=1448204&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java
(original)
+++ lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java
Wed Feb 20 15:00:53 2013
@@ -29,6 +29,7 @@ import org.apache.lucene.search.BooleanQ
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TotalHitCountCollector;
+import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.util.BytesRef;
 
 import java.io.IOException;
@@ -69,7 +70,18 @@ public class SimpleNaiveBayesClassifier 
     this.textFieldName = textFieldName;
     this.classFieldName = classFieldName;
     this.analyzer = analyzer;
-    this.docsWithClassSize = MultiFields.getTerms(this.atomicReader, this.classFieldName).getDocCount();
+    this.docsWithClassSize = countDocsWithClass();
+  }
+
+  private int countDocsWithClass() throws IOException {
+    int docCount = MultiFields.getTerms(this.atomicReader, this.classFieldName).getDocCount();
+    if (docCount == -1) { // in case codec doesn't support getDocCount
+      TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
+      indexSearcher.search(new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))),
+          totalHitCountCollector);
+      docCount = totalHitCountCollector.getTotalHits();
+    }
+    return docCount;
   }
 
   private String[] tokenizeDoc(String doc) throws IOException {



Mime
View raw message