mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gsing...@apache.org
Subject svn commit: r796897 - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
Date Wed, 22 Jul 2009 22:40:55 GMT
Author: gsingers
Date: Wed Jul 22 22:40:55 2009
New Revision: 796897

URL: http://svn.apache.org/viewvc?rev=796897&view=rev
Log:
add verbosity option

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java?rev=796897&r1=796896&r2=796897&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
Wed Jul 22 22:40:55 2009
@@ -47,13 +47,21 @@
     return this.confusionMatrix;
   }
 
-  public void addInstance(String correctLabel, ClassifierResult classifiedResult) {
-    if (correctLabel.equals(classifiedResult.getLabel())) {
+  /**
+   *
+   * @param correctLabel The correct label
+   * @param classifiedResult The classified result
+   * @return whether the instance was correct or not
+   */
+  public boolean addInstance(String correctLabel, ClassifierResult classifiedResult) {
+    boolean result = correctLabel.equals(classifiedResult.getLabel());
+    if (result == true) {
       correctlyClassified++;
     } else {
       incorrectlyClassified++;
     }
     confusionMatrix.addInstance(correctLabel, classifiedResult);
+    return result;
   }
 
   @Override

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java?rev=796897&r1=796896&r2=796897&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
(original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
Wed Jul 22 22:40:55 2009
@@ -85,13 +85,14 @@
     Option gramSizeOpt = obuilder.withLongName("gramSize").withRequired(true).withArgument(
             abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create()).
             withDescription("Size of the n-gram").withShortName("ng").create();
-
+    Option verboseOutputOpt = obuilder.withLongName("verbose").withRequired(false).
+            withDescription("Output which values were correctly and incorrectly classified").withShortName("v").create();
     Option typeOpt = obuilder.withLongName("classifierType").withRequired(true).withArgument(
             abuilder.withName("classifierType").withMinimum(1).withMaximum(1).create()).
             withDescription("Type of classifier: bayes|cbayes").withShortName("type").create();
 
     Group group = gbuilder.withName("Options").withOption(analyzerOpt).withOption(defaultCatOpt).withOption(dirOpt).withOption(encodingOpt).withOption(gramSizeOpt).withOption(pathOpt)
-            .withOption(typeOpt).create();
+            .withOption(typeOpt).withOption(verboseOutputOpt).create();
 
     Parser parser = new Parser();
     parser.setGroup(group);
@@ -143,6 +144,7 @@
     if (cmdLine.hasOption(encodingOpt)) {
       encoding = (String) cmdLine.getValue(encodingOpt);
     }
+    boolean verbose = cmdLine.hasOption(verboseOutputOpt);
     //Analyzer analyzer = null;
     //if (cmdLine.hasOption(analyzerOpt)) {
       //String className = (String) cmdLine.getValue(analyzerOpt);
@@ -171,13 +173,15 @@
     ResultAnalyzer resultAnalyzer = new ResultAnalyzer(model.getLabels(), defaultCat);
 
     if (subdirs != null) {
-      for (File subdir : subdirs) {
-
-        String correctLabel = subdir.getName().split(".txt")[0];
+      for (File file : subdirs) {
+        log.info("--------------");
+        log.info("Testing: " + file);
+        String correctLabel = file.getName().split(".txt")[0];
         BufferedReader fileReader = new BufferedReader(new InputStreamReader(
-            new FileInputStream(subdir.getPath()), encoding));
+            new FileInputStream(file.getPath()), encoding));
         try {
           String line;
+          long lineNum = 0;
           while ((line = fileReader.readLine()) != null) {
   
             Map<String, List<String>> document = Model.generateNGrams(line, gramSize);
@@ -186,8 +190,14 @@
               ClassifierResult classifiedLabel = classifier.classify(model,
                   strings.toArray(new String[strings.size()]),
                   defaultCat);
-              resultAnalyzer.addInstance(correctLabel, classifiedLabel);
+              boolean correct = resultAnalyzer.addInstance(correctLabel, classifiedLabel);
+              if (verbose == true){
+                //We have one document per line
+                log.info("Line Number: " + lineNum + " Line(30): " + (line.length() >
30 ? line.substring(0, 30) : line) +
+                        " Expected Label: " + correctLabel + " Classified Label: " + classifiedLabel.getLabel()
+ " Correct: " + correct);
+              }
             }
+            lineNum++;
           }
           log.info("{}\t{}\t{}/{}", new Object[]{
               correctLabel,



Mime
View raw message