mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jeast...@apache.org
Subject svn commit: r1341963 - /mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java
Date Wed, 23 May 2012 18:07:51 GMT
Author: jeastman
Date: Wed May 23 18:07:50 2012
New Revision: 1341963

URL: http://svn.apache.org/viewvc?rev=1341963&view=rev
Log:
some improvements to output formatting that were misleading

Modified:
    mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java?rev=1341963&r1=1341962&r2=1341963&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java
(original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java
Wed May 23 18:07:50 2012
@@ -131,6 +131,7 @@ public final class TestL1ModelClustering
   private List<Vector> sampleData;
   
   private void getSampleData(String[] docs2) throws IOException {
+    System.out.println();
     sampleData = Lists.newArrayList();
     RAMDirectory directory = new RAMDirectory();
     IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(Version.LUCENE_34),
true,
@@ -157,7 +158,8 @@ public final class TestL1ModelClustering
     int i = 0;
     for (Vector vector : iterable) {
       assertNotNull(vector);
-      System.out.println("Vector[" + i++ + "]=" + formatVector(vector));
+      System.out.println(i + ". " + docs2[i++]);
+      System.out.println("\t" + formatVector(vector));
       sampleData.add(vector);
     }
   }
@@ -189,50 +191,30 @@ public final class TestL1ModelClustering
     return buf.toString();
   }
   
-  private void printSamples(Iterable<Cluster[]> result, int significant) {
-    int row = 0;
-    for (Cluster[] r : result) {
-      int sig = 0;
-      for (Cluster model : r) {
-        if (model.getNumObservations() > significant) {
-          sig++;
-        }
-      }
-      System.out.print("sample[" + row++ + "] (" + sig + ")= ");
-      for (Cluster model : r) {
-        if (model.getNumObservations() > significant) {
-          System.out.print(model.asFormatString(null) + ", ");
-        }
-      }
-      System.out.println();
-    }
-    System.out.println();
-  }
-  
   private void printClusters(List<Cluster> models, String[] docs) {
     for (int m = 0; m < models.size(); m++) {
       Cluster model = models.get(m);
-      long count = model.getNumObservations();
-      if (count == 0) {
+      long total = model.getTotalObservations();
+      if (total == 0) {
         continue;
       }
-      System.out.println("Model[" + m + "] had " + count + " hits (!) and " + (sampleData.size()
- count)
-          + " misses (? in pdf order) during the last iteration:");
+      System.out.println();
+      System.out.println("Model[" + m + "] had " + total + " observations");
+      System.out.println("pdf           document");
       MapElement[] map = new MapElement[sampleData.size()];
       // sort the samples by pdf
+      double maxPdf = Double.MIN_NORMAL;
       for (int i = 0; i < sampleData.size(); i++) {
         VectorWritable sample = new VectorWritable(sampleData.get(i));
-        map[i] = new MapElement(model.pdf(sample), docs[i]);
+        double pdf = Math.abs(model.pdf(sample));
+        maxPdf = Math.max(maxPdf, pdf);
+        map[i] = new MapElement(pdf, docs[i]);
       }
       Arrays.sort(map);
-      // now find the n=model.count() most likely docs and output them
       for (int i = 0; i < map.length; i++) {
-        if (i < count) {
-          System.out.print("! ");
-        } else {
-          System.out.print("? ");
-        }
-        System.out.println(map[i].doc);
+        Double pdf = map[i].pdf;
+        double norm = pdf / maxPdf;
+        System.out.println(String.format(Locale.ENGLISH, "%.3f", norm) + " " + map[i].doc);
       }
     }
   }



Mime
View raw message