mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From smar...@apache.org
Subject [1/2] mahout git commit: Mahout-1612: NullPointerException happens during JSON output format for clusterdumper
Date Sun, 29 Mar 2015 00:59:21 GMT
Repository: mahout
Updated Branches:
  refs/heads/master f5f54a156 -> 4de8c5862


Mahout-1612: NullPointerException happens during JSON output format for clusterdumper


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/eeca175c
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/eeca175c
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/eeca175c

Branch: refs/heads/master
Commit: eeca175c3a838a8aaa821cbbcd8d0376bd17939d
Parents: f5f54a1
Author: Suneel Marthi <suneel.marthi@gmail.com>
Authored: Sat Mar 28 19:50:17 2015 -0400
Committer: Suneel Marthi <suneel.marthi@gmail.com>
Committed: Sat Mar 28 19:50:17 2015 -0400

----------------------------------------------------------------------
 CHANGELOG                                       |  2 +
 .../utils/clustering/JsonClusterWriter.java     | 46 ++++++++++++--------
 2 files changed, 29 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/eeca175c/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index d878267..600ed9b 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,8 @@ Mahout Change Log
 
 Release 0.10.0 - unreleased
 
+  MAHOUT-1612: NullPointerException happens during JSON output format for clusterdumper (smarthi,
Manoj Awasthi)
+
   MAHOUT-1652: Java 7 update (smarthi)
 
   MAHOUT-1639: Streaming kmeans doesn't properly validate estimatedNumMapClusters -km (smarthi)

http://git-wip-us.apache.org/repos/asf/mahout/blob/eeca175c/integration/src/main/java/org/apache/mahout/utils/clustering/JsonClusterWriter.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/clustering/JsonClusterWriter.java
b/integration/src/main/java/org/apache/mahout/utils/clustering/JsonClusterWriter.java
index d5abb35..d564a73 100644
--- a/integration/src/main/java/org/apache/mahout/utils/clustering/JsonClusterWriter.java
+++ b/integration/src/main/java/org/apache/mahout/utils/clustering/JsonClusterWriter.java
@@ -18,13 +18,14 @@ package org.apache.mahout.utils.clustering;
 
 import java.io.IOException;
 import java.io.Writer;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.regex.Pattern;
 
-import com.google.common.collect.Maps;
 import org.apache.mahout.clustering.AbstractCluster;
 import org.apache.mahout.clustering.Cluster;
 import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable;
@@ -36,8 +37,6 @@ import org.codehaus.jackson.map.ObjectMapper;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import com.google.common.collect.Lists;
-
 /**
  * Dump cluster info to JSON formatted lines. Heavily inspired by
  * ClusterDumperWriter.java and CSVClusterWriter.java
@@ -48,8 +47,7 @@ public class JsonClusterWriter extends AbstractClusterWriter {
   private final int numTopFeatures;
   private final ObjectMapper jxn;
 
-  private static final Logger log = LoggerFactory
-      .getLogger(JsonClusterWriter.class);
+  private static final Logger log = LoggerFactory.getLogger(JsonClusterWriter.class);
   private static final Pattern VEC_PATTERN = Pattern.compile("\\{|\\:|\\,|\\}");
 
   public JsonClusterWriter(Writer writer,
@@ -67,22 +65,32 @@ public class JsonClusterWriter extends AbstractClusterWriter {
    */
   @Override
   public void write(ClusterWritable clusterWritable) throws IOException {
-    Map<String, Object> res = Maps.newHashMap();
+    Map<String, Object> res = new HashMap<>();
 
     // get top terms
-    List<Object> topTerms = getTopFeaturesList(clusterWritable.getValue()
-        .getCenter(), dictionary, numTopFeatures);
-    res.put("top_terms", topTerms);
+    if (dictionary != null) {
+      List<Object> topTerms = getTopFeaturesList(clusterWritable.getValue()
+          .getCenter(), dictionary, numTopFeatures);
+      res.put("top_terms", topTerms);
+    } else {
+      res.put("top_terms", new ArrayList<>());
+    }
 
     // get human-readable cluster representation
     Cluster cluster = clusterWritable.getValue();
-    Map<String,Object> fmtStr = cluster.asJson(dictionary);
     res.put("cluster_id", cluster.getId());
-    res.put("cluster", fmtStr);
 
-    // get points
-    List<Object> points = getPoints(cluster, dictionary);
-    res.put("points", points);
+    if (dictionary != null) {
+      Map<String,Object> fmtStr = cluster.asJson(dictionary);
+      res.put("cluster", fmtStr);
+
+      // get points
+      List<Object> points = getPoints(cluster, dictionary);
+      res.put("points", points);
+    } else {
+      res.put("cluster", new HashMap<>());
+      res.put("points", new ArrayList<>());
+    }
 
     // write JSON
     Writer writer = getWriter();
@@ -97,7 +105,7 @@ public class JsonClusterWriter extends AbstractClusterWriter {
   public List<Object> getTopFeaturesList(Vector vector, String[] dictionary,
       int numTerms) {
 
-    List<TermIndexWeight> vectorTerms = Lists.newArrayList();
+    List<TermIndexWeight> vectorTerms = new ArrayList<>();
 
     for (Vector.Element elt : vector.nonZeroes()) {
       vectorTerms.add(new TermIndexWeight(elt.index(), elt.get()));
@@ -111,7 +119,7 @@ public class JsonClusterWriter extends AbstractClusterWriter {
       }
     });
 
-    List<Object> topTerms = Lists.newLinkedList();
+    List<Object> topTerms = new ArrayList<>();
 
     for (int i = 0; i < vectorTerms.size() && i < numTerms; i++) {
       int index = vectorTerms.get(i).index;
@@ -120,7 +128,7 @@ public class JsonClusterWriter extends AbstractClusterWriter {
         log.error("Dictionary entry missing for {}", index);
         continue;
       }
-      Map<String, Object> term_entry = Maps.newHashMap();
+      Map<String, Object> term_entry = new HashMap<>();
       term_entry.put(dictTerm, vectorTerms.get(i).weight);
       topTerms.add(term_entry);
     }
@@ -134,13 +142,13 @@ public class JsonClusterWriter extends AbstractClusterWriter {
    * @return List<Object>
    */
   public List<Object> getPoints(Cluster cluster, String[] dictionary) {
-    List<Object> vectorObjs = Lists.newLinkedList();
+    List<Object> vectorObjs = new ArrayList<>();
     List<WeightedPropertyVectorWritable> points = getClusterIdToPoints().get(
         cluster.getId());
 
     if (points != null) {
       for (WeightedPropertyVectorWritable point : points) {
-        Map<String, Object> entry = Maps.newHashMap();
+        Map<String, Object> entry = new HashMap<>();
         Vector theVec = point.getVector();
         if (theVec instanceof NamedVector) {
           entry.put("vector_name", ((NamedVector) theVec).getName());


Mime
View raw message