mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From smar...@apache.org
Subject svn commit: r1551019 - in /mahout/trunk: CHANGELOG core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java core/src/test/java/org/apache/mahout/classifier/df/data/DatasetTest.java
Date Sun, 15 Dec 2013 12:56:19 GMT
Author: smarthi
Date: Sun Dec 15 12:56:19 2013
New Revision: 1551019

URL: http://svn.apache.org/r1551019
Log:
MAHOUT-1378: Running Random Forest with Ignored features fails when loading feature descriptor
from JSON file

Modified:
    mahout/trunk/CHANGELOG
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/data/DatasetTest.java

Modified: mahout/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1551019&r1=1551018&r2=1551019&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Sun Dec 15 12:56:19 2013
@@ -6,6 +6,8 @@ Release 0.9 - unreleased
 
   MAHOUT-1379: ClusterQualitySummarizer fails with the new T-Digest for clusters with 1 data
point (smarthi)
 
+  MAHOUT-1378: Running Random Forest with Ignored features fails when loading feature descriptor
from JSON file (Sam Wu via smarthi)
+
   MAHOUT-1370: Vectordump doesn't write to output file in MapReduce Mode (smarthi)
 
   MAHOUT-1368: Convert OnlineSummarizer to use the new TDigest (tdunning)

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java?rev=1551019&r1=1551018&r2=1551019&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java Sun
Dec 15 12:56:19 2013
@@ -67,7 +67,6 @@ public class Dataset {
     }
     
     private static Attribute fromString(String from) {
-      
       Attribute toReturn = LABEL;
       if (NUMERICAL.toString().equalsIgnoreCase(from)) {
         toReturn = NUMERICAL;
@@ -238,13 +237,11 @@ public class Dataset {
    */
   private static int countAttributes(Attribute[] attrs) {
     int nbattrs = 0;
-
     for (Attribute attr : attrs) {
       if (!attr.isIgnored()) {
         nbattrs++;
       }
     }
-
     return nbattrs;
   }
 
@@ -320,7 +317,6 @@ public class Dataset {
    * @throws java.io.IOException
    */
   public static Dataset load(Configuration conf, Path path) throws IOException {
-
     FileSystem fs = path.getFileSystem(conf);
     long bytesToRead = fs.getFileStatus(path).getLen();
     byte[] buff = new byte[Long.valueOf(bytesToRead).intValue()];
@@ -340,12 +336,11 @@ public class Dataset {
    * @return some JSON
    */
   public String toJSON() {
-
     List<Map<String, Object>> toWrite = Lists.newLinkedList();
     // attributes does not include ignored columns and it does include the class label
     int ignoredCount = 0;
     for (int i = 0; i < attributes.length + ignored.length; i++) {
-      Map<String, Object> attribute = null;
+      Map<String, Object> attribute;
       int attributesIndex = i - ignoredCount;
       if (ignoredCount < ignored.length && i == ignored[ignoredCount]) {
         // fill in ignored atttribute
@@ -370,10 +365,9 @@ public class Dataset {
   /**
    * De-serialize an instance from a string
    * @param json From which an instance is created
-   * @return A shinny new Dataset
+   * @return A shiny new Dataset
    */
   public static Dataset fromJSON(String json) {
-
     List<Map<String, Object>> fromJSON;
     try {
       fromJSON = OBJECT_MAPPER.readValue(json, new TypeReference<List<Map<String,
Object>>>() {});
@@ -397,7 +391,7 @@ public class Dataset {
         if (attribute.get(VALUES) != null) {
           List<String> get = (List<String>) attribute.get(VALUES);
           String[] array = get.toArray(new String[get.size()]);
-          nominalValues[i] = array;
+          nominalValues[i - ignored.size()] = array;
         }
       }
     }
@@ -413,17 +407,15 @@ public class Dataset {
   /**
    * Generate a map to describe an attribute
    * @param type The type
-   * @param values
-   * @param isLabel
-   * @return 
+   * @param values - values
+   * @param isLabel - is a label
+   * @return map of (AttributeTypes, Values)
    */
   private Map<String, Object> getMap(Attribute type, String[] values, boolean isLabel)
{
-
     Map<String, Object> attribute = Maps.newHashMap();
     attribute.put(TYPE, type.toString().toLowerCase(Locale.getDefault()));
     attribute.put(VALUES, values);
     attribute.put(LABEL, isLabel);
     return attribute;
   }
-
 }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/data/DatasetTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/data/DatasetTest.java?rev=1551019&r1=1551018&r2=1551019&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/data/DatasetTest.java
(original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/data/DatasetTest.java
Sun Dec 15 12:56:19 2013
@@ -16,7 +16,6 @@
  */
 package org.apache.mahout.classifier.df.data;
 
-
 import org.apache.mahout.common.MahoutTestCase;
 import org.junit.Test;
 
@@ -24,7 +23,6 @@ public final class DatasetTest extends M
 
   @Test
   public void jsonEncoding() throws DescriptorException {
-
     String json = "["
             + "{\"values\":null,\"label\":false,\"type\":\"numerical\"},"
             + "{\"values\":[\"foo\",\"bar\"],\"label\":false,\"type\":\"categorical\"},"
@@ -42,7 +40,7 @@ public final class DatasetTest extends M
     assertTrue(to.isNumerical(0));
 
     // from JSON
-    Dataset fromJson = new Dataset().fromJSON(json);
+    Dataset fromJson = Dataset.fromJSON(json);
     assertEquals(3, fromJson.nbAttributes());
     assertEquals(1, fromJson.getIgnored().length);
     assertEquals(2, fromJson.getIgnored()[0]);
@@ -50,6 +48,37 @@ public final class DatasetTest extends M
     
     // read values for a nominal
     assertEquals(0, fromJson.valueOf(1, "foo"));
+  }
+
+  @Test
+  public void jsonEncodingIgnoreFeatures() throws DescriptorException {
+    String json = "["
+        + "{\"values\":null,\"label\":false,\"type\":\"numerical\"},"
+        + "{\"values\":[\"foo\",\"bar\"],\"label\":false,\"type\":\"categorical\"},"
+        + "{\"values\":null,\"label\":false,\"type\":\"ignored\"},"
+        + "{\"values\":[\"Blue\",\"Red\"],\"label\":true,\"type\":\"categorical\"}"
+        + "]";
+    Dataset to = DataLoader.generateDataset("N C I L", false, new String[]{"1 foo 2 Red",
"4 bar 5 Blue"});
 
+    // to JSON
+    assertEquals(json, to.toJSON());
+    assertEquals(3, to.nbAttributes());
+    assertEquals(1, to.getIgnored().length);
+    assertEquals(2, to.getIgnored()[0]);
+    assertEquals(2, to.getLabelId());
+    assertTrue(to.isNumerical(0));
+    assertEquals(0, to.valueOf(1, "foo"));
+    assertEquals(0, to.valueOf(2, "Blue"));
+
+    // from JSON
+    Dataset fromJson = Dataset.fromJSON(json);
+    assertEquals(3, fromJson.nbAttributes());
+    assertEquals(1, fromJson.getIgnored().length);
+    assertEquals(2, fromJson.getIgnored()[0]);
+    assertTrue(fromJson.isNumerical(0));
+
+    // read values for a nominal, one before and one after the ignore feature
+    assertEquals(0, fromJson.valueOf(1, "foo"));
+    assertEquals(0, fromJson.valueOf(2, "Blue"));
   }
 }



Mime
View raw message