mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r1198330 - in /mahout/trunk/integration/src: main/java/org/apache/mahout/utils/vectors/arff/ test/java/org/apache/mahout/utils/vectors/arff/
Date Sun, 06 Nov 2011 09:44:54 GMT
Author: srowen
Date: Sun Nov  6 09:44:54 2011
New Revision: 1198330

URL: http://svn.apache.org/viewvc?rev=1198330&view=rev
Log:
MAHOUT-155 DateTestAndLabelFix

Modified:
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFIterator.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFType.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFIterator.java?rev=1198330&r1=1198329&r2=1198330&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFIterator.java
(original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFIterator.java
Sun Nov  6 09:44:54 2011
@@ -29,8 +29,9 @@ import org.apache.mahout.math.Vector;
 
 final class ARFFIterator extends AbstractIterator<Vector> {
 
-  private static final Pattern COMMA_PATTERN = Pattern.compile(",");
-  private static final Pattern SPACE_PATTERN = Pattern.compile(" ");
+  // This pattern will make sure a , inside a string is not a point for split.
+  // Ex: "Arizona" , "0:08 PM, PDT" , 110 will be split considering "0:08 PM, PDT" as one
string
+  private static final Pattern COMMA_PATTERN = Pattern.compile(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)");
 
   private final BufferedReader reader;
   private final ARFFModel model;
@@ -63,9 +64,11 @@ final class ARFFIterator extends Abstrac
       String[] splits = COMMA_PATTERN.split(line);
       result = new RandomAccessSparseVector(model.getLabelSize());
       for (String split : splits) {
-        String[] data = SPACE_PATTERN.split(split); // first is index, second is
-        int idx = Integer.parseInt(data[0]);
-        result.setQuick(idx, model.getValue(data[1], idx));
+        split = split.trim();
+        int idIndex = split.indexOf(' ');
+        int idx = Integer.parseInt(split.substring(0, idIndex).trim());
+        String data = split.substring(idIndex).trim();
+        result.setQuick(idx, model.getValue(data, idx));
       }
     } else {
       result = new DenseVector(model.getLabelSize());

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFType.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFType.java?rev=1198330&r1=1198329&r2=1198330&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFType.java
(original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFType.java
Sun Nov  6 09:44:54 2011
@@ -18,9 +18,14 @@
 package org.apache.mahout.utils.vectors.arff;
 
 public enum ARFFType {
-  NUMERIC("numeric"), NOMINAL("{"), DATE("date"), STRING("string");
+
+  NUMERIC("numeric"),
+  NOMINAL("{"),
+  DATE("date"),
+  STRING("string");
   
   private final String indicator;
+  
   ARFFType(String indicator) {
     this.indicator = indicator;
   }
@@ -30,8 +35,7 @@ public enum ARFFType {
   }
   
   public String getLabel(String line) {
-    int idx = line.indexOf(indicator);
-    return line.substring(ARFFModel.ATTRIBUTE.length(),
-      idx).trim();
+    int idx = line.lastIndexOf(indicator);
+    return line.substring(ARFFModel.ATTRIBUTE.length(), idx).trim();
   }
 }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java?rev=1198330&r1=1198329&r2=1198330&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
(original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
Sun Nov  6 09:44:54 2011
@@ -110,7 +110,7 @@ public class ARFFVectorIterable implemen
           type = ARFFType.DATE;
           //TODO: DateFormatter map
           DateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ENGLISH);
-          int idx = lower.indexOf(ARFFType.DATE.getIndicator());
+          int idx = lower.lastIndexOf(ARFFType.DATE.getIndicator());
           String[] split = SPACE_PATTERN.split(line);
           if (split.length >= 4) { //we have a date format
             String formStr = line.substring(idx + ARFFType.DATE.getIndicator().length()).trim();

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java?rev=1198330&r1=1198329&r2=1198330&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
(original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
Sun Nov  6 09:44:54 2011
@@ -122,23 +122,10 @@ public final class Driver {
           });
           
           for (File file : files) {
-            writeFile(outDir, file, maxDocs, model);
+            writeFile(outDir, file, maxDocs, model, dictOut, delimiter);
           }
         } else {
-          writeFile(outDir, input, maxDocs, model);
-        }
-        log.info("Dictionary Output file: {}", dictOut);
-        Map<String,Integer> labels = model.getLabelBindings();
-        Writer writer = Files.newWriter(dictOut, Charsets.UTF_8);
-        try {
-          for (Map.Entry<String,Integer> entry : labels.entrySet()) {
-            writer.write(entry.getKey());
-            writer.write(delimiter);
-            writer.write(String.valueOf(entry.getValue()));
-            writer.write('\n');
-          }
-        } finally {
-          Closeables.closeQuietly(writer);
+          writeFile(outDir, input, maxDocs, model, dictOut, delimiter);
         }
       }
       
@@ -148,7 +135,29 @@ public final class Driver {
     }
   }
   
-  private static void writeFile(String outDir, File file, long maxDocs, ARFFModel arffModel)
throws IOException {
+  private static void writeLabelBindings(File dictOut, ARFFModel arffModel, String delimiter)
throws IOException {
+    Map<String,Integer> labels = arffModel.getLabelBindings();
+    Writer writer = Files.newWriterSupplier(dictOut, Charsets.UTF_8, true).getOutput();
+    try {
+      writer.write("Label bindings for Relation " + arffModel.getRelation() + "\n");
+      for (Map.Entry<String,Integer> entry : labels.entrySet()) {
+        writer.write(entry.getKey());
+        writer.write(delimiter);
+        writer.write(String.valueOf(entry.getValue()));
+        writer.write('\n');
+      }
+      writer.write('\n');
+    } finally {
+      Closeables.closeQuietly(writer);
+    }
+  }
+  
+  private static void writeFile(String outDir,
+                                File file,
+                                long maxDocs,
+                                ARFFModel arffModel,
+                                File dictOut,
+                                String delimiter) throws IOException {
     log.info("Converting File: {}", file);
     ARFFModel model = new MapBackedARFFModel(arffModel.getWords(), arffModel.getWordCount()
+ 1, arffModel
         .getNominalMap());
@@ -158,6 +167,7 @@ public final class Driver {
     VectorWriter vectorWriter = getSeqFileWriter(outFile);
     try {
       long numDocs = vectorWriter.write(iteratable, maxDocs);
+      writeLabelBindings(dictOut, model, delimiter);
       log.info("Wrote: {} vectors", numDocs);
     } finally {
       Closeables.closeQuietly(vectorWriter);

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java?rev=1198330&r1=1198329&r2=1198330&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
(original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
Sun Nov  6 09:44:54 2011
@@ -35,20 +35,16 @@ public final class ARFFVectorIterableTes
   @Test
   public void testValues() throws Exception {
     StringBuilder builder = new StringBuilder();
-    builder.append("%comments").append('\n').append("@RELATION Mahout").append('\n')
-    .append("@ATTRIBUTE foo numeric").append('\n')
-    .append("@ATTRIBUTE bar numeric").append('\n')
-    .append("@ATTRIBUTE timestamp DATE \"yyyy-MM-dd HH:mm:ss\"").append('\n')
-    .append("@ATTRIBUTE junk string").append('\n')
-    .append("@ATTRIBUTE theNominal {c,b,a}").append('\n')
-    .append("@DATA").append('\n')
-    .append("1,2, \"2009-01-01 5:55:55\", foo, c").append('\n')
-    .append("2,3").append('\n')
-    .append("{0 5,1 23}").append('\n');
+    builder.append("%comments").append('\n').append("@RELATION Mahout").append('\n').append(
+      "@ATTRIBUTE foo numeric").append('\n').append("@ATTRIBUTE bar numeric").append('\n').append(
+      "@ATTRIBUTE timestamp DATE \"yyyy-MM-dd HH:mm:ss\"").append('\n').append("@ATTRIBUTE
junk string")
+        .append('\n').append("@ATTRIBUTE theNominal {c,b,a}").append('\n').append("@DATA").append('\n')
+        .append("1,2, \"2009-01-01 5:55:55\", foo, c").append('\n').append("2,3").append('\n').append(
+          "{0 5,1 23}").append('\n');
     ARFFModel model = new MapBackedARFFModel();
     ARFFVectorIterable iterable = new ARFFVectorIterable(builder.toString(), model);
     assertEquals("Mahout", iterable.getModel().getRelation());
-    Map<String, Integer> bindings = iterable.getModel().getLabelBindings();
+    Map<String,Integer> bindings = iterable.getModel().getLabelBindings();
     assertNotNull(bindings);
     assertEquals(5, bindings.size());
     Iterator<Vector> iter = iterable.iterator();
@@ -109,21 +105,28 @@ public final class ARFFVectorIterableTes
       assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
       count++;
     }
+    
+    iterable = new ARFFVectorIterable(NON_NUMERIC_ARFF, model);
+    Iterator<Vector> iter = iterable.iterator();
+    Vector firstVector = iter.next();
+    
+    assertEquals(1.0, firstVector.get(2), 0);
+    
     assertEquals(10, count);
-    Map<String, Map<String, Integer>> nominalMap = iterable.getModel().getNominalMap();
+    Map<String,Map<String,Integer>> nominalMap = iterable.getModel().getNominalMap();
     assertNotNull(nominalMap);
     assertEquals(1, nominalMap.size());
-    Map<String, Integer> noms = nominalMap.get("bar");
+    Map<String,Integer> noms = nominalMap.get("bar");
     assertNotNull("nominals for bar are null", noms);
     assertEquals(2, noms.size());
-    Map<Integer, ARFFType> integerARFFTypeMap = model.getTypeMap();
+    Map<Integer,ARFFType> integerARFFTypeMap = model.getTypeMap();
     assertNotNull("Type map null", integerARFFTypeMap);
     assertEquals(5, integerARFFTypeMap.size());
-    Map<String, Long> words = model.getWords();
+    Map<String,Long> words = model.getWords();
     assertNotNull("words null", words);
     assertEquals(10, words.size());
-    //System.out.println("Words: " + words);
-    Map<Integer, DateFormat> integerDateFormatMap = model.getDateMap();
+    // System.out.println("Words: " + words);
+    Map<Integer,DateFormat> integerDateFormatMap = model.getDateMap();
     assertNotNull("date format null", integerDateFormatMap);
     assertEquals(1, integerDateFormatMap.size());
   }
@@ -131,14 +134,39 @@ public final class ARFFVectorIterableTes
   @Test
   public void testDate() throws Exception {
     MapBackedARFFModel model = new MapBackedARFFModel();
-    ARFFVectorIterable iterable = new ARFFVectorIterable(NON_NUMERIC_ARFF, model);
+    ARFFVectorIterable iterable = new ARFFVectorIterable(DATE_ARFF, model);
     Iterator<Vector> iter = iterable.iterator();
     Vector firstVector = iter.next();
-    assertEquals(1.0, firstVector.get(2),0);
-    DateFormat format = new SimpleDateFormat("yyyy-MM-dd", Locale.ENGLISH);
-    Date date = format.parse("1973-10-23");
+    
+    DateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ENGLISH);
+    Date date = format.parse("2001-07-04T12:08:56");
     long result = date.getTime();
-    assertEquals(result, firstVector.get(4),0);
+    assertEquals(result, firstVector.get(1), 0);
+    
+    format = new SimpleDateFormat("yyyy.MM.dd G 'at' HH:mm:ss z", Locale.ENGLISH);
+    date = format.parse("2001.07.04 AD at 12:08:56 PDT");
+    result = date.getTime();
+    assertEquals(result, firstVector.get(2), 0);
+    
+    format = new SimpleDateFormat("EEE, MMM d, ''yy", Locale.ENGLISH);
+    date = format.parse("Wed, Jul 4, '01,4 0:08 PM, PDT");
+    result = date.getTime();
+    assertEquals(result, firstVector.get(3), 0);
+    
+    format = new SimpleDateFormat("K:mm a, z", Locale.ENGLISH);
+    date = format.parse("0:08 PM, PDT");
+    result = date.getTime();
+    assertEquals(result, firstVector.get(4), 0);
+    
+    format = new SimpleDateFormat("yyyyy.MMMMM.dd GGG hh:mm aaa", Locale.ENGLISH);
+    date = format.parse("02001.July.04 AD 12:08 PM");
+    result = date.getTime();
+    assertEquals(result, firstVector.get(5), 0);
+    
+    format = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z", Locale.ENGLISH);
+    date = format.parse("Wed, 4 Jul 2001 12:08:56 -0700");
+    result = date.getTime();
+    assertEquals(result, firstVector.get(6), 0);
     
   }
 
@@ -152,24 +180,23 @@ public final class ARFFVectorIterableTes
       count++;
     }
     assertEquals(10, count);
-    Map<String, Map<String, Integer>> nominalMap = iterable.getModel().getNominalMap();
+    Map<String,Map<String,Integer>> nominalMap = iterable.getModel().getNominalMap();
     assertNotNull(nominalMap);
     assertEquals(1, nominalMap.size());
-    Map<String, Integer> noms = nominalMap.get("bar");
+    Map<String,Integer> noms = nominalMap.get("bar");
     assertNotNull("nominals for bar are null", noms);
     assertEquals(2, noms.size());
-    Map<Integer, ARFFType> integerARFFTypeMap = model.getTypeMap();
+    Map<Integer,ARFFType> integerARFFTypeMap = model.getTypeMap();
     assertNotNull("Type map null", integerARFFTypeMap);
     assertEquals(5, integerARFFTypeMap.size());
-    Map<String, Long> words = model.getWords();
+    Map<String,Long> words = model.getWords();
     assertNotNull("words null", words);
     assertEquals(10, words.size());
-    //System.out.println("Words: " + words);
-    Map<Integer, DateFormat> integerDateFormatMap = model.getDateMap();
+    // System.out.println("Words: " + words);
+    Map<Integer,DateFormat> integerDateFormatMap = model.getDateMap();
     assertNotNull("date format null", integerDateFormatMap);
     assertEquals(1, integerDateFormatMap.size());
-    model = new MapBackedARFFModel(model.getWords(), model.getWordCount(),
-      model.getNominalMap());
+    model = new MapBackedARFFModel(model.getWords(), model.getWordCount(), model.getNominalMap());
     iterable = new ARFFVectorIterable(NON_NUMERIC_ARFF2, model);
     count = 0;
     for (Vector vector : iterable) {
@@ -184,108 +211,90 @@ public final class ARFFVectorIterableTes
     assertEquals(2, noms.size());
   }
   
+  private static final String SAMPLE_DENSE_ARFF = "   % Comments\n" + "   % \n" + "   % Comments
go here"
+                                                  + "   % \n" + "   @RELATION Mahout\n" +
'\n'
+                                                  + "   @ATTRIBUTE foo  NUMERIC\n"
+                                                  + "   @ATTRIBUTE bar   NUMERIC\n"
+                                                  + "   @ATTRIBUTE hockey  NUMERIC\n"
+                                                  + "   @ATTRIBUTE football   NUMERIC\n"
+ "  \n" + '\n'
+                                                  + '\n' + "   @DATA\n" + "   23.1,3.23,1.2,0.2\n"
+                                                  + "   2.9,3.0,1.2,0.2\n" + "   2.7,3.2,1.3,0.2\n"
+                                                  + "   2.6,3.1,1.23,0.2\n" + "   23.0,3.6,1.2,0.2\n"
+                                                  + "   23.2,3.9,1.7,0.2\n" + "   2.6,3.2,1.2,0.3\n"
+                                                  + "   23.0,3.2,1.23,0.2\n" + "   2.2,2.9,1.2,0.2\n"
+                                                  + "   2.9,3.1,1.23,0.1\n";
   
-  private static final String SAMPLE_DENSE_ARFF = "   % Comments\n" +
-  "   % \n" +
-  "   % Comments go here" +
-  "   % \n" +
-  "   @RELATION Mahout\n" +
-  '\n' +
-  "   @ATTRIBUTE foo  NUMERIC\n" +
-  "   @ATTRIBUTE bar   NUMERIC\n" +
-  "   @ATTRIBUTE hockey  NUMERIC\n" +
-  "   @ATTRIBUTE football   NUMERIC\n" +
-  "  \n" +
-  '\n' +
-  '\n' +
-  "   @DATA\n" +
-  "   23.1,3.23,1.2,0.2\n" +
-  "   2.9,3.0,1.2,0.2\n" +
-  "   2.7,3.2,1.3,0.2\n" +
-  "   2.6,3.1,1.23,0.2\n" +
-  "   23.0,3.6,1.2,0.2\n" +
-  "   23.2,3.9,1.7,0.2\n" +
-  "   2.6,3.2,1.2,0.3\n" +
-  "   23.0,3.2,1.23,0.2\n" +
-  "   2.2,2.9,1.2,0.2\n" +
-  "   2.9,3.1,1.23,0.1\n";
-  
+  private static final String SAMPLE_SPARSE_ARFF = "   % Comments\n" + "   % \n" + "   %
Comments go here"
+                                                   + "   % \n" + "   @RELATION Mahout\n"
+ '\n'
+                                                   + "   @ATTRIBUTE foo  NUMERIC\n"
+                                                   + "   @ATTRIBUTE bar   NUMERIC\n"
+                                                   + "   @ATTRIBUTE hockey  NUMERIC\n"
+                                                   + "   @ATTRIBUTE football   NUMERIC\n"
+                                                   + "   @ATTRIBUTE tennis   NUMERIC\n" +
"  \n" + '\n'
+                                                   + '\n' + "   @DATA\n" + "   {1 23.1,2
3.23,3 1.2,4 0.2}\n"
+                                                   + "   {0 2.9}\n" + "   {0 2.7,2 3.2,3
1.3,4 0.2}\n"
+                                                   + "   {1 2.6,2 3.1,3 1.23,4 0.2}\n"
+                                                   + "   {1 23.0,2 3.6,3 1.2,4 0.2}\n"
+                                                   + "   {0 23.2,1 3.9,3 1.7,4 0.2}\n"
+                                                   + "   {0 2.6,1 3.2,2 1.2,4 0.3}\n"
+                                                   + "   {1 23.0,2 3.2,3 1.23}\n"
+                                                   + "   {1 2.2,2 2.94,3 0.2}\n" + "   {1
2.9,2 3.1}\n";
   
-  private static final String SAMPLE_SPARSE_ARFF = "   % Comments\n" +
-  "   % \n" +
-  "   % Comments go here" +
-  "   % \n" +
-  "   @RELATION Mahout\n" +
-  '\n' +
-  "   @ATTRIBUTE foo  NUMERIC\n" +
-  "   @ATTRIBUTE bar   NUMERIC\n" +
-  "   @ATTRIBUTE hockey  NUMERIC\n" +
-  "   @ATTRIBUTE football   NUMERIC\n" +
-  "   @ATTRIBUTE tennis   NUMERIC\n" +
-  "  \n" +
-  '\n' +
-  '\n' +
-  "   @DATA\n" +
-  "   {1 23.1,2 3.23,3 1.2,4 0.2}\n" +
-  "   {0 2.9}\n" +
-  "   {0 2.7,2 3.2,3 1.3,4 0.2}\n" +
-  "   {1 2.6,2 3.1,3 1.23,4 0.2}\n" +
-  "   {1 23.0,2 3.6,3 1.2,4 0.2}\n" +
-  "   {0 23.2,1 3.9,3 1.7,4 0.2}\n" +
-  "   {0 2.6,1 3.2,2 1.2,4 0.3}\n" +
-  "   {1 23.0,2 3.2,3 1.23}\n" +
-  "   {1 2.2,2 2.94 0.2}\n" +
-  "   {1 2.9,2 3.1}\n";
+  private static final String NON_NUMERIC_ARFF = "   % Comments\n" + "   % \n" + "   % Comments
go here"
+                                                 + "   % \n" + "   @RELATION Mahout\n" +
'\n'
+                                                 + "   @ATTRIBUTE junk  NUMERIC\n"
+                                                 + "   @ATTRIBUTE foo  NUMERIC\n"
+                                                 + "   @ATTRIBUTE bar   {c,d}\n"
+                                                 + "   @ATTRIBUTE hockey  string\n"
+                                                 + "   @ATTRIBUTE football   date \"yyyy-MM-dd\"\n"
+ "  \n"
+                                                 + '\n' + '\n' + "   @DATA\n"
+                                                 + "   {2 c,3 gretzky,4 1973-10-23}\n"
+                                                 + "   {1 2.9,2 d,3 orr,4 1973-11-23}\n"
+                                                 + "   {2 c,3 bossy,4 1981-10-23}\n"
+                                                 + "   {1 2.6,2 c,3 lefleur,4 1989-10-23}\n"
+                                                 + "   {3 esposito,4 1973-04-23}\n"
+                                                 + "   {1 23.2,2 d,3 chelios,4 1999-2-23}\n"
+                                                 + "   {3 richard,4 1973-10-12}\n"
+                                                 + "   {3 howe,4 1983-06-23}\n"
+                                                 + "   {0 2.2,2 d,3 messier,4 2008-11-23}\n"
+                                                 + "   {2 c,3 roy,4 1973-10-13}\n";
   
-  private static final String NON_NUMERIC_ARFF = "   % Comments\n" +
-  "   % \n" +
-  "   % Comments go here" +
-  "   % \n" +
-  "   @RELATION Mahout\n" +
-  '\n' +
-  "   @ATTRIBUTE junk  NUMERIC\n" +
-  "   @ATTRIBUTE foo  NUMERIC\n" +
-  "   @ATTRIBUTE bar   {c,d}\n" +
-  "   @ATTRIBUTE hockey  string\n" +
-  "   @ATTRIBUTE football   date \"yyyy-MM-dd\"\n" +
-  "  \n" +
-  '\n' +
-  '\n' +
-  "   @DATA\n" +
-  "   {2 c,3 gretzky,4 1973-10-23}\n" +
-  "   {1 2.9,2 d,3 orr,4 1973-11-23}\n" +
-  "   {2 c,3 bossy,4 1981-10-23}\n" +
-  "   {1 2.6,2 c,3 lefleur,4 1989-10-23}\n" +
-  "   {3 esposito,4 1973-04-23}\n" +
-  "   {1 23.2,2 d,3 chelios,4 1999-2-23}\n" +
-  "   {3 richard,4 1973-10-12}\n" +
-  "   {3 howe,4 1983-06-23}\n" +
-  "   {0 2.2,2 d,3 messier,4 2008-11-23}\n" +
-  "   {2 c,3 roy,4 1973-10-13}\n";
+  private static final String NON_NUMERIC_ARFF2 = "   % Comments\n" + "   % \n" + "   % Comments
go here"
+                                                  + "   % \n" + "   @RELATION Mahout\n" +
'\n'
+                                                  + "   @ATTRIBUTE junk  NUMERIC\n"
+                                                  + "   @ATTRIBUTE foo  NUMERIC\n"
+                                                  + "   @ATTRIBUTE test   {f,z}\n"
+                                                  + "   @ATTRIBUTE hockey  string\n"
+                                                  + "   @ATTRIBUTE football   date \"yyyy-MM-dd\"\n"
+ "  \n"
+                                                  + '\n' + '\n' + "   @DATA\n"
+                                                  + "   {2 f,3 gretzky,4 1973-10-23}\n"
+                                                  + "   {1 2.9,2 z,3 orr,4 1973-11-23}\n"
+                                                  + "   {2 f,3 bossy,4 1981-10-23}\n"
+                                                  + "   {1 2.6,2 f,3 lefleur,4 1989-10-23}\n"
+                                                  + "   {3 esposito,4 1973-04-23}\n"
+                                                  + "   {1 23.2,2 z,3 chelios,4 1999-2-23}\n"
+                                                  + "   {3 richard,4 1973-10-12}\n"
+                                                  + "   {3 howe,4 1983-06-23}\n"
+                                                  + "   {0 2.2,2 f,3 messier,4 2008-11-23}\n"
+                                                  + "   {2 f,3 roy,4 1973-10-13}\n";
   
-  private static final String NON_NUMERIC_ARFF2 = "   % Comments\n" +
-  "   % \n" +
-  "   % Comments go here" +
-  "   % \n" +
-  "   @RELATION Mahout\n" +
-  '\n' +
-  "   @ATTRIBUTE junk  NUMERIC\n" +
-  "   @ATTRIBUTE foo  NUMERIC\n" +
-  "   @ATTRIBUTE test   {f,z}\n" +
-  "   @ATTRIBUTE hockey  string\n" +
-  "   @ATTRIBUTE football   date \"yyyy-MM-dd\"\n" +
-  "  \n" +
-  '\n' +
-  '\n' +
-  "   @DATA\n" +
-  "   {2 f,3 gretzky,4 1973-10-23}\n" +
-  "   {1 2.9,2 z,3 orr,4 1973-11-23}\n" +
-  "   {2 f,3 bossy,4 1981-10-23}\n" +
-  "   {1 2.6,2 f,3 lefleur,4 1989-10-23}\n" +
-  "   {3 esposito,4 1973-04-23}\n" +
-  "   {1 23.2,2 z,3 chelios,4 1999-2-23}\n" +
-  "   {3 richard,4 1973-10-12}\n" +
-  "   {3 howe,4 1983-06-23}\n" +
-  "   {0 2.2,2 f,3 messier,4 2008-11-23}\n" +
-  "   {2 f,3 roy,4 1973-10-13}\n";
+  private static final String DATE_ARFF = "   % Comments\n"
+                                          + "   % \n"
+                                          + "   % Comments go here"
+                                          + "   % \n"
+                                          + "   @RELATION MahoutDateTest\n"
+                                          + '\n'
+                                          + "   @ATTRIBUTE junk  NUMERIC\n"
+                                          + "   @ATTRIBUTE date1   \n"
+                                          + "   @ATTRIBUTE date2   date \"yyyy.MM.dd G 'at'
HH:mm:ss z\" \n"
+                                          + "   @ATTRIBUTE date3   date \"EEE, MMM d, ''yy\"
\n"
+                                          + "   @ATTRIBUTE date4   date \"K:mm a, z\" \n"
+                                          + "   @ATTRIBUTE date5   date \"yyyyy.MMMMM.dd
GGG hh:mm aaa\" \n"
+                                          + "   @ATTRIBUTE date6   date \"EEE, d MMM yyyy
HH:mm:ss Z\" \n"
+                                          + "  \n"
+                                          + '\n'
+                                          + '\n'
+                                          + "   @DATA\n"
+                                          + "   {0 1,1 \"2001-07-04T12:08:56\",2 \"2001.07.04
AD at 12:08:56 PDT\",3 \"Wed, Jul 4, '01,4 0:08 PM, PDT\",4 \"0:08 PM, PDT\", 5 \"02001.July.04
AD 12:08 PM\" ,6 \"Wed, 4 Jul 2001 12:08:56 -0700\"  }\n"
+                                          + "   {0 2,1 \"2001-08-04T12:09:56\",2 \"2011.07.04
AD at 12:08:56 PDT\",3 \"Mon, Jul 4, '11,4 0:08 PM, PDT\",4 \"0:08 PM, PDT\", 5 \"02001.July.14
AD 12:08 PM\" ,6 \"Mon, 4 Jul 2011 12:08:56 -0700\"  }\n";
 }



Mime
View raw message