mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r708519 - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/classifier/ core/src/main/java/org/apache/mahout/classifier/bayes/ core/src/main/java/org/apache/mahout/classifier/bayes/common/ core/src/main/java/org/apache/mahout/cl...
Date Tue, 28 Oct 2008 10:41:39 GMT
Author: srowen
Date: Tue Oct 28 03:41:38 2008
New Revision: 708519

URL: http://svn.apache.org/viewvc?rev=708519&view=rev
Log:
Retired and/or addressed the TODOs I wrote, and changed to use indeOf(char) as appropriate
in places

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesWeightSummerMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansCombiner.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java?rev=708519&r1=708518&r2=708519&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
Tue Oct 28 03:41:38 2008
@@ -73,7 +73,7 @@
     Writer writer = new OutputStreamWriter(new FileOutputStream(outputFile),
         charset);
     inputDir.listFiles(new FileProcessor(label, analyzer, charset, writer));
-    // TODO srowen asks why call this when return value isn't used?
+    // listFiles() is called here as a way to recursively visit files, actually
     writer.close();
 
   }
@@ -91,15 +91,14 @@
    */
   public static void format(String label, Analyzer analyzer, File input,
       Charset charset, File outDir) throws IOException {
-    if (input.isDirectory() == false) {
+    if (input.isDirectory()) {
+      input.listFiles(new FileProcessor(label, analyzer, charset, outDir));
+    } else {
       Writer writer = new OutputStreamWriter(new FileOutputStream(new File(
           outDir, input.getName())), charset);
       writeFile(label, analyzer, new InputStreamReader(new FileInputStream(
           input), charset), writer);
       writer.close();
-    } else {
-      input.listFiles(new FileProcessor(label, analyzer, charset, outDir));
-      // TODO srowen asks why call this when return value isn't used?
     }
   }
 
@@ -176,7 +175,6 @@
         }
       } else {
         file.listFiles(this);
-        // TODO srowen asks why call this when return value isn't used?
       }
       return false;
     }
@@ -198,8 +196,7 @@
     writer.write('\t'); // edit: Inorder to match Hadoop standard
     // TextInputFormat
     Token token = new Token();
-    CharArraySet seen = new CharArraySet(256, false);
-    // TODO srowen wonders that 'seen' is updated but not used?
+    //CharArraySet seen = new CharArraySet(256, false);
     //long numTokens = 0;
     while ((token = ts.next(token)) != null) {
       char[] termBuffer = token.termBuffer();
@@ -209,7 +206,7 @@
       writer.write(' ');
       char[] tmp = new char[termLen];
       System.arraycopy(termBuffer, 0, tmp, 0, termLen);
-      seen.add(tmp);// do this b/c CharArraySet doesn't allow offsets
+      //seen.add(tmp);// do this b/c CharArraySet doesn't allow offsets
     }
     ///numTokens++;
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java?rev=708519&r1=708518&r2=708519&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java
Tue Oct 28 03:41:38 2008
@@ -98,7 +98,6 @@
           Integer labelInt = label;
           double D_ij = getWeightUnprocessed(labelInt, featureInt);
           double sumLabelWeight = getSumLabelWeight(labelInt);
-          // TODO srowen says sigma_j is unused
           //double sigma_j = getSumFeatureWeight(featureInt);
 
           double numerator = D_ij + alpha_i;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java?rev=708519&r1=708518&r2=708519&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java
Tue Oct 28 03:41:38 2008
@@ -74,7 +74,6 @@
     StringBuilder builder = new StringBuilder(label);
     builder.ensureCapacity(32);// make sure we have a reasonably size buffer to
                                // begin with
-    // TODO: srowen says this var isn't used right now
     //List<String> previousN_1Grams  = Model.generateNGramsWithoutLabel(line, keyLen);
     
     double lengthNormalisation = 0.0;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesWeightSummerMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesWeightSummerMapper.java?rev=708519&r1=708518&r2=708519&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesWeightSummerMapper.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesWeightSummerMapper.java
Tue Oct 28 03:41:38 2008
@@ -46,7 +46,7 @@
       throws IOException {
 
     String labelFeaturePair = key.toString();
-    int i = labelFeaturePair.indexOf(",");
+    int i = labelFeaturePair.indexOf(',');
     
     String label = labelFeaturePair.substring(0,i);
     String feature = labelFeaturePair.substring(i+1);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java?rev=708519&r1=708518&r2=708519&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/io/SequenceFileModelReader.java
Tue Oct 28 03:41:38 2008
@@ -74,7 +74,7 @@
       while (reader.next(key, value)) {
         String keyStr = key.toString();
 
-        int idx = keyStr.indexOf(",");
+        int idx = keyStr.indexOf(',');
         if (idx != -1) {
           model.loadFeatureWeight(keyStr.substring(0, idx), keyStr.substring(idx + 1), value.get());
         }
@@ -200,19 +200,11 @@
       // the key is either _label_ or label,feature
       while (reader.next(key, value)) {
         String keyStr = key.toString();
-        // TODO srowen says we should probably collapse these empty branches?
-        if (keyStr.startsWith("_")) {
-
-        } else if (keyStr.startsWith(",")) {
-
-        } else if (keyStr.startsWith("*")) {
-
-        } else {
-          int idx = keyStr.indexOf(",");
+        if (!keyStr.startsWith("_") && !keyStr.startsWith(",") && !keyStr.startsWith("*"))
{
+          int idx = keyStr.indexOf(',');
           if (idx != -1) {
-            // TODO srowen says data is not used?
-            Map<String,Double> data = new HashMap<String,Double>();
-            data.put(keyStr.substring(0, idx), value.get());
+            //Map<String,Double> data = new HashMap<String,Double>();
+            //data.put(keyStr.substring(0, idx), value.get());
             writer.append(new Text(key.toString()), value);
           }
         }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansCombiner.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansCombiner.java?rev=708519&r1=708518&r2=708519&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansCombiner.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansCombiner.java
Tue Oct 28 03:41:38 2008
@@ -36,10 +36,9 @@
     SoftCluster cluster = SoftCluster.decodeCluster(key.toString());
     while (values.hasNext()) {
       String pointInfo = values.next().toString();
-      double pointProb = Double.parseDouble(pointInfo.substring(0, pointInfo
-          .indexOf(":")));
+      double pointProb = Double.parseDouble(pointInfo.substring(0, pointInfo.indexOf(':')));
 
-      String encodedVector = pointInfo.substring(pointInfo.indexOf(":") + 1);
+      String encodedVector = pointInfo.substring(pointInfo.indexOf(':') + 1);
       cluster.addPoint(AbstractVector.decodeVector(encodedVector), pointProb
           * SoftCluster.getM());
     }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java?rev=708519&r1=708518&r2=708519&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java
Tue Oct 28 03:41:38 2008
@@ -47,16 +47,14 @@
         double partialSumPtProb = Double.parseDouble(value.substring(0, ix));
         Vector total = AbstractVector.decodeVector(value.substring(ix + 2));
         cluster.addPoints(partialSumPtProb, total);
-      } catch (Exception e) { 
+      } catch (RuntimeException e) {
         // TODO srowen thinks this should be replaced with a more specific catch, or not
use exceptions to control flow
         // Escaped from Combiner. So, let's do that processing too:
         log.info("Escaped from combiner: Key: {} Value: {}", key, value);
-        double pointProb = Double.parseDouble(value.substring(0, value
-            .indexOf(":")));
+        double pointProb = Double.parseDouble(value.substring(0, value.indexOf(':')));
 
-        String encodedVector = value.substring(value.indexOf(":") + 1);
-        cluster.addPoint(AbstractVector.decodeVector(encodedVector), pointProb
-            * SoftCluster.getM());
+        String encodedVector = value.substring(value.indexOf(':') + 1);
+        cluster.addPoint(AbstractVector.decodeVector(encodedVector), pointProb * SoftCluster.getM());
       }
     }
 

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java?rev=708519&r1=708518&r2=708519&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
(original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
Tue Oct 28 03:41:38 2008
@@ -89,9 +89,9 @@
 
     DistanceMeasure measure = (DistanceMeasure) cl.newInstance();
     SoftCluster.config(measure, threshold);
-    boolean converged = false;
-    // TODO srowen notes that converged is always false?
-    for (int iter = 0; !converged && iter < numIter; iter++) {
+    //boolean converged = false;
+    //for (int iter = 0; !converged && iter < numIter; iter++) {
+    for (int iter = 0; iter < numIter; iter++) {
       iterateReference(points, clusterList, measure);
     }
     computeCluster(points, clusterList, measure, pointClusterInfo);
@@ -297,16 +297,15 @@
 
       for (String key : mapCollector.getKeys()) {
         //SoftCluster cluster = SoftCluster.decodeCluster(key);
-        // TODO srowen says cluster is not used?
         List<Text> values = mapCollector.getValue(key);
 
         for (Text value : values) {
           String pointInfo = value.toString();
           double pointProb = Double.parseDouble(pointInfo.substring(0,
-              pointInfo.indexOf(":")));
+              pointInfo.indexOf(':')));
 
           String encodedVector = pointInfo
-              .substring(pointInfo.indexOf(":") + 1);
+              .substring(pointInfo.indexOf(':') + 1);
 
           Double val = pointTotalProbMap.get(encodedVector);
           double probVal = 0.0;

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java?rev=708519&r1=708518&r2=708519&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
(original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
Tue Oct 28 03:41:38 2008
@@ -266,7 +266,7 @@
         List<Text> values = collector2.getValue(key);
         assertEquals("too many values", 1, values.size());
         String value = values.get(0).toString();
-        int ix = value.indexOf(",");
+        int ix = value.indexOf(',');
         count += Integer.parseInt(value.substring(0, ix));
         total = total
             .plus(AbstractVector.decodeVector(value.substring(ix + 2)));

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java?rev=708519&r1=708518&r2=708519&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
(original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
Tue Oct 28 03:41:38 2008
@@ -148,16 +148,15 @@
     if (cmdLine.hasOption(encodingOpt)) {
       encoding = (String) cmdLine.getValue(encodingOpt);
     }
-    Analyzer analyzer = null;
-    if (cmdLine.hasOption(analyzerOpt)) {
-      String className = (String) cmdLine.getValue(analyzerOpt);
-      Class clazz = Class.forName(className);
-      analyzer = (Analyzer) clazz.newInstance();
-    }
-    if (analyzer == null) {
-      analyzer = new StandardAnalyzer();
-    }
-    // TODO srowen says analyzer is never used?
+    //Analyzer analyzer = null;
+    //if (cmdLine.hasOption(analyzerOpt)) {
+      //String className = (String) cmdLine.getValue(analyzerOpt);
+      //Class clazz = Class.forName(className);
+      //analyzer = (Analyzer) clazz.newInstance();
+    //}
+    //if (analyzer == null) {
+    //  analyzer = new StandardAnalyzer();
+    //}
     int gramSize = 1;
     if (cmdLine.hasOption(gramSizeOpt)) {
       gramSize = Integer.parseInt((String) cmdLine

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java?rev=708519&r1=708518&r2=708519&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java
(original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java
Tue Oct 28 03:41:38 2008
@@ -70,8 +70,6 @@
         new FileInputStream(dumpFilePath), "UTF-8"));
 
     File dir = new File(outputDirPath);
-    dir.getPath();
-    // TODO srowen asks if the call to getPath() is needed?
 
     String header =
           "<mediawiki xmlns=\"http://www.mediawiki.org/xml/export-0.3/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"
xsi:schemaLocation=\"http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd\"
version=\"0.3\" xml:lang=\"en\">\n"



Mime
View raw message