Return-Path: Delivered-To: apmail-lucene-mahout-commits-archive@locus.apache.org Received: (qmail 36766 invoked from network); 24 Aug 2008 16:11:38 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 24 Aug 2008 16:11:38 -0000 Received: (qmail 17360 invoked by uid 500); 24 Aug 2008 16:11:37 -0000 Delivered-To: apmail-lucene-mahout-commits-archive@lucene.apache.org Received: (qmail 17294 invoked by uid 500); 24 Aug 2008 16:11:36 -0000 Mailing-List: contact mahout-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: mahout-dev@lucene.apache.org Delivered-To: mailing list mahout-commits@lucene.apache.org Received: (qmail 17285 invoked by uid 99); 24 Aug 2008 16:11:36 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 24 Aug 2008 09:11:36 -0700 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 24 Aug 2008 16:10:47 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id C79292388A15; Sun, 24 Aug 2008 09:10:46 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r688522 [2/2] - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/ core/src/main/java/org/apache/mahout/classifier/ core/src/main/java/org/apache/mahout/classifier/bayes/ core/src/main/java/org/apache/m... Date: Sun, 24 Aug 2008 16:10:44 -0000 To: mahout-commits@lucene.apache.org From: srowen@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20080824161046.C79292388A15@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaDriver.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaDriver.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaDriver.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaDriver.java Sun Aug 24 09:10:42 2008 @@ -27,24 +27,30 @@ import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.util.GenericsUtil; import org.apache.mahout.classifier.bayes.io.SequenceFileModelReader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.HashMap; import java.util.Map; +import java.io.IOException; /** * Create and run the Bayes Trainer. - * - **/ + */ public class CBayesThetaDriver { + + private static final Logger log = LoggerFactory.getLogger(CBayesThetaDriver.class); + /** * Takes in two arguments: *
    *
  1. The input {@link org.apache.hadoop.fs.Path} where the input documents live
  2. - *
  3. The output {@link org.apache.hadoop.fs.Path} where to write the {@link org.apache.mahout.common.Model} as a {@link org.apache.hadoop.io.SequenceFile}
  4. + *
  5. The output {@link org.apache.hadoop.fs.Path} where to write the {@link org.apache.mahout.common.Model} as a + * {@link org.apache.hadoop.io.SequenceFile}
  6. *
* @param args The args */ - public static void main(String[] args) { + public static void main(String[] args) throws IOException { String input = args[0]; String output = args[1]; @@ -57,7 +63,7 @@ * @param input the input pathname String * @param output the output pathname String */ - public static void runJob(String input, String output) { + public static void runJob(String input, String output) throws IOException { JobClient client = new JobClient(); JobConf conf = new JobConf(CBayesThetaDriver.class); @@ -77,51 +83,48 @@ conf.setReducerClass(CBayesThetaReducer.class); conf.setOutputFormat(SequenceFileOutputFormat.class); - conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization"); // Dont ever forget this. People should keep track of how hadoop conf parameters and make or break a piece of code - try { - FileSystem dfs = FileSystem.get(conf); - if (dfs.exists(outPath)) - dfs.delete(outPath, true); - - SequenceFileModelReader reader = new SequenceFileModelReader(); - - Path Sigma_kFiles = new Path(output+"/trainer-weights/Sigma_k/*"); - HashMap labelWeightSum= reader.readLabelSums(dfs, Sigma_kFiles, conf); - DefaultStringifier> mapStringifier = new DefaultStringifier>(conf, GenericsUtil.getClass(labelWeightSum)); - String labelWeightSumString = mapStringifier.toString(labelWeightSum); - - System.out.println("Sigma_k for Each Label"); - Map c = mapStringifier.fromString(labelWeightSumString); - System.out.println(c); - conf.set("cnaivebayes.sigma_k", labelWeightSumString); - - - Path sigma_kSigma_jFile = new Path(output+"/trainer-weights/Sigma_kSigma_j/*"); - Float sigma_jSigma_k = reader.readSigma_jSigma_k(dfs, sigma_kSigma_jFile, conf); - DefaultStringifier floatStringifier = new DefaultStringifier(conf, Float.class); - String sigma_jSigma_kString = floatStringifier.toString(sigma_jSigma_k); - - System.out.println("Sigma_kSigma_j for each Label and for each Features"); - Float retSigma_jSigma_k = floatStringifier.fromString(sigma_jSigma_kString); - System.out.println(retSigma_jSigma_k); - conf.set("cnaivebayes.sigma_jSigma_k", sigma_jSigma_kString); - - Path vocabCountFile = new Path(output+"/trainer-tfIdf/trainer-vocabCount/*"); - Float vocabCount = reader.readVocabCount(dfs, vocabCountFile, conf); - String vocabCountString = floatStringifier.toString(vocabCount); - - System.out.println("Vocabulary Count"); - conf.set("cnaivebayes.vocabCount", vocabCountString); - Float retvocabCount = floatStringifier.fromString(vocabCountString); - System.out.println(retvocabCount); - - client.setConf(conf); - - JobClient.runJob(conf); - - } catch (Exception e) { - throw new RuntimeException(e); - } - + conf.set("io.serializations", + "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization"); + // Dont ever forget this. People should keep track of how hadoop conf parameters and make or break a piece of code + FileSystem dfs = FileSystem.get(conf); + if (dfs.exists(outPath)) + dfs.delete(outPath, true); + + SequenceFileModelReader reader = new SequenceFileModelReader(); + + Path Sigma_kFiles = new Path(output+"/trainer-weights/Sigma_k/*"); + HashMap labelWeightSum= reader.readLabelSums(dfs, Sigma_kFiles, conf); + DefaultStringifier> mapStringifier = + new DefaultStringifier>(conf, GenericsUtil.getClass(labelWeightSum)); + String labelWeightSumString = mapStringifier.toString(labelWeightSum); + + log.info("Sigma_k for Each Label"); + Map c = mapStringifier.fromString(labelWeightSumString); + log.info("{}", c); + conf.set("cnaivebayes.sigma_k", labelWeightSumString); + + + Path sigma_kSigma_jFile = new Path(output+"/trainer-weights/Sigma_kSigma_j/*"); + Float sigma_jSigma_k = reader.readSigma_jSigma_k(dfs, sigma_kSigma_jFile, conf); + DefaultStringifier floatStringifier = new DefaultStringifier(conf, Float.class); + String sigma_jSigma_kString = floatStringifier.toString(sigma_jSigma_k); + + log.info("Sigma_kSigma_j for each Label and for each Features"); + Float retSigma_jSigma_k = floatStringifier.fromString(sigma_jSigma_kString); + log.info("{}", retSigma_jSigma_k); + conf.set("cnaivebayes.sigma_jSigma_k", sigma_jSigma_kString); + + Path vocabCountFile = new Path(output+"/trainer-tfIdf/trainer-vocabCount/*"); + Float vocabCount = reader.readVocabCount(dfs, vocabCountFile, conf); + String vocabCountString = floatStringifier.toString(vocabCount); + + log.info("Vocabulary Count"); + conf.set("cnaivebayes.vocabCount", vocabCountString); + Float retvocabCount = floatStringifier.fromString(vocabCountString); + log.info("{}", retvocabCount); + + client.setConf(conf); + + JobClient.runJob(conf); } } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaMapper.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaMapper.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaMapper.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaMapper.java Sun Aug 24 09:10:42 2008 @@ -26,6 +26,8 @@ import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.util.GenericsUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.HashMap; @@ -34,6 +36,8 @@ public class CBayesThetaMapper extends MapReduceBase implements Mapper { + private static final Logger log = LoggerFactory.getLogger(CBayesThetaMapper.class); + public HashMap labelWeightSum = null; String labelWeightSumString = " "; Float sigma_jSigma_k = 0.0f; @@ -102,8 +106,7 @@ } } catch (IOException ex) { - - ex.printStackTrace(); + log.info(ex.toString(), ex); } } } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerDriver.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerDriver.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerDriver.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerDriver.java Sun Aug 24 09:10:42 2008 @@ -27,15 +27,20 @@ import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.util.GenericsUtil; import org.apache.mahout.classifier.bayes.io.SequenceFileModelReader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.HashMap; import java.util.Map; +import java.io.IOException; /** * Create and run the Bayes Trainer. - * - **/ + */ public class CBayesThetaNormalizerDriver { + + private static final Logger log = LoggerFactory.getLogger(CBayesThetaNormalizerDriver.class); + /** * Takes in two arguments: *
    @@ -44,7 +49,7 @@ *
* @param args The args */ - public static void main(String[] args) { + public static void main(String[] args) throws IOException { String input = args[0]; String output = args[1]; @@ -57,7 +62,7 @@ * @param input the input pathname String * @param output the output pathname String */ - public static void runJob(String input, String output) { + public static void runJob(String input, String output) throws IOException { JobClient client = new JobClient(); JobConf conf = new JobConf(CBayesThetaNormalizerDriver.class); @@ -75,52 +80,50 @@ conf.setCombinerClass(CBayesThetaNormalizerReducer.class); conf.setReducerClass(CBayesThetaNormalizerReducer.class); conf.setOutputFormat(SequenceFileOutputFormat.class); - conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization"); // Dont ever forget this. People should keep track of how hadoop conf parameters and make or break a piece of code + conf.set("io.serializations", + "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization"); + // Dont ever forget this. People should keep track of how hadoop conf parameters and make or break a piece of code - try { - FileSystem dfs = FileSystem.get(conf); - if (dfs.exists(outPath)) - dfs.delete(outPath, true); - - SequenceFileModelReader reader = new SequenceFileModelReader(); - - Path Sigma_kFiles = new Path(output+"/trainer-weights/Sigma_k/*"); - HashMap labelWeightSum= reader.readLabelSums(dfs, Sigma_kFiles, conf); - DefaultStringifier> mapStringifier = new DefaultStringifier>(conf, GenericsUtil.getClass(labelWeightSum)); - String labelWeightSumString = mapStringifier.toString(labelWeightSum); - - System.out.println("Sigma_k for Each Label"); - Map c = mapStringifier.fromString(labelWeightSumString); - System.out.println(c); - conf.set("cnaivebayes.sigma_k", labelWeightSumString); - - - Path sigma_kSigma_jFile = new Path(output+"/trainer-weights/Sigma_kSigma_j/*"); - Float sigma_jSigma_k = reader.readSigma_jSigma_k(dfs, sigma_kSigma_jFile, conf); - DefaultStringifier floatStringifier = new DefaultStringifier(conf, Float.class); - String sigma_jSigma_kString = floatStringifier.toString(sigma_jSigma_k); - - System.out.println("Sigma_kSigma_j for each Label and for each Features"); - Float retSigma_jSigma_k = floatStringifier.fromString(sigma_jSigma_kString); - System.out.println(retSigma_jSigma_k); - conf.set("cnaivebayes.sigma_jSigma_k", sigma_jSigma_kString); - - Path vocabCountFile = new Path(output+"/trainer-tfIdf/trainer-vocabCount/*"); - Float vocabCount = reader.readVocabCount(dfs, vocabCountFile, conf); - String vocabCountString = floatStringifier.toString(vocabCount); - - System.out.println("Vocabulary Count"); - conf.set("cnaivebayes.vocabCount", vocabCountString); - Float retvocabCount = floatStringifier.fromString(vocabCountString); - System.out.println(retvocabCount); - - client.setConf(conf); - - JobClient.runJob(conf); - - } catch (Exception e) { - throw new RuntimeException(e); - } + FileSystem dfs = FileSystem.get(conf); + if (dfs.exists(outPath)) + dfs.delete(outPath, true); + + SequenceFileModelReader reader = new SequenceFileModelReader(); + + Path Sigma_kFiles = new Path(output+"/trainer-weights/Sigma_k/*"); + HashMap labelWeightSum= reader.readLabelSums(dfs, Sigma_kFiles, conf); + DefaultStringifier> mapStringifier = + new DefaultStringifier>(conf, GenericsUtil.getClass(labelWeightSum)); + String labelWeightSumString = mapStringifier.toString(labelWeightSum); + + log.info("Sigma_k for Each Label"); + Map c = mapStringifier.fromString(labelWeightSumString); + log.info("{}", c); + conf.set("cnaivebayes.sigma_k", labelWeightSumString); + + + Path sigma_kSigma_jFile = new Path(output+"/trainer-weights/Sigma_kSigma_j/*"); + Float sigma_jSigma_k = reader.readSigma_jSigma_k(dfs, sigma_kSigma_jFile, conf); + DefaultStringifier floatStringifier = new DefaultStringifier(conf, Float.class); + String sigma_jSigma_kString = floatStringifier.toString(sigma_jSigma_k); + + log.info("Sigma_kSigma_j for each Label and for each Features"); + Float retSigma_jSigma_k = floatStringifier.fromString(sigma_jSigma_kString); + log.info("{}", retSigma_jSigma_k); + conf.set("cnaivebayes.sigma_jSigma_k", sigma_jSigma_kString); + + Path vocabCountFile = new Path(output+"/trainer-tfIdf/trainer-vocabCount/*"); + Float vocabCount = reader.readVocabCount(dfs, vocabCountFile, conf); + String vocabCountString = floatStringifier.toString(vocabCount); + + log.info("Vocabulary Count"); + conf.set("cnaivebayes.vocabCount", vocabCountString); + Float retvocabCount = floatStringifier.fromString(vocabCountString); + log.info("{}", retvocabCount); + + client.setConf(conf); + + JobClient.runJob(conf); } } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerMapper.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerMapper.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerMapper.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerMapper.java Sun Aug 24 09:10:42 2008 @@ -26,6 +26,8 @@ import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.util.GenericsUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.HashMap; @@ -33,6 +35,8 @@ public class CBayesThetaNormalizerMapper extends MapReduceBase implements Mapper { + private static final Logger log = LoggerFactory.getLogger(CBayesThetaNormalizerMapper.class); + public HashMap labelWeightSum = null; String labelWeightSumString = " "; @@ -109,8 +113,7 @@ } } catch (IOException ex) { - - ex.printStackTrace(); + log.warn(ex.toString(), ex); } } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerReducer.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerReducer.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerReducer.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaNormalizerReducer.java Sun Aug 24 09:10:42 2008 @@ -32,9 +32,7 @@ /** * Can also be used as a local Combiner beacuse only two values should be there * inside the values - * */ - public class CBayesThetaNormalizerReducer extends MapReduceBase implements Reducer { @@ -61,7 +59,6 @@ while (values.hasNext()) { weightSumPerLabel += values.next().get(); } - // System.out.println(token + "=>"+ weightSumPerLabel); output.collect(key, new FloatWritable(weightSumPerLabel)); } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaReducer.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaReducer.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaReducer.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/cbayes/CBayesThetaReducer.java Sun Aug 24 09:10:42 2008 @@ -22,19 +22,18 @@ import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.Iterator; - /** * Can also be used as a local Combiner beacuse only two values should be there inside the values - * - **/ - + */ public class CBayesThetaReducer extends MapReduceBase implements Reducer { - + private static final Logger log = LoggerFactory.getLogger(CBayesThetaReducer.class); public void reduce(Text key, Iterator values, OutputCollector output, Reporter reporter) throws IOException { //Key is label,word, value is the number of times we've seen this label word per local node. Output is the same @@ -47,7 +46,7 @@ } if(numberofValues < 2) return; if(weight<=0.0f) - System.out.println(token + "=>"+ weight); + log.info("{}=>{}", token, weight); output.collect(key, new FloatWritable(weight)); } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java Sun Aug 24 09:10:42 2008 @@ -110,7 +110,11 @@ Class cl = ccl.loadClass(job.get(DISTANCE_MEASURE_KEY)); measure = (DistanceMeasure) cl.newInstance(); measure.configure(job); - } catch (Exception e) { + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } catch (InstantiationException e) { throw new RuntimeException(e); } nextCanopyId = 0; Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusteringJob.java Sun Aug 24 09:10:42 2008 @@ -1,5 +1,7 @@ package org.apache.mahout.clustering.canopy; +import java.io.IOException; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -26,7 +28,7 @@ /** * @param args */ - public static void main(String[] args) { + public static void main(String[] args) throws IOException { String input = args[0]; String output = args[1]; String measureClassName = args[2]; @@ -45,7 +47,7 @@ * @param t2 the T2 distance threshold */ public static void runJob(String input, String output, - String measureClassName, double t1, double t2) { + String measureClassName, double t1, double t2) throws IOException { CanopyDriver.runJob(input, output + "/canopies", measureClassName, t1, t2); ClusterDriver.runJob(input, output + "/canopies", output, measureClassName, t1, t2); } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java Sun Aug 24 09:10:42 2008 @@ -25,12 +25,14 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.SequenceFileOutputFormat; +import java.io.IOException; + public class CanopyDriver { private CanopyDriver() { } - public static void main(String[] args) { + public static void main(String[] args) throws IOException { String input = args[0]; String output = args[1]; String measureClassName = args[2]; @@ -49,7 +51,7 @@ * @param t2 the T2 distance threshold */ public static void runJob(String input, String output, - String measureClassName, double t1, double t2) { + String measureClassName, double t1, double t2) throws IOException { JobClient client = new JobClient(); JobConf conf = new JobConf( org.apache.mahout.clustering.canopy.CanopyDriver.class); @@ -71,14 +73,10 @@ conf.setOutputFormat(SequenceFileOutputFormat.class); client.setConf(conf); - try { - FileSystem dfs = FileSystem.get(conf); - if (dfs.exists(outPath)) - dfs.delete(outPath, true); - JobClient.runJob(conf); - } catch (Exception e) { - throw new RuntimeException(e); - } + FileSystem dfs = FileSystem.get(conf); + if (dfs.exists(outPath)) + dfs.delete(outPath, true); + JobClient.runJob(conf); } } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/ClusterDriver.java Sun Aug 24 09:10:42 2008 @@ -25,12 +25,14 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.lib.IdentityReducer; +import java.io.IOException; + public class ClusterDriver { private ClusterDriver() { } - public static void main(String[] args) { + public static void main(String[] args) throws IOException { String points = args[0]; String canopies = args[1]; String output = args[2]; @@ -51,7 +53,7 @@ * @param t2 the T2 distance threshold */ public static void runJob(String points, String canopies, String output, - String measureClassName, double t1, double t2) { + String measureClassName, double t1, double t2) throws IOException { JobClient client = new JobClient(); JobConf conf = new JobConf( org.apache.mahout.clustering.canopy.ClusterDriver.class); @@ -72,14 +74,10 @@ conf.setReducerClass(IdentityReducer.class); client.setConf(conf); - try { - FileSystem dfs = FileSystem.get(conf); - if (dfs.exists(outPath)) - dfs.delete(outPath, true); - JobClient.runJob(conf); - } catch (Exception e) { - throw new RuntimeException(e); - } + FileSystem dfs = FileSystem.get(conf); + if (dfs.exists(outPath)) + dfs.delete(outPath, true); + JobClient.runJob(conf); } } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java Sun Aug 24 09:10:42 2008 @@ -48,9 +48,9 @@ String clusters = args[1]; String output = args[2]; String measureClass = args[3]; - double convergenceDelta = new Double(args[4]); - int maxIterations = new Integer(args[5]); - int m = new Integer(args[6]); + double convergenceDelta = Double.parseDouble(args[4]); + int maxIterations = Integer.parseInt(args[5]); + int m = Integer.parseInt(args[6]); runJob(input, clusters, output, measureClass, convergenceDelta, maxIterations, 10,m); } @@ -69,34 +69,30 @@ public static void runJob(String input, String clustersIn, String output, String measureClass, double convergenceDelta, int maxIterations, int numMapTasks, int m) { - try { - - boolean converged = false; - int iteration = 0; - String delta = Double.toString(convergenceDelta); - - // iterate until the clusters converge - while (!converged && iteration < maxIterations) { - log.info("Iteration {" + iteration + "}"); - - // point the output to a new directory per iteration - String clustersOut = output + File.separator + "clusters-" + iteration; - converged = runIteration(input, clustersIn, clustersOut, measureClass, - delta, numMapTasks, iteration, m); - - // now point the input to the old output directory - clustersIn = output + File.separator + "clusters-" + iteration; - iteration++; - } - - // now actually cluster the points - log.info("Clustering "); - runClustering(input, clustersIn, output + File.separator + "points", - measureClass, delta, numMapTasks, m); - } catch (Exception e) { - throw new RuntimeException(e); + boolean converged = false; + int iteration = 0; + String delta = Double.toString(convergenceDelta); + + // iterate until the clusters converge + while (!converged && iteration < maxIterations) { + log.info("Iteration {" + iteration + "}"); + + // point the output to a new directory per iteration + String clustersOut = output + File.separator + "clusters-" + iteration; + converged = runIteration(input, clustersIn, clustersOut, measureClass, + delta, numMapTasks, iteration, m); + + // now point the input to the old output directory + clustersIn = output + File.separator + "clusters-" + iteration; + iteration++; } + + // now actually cluster the points + log.info("Clustering "); + + runClustering(input, clustersIn, output + File.separator + "points", + measureClass, delta, numMapTasks, m); } /** @@ -142,7 +138,7 @@ JobClient.runJob(conf); FileSystem fs = FileSystem.get(conf); return isConverged(clustersOut, conf, fs); - } catch (Exception e) { + } catch (IOException e) { log.warn(e.toString(), e); return true; } @@ -184,7 +180,7 @@ conf.set(SoftCluster.M_KEY, String.valueOf(m)); try { JobClient.runJob(conf); - } catch (Exception e) { + } catch (IOException e) { log.warn(e.toString(), e); } } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansJob.java Sun Aug 24 09:10:42 2008 @@ -20,14 +20,19 @@ import org.apache.mahout.clustering.canopy.CanopyDriver; import org.apache.mahout.utils.ManhattanDistanceMeasure; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class FuzzyKMeansJob { + private static final Logger log = LoggerFactory.getLogger(FuzzyKMeansJob.class); + public static void main(String[] args) throws IOException { if (args.length != 9) { - System.out.println("Expected num Arguments: 9 received:" + args.length); + log.warn("Expected num Arguments: 9 received: {}", args.length); printMessage(); + return; } int index = 0; String input = args[index++]; @@ -49,9 +54,7 @@ * Prints Error Message */ private static void printMessage() { - System.out - .println("Usage: inputDir clusterDir OutputDir ConvergenceDelata maxIterations numMapTasks doCanopy"); - System.exit(1); + log.warn("Usage: inputDir clusterDir OutputDir ConvergenceDelata maxIterations numMapTasks doCanopy"); } /** @@ -69,20 +72,17 @@ */ public static void runJob(String input, String clustersIn, String output, String measureClass, double convergenceDelta, int maxIterations, - int numMapTasks, boolean doCanopy, int m) { - try { + int numMapTasks, boolean doCanopy, int m) throws IOException { + + // run canopy to find initial clusters + if (doCanopy) { + CanopyDriver.runJob(input, clustersIn, ManhattanDistanceMeasure.class + .getName(), 100.1, 50.1); - // run canopy to find initial clusters - if (doCanopy) { - CanopyDriver.runJob(input, clustersIn, ManhattanDistanceMeasure.class - .getName(), 100.1, 50.1); - - } - // run fuzzy k -means - FuzzyKMeansDriver.runJob(input, clustersIn, output, measureClass, - convergenceDelta, maxIterations, numMapTasks,m); - } catch (Exception e) { - throw new RuntimeException(e); } + // run fuzzy k -means + FuzzyKMeansDriver.runJob(input, clustersIn, output, measureClass, + convergenceDelta, maxIterations, numMapTasks,m); + } } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java Sun Aug 24 09:10:42 2008 @@ -27,10 +27,14 @@ import org.apache.hadoop.mapred.Reporter; import org.apache.mahout.matrix.AbstractVector; import org.apache.mahout.matrix.Vector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class FuzzyKMeansReducer extends MapReduceBase implements Reducer { + private static final Logger log = LoggerFactory.getLogger(FuzzyKMeansReducer.class); + public void reduce(Text key, Iterator values, OutputCollector output, Reporter reporter) throws IOException { SoftCluster cluster = SoftCluster.decodeCluster(key.toString()); @@ -39,13 +43,13 @@ int ix = value.indexOf(','); try { - double partialSumPtProb = new Double(value.substring(0, ix)); + double partialSumPtProb = Double.parseDouble(value.substring(0, ix)); Vector total = AbstractVector.decodeVector(value.substring(ix + 2)); cluster.addPoints(partialSumPtProb, total); - } catch (Exception e) { + } catch (Exception e) { + // TODO srowen thinks this should be replaced with a more specific catch, or not use exceptions to control flow // Escaped from Combiner. So, let's do that processing too: - System.out.println("Escaped from combiner: Key:" + key.toString() - + " Value:" + value); + log.info("Escaped from combiner: Key: {} Value: {}", key, value); double pointProb = Double.parseDouble(value.substring(0, value .indexOf(":"))); @@ -57,8 +61,7 @@ // force convergence calculation cluster.computeConvergence(); - output.collect(new Text(cluster.getIdentifier()), new Text(SoftCluster - .formatCluster(cluster))); + output.collect(new Text(cluster.getIdentifier()), new Text(SoftCluster.formatCluster(cluster))); } @Override Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/SoftCluster.java Sun Aug 24 09:10:42 2008 @@ -115,10 +115,14 @@ Class cl = ccl.loadClass(job.get(DISTANCE_MEASURE_KEY)); measure = (DistanceMeasure) cl.newInstance(); measure.configure(job); - convergenceDelta = new Double(job.get(CLUSTER_CONVERGENCE_KEY)); + convergenceDelta = Double.parseDouble(job.get(CLUSTER_CONVERGENCE_KEY)); nextClusterId = 0; m = Float.parseFloat(job.get(M_KEY)); - } catch (Exception e) { + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } catch (InstantiationException e) { throw new RuntimeException(e); } } @@ -217,8 +221,7 @@ denom += Math.pow(clusterDistance / eachCDist, (double) 2 / (m - 1)); } - double val = (double) (1) / denom; - return val; + return 1.0 / denom; } /** Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java Sun Aug 24 09:10:42 2008 @@ -106,7 +106,11 @@ measure.configure(job); convergenceDelta = Double.parseDouble(job.get(CLUSTER_CONVERGENCE_KEY)); nextClusterId = 0; - } catch (Exception e) { + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } catch (InstantiationException e) { throw new RuntimeException(e); } } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Sun Aug 24 09:10:42 2008 @@ -59,29 +59,25 @@ */ public static void runJob(String input, String clustersIn, String output, String measureClass, double convergenceDelta, int maxIterations) { - try { - // iterate until the clusters converge - boolean converged = false; - int iteration = 0; - String delta = Double.toString(convergenceDelta); - - while (!converged && iteration < maxIterations) { - log.info("Iteration {}", iteration); - // point the output to a new directory per iteration - String clustersOut = output + "/clusters-" + iteration; - converged = runIteration(input, clustersIn, clustersOut, measureClass, - delta); - // now point the input to the old output directory - clustersIn = output + "/clusters-" + iteration; - iteration++; - } - // now actually cluster the points - log.info("Clustering "); - runClustering(input, clustersIn, output + "/points", measureClass, + // iterate until the clusters converge + boolean converged = false; + int iteration = 0; + String delta = Double.toString(convergenceDelta); + + while (!converged && iteration < maxIterations) { + log.info("Iteration {}", iteration); + // point the output to a new directory per iteration + String clustersOut = output + "/clusters-" + iteration; + converged = runIteration(input, clustersIn, clustersOut, measureClass, delta); - } catch (Exception e) { - throw new RuntimeException(e); + // now point the input to the old output directory + clustersIn = output + "/clusters-" + iteration; + iteration++; } + // now actually cluster the points + log.info("Clustering "); + runClustering(input, clustersIn, output + "/points", measureClass, + delta); } /** @@ -120,7 +116,7 @@ JobClient.runJob(conf); FileSystem fs = FileSystem.get(conf); return isConverged(clustersOut + "/part-00000", conf, fs); - } catch (Exception e) { + } catch (IOException e) { log.warn(e.toString(), e); return true; } @@ -156,7 +152,7 @@ client.setConf(conf); try { JobClient.runJob(conf); - } catch (Exception e) { + } catch (IOException e) { log.warn(e.toString(), e); } } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansJob.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansJob.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansJob.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansJob.java Sun Aug 24 09:10:42 2008 @@ -20,12 +20,14 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobConf; +import java.io.IOException; + public class KMeansJob { private KMeansJob() { } - public static void main(String[] args) { + public static void main(String[] args) throws IOException { String input = args[0]; String clusters = args[1]; String output = args[2]; @@ -48,20 +50,16 @@ * @param maxIterations the maximum number of iterations */ public static void runJob(String input, String clustersIn, String output, - String measureClass, double convergenceDelta, int maxIterations) { - try { - // delete the output directory - JobConf conf = new JobConf(KMeansJob.class); - Path outPath = new Path(output); - FileSystem fs = FileSystem.get(conf); - if (fs.exists(outPath)) { - fs.delete(outPath, true); - } - fs.mkdirs(outPath); - KMeansDriver.runJob(input, clustersIn, output, measureClass, - convergenceDelta, maxIterations); - } catch (Exception e) { - throw new RuntimeException(e); + String measureClass, double convergenceDelta, int maxIterations) throws IOException { + // delete the output directory + JobConf conf = new JobConf(KMeansJob.class); + Path outPath = new Path(output); + FileSystem fs = FileSystem.get(conf); + if (fs.exists(outPath)) { + fs.delete(outPath, true); } + fs.mkdirs(outPath); + KMeansDriver.runJob(input, clustersIn, output, measureClass, + convergenceDelta, maxIterations); } } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java Sun Aug 24 09:10:42 2008 @@ -90,7 +90,11 @@ Class cl = Class.forName(job.get(DISTANCE_MEASURE_KEY)); measure = (DistanceMeasure) cl.newInstance(); measure.configure(job); - } catch (Exception e) { + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } catch (InstantiationException e) { throw new RuntimeException(e); } nextCanopyId = 0; Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java Sun Aug 24 09:10:42 2008 @@ -27,6 +27,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; + public class MeanShiftCanopyDriver { private static final Logger log = LoggerFactory.getLogger(MeanShiftCanopyDriver.class); @@ -81,7 +83,7 @@ client.setConf(conf); try { JobClient.runJob(conf); - } catch (Exception e) { + } catch (IOException e) { log.warn(e.toString(), e); } } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java Sun Aug 24 09:10:42 2008 @@ -30,7 +30,7 @@ private static final Logger log = LoggerFactory.getLogger(MeanShiftCanopyJob.class); - public static void main(String[] args) { + public static void main(String[] args) throws IOException { String input = args[0]; String output = args[1]; String measureClassName = args[2]; @@ -55,36 +55,32 @@ */ public static void runJob(String input, String output, String measureClassName, double t1, double t2, double convergenceDelta, - int maxIterations) { - try { - // delete the output directory - JobConf conf = new JobConf(MeanShiftCanopyDriver.class); - Path outPath = new Path(output); - FileSystem fs = FileSystem.get(conf); - if (fs.exists(outPath)) { - fs.delete(outPath, true); - } - fs.mkdirs(outPath); - // iterate until the clusters converge - boolean converged = false; - boolean inputIsSequenceFile = false; - int iteration = 0; - String clustersIn = input; - while (!converged && iteration < maxIterations) { - log.info("Iteration {}", iteration); - // point the output to a new directory per iteration - String clustersOut = output + "/canopies-" + iteration; - MeanShiftCanopyDriver.runJob(clustersIn, clustersOut, measureClassName, - t1, t2, convergenceDelta, inputIsSequenceFile); - converged = isConverged(clustersOut + "/part-00000", conf, FileSystem - .get(conf)); - // now point the input to the old output directory - clustersIn = output + "/canopies-" + iteration; - iteration++; - inputIsSequenceFile = true; - } - } catch (Exception e) { - throw new RuntimeException(e); + int maxIterations) throws IOException { + // delete the output directory + JobConf conf = new JobConf(MeanShiftCanopyDriver.class); + Path outPath = new Path(output); + FileSystem fs = FileSystem.get(conf); + if (fs.exists(outPath)) { + fs.delete(outPath, true); + } + fs.mkdirs(outPath); + // iterate until the clusters converge + boolean converged = false; + boolean inputIsSequenceFile = false; + int iteration = 0; + String clustersIn = input; + while (!converged && iteration < maxIterations) { + log.info("Iteration {}", iteration); + // point the output to a new directory per iteration + String clustersOut = output + "/canopies-" + iteration; + MeanShiftCanopyDriver.runJob(clustersIn, clustersOut, measureClassName, + t1, t2, convergenceDelta, inputIsSequenceFile); + converged = isConverged(clustersOut + "/part-00000", conf, FileSystem + .get(conf)); + // now point the input to the old output directory + clustersIn = output + "/canopies-" + iteration; + iteration++; + inputIsSequenceFile = true; } } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java Sun Aug 24 09:10:42 2008 @@ -18,6 +18,8 @@ */ import org.apache.mahout.cf.taste.impl.common.FastMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.Collection; @@ -32,6 +34,10 @@ */ public abstract class Model { + private static final Logger log = LoggerFactory.getLogger(Model.class); + + public static final float DEFAULT_PROBABILITY = 0.5f; + protected final List> featureLabelWeights = new ArrayList>(); protected final Map featureList = new FastMap(); @@ -48,9 +54,6 @@ protected final Float alpha_i = 1.0f; // alpha_i can be improved upon for increased smoothing - public static final float DEFAULT_PROBABILITY = 0.5f; - - protected abstract float FeatureWeight(Integer label, Integer feature); protected abstract float getWeight(Integer label, Integer feature); @@ -106,32 +109,28 @@ return featureList.get(feature); } - protected void setWeight(String labelString, String featureString, Float weight) - throws Exception { + protected void setWeight(String labelString, String featureString, Float weight) { Integer feature = getFeature(featureString); Integer label = getLabel(labelString); setWeight(label, feature, weight); } - protected void setWeight(Integer label, Integer feature, Float weight) throws Exception { + protected void setWeight(Integer label, Integer feature, Float weight) { if (featureLabelWeights.size() <= feature) { - // System.out.println(feature + "," + featureLabelWeights.size()); - // System.in.read(); - throw new Exception("This should not happen"); - + throw new IllegalStateException("This should not happen"); } featureLabelWeights.get(feature).put(label, new Float(weight)); } - protected void setSumFeatureWeight(Integer feature, float sum) throws Exception { + protected void setSumFeatureWeight(Integer feature, float sum) { if (sumFeatureWeight.size() != feature) - throw new Exception("This should not happen"); + throw new IllegalStateException("This should not happen"); sumFeatureWeight.add(feature, new Float(sum)); } - protected void setSumLabelWeight(Integer label, float sum) throws Exception { + protected void setSumLabelWeight(Integer label, float sum) { if (sumLabelWeight.size() != label) - throw new Exception("This should not happen"); + throw new IllegalStateException("This should not happen"); sumLabelWeight.put(label, new Float(sum)); } @@ -140,7 +139,7 @@ } public void initializeWeightMatrix() { - System.out.println(featureList.size()); + log.info("{}", featureList.size()); for (int i = 0; i < featureList.size(); i++) featureLabelWeights.add(new HashMap(1)); @@ -152,38 +151,21 @@ public void loadFeatureWeight(String labelString, String featureString, float weight) { - try { - setWeight(labelString, featureString, weight); - } catch (Exception e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } + setWeight(labelString, featureString, weight); } public void setSumFeatureWeight(String feature, float sum) { - try { - setSumFeatureWeight(getFeature(feature), sum); - } catch (Exception e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } + setSumFeatureWeight(getFeature(feature), sum); } public void setSumLabelWeight(String label, float sum) { - try { - setSumLabelWeight(getLabel(label), sum); - } catch (Exception e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } + setSumLabelWeight(getLabel(label), sum); } public void setThetaNormalizer(String label, float sum) { setThetaNormalizer(getLabel(label), sum); } - - /** * Get the weighted probability of the feature. * @@ -199,8 +181,6 @@ return FeatureWeight(label, feature); } - - public Collection getLabels() { return labelList.keySet(); } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Summarizable.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Summarizable.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Summarizable.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Summarizable.java Sun Aug 24 09:10:42 2008 @@ -21,7 +21,10 @@ * A Summarizable Interface. All Classes which implements this has to have a summarize function * which generates a string summary of the data contained in it */ -public interface Summarizable{ - /** @return Summary of the data inside the class */ - public abstract String summarize() throws Exception; +public interface Summarizable { + + /** + * @return Summary of the data inside the class + */ + String summarize(); } \ No newline at end of file Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/EvalMapper.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/EvalMapper.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/EvalMapper.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/EvalMapper.java Sun Aug 24 09:10:42 2008 @@ -44,7 +44,7 @@ /** Parameter used to store the "stringified" evaluator */ public static final String MAHOUT_GA_EVALUATOR = "mahout.ga.evaluator"; - private FitnessEvaluator evaluator = null; + private FitnessEvaluator evaluator = null; @Override public void configure(JobConf job) { @@ -54,7 +54,7 @@ "'MAHOUT_GA_EVALUATOR' job parameter non found"); } - evaluator = (FitnessEvaluator) StringUtils.fromString(evlstr); + evaluator = (FitnessEvaluator) StringUtils.fromString(evlstr); super.configure(job); } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java Sun Aug 24 09:10:42 2008 @@ -96,6 +96,7 @@ } @Override + @SuppressWarnings("unchecked") public String asFormatString() { StringBuilder out = new StringBuilder(); out.append("[s").append(cardinality).append(", "); Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java Sun Aug 24 09:10:42 2008 @@ -214,7 +214,11 @@ Vector vector; try { vector = (Vector) Class.forName(vectorClassName).newInstance(); - } catch (Exception e) { + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } catch (InstantiationException e) { throw new RuntimeException(e); } vector.readFields(dataInput); Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/ManhattanDistanceMeasure.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/ManhattanDistanceMeasure.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/ManhattanDistanceMeasure.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/ManhattanDistanceMeasure.java Sun Aug 24 09:10:42 2008 @@ -52,10 +52,8 @@ // nothing to do } - - @SuppressWarnings("unchecked") public Collection getParameters() { - return Collections.EMPTY_LIST; + return Collections.emptyList(); } public void createParameters(String prefix, JobConf jobConf) { Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedDistanceMeasure.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedDistanceMeasure.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedDistanceMeasure.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedDistanceMeasure.java Sun Aug 24 09:10:42 2008 @@ -28,6 +28,7 @@ import java.io.DataInputStream; import java.io.FileNotFoundException; +import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.List; @@ -42,11 +43,12 @@ private Parameter vectorClass; protected Vector weights; - public void createParameters(String prefix, JobConf jobConf) { parameters = new ArrayList(); - parameters.add(weightsFile = new PathParameter(prefix, "weightsFile", jobConf, null, "Path on DFS to a file containing the weights.")); - parameters.add(vectorClass = new ClassParameter(prefix, "vectorClass", jobConf, DenseVector.class, "Class file specified in parameter weightsFile has been serialized with.")); + weightsFile = new PathParameter(prefix, "weightsFile", jobConf, null, "Path on DFS to a file containing the weights."); + parameters.add(weightsFile); + vectorClass = new ClassParameter(prefix, "vectorClass", jobConf, DenseVector.class, "Class file specified in parameter weightsFile has been serialized with."); + parameters.add(vectorClass); } public Collection getParameters() { @@ -69,7 +71,11 @@ in.close(); this.weights = weights; } - } catch (Exception e) { + } catch (IOException e) { + throw new RuntimeException(e); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } catch (InstantiationException e) { throw new RuntimeException(e); } } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/parameters/AbstractParameter.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/parameters/AbstractParameter.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/parameters/AbstractParameter.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/parameters/AbstractParameter.java Sun Aug 24 09:10:42 2008 @@ -46,10 +46,8 @@ return value.toString(); } - - @SuppressWarnings("unchecked") public Collection getParameters() { - return Collections.EMPTY_LIST; + return Collections.emptyList(); } protected AbstractParameter(Class type, String prefix, String name, JobConf jobConf, T defaultValue, String description) { Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/parameters/ClassParameter.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/parameters/ClassParameter.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/parameters/ClassParameter.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/parameters/ClassParameter.java Sun Aug 24 09:10:42 2008 @@ -11,7 +11,7 @@ public void setStringValue(String stringValue) { try { set(Class.forName(stringValue)); - } catch (Exception e) { + } catch (ClassNotFoundException e) { throw new RuntimeException(e); } } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/parameters/CompositeParameter.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/parameters/CompositeParameter.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/parameters/CompositeParameter.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/parameters/CompositeParameter.java Sun Aug 24 09:10:42 2008 @@ -30,7 +30,11 @@ public void setStringValue(String className) { try { set((T) Class.forName(className).newInstance()); - } catch (Exception e) { + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } catch (InstantiationException e) { throw new RuntimeException(e); } } Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesClassifierTest.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesClassifierTest.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesClassifierTest.java (original) +++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesClassifierTest.java Sun Aug 24 09:10:42 2008 @@ -73,8 +73,7 @@ public void test() { BayesClassifier classifier = new BayesClassifier(); ClassifierResult result; - String [] document; - document = new String[]{"aa", "ff"}; + String[] document = new String[]{"aa", "ff"}; result = classifier.classify(model, document, "unknown"); assertTrue("category is null and it shouldn't be", result != null); assertTrue(result + " is not equal to " + "e", result.getLabel().equals("e") == true); @@ -92,8 +91,7 @@ public void testResults() throws Exception { BayesClassifier classifier = new BayesClassifier(); - String [] document; - document = new String[]{"aa", "ff"}; + String[] document = new String[]{"aa", "ff"}; ClassifierResult result = classifier.classify(model, document, "unknown"); assertTrue("category is null and it shouldn't be", result != null); System.out.println("Result: " + result); Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/CBayesClassifierTest.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/CBayesClassifierTest.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/CBayesClassifierTest.java (original) +++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/CBayesClassifierTest.java Sun Aug 24 09:10:42 2008 @@ -80,8 +80,7 @@ public void test() { BayesClassifier classifier = new BayesClassifier(); ClassifierResult result; - String [] document; - document = new String[]{"aa", "ff"}; + String[] document = new String[]{"aa", "ff"}; result = classifier.classify(model, document, "unknown"); assertTrue("category is null and it shouldn't be", result != null); assertTrue(result + " is not equal to " + "e", result.getLabel().equals("e") == true); @@ -99,8 +98,7 @@ public void testResults() throws Exception { BayesClassifier classifier = new BayesClassifier(); - String [] document; - document = new String[]{"aa", "ff"}; + String[] document = new String[]{"aa", "ff"}; ClassifierResult result = classifier.classify(model, document, "unknown"); assertTrue("category is null and it shouldn't be", result != null); System.out.println("Result: " + result); Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java (original) +++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java Sun Aug 24 09:10:42 2008 @@ -190,7 +190,7 @@ public void testCanopyMapperEuclidean() throws Exception { MeanShiftCanopyMapper mapper = new MeanShiftCanopyMapper(); MeanShiftCanopyCombiner combiner = new MeanShiftCanopyCombiner(); - DummyOutputCollector collector = new DummyOutputCollector(); + DummyOutputCollector collector = new DummyOutputCollector(); MeanShiftCanopy.config(euclideanDistanceMeasure, 4, 1, 0.5); // get the initial canopies List canopies = getInitialCanopies(); @@ -206,7 +206,7 @@ // now combine the mapper output MeanShiftCanopy.config(euclideanDistanceMeasure, 4, 1, 0.5); Map> mapData = collector.getData(); - collector = new DummyOutputCollector(); + collector = new DummyOutputCollector(); for (String key : mapData.keySet()) combiner.reduce(new Text(key), mapData.get(key).iterator(), collector, null); @@ -252,7 +252,7 @@ MeanShiftCanopyMapper mapper = new MeanShiftCanopyMapper(); MeanShiftCanopyCombiner combiner = new MeanShiftCanopyCombiner(); MeanShiftCanopyReducer reducer = new MeanShiftCanopyReducer(); - DummyOutputCollector collector = new DummyOutputCollector(); + DummyOutputCollector collector = new DummyOutputCollector(); MeanShiftCanopy.config(euclideanDistanceMeasure, 4, 1, 0.5); // get the initial canopies List canopies = getInitialCanopies(); @@ -275,12 +275,12 @@ // now combine the mapper output MeanShiftCanopy.config(euclideanDistanceMeasure, 4, 1, 0.5); Map> mapData = collector.getData(); - collector = new DummyOutputCollector(); + collector = new DummyOutputCollector(); for (String key : mapData.keySet()) combiner.reduce(new Text(key), mapData.get(key).iterator(), collector, null); // now reduce the combiner output - DummyOutputCollector collector2 = new DummyOutputCollector(); + DummyOutputCollector collector2 = new DummyOutputCollector(); reducer.reduce(new Text("canopy"), collector.getValue("canopy").iterator(), collector2, null); Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java?rev=688522&r1=688521&r2=688522&view=diff ============================================================================== --- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java (original) +++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/TrainClassifier.java Sun Aug 24 09:10:42 2008 @@ -24,6 +24,8 @@ import org.apache.commons.cli.ParseException; import org.apache.mahout.classifier.cbayes.CBayesDriver; +import java.io.IOException; + /** * Train the Naive Bayes Complement classifier with improved weighting on the Twenty Newsgroups data (http://people.csail.mit.edu/jrennie/20Newsgroups/20news-18828.tar.gz) * @@ -46,16 +48,16 @@ */ public class TrainClassifier { - public void trainNaiveBayes(String dir, String outputDir, int gramSize){ + public void trainNaiveBayes(String dir, String outputDir, int gramSize) throws IOException { BayesDriver.runJob(dir, outputDir, gramSize); } - public void trainCNaiveBayes(String dir, String outputDir, int gramSize){ + public void trainCNaiveBayes(String dir, String outputDir, int gramSize) throws IOException { CBayesDriver.runJob(dir, outputDir, gramSize); } @SuppressWarnings("static-access") - public static void main(String[] args) throws ParseException { + public static void main(String[] args) throws IOException, ParseException { Options options = new Options(); Option trainOpt = OptionBuilder.withLongOpt("train").withDescription("Train the classifier").create("t"); options.addOption(trainOpt);