Return-Path: X-Original-To: apmail-mahout-commits-archive@www.apache.org Delivered-To: apmail-mahout-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 59F124C4E for ; Mon, 27 Jun 2011 18:47:10 +0000 (UTC) Received: (qmail 56474 invoked by uid 500); 27 Jun 2011 18:47:09 -0000 Delivered-To: apmail-mahout-commits-archive@mahout.apache.org Received: (qmail 56140 invoked by uid 500); 27 Jun 2011 18:47:09 -0000 Mailing-List: contact commits-help@mahout.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@mahout.apache.org Delivered-To: mailing list commits@mahout.apache.org Received: (qmail 56132 invoked by uid 99); 27 Jun 2011 18:47:08 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 27 Jun 2011 18:47:08 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 27 Jun 2011 18:47:06 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id ABFE223888CB; Mon, 27 Jun 2011 18:46:46 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1140264 - in /mahout/trunk: core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ src/conf/ Date: Mon, 27 Jun 2011 18:46:46 -0000 To: commits@mahout.apache.org From: srowen@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20110627184646.ABFE223888CB@eris.apache.org> Author: srowen Date: Mon Jun 27 18:46:46 2011 New Revision: 1140264 URL: http://svn.apache.org/viewvc?rev=1140264&view=rev Log: MAHOUT-734 add HMM command lines Added: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/LossyHmmSerializer.java mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmModel.java mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmTrainer.java mahout/trunk/src/conf/driver.classes.props Added: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java?rev=1140264&view=auto ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java (added) +++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java Mon Jun 27 18:46:46 2011 @@ -0,0 +1,169 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.classifier.sequencelearning.hmm; + +import com.google.common.io.Closeables; +import org.apache.commons.cli2.CommandLine; +import org.apache.commons.cli2.Group; +import org.apache.commons.cli2.Option; +import org.apache.commons.cli2.OptionException; +import org.apache.commons.cli2.builder.ArgumentBuilder; +import org.apache.commons.cli2.builder.DefaultOptionBuilder; +import org.apache.commons.cli2.builder.GroupBuilder; +import org.apache.commons.cli2.commandline.Parser; +import org.apache.mahout.common.CommandLineUtil; + +import java.io.DataOutputStream; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.Scanner; + +/** + * A class for EM training of HMM from console + */ +public final class BaumWelchTrainer { + + private BaumWelchTrainer() { + } + + public static void main(String[] args) throws IOException { + DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder(); + ArgumentBuilder argumentBuilder = new ArgumentBuilder(); + + Option inputOption = optionBuilder.withLongName("input"). + withDescription("Text file with space-separated integers to train on"). + withShortName("i").withArgument(argumentBuilder.withMaximum(1).withMinimum(1). + withName("path").create()).withRequired(true).create(); + + Option outputOption = optionBuilder.withLongName("output"). + withDescription("Path trained HMM model should be serialized to"). + withShortName("o").withArgument(argumentBuilder.withMaximum(1).withMinimum(1). + withName("path").create()).withRequired(true).create(); + + Option stateNumberOption = optionBuilder.withLongName("nrOfHiddenStates"). + withDescription("Number of hidden states"). + withShortName("nh").withArgument(argumentBuilder.withMaximum(1).withMinimum(1). + withName("number").create()).withRequired(true).create(); + + Option observedStateNumberOption = optionBuilder.withLongName("nrOfObservedStates"). + withDescription("Number of observed states"). + withShortName("no").withArgument(argumentBuilder.withMaximum(1).withMinimum(1). + withName("number").create()).withRequired(true).create(); + + Option epsilonOption = optionBuilder.withLongName("epsilon"). + withDescription("Convergence threshold"). + withShortName("e").withArgument(argumentBuilder.withMaximum(1).withMinimum(1). + withName("number").create()).withRequired(true).create(); + + Option iterationsOption = optionBuilder.withLongName("max-iterations"). + withDescription("Maximum iterations number"). + withShortName("m").withArgument(argumentBuilder.withMaximum(1).withMinimum(1). + withName("number").create()).withRequired(true).create(); + + Group optionGroup = new GroupBuilder().withOption(inputOption). + withOption(outputOption).withOption(stateNumberOption).withOption(observedStateNumberOption). + withOption(epsilonOption).withOption(iterationsOption). + withName("Options").create(); + + try { + Parser parser = new Parser(); + parser.setGroup(optionGroup); + CommandLine commandLine = parser.parse(args); + + String input = (String) commandLine.getValue(inputOption); + String output = (String) commandLine.getValue(outputOption); + + int nrOfHiddenStates = Integer.parseInt((String) commandLine.getValue(stateNumberOption)); + int nrOfObservedStates = Integer.parseInt((String) commandLine.getValue(observedStateNumberOption)); + + double epsilon = Double.parseDouble((String) commandLine.getValue(epsilonOption)); + int maxIterations = Integer.parseInt((String) commandLine.getValue(iterationsOption)); + + //constructing random-generated HMM + HmmModel model = new HmmModel(nrOfHiddenStates, nrOfObservedStates, new Date().getTime()); + List observations = new ArrayList(); + + //reading observations + Scanner scanner = new Scanner(new FileInputStream(input)); + try { + while (scanner.hasNextInt()) { + observations.add(scanner.nextInt()); + } + } finally { + scanner.close(); + } + + int[] observationsArray = new int[observations.size()]; + for (int i = 0; i < observations.size(); ++i) { + observationsArray[i] = observations.get(i); + } + + //training + HmmModel trainedModel = HmmTrainer.trainBaumWelch(model, + observationsArray, epsilon, maxIterations, true); + + //serializing trained model + DataOutputStream stream = new DataOutputStream(new FileOutputStream(output)); + try { + LossyHmmSerializer.serialize(trainedModel, stream); + } finally { + Closeables.closeQuietly(stream); + } + + //printing tranied model + System.out.println("Initial probabilities: "); + for (int i = 0; i < trainedModel.getNrOfHiddenStates(); ++i) + System.out.print(i + " "); + System.out.println(); + for (int i = 0; i < trainedModel.getNrOfHiddenStates(); ++i) + System.out.print(trainedModel.getInitialProbabilities().get(i) + " "); + System.out.println(); + + System.out.println("Transition matrix:"); + System.out.print(" "); + for (int i = 0; i < trainedModel.getNrOfHiddenStates(); ++i) + System.out.print(i + " "); + System.out.println(); + for (int i = 0; i < trainedModel.getNrOfHiddenStates(); ++i) { + System.out.print(i + " "); + for (int j = 0; j < trainedModel.getNrOfHiddenStates(); ++j) { + System.out.print(trainedModel.getTransitionMatrix().get(i, j) + " "); + } + System.out.println(); + } + System.out.println("Emission matrix: "); + System.out.print(" "); + for (int i = 0; i < trainedModel.getNrOfOutputStates(); ++i) + System.out.print(i + " "); + System.out.println(); + for (int i = 0; i < trainedModel.getNrOfHiddenStates(); ++i) { + System.out.print(i + " "); + for (int j = 0; j < trainedModel.getNrOfOutputStates(); ++j) { + System.out.print(trainedModel.getEmissionMatrix().get(i, j) + " "); + } + System.out.println(); + } + } catch (OptionException e) { + CommandLineUtil.printHelp(optionGroup); + } + } +} Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmModel.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmModel.java?rev=1140264&r1=1140263&r2=1140264&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmModel.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmModel.java Mon Jun 27 18:46:46 2011 @@ -78,8 +78,7 @@ public class HmmModel implements Cloneab * Get a copy of this model */ @Override - public HmmModel clone() throws CloneNotSupportedException { - super.clone(); + public HmmModel clone() { HmmModel model = new HmmModel(transitionMatrix.clone(), emissionMatrix.clone(), initialProbabilities.clone()); if (hiddenStateNames != null) { model.hiddenStateNames = HashBiMap.create(hiddenStateNames); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmTrainer.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmTrainer.java?rev=1140264&r1=1140263&r2=1140264&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmTrainer.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmTrainer.java Mon Jun 27 18:46:46 2011 @@ -220,14 +220,8 @@ public final class HmmTrainer { pseudoCount = pseudoCount == 0 ? Double.MIN_VALUE : pseudoCount; // allocate space for iteration models - HmmModel lastIteration; - HmmModel iteration; - try { - lastIteration = initialModel.clone(); - iteration = initialModel.clone(); - } catch (CloneNotSupportedException e) { - throw new UnknownError("Cloning HmmModels broke. Check for programming errors, changed APIs."); - } + HmmModel lastIteration = initialModel.clone(); + HmmModel iteration = initialModel.clone(); // allocate space for Viterbi path calculation int[] viterbiPath = new int[observedSequence.length]; @@ -301,14 +295,9 @@ public final class HmmTrainer { public static HmmModel trainBaumWelch(HmmModel initialModel, int[] observedSequence, double epsilon, int maxIterations, boolean scaled) { // allocate space for the iterations - HmmModel lastIteration; - HmmModel iteration; - try { - lastIteration = initialModel.clone(); - iteration = initialModel.clone(); - } catch (CloneNotSupportedException e) { - throw new UnknownError("Cloning HmmModels broke. Check for programming errors, changed APIs etc."); - } + HmmModel lastIteration = initialModel.clone(); + HmmModel iteration = initialModel.clone(); + // allocate space for baum-welch factors int hiddenCount = initialModel.getNrOfHiddenStates(); int visibleCount = observedSequence.length; Added: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/LossyHmmSerializer.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/LossyHmmSerializer.java?rev=1140264&view=auto ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/LossyHmmSerializer.java (added) +++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/LossyHmmSerializer.java Mon Jun 27 18:46:46 2011 @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.classifier.sequencelearning.hmm; + +import org.apache.mahout.math.Matrix; +import org.apache.mahout.math.MatrixWritable; +import org.apache.mahout.math.Vector; +import org.apache.mahout.math.VectorWritable; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +/** + * Utils for serializing Writable parts of HmmModel (that means without hidden state names and so on) + */ +final class LossyHmmSerializer { + + private LossyHmmSerializer() { + } + + static void serialize(HmmModel model, DataOutput output) throws IOException { + MatrixWritable matrix = new MatrixWritable(model.getEmissionMatrix()); + matrix.write(output); + matrix.set(model.getTransitionMatrix()); + matrix.write(output); + + VectorWritable vector = new VectorWritable(model.getInitialProbabilities()); + vector.write(output); + } + + static HmmModel deserialize(DataInput input) throws IOException { + MatrixWritable matrix = new MatrixWritable(); + matrix.readFields(input); + Matrix emissionMatrix = matrix.get(); + + matrix.readFields(input); + Matrix transitionMatrix = matrix.get(); + + VectorWritable vector = new VectorWritable(); + vector.readFields(input); + Vector initialProbabilities = vector.get(); + + return new HmmModel(transitionMatrix, emissionMatrix, initialProbabilities); + } + +} Added: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java?rev=1140264&view=auto ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java (added) +++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java Mon Jun 27 18:46:46 2011 @@ -0,0 +1,106 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package org.apache.mahout.classifier.sequencelearning.hmm; + +import com.google.common.io.Closeables; +import org.apache.commons.cli2.CommandLine; +import org.apache.commons.cli2.Group; +import org.apache.commons.cli2.Option; +import org.apache.commons.cli2.OptionException; +import org.apache.commons.cli2.builder.ArgumentBuilder; +import org.apache.commons.cli2.builder.DefaultOptionBuilder; +import org.apache.commons.cli2.builder.GroupBuilder; +import org.apache.commons.cli2.commandline.Parser; +import org.apache.mahout.common.CommandLineUtil; + +import java.io.DataInputStream; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintWriter; + +/** + * Command-line tool for generating random sequences by given HMM + */ +public final class RandomSequenceGenerator { + + private RandomSequenceGenerator() { + } + + public static void main(String[] args) throws IOException { + DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder(); + ArgumentBuilder argumentBuilder = new ArgumentBuilder(); + + Option outputOption = optionBuilder.withLongName("output"). + withDescription("Output file with sequence of observed states"). + withShortName("o").withArgument(argumentBuilder.withMaximum(1).withMinimum(1). + withName("path").create()).withRequired(false).create(); + + Option modelOption = optionBuilder.withLongName("model"). + withDescription("Path to serialized HMM model"). + withShortName("m").withArgument(argumentBuilder.withMaximum(1).withMinimum(1). + withName("path").create()).withRequired(true).create(); + + Option lengthOption = optionBuilder.withLongName("length"). + withDescription("Length of generated sequence"). + withShortName("l").withArgument(argumentBuilder.withMaximum(1).withMinimum(1). + withName("number").create()).withRequired(true).create(); + + Group optionGroup = new GroupBuilder(). + withOption(outputOption).withOption(modelOption).withOption(lengthOption). + withName("Options").create(); + + try { + Parser parser = new Parser(); + parser.setGroup(optionGroup); + CommandLine commandLine = parser.parse(args); + + String output = (String) commandLine.getValue(outputOption); + + String modelPath = (String) commandLine.getValue(modelOption); + + int length = Integer.parseInt((String) commandLine.getValue(lengthOption)); + + //reading serialized HMM + DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath)); + HmmModel model; + try { + model = LossyHmmSerializer.deserialize(modelStream); + } finally { + Closeables.closeQuietly(modelStream); + } + + //generating observations + int[] observations = HmmEvaluator.predict(model, length, System.currentTimeMillis()); + + //writing output + PrintWriter writer = new PrintWriter(new FileOutputStream(output)); + try { + for (int observation : observations) { + writer.print(observation); + writer.print(' '); + } + } finally { + Closeables.closeQuietly(writer); + } + } catch (OptionException e) { + CommandLineUtil.printHelp(optionGroup); + } + } +} Added: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java?rev=1140264&view=auto ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java (added) +++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java Mon Jun 27 18:46:46 2011 @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.classifier.sequencelearning.hmm; + +import com.google.common.io.Closeables; +import org.apache.commons.cli2.CommandLine; +import org.apache.commons.cli2.Group; +import org.apache.commons.cli2.Option; +import org.apache.commons.cli2.OptionException; +import org.apache.commons.cli2.builder.ArgumentBuilder; +import org.apache.commons.cli2.builder.DefaultOptionBuilder; +import org.apache.commons.cli2.builder.GroupBuilder; +import org.apache.commons.cli2.commandline.Parser; +import org.apache.mahout.common.CommandLineUtil; + +import java.io.DataInputStream; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.List; +import java.util.Scanner; + +/** + * Command-line tool for Viterbi evaluating + */ +public final class ViterbiEvaluator { + + private ViterbiEvaluator() { + } + + public static void main(String[] args) throws IOException { + DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder(); + ArgumentBuilder argumentBuilder = new ArgumentBuilder(); + + Option inputOption = optionBuilder.withLongName("input"). + withDescription("Text file with space-separated integers to segment"). + withShortName("i").withArgument(argumentBuilder.withMaximum(1).withMinimum(1). + withName("path").create()).withRequired(true).create(); + + Option outputOption = optionBuilder.withLongName("output"). + withDescription("Output file with decoded sequence of hidden states"). + withShortName("o").withArgument(argumentBuilder.withMaximum(1).withMinimum(1). + withName("path").create()).withRequired(true).create(); + + Option modelOption = optionBuilder.withLongName("model"). + withDescription("Path to serialized HMM model"). + withShortName("m").withArgument(argumentBuilder.withMaximum(1).withMinimum(1). + withName("path").create()).withRequired(true).create(); + + Option likelihoodOption = optionBuilder.withLongName("likelihood"). + withDescription("Compute likelihood of observed sequence"). + withShortName("l").withRequired(false).create(); + + Group optionGroup = new GroupBuilder().withOption(inputOption). + withOption(outputOption).withOption(modelOption).withOption(likelihoodOption). + withName("Options").create(); + + try { + Parser parser = new Parser(); + parser.setGroup(optionGroup); + CommandLine commandLine = parser.parse(args); + + String input = (String) commandLine.getValue(inputOption); + String output = (String) commandLine.getValue(outputOption); + + String modelPath = (String) commandLine.getValue(modelOption); + + boolean computeLikelihood = commandLine.hasOption(likelihoodOption); + + //reading serialized HMM + DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath)); + HmmModel model; + try { + model = LossyHmmSerializer.deserialize(modelStream); + } finally { + Closeables.closeQuietly(modelStream); + } + + //reading observations + List observations = new ArrayList(); + Scanner scanner = new Scanner(new FileInputStream(input)); + try { + while (scanner.hasNextInt()) { + observations.add(scanner.nextInt()); + } + } finally { + scanner.close(); + } + + int[] observationsArray = new int[observations.size()]; + for (int i = 0; i < observations.size(); ++i) { + observationsArray[i] = observations.get(i); + } + + //decoding + int[] hiddenStates = HmmEvaluator.decode(model, observationsArray, true); + + //writing output + PrintWriter writer = new PrintWriter(new FileOutputStream(output)); + try { + for (int hiddenState : hiddenStates) { + writer.print(hiddenState); + writer.print(' '); + } + } finally { + Closeables.closeQuietly(writer); + } + + if (computeLikelihood) { + System.out.println("Likelihood: " + HmmEvaluator.modelLikelihood(model, observationsArray, true)); + } + } catch (OptionException e) { + CommandLineUtil.printHelp(optionGroup); + } + } +} Modified: mahout/trunk/src/conf/driver.classes.props URL: http://svn.apache.org/viewvc/mahout/trunk/src/conf/driver.classes.props?rev=1140264&r1=1140263&r2=1140264&view=diff ============================================================================== --- mahout/trunk/src/conf/driver.classes.props (original) +++ mahout/trunk/src/conf/driver.classes.props Mon Jun 27 18:46:46 2011 @@ -37,4 +37,7 @@ org.apache.mahout.math.hadoop.stochastic org.apache.mahout.clustering.spectral.eigencuts.EigencutsDriver = eigencuts : Eigencuts spectral clustering org.apache.mahout.clustering.spectral.kmeans.SpectralKMeansDriver = spectralkmeans : Spectral k-means clustering org.apache.mahout.cf.taste.hadoop.als.ParallelALSFactorizationJob = parallelALS : ALS-WR factorization of a rating matrix -org.apache.mahout.cf.taste.hadoop.als.PredictionJob = predictFromFactorization : predict preferences from a factorization of a rating matrix \ No newline at end of file +org.apache.mahout.cf.taste.hadoop.als.PredictionJob = predictFromFactorization : predict preferences from a factorization of a rating matrix +org.apache.mahout.classifier.sequencelearning.hmm.BaumWelchTrainer = baumwelch : Baum-Welch algorithm for unsupervised HMM training +org.apache.mahout.classifier.sequencelearning.hmm.ViterbiEvaluator = viterbi : Viterbi decoding of hidden states from given output states sequence +org.apache.mahout.classifier.sequencelearning.hmm.RandomSequenceGenerator = hmmpredict : Generate random sequence of observations by given HMM