mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From isa...@apache.org
Subject svn commit: r1000807 [1/2] - in /mahout/trunk: core/ core/src/main/java/org/apache/mahout/classifier/sequencelearning/ core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ core/src/test/java/org/apache/mahout/classifier/sequencelearnin...
Date Fri, 24 Sep 2010 11:17:14 GMT
Author: isabel
Date: Fri Sep 24 11:17:13 2010
New Revision: 1000807

URL: http://svn.apache.org/viewvc?rev=1000807&view=rev
Log:
MAHOUT-396 - add HMM support for sequence classification. Thanks to Max
Heimel, Marc Hofer, Qiuyan Xu and Van Long Nguyen for contributing the
patch.

Added:
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmAlgorithms.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmEvaluator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmModel.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmTrainer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sequencelearning/
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sequencelearning/hmm/
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sequencelearning/hmm/HMMAlgorithmsTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sequencelearning/hmm/HMMEvaluatorTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sequencelearning/hmm/HMMModelTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sequencelearning/hmm/HMMTestBase.java
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sequencelearning/hmm/HMMTrainerTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sequencelearning/hmm/HMMUtilsTest.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/
    mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/
    mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java
Modified:
    mahout/trunk/core/pom.xml

Modified: mahout/trunk/core/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/core/pom.xml?rev=1000807&r1=1000806&r2=1000807&view=diff
==============================================================================
--- mahout/trunk/core/pom.xml (original)
+++ mahout/trunk/core/pom.xml Fri Sep 24 11:17:13 2010
@@ -263,6 +263,11 @@
       <scope>test</scope>
     </dependency>
 
+    <dependency>
+    	<groupId>commons-collections</groupId>
+    	<artifactId>commons-collections</artifactId>
+    	<version>3.2.1</version>
+    </dependency>
   </dependencies>
 
   <repositories>

Added: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmAlgorithms.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmAlgorithms.java?rev=1000807&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmAlgorithms.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmAlgorithms.java Fri Sep 24 11:17:13 2010
@@ -0,0 +1,311 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.classifier.sequencelearning.hmm;
+
+import org.apache.mahout.math.DenseMatrix;
+import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.Vector;
+
+/**
+ * Class containing implementations of the three major HMM algorithms: forward,
+ * backward and Viterbi
+ *
+ * @author mheimel
+ */
+public final class HmmAlgorithms {
+
+
+  /**
+   * No public constructors for utility classes.
+   */
+  private HmmAlgorithms() {
+    // nothing to do here really
+  }
+
+  /**
+   * External function to compute a matrix of alpha factors
+   *
+   * @param model        model to run forward algorithm for.
+   * @param observations observation sequence to train on.
+   * @param scaled       Should log-scaled beta factors be computed?
+   * @return matrix of alpha factors.
+   */
+  public static Matrix forwardAlgorithm(HmmModel model, int[] observations,
+                                        boolean scaled) {
+
+    DenseMatrix alpha = new DenseMatrix(observations.length, model
+        .getNrOfHiddenStates());
+
+    forwardAlgorithm(alpha, model, observations, scaled);
+
+    return alpha;
+  }
+
+  /**
+   * Internal function to compute the alpha factors
+   *
+   * @param alpha        matrix to store alpha factors in.
+   * @param model        model to use for alpha factor computation.
+   * @param observations observation sequence seen.
+   * @param scaled       set to true if log-scaled beta factors should be computed.
+   */
+  static void forwardAlgorithm(Matrix alpha, HmmModel model,
+                               int[] observations, boolean scaled) {
+
+    // fetch references to the model parameters
+    Vector ip = model.getInitialProbabilities();
+    Matrix b = model.getEmissionMatrix();
+    Matrix a = model.getTransitionMatrix();
+
+    if (scaled) { // compute log scaled alpha values
+      // Initialization
+      for (int i = 0; i < model.getNrOfHiddenStates(); i++)
+        alpha.setQuick(0, i, Math.log(ip.getQuick(i)
+            * b.getQuick(i, observations[0])));
+
+      // Induction
+      for (int t = 1; t < observations.length; t++) {
+        for (int i = 0; i < model.getNrOfHiddenStates(); i++) {
+          double sum = Double.NEGATIVE_INFINITY; // log(0)
+          for (int j = 0; j < model.getNrOfHiddenStates(); j++) {
+            double tmp = alpha.getQuick(t - 1, j) + Math.log(a.getQuick(j, i));
+            if (tmp > Double.NEGATIVE_INFINITY) // make sure we
+              // handle
+              // log(0) correctly
+              sum = tmp + Math.log(1 + Math.exp(sum - tmp));
+          }
+          alpha.setQuick(t, i, sum + Math.log(b.getQuick(i, observations[t])));
+        }
+      }
+    } else {
+
+      // Initialization
+      for (int i = 0; i < model.getNrOfHiddenStates(); i++)
+        alpha.setQuick(0, i, ip.getQuick(i) * b.getQuick(i, observations[0]));
+
+      // Induction
+      for (int t = 1; t < observations.length; t++) {
+        for (int i = 0; i < model.getNrOfHiddenStates(); i++) {
+          double sum = 0.0;
+          for (int j = 0; j < model.getNrOfHiddenStates(); j++) {
+            sum += alpha.getQuick(t - 1, j) * a.getQuick(j, i);
+          }
+          alpha.setQuick(t, i, sum * b.getQuick(i, observations[t]));
+        }
+      }
+    }
+  }
+
+  /**
+   * External function to compute a matrix of beta factors
+   *
+   * @param model        model to use for estimation.
+   * @param observations observation sequence seen.
+   * @param scaled       Set to true if log-scaled beta factors should be computed.
+   * @return beta factors based on the model and observation sequence.
+   */
+  public static Matrix backwardAlgorithm(HmmModel model, int[] observations,
+                                         boolean scaled) {
+
+    // initialize the matrix
+    DenseMatrix beta = new DenseMatrix(observations.length, model
+        .getNrOfHiddenStates());
+    // compute the beta factors
+    backwardAlgorithm(beta, model, observations, scaled);
+
+    return beta;
+  }
+
+  /**
+   * Internal function to compute the beta factors
+   *
+   * @param beta         Matrix to store resulting factors in.
+   * @param model        model to use for factor estimation.
+   * @param observations sequence of observations to estimate.
+   * @param scaled       set to true to compute log-scaled parameters.
+   */
+  static void backwardAlgorithm(Matrix beta, HmmModel model,
+                                int[] observations, boolean scaled) {
+    // fetch references to the model parameters
+    Matrix b = model.getEmissionMatrix();
+    Matrix a = model.getTransitionMatrix();
+
+    if (scaled) { // compute log-scaled factors
+      // initialization
+      for (int i = 0; i < model.getNrOfHiddenStates(); i++)
+        beta.setQuick(observations.length - 1, i, 0);
+
+      // induction
+      for (int t = observations.length - 2; t >= 0; t--) {
+        for (int i = 0; i < model.getNrOfHiddenStates(); i++) {
+          double sum = Double.NEGATIVE_INFINITY; // log(0)
+          for (int j = 0; j < model.getNrOfHiddenStates(); j++) {
+            double tmp = beta.getQuick(t + 1, j) + Math.log(a.getQuick(i, j))
+                + Math.log(b.getQuick(j, observations[t + 1]));
+            if (tmp > Double.NEGATIVE_INFINITY) // handle log(0)
+              sum = tmp + Math.log(1 + Math.exp(sum - tmp));
+          }
+          beta.setQuick(t, i, sum);
+        }
+      }
+    } else {
+      // initialization
+      for (int i = 0; i < model.getNrOfHiddenStates(); i++)
+        beta.setQuick(observations.length - 1, i, 1);
+      // induction
+      for (int t = observations.length - 2; t >= 0; t--) {
+        for (int i = 0; i < model.getNrOfHiddenStates(); i++) {
+          double sum = 0;
+          for (int j = 0; j < model.getNrOfHiddenStates(); j++)
+            sum += beta.getQuick(t + 1, j) * a.getQuick(i, j)
+                * b.getQuick(j, observations[t + 1]);
+          beta.setQuick(t, i, sum);
+        }
+      }
+    }
+  }
+
+  /**
+   * Viterbi algorithm to compute the most likely hidden sequence for a given
+   * model and observed sequence
+   *
+   * @param model        HmmModel for which the Viterbi path should be computed
+   * @param observations Sequence of observations
+   * @param scaled       Use log-scaled computations, this requires higher computational
+   *                     effort but is numerically more stable for large observation
+   *                     sequences
+   * @return nrOfObservations 1D int array containing the most likely hidden
+   *         sequence
+   */
+  public static int[] viterbiAlgorithm(HmmModel model, int[] observations,
+                                       boolean scaled) {
+
+    // probability that the most probable hidden states ends at state i at
+    // time t
+    double[][] delta = new double[observations.length][model
+        .getNrOfHiddenStates()];
+
+    // previous hidden state in the most probable state leading up to state
+    // i at time t
+    int[][] phi = new int[observations.length - 1][model.getNrOfHiddenStates()];
+
+    // initialize the return array
+    int[] sequence = new int[observations.length];
+
+    viterbiAlgorithm(sequence, delta, phi, model, observations, scaled);
+
+    return sequence;
+  }
+
+  /**
+   * Internal version of the viterbi algorithm, allowing to reuse existing
+   * arrays instead of allocating new ones
+   *
+   * @param sequence     NrOfObservations 1D int array for storing the viterbi sequence
+   * @param delta        NrOfObservations x NrHiddenStates 2D double array for storing the
+   *                     delta factors
+   * @param phi          NrOfObservations-1 x NrHiddenStates 2D int array for storing the
+   *                     phi values
+   * @param model        HmmModel for which the viterbi path should be computed
+   * @param observations Sequence of observations
+   * @param scaled       Use log-scaled computations, this requires higher computational
+   *                     effort but is numerically more stable for large observation
+   *                     sequences
+   */
+  static void viterbiAlgorithm(int[] sequence, double[][] delta, int[][] phi,
+                               HmmModel model, int[] observations, boolean scaled) {
+    // fetch references to the model parameters
+    Vector ip = model.getInitialProbabilities();
+    Matrix b = model.getEmissionMatrix();
+    Matrix a = model.getTransitionMatrix();
+
+    // Initialization
+    if (scaled) {
+      for (int i = 0; i < model.getNrOfHiddenStates(); i++) {
+        delta[0][i] = Math.log(ip.getQuick(i) * b.getQuick(i, observations[0]));
+      }
+    } else {
+
+      for (int i = 0; i < model.getNrOfHiddenStates(); i++) {
+        delta[0][i] = ip.getQuick(i) * b.getQuick(i, observations[0]);
+      }
+    }
+
+    // Induction
+    // iterate over the time
+    if (scaled) {
+      for (int t = 1; t < observations.length; t++) {
+        // iterate over the hidden states
+        for (int i = 0; i < model.getNrOfHiddenStates(); i++) {
+          // find the maximum probability and most likely state
+          // leading up
+          // to this
+          int maxState = 0;
+          double maxProb = delta[t - 1][0] + Math.log(a.getQuick(0, i));
+          for (int j = 1; j < model.getNrOfHiddenStates(); j++) {
+            double prob = delta[t - 1][j] + Math.log(a.getQuick(j, i));
+            if (prob > maxProb) {
+              maxProb = prob;
+              maxState = j;
+            }
+          }
+          delta[t][i] = maxProb + Math.log(b.getQuick(i, observations[t]));
+          phi[t - 1][i] = maxState;
+        }
+      }
+    } else {
+      for (int t = 1; t < observations.length; t++) {
+        // iterate over the hidden states
+        for (int i = 0; i < model.getNrOfHiddenStates(); i++) {
+          // find the maximum probability and most likely state
+          // leading up
+          // to this
+          int maxState = 0;
+          double maxProb = delta[t - 1][0] * a.getQuick(0, i);
+          for (int j = 1; j < model.getNrOfHiddenStates(); j++) {
+            double prob = delta[t - 1][j] * a.getQuick(j, i);
+            if (prob > maxProb) {
+              maxProb = prob;
+              maxState = j;
+            }
+          }
+          delta[t][i] = maxProb * b.getQuick(i, observations[t]);
+          phi[t - 1][i] = maxState;
+        }
+      }
+    }
+
+    // find the most likely end state for initialization
+    double maxProb;
+    if (scaled)
+      maxProb = Double.NEGATIVE_INFINITY;
+    else
+      maxProb = 0;
+    for (int i = 0; i < model.getNrOfHiddenStates(); i++) {
+      if (delta[observations.length - 1][i] > maxProb) {
+        maxProb = delta[observations.length - 1][i];
+        sequence[observations.length - 1] = i;
+      }
+    }
+
+    // now backtrack to find the most likely hidden sequence
+    for (int t = observations.length - 2; t >= 0; t--)
+      sequence[t] = phi[t][sequence[t + 1]];
+  }
+
+}

Added: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmEvaluator.java?rev=1000807&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmEvaluator.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmEvaluator.java Fri Sep 24 11:17:13 2010
@@ -0,0 +1,194 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.classifier.sequencelearning.hmm;
+
+import java.util.Random;
+
+import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.Vector;
+
+/**
+ * The HMMEvaluator class offers several methods to evaluate an HMM Model. The
+ * following use-cases are covered: 1) Generate a sequence of output states from
+ * a given model (prediction). 2) Compute the likelihood that a given model
+ * generated a given sequence of output states (model likelihood). 3) Compute
+ * the most likely hidden sequence for a given model and a given observed
+ * sequence (decoding).
+ *
+ * @author mheimel
+ */
+public final class HmmEvaluator {
+
+  /**
+   * No constructor for utility classes.
+   */
+  private HmmEvaluator() {
+    // Nothing to do here.
+  }
+
+  /**
+   * Predict a sequence of steps output states for the given HMM model
+   *
+   * @param model The Hidden Markov model used to generate the output sequence
+   * @param steps Size of the generated output sequence
+   * @return integer array containing a sequence of steps output state IDs,
+   *         generated by the specified model
+   */
+  public static int[] predict(HmmModel model, int steps) {
+    return predict(model, steps, 0);
+  }
+
+  /**
+   * Predict a sequence of steps output states for the given HMM model using the
+   * given seed for probabilistic experiments
+   *
+   * @param model The Hidden Markov model used to generate the output sequence
+   * @param steps Size of the generated output sequence
+   * @param seed  Seed to initialize the RNG
+   * @return integer array containing a sequence of steps output state IDs,
+   *         generated by the specified model
+   */
+  public static int[] predict(HmmModel model, int steps, long seed) {
+    // create the random number generator
+    Random rand;
+    if (seed == 0)
+      rand = RandomUtils.getRandom();
+    else
+      rand = RandomUtils.getRandom(seed);
+    // fetch the cumulative distributions
+    Vector cip = HmmUtils.getCumulativeInitialProbabilities(model);
+    Matrix ctm = HmmUtils.getCumulativeTransitionMatrix(model);
+    Matrix com = HmmUtils.getCumulativeOutputMatrix(model);
+    // allocate the result IntArrayList
+    int[] result = new int[steps];
+    // choose the initial state
+    int hiddenState = 0;
+
+    double randnr = rand.nextDouble();
+    while (cip.get(hiddenState) < randnr)
+      hiddenState++;
+
+    // now draw steps output states according to the cumulative
+    // distributions
+    for (int step = 0; step < steps; ++step) {
+      // choose output state to given hidden state
+      randnr = rand.nextDouble();
+      int outputState = 0;
+      while (com.get(hiddenState, outputState) < randnr)
+        outputState++;
+      result[step] = outputState;
+      // choose the next hidden state
+      randnr = rand.nextDouble();
+      int nextHiddenState = 0;
+      while (ctm.get(hiddenState, nextHiddenState) < randnr)
+        nextHiddenState++;
+      hiddenState = nextHiddenState;
+    }
+    return result;
+  }
+
+  /**
+   * Returns the likelihood that a given output sequence was produced by the
+   * given model. Internally, this function calls the forward algorithm to
+   * compute the alpha values and then uses the overloaded function to compute
+   * the actual model likelihood.
+   *
+   * @param model          Model to base the likelihood on.
+   * @param outputSequence Sequence to compute likelihood for.
+   * @param scaled         Use log-scaled parameters for computation. This is computationally
+   *                       more expensive, but offers better numerically stability in case of
+   *                       long output sequences
+   * @return Likelihood that the given model produced the given sequence
+   */
+  public static double modelLikelihood(HmmModel model, int[] outputSequence,
+                                       boolean scaled) {
+    return modelLikelihood(HmmAlgorithms.forwardAlgorithm(model,
+        outputSequence, scaled), scaled);
+  }
+
+  /**
+   * Computes the likelihood that a given output sequence was computed by a
+   * given model using the alpha values computed by the forward algorithm.
+   * // TODO I am a bit confused here - where is the output sequence referenced in the comment above in the code?
+   * @param alpha  Matrix of alpha values
+   * @param scaled Set to true if the alpha values are log-scaled.
+   * @return model likelihood.
+   */
+  public static double modelLikelihood(Matrix alpha, boolean scaled) {
+    double likelihood = 0;
+    if (scaled) {
+      for (int i = 0; i < alpha.numCols(); ++i) {
+        likelihood += Math.exp(alpha.getQuick(alpha.numRows() - 1, i));
+      }
+    } else {
+      for (int i = 0; i < alpha.numCols(); ++i) {
+        likelihood += alpha.getQuick(alpha.numRows() - 1, i);
+      }
+    }
+    return likelihood;
+  }
+
+  /**
+   * Computes the likelihood that a given output sequence was computed by a
+   * given model.
+   *
+   * @param model model to compute sequence likelihood for.
+   * @param outputSequence sequence to base computation on.
+   * @param beta beta parameters.
+   * @param scaled     set to true if betas are log-scaled.
+   * @return likelihood of the outputSequence given the model.
+   */
+  public static double modelLikelihood(HmmModel model, int[] outputSequence,
+                                       Matrix beta, boolean scaled) {
+    double likelihood = 0;
+    // fetch the emission probabilities
+    Matrix e = model.getEmissionMatrix();
+    Vector pi = model.getInitialProbabilities();
+    int firstOutput = outputSequence[0];
+    if (scaled) {
+      for (int i = 0; i < model.getNrOfHiddenStates(); ++i) {
+        likelihood += pi.getQuick(i) * Math.exp(beta.getQuick(0, i))
+            * e.getQuick(i, firstOutput);
+      }
+    } else {
+      for (int i = 0; i < model.getNrOfHiddenStates(); ++i) {
+        likelihood += pi.getQuick(i) * beta.getQuick(0, i)
+            * e.getQuick(i, firstOutput);
+      }
+    }
+    return likelihood;
+  }
+
+  /**
+   * Returns the most likely sequence of hidden states for the given model and
+   * observation
+   *
+   * @param model model to use for decoding.
+   * @param observations integer Array containing a sequence of observed state IDs
+   * @param scaled       Use log-scaled computations, this requires higher computational
+   *                     effort but is numerically more stable for large observation
+   *                     sequences
+   * @return integer array containing the most likely sequence of hidden state
+   * IDs
+   */
+  public static int[] decode(HmmModel model, int[] observations, boolean scaled) {
+    return HmmAlgorithms.viterbiAlgorithm(model, observations, scaled);
+  }
+
+}

Added: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmModel.java?rev=1000807&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmModel.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmModel.java Fri Sep 24 11:17:13 2010
@@ -0,0 +1,502 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.classifier.sequencelearning.hmm;
+
+import java.lang.reflect.Type;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.commons.collections.BidiMap;
+import org.apache.commons.collections.bidimap.TreeBidiMap;
+import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.math.DenseMatrix;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.JsonMatrixAdapter;
+import org.apache.mahout.math.JsonVectorAdapter;
+import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.Vector;
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.JsonDeserializationContext;
+import com.google.gson.JsonDeserializer;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonPrimitive;
+import com.google.gson.JsonSerializationContext;
+import com.google.gson.JsonSerializer;
+import com.google.gson.reflect.TypeToken;
+
+/**
+ * Main class defining a Hidden Markov Model
+ *
+ * @author mheimel
+ */
+public class HmmModel implements JsonDeserializer<HmmModel>,
+    JsonSerializer<HmmModel>, Cloneable {
+
+  /**
+   * No-args constructed needed for json de-serialization
+   */
+  private HmmModel() {
+    // do nothing
+  }
+
+  /**
+   * Get a copy of this model
+   */
+  public HmmModel clone() throws CloneNotSupportedException {
+    super.clone();
+    HmmModel model = new HmmModel(transitionMatrix.clone(), emissionMatrix
+        .clone(), initialProbabilities.clone());
+    if (hiddenStateNames != null)
+      model.hiddenStateNames = new TreeBidiMap(hiddenStateNames);
+    if (outputStateNames != null)
+      model.outputStateNames = new TreeBidiMap(outputStateNames);
+    return model;
+  }
+
+  /**
+   * Assign the content of another HMM model to this one
+   *
+   * @param model The HmmModel that will be assigned to this one
+   */
+  public void assign(HmmModel model) {
+    this.nrOfHiddenStates = model.nrOfHiddenStates;
+    this.nrOfOutputStates = model.nrOfOutputStates;
+    this.hiddenStateNames = model.hiddenStateNames;
+    this.outputStateNames = model.outputStateNames;
+    // for now clone the matrix/vectors
+    this.initialProbabilities = model.initialProbabilities.clone();
+    this.emissionMatrix = model.emissionMatrix.clone();
+    this.transitionMatrix = model.transitionMatrix.clone();
+  }
+
+  /**
+   * Construct a valid random Hidden-Markov parameter set with the given number
+   * of hidden and output states using a given seed.
+   *
+   * @param nrOfHiddenStates Number of hidden states
+   * @param nrOfOutputStates Number of output states
+   * @param seed             Seed for the random initialization, if set to 0 the current time
+   *                         is used
+   */
+  public HmmModel(int nrOfHiddenStates, int nrOfOutputStates, long seed) {
+    this.nrOfHiddenStates = nrOfHiddenStates;
+    this.nrOfOutputStates = nrOfOutputStates;
+    this.transitionMatrix = new DenseMatrix(nrOfHiddenStates, nrOfHiddenStates);
+    this.emissionMatrix = new DenseMatrix(nrOfHiddenStates, nrOfOutputStates);
+    this.initialProbabilities = new DenseVector(nrOfHiddenStates);
+    // initialize a random, valid parameter set
+    initRandomParameters(seed);
+  }
+
+  /**
+   * Construct a valid random Hidden-Markov parameter set with the given number
+   * of hidden and output states.
+   *
+   * @param nrOfHiddenStates Number of hidden states
+   * @param nrOfOutputStates Number of output states
+   */
+  public HmmModel(int nrOfHiddenStates, int nrOfOutputStates) {
+    this(nrOfHiddenStates, nrOfOutputStates, 0);
+  }
+
+  /**
+   * Generates a Hidden Markov model using the specified parameters
+   *
+   * @param transitionMatrix     transition probabilities.
+   * @param emissionMatrix       emission probabilities.
+   * @param initialProbabilities initial start probabilities.
+   * @throws IllegalArgumentException If the given parameter set is invalid
+   */
+  public HmmModel(Matrix transitionMatrix, Matrix emissionMatrix,
+                  Vector initialProbabilities) {
+    this.nrOfHiddenStates = initialProbabilities.size();
+    this.nrOfOutputStates = emissionMatrix.numCols();
+    this.transitionMatrix = transitionMatrix;
+    this.emissionMatrix = emissionMatrix;
+    this.initialProbabilities = initialProbabilities;
+  }
+
+  /**
+   * Initialize a valid random set of HMM parameters
+   *
+   * @param seed seed to use for Random initialization. Use 0 to use Java-built-in-version.
+   */
+  private void initRandomParameters(long seed) {
+    Random rand;
+    // initialize the random number generator
+    if (seed == 0)
+      rand = RandomUtils.getRandom();
+    else
+      rand = RandomUtils.getRandom(seed);
+    // initialize the initial Probabilities
+    double sum = 0; // used for normalization
+    for (int i = 0; i < nrOfHiddenStates; i++) {
+      double nextRand = rand.nextDouble();
+      initialProbabilities.set(i, nextRand);
+      sum += nextRand;
+    }
+    // "normalize" the vector to generate probabilities
+    initialProbabilities = initialProbabilities.divide(sum);
+
+    // initialize the transition matrix
+    double[] values = new double[nrOfHiddenStates];
+    for (int i = 0; i < nrOfHiddenStates; i++) {
+      sum = 0;
+      for (int j = 0; j < nrOfHiddenStates; j++) {
+        values[j] = rand.nextDouble();
+        sum += values[j];
+      }
+      // normalize the random values to obtain probabilities
+      for (int j = 0; j < nrOfHiddenStates; j++)
+        values[j] /= sum;
+      // set this row of the transition matrix
+      transitionMatrix.set(i, values);
+    }
+
+    // initialize the output matrix
+    values = new double[nrOfOutputStates];
+    for (int i = 0; i < nrOfHiddenStates; i++) {
+      sum = 0;
+      for (int j = 0; j < nrOfOutputStates; j++) {
+        values[j] = rand.nextDouble();
+        sum += values[j];
+      }
+      // normalize the random values to obtain probabilities
+      for (int j = 0; j < nrOfOutputStates; j++)
+        values[j] /= sum;
+      // set this row of the output matrix
+      emissionMatrix.set(i, values);
+    }
+  }
+
+  /**
+   * Number of hidden states
+   */
+  private int nrOfHiddenStates;
+
+  /**
+   * Getter Method for the number of hidden states
+   *
+   * @return Number of hidden states
+   */
+  public int getNrOfHiddenStates() {
+    return nrOfHiddenStates;
+  }
+
+  /**
+   * Number of output states
+   */
+  private int nrOfOutputStates;
+
+  /**
+   * Getter Method for the number of output states
+   *
+   * @return Number of output states
+   */
+  public int getNrOfOutputStates() {
+    return nrOfOutputStates;
+  }
+
+  /**
+   * Transition matrix containing the transition probabilities between hidden
+   * states. TransitionMatrix(i,j) is the probability that we change from hidden
+   * state i to hidden state j In general: P(h(t+1)=h_j | h(t) = h_i) =
+   * transitionMatrix(i,j) Since we have to make sure that each hidden state can
+   * be "left", the following normalization condition has to hold:
+   * sum(transitionMatrix(i,j),j=1..hiddenStates) = 1
+   */
+  private Matrix transitionMatrix;
+
+  /**
+   * Getter function to get the hidden state transition matrix
+   *
+   * @return returns the model's transition matrix.
+   */
+  public Matrix getTransitionMatrix() {
+    return transitionMatrix;
+  }
+
+  /**
+   * Output matrix containing the probabilities that we observe a given output
+   * state given a hidden state. outputMatrix(i,j) is the probability that we
+   * observe output state j if we are in hidden state i Formally: P(o(t)=o_j |
+   * h(t)=h_i) = outputMatrix(i,j) Since we always have an observation for each
+   * hidden state, the following normalization condition has to hold:
+   * sum(outputMatrix(i,j),j=1..outputStates) = 1
+   */
+  private Matrix emissionMatrix;
+
+  /**
+   * Getter function to get the output state probability matrix
+   *
+   * @return returns the models emission matrix.
+   */
+  public Matrix getEmissionMatrix() {
+    return emissionMatrix;
+  }
+
+  /**
+   * Vector containing the initial hidden state probabilities. That is
+   * P(h(0)=h_i) = initialProbabilities(i). Since we are dealing with
+   * probabilities the following normalization condition has to hold:
+   * sum(initialProbabilities(i),i=1..hiddenStates) = 1
+   */
+  private Vector initialProbabilities;
+
+  /**
+   * Getter function to return the vector of initial hidden state probabilities
+   *
+   * @return returns the model's init probabilities.
+   */
+  public Vector getInitialProbabilities() {
+    return initialProbabilities;
+  }
+
+  /**
+   * Bi-Directional Map for storing the hidden state names
+   */
+  private BidiMap hiddenStateNames;
+
+  /**
+   * Getter method for the hidden state Names map
+   *
+   * @return hidden state names.
+   */
+  @SuppressWarnings("unchecked")
+  public Map<String, Integer> getHiddenStateNames() {
+    return hiddenStateNames;
+  }
+
+  /**
+   * Register an array of hidden state Names. We assume that the state name at
+   * position i has the ID i
+   *
+   * @param stateNames names of hidden states.
+   */
+  public void registerHiddenStateNames(String[] stateNames) {
+    if (stateNames != null) {
+      hiddenStateNames = new TreeBidiMap();
+      for (int i = 0; i < stateNames.length; ++i) {
+        hiddenStateNames.put(stateNames[i], i);
+      }
+    }
+  }
+
+  /**
+   * Register a map of hidden state Names/state IDs
+   *
+   * @param stateNames <String,Integer> Map that assigns each state name an integer ID
+   */
+  public void registerHiddenStateNames(Map<String, Integer> stateNames) {
+    if (stateNames != null)
+      hiddenStateNames = new TreeBidiMap(stateNames);
+  }
+
+  /**
+   * Lookup the name for the given hidden state ID
+   *
+   * @param id Integer id of the hidden state
+   * @return String containing the name for the given ID, null if this ID is not
+   *         known or no hidden state names were specified
+   */
+  public String getHiddenStateName(int id) {
+    if (hiddenStateNames == null)
+      return null;
+    return (String) hiddenStateNames.getKey(id);
+  }
+
+  /**
+   * Lookup the ID for the given hidden state name
+   *
+   * @param name Name of the hidden state
+   * @return int containing the ID for the given name, -1 if this name is not
+   *         known or no hidden state names were specified
+   */
+  public int getHiddenStateID(String name) {
+    if (hiddenStateNames == null)
+      return -1;
+    Integer tmp = (Integer) hiddenStateNames.get(name);
+    return (tmp == null) ? -1 : tmp;
+  }
+
+  /**
+   * Bi-directional Map for storing the observed state names
+   */
+  private BidiMap outputStateNames;
+
+  /**
+   * Getter method for the output state Names map
+   *
+   * @return names of output states.
+   */
+  @SuppressWarnings("unchecked")
+  public Map<String, Integer> getOutputStateNames() {
+    return outputStateNames;
+  }
+
+  /**
+   * Register an array of hidden state Names. We assume that the state name at
+   * position i has the ID i
+   *
+   * @param stateNames state names to register.
+   */
+  public void registerOutputStateNames(String[] stateNames) {
+    if (stateNames != null) {
+      outputStateNames = new TreeBidiMap();
+      for (int i = 0; i < stateNames.length; ++i) {
+        outputStateNames.put(stateNames[i], i);
+      }
+    }
+  }
+
+  /**
+   * Register a map of hidden state Names/state IDs
+   *
+   * @param stateNames <String,Integer> Map that assigns each state name an integer ID
+   */
+  public void registerOutputStateNames(Map<String, Integer> stateNames) {
+    if (stateNames != null)
+      outputStateNames = new TreeBidiMap(stateNames);
+  }
+
+  /**
+   * Lookup the name for the given output state id
+   *
+   * @param id Integer id of the output state
+   * @return String containing the name for the given id, null if this id is not
+   *         known or no output state names were specified
+   */
+  public String getOutputStateName(int id) {
+    if (outputStateNames == null)
+      return null;
+    return (String) outputStateNames.getKey(id);
+  }
+
+  /**
+   * Lookup the ID for the given output state name
+   *
+   * @param name Name of the output state
+   * @return int containing the ID for the given name, -1 if this name is not
+   *         known or no output state names were specified
+   */
+  public int getOutputStateID(String name) {
+    if (outputStateNames == null)
+      return -1;
+    Integer tmp = (Integer) outputStateNames.get(name);
+    return (tmp == null) ? -1 : tmp;
+  }
+
+  /**
+   * Encode this HMMmodel as a JSON string
+   *
+   * @return String containing the JSON of this model
+   */
+  public String toJson() {
+    GsonBuilder builder = new GsonBuilder();
+    builder.registerTypeAdapter(HmmModel.class, this);
+    Gson gson = builder.create();
+    return gson.toJson(this);
+  }
+
+  /**
+   * Decode this HmmModel from a JSON string
+   *
+   * @param json String containing JSON representation of this model
+   * @return Initialized model
+   */
+  public static HmmModel fromJson(String json) {
+    GsonBuilder builder = new GsonBuilder();
+    builder.registerTypeAdapter(HmmModel.class, new HmmModel());
+    Gson gson = builder.create();
+    return gson.fromJson(json, HmmModel.class);
+  }
+
+  // CODE USED FOR SERIALIZATION
+
+  private static final String MODEL = "HMMModel";
+  private static final String OUTNAMES = "HMMOutNames";
+  private static final String HIDDENNAMES = "HmmHiddenNames";
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public HmmModel deserialize(JsonElement json, Type type,
+                              JsonDeserializationContext context) {
+    // register the builders for matrix / vector
+    GsonBuilder builder = new GsonBuilder();
+    builder.registerTypeAdapter(Matrix.class, new JsonMatrixAdapter());
+    builder.registerTypeAdapter(Vector.class, new JsonVectorAdapter());
+    Gson gson = builder.create();
+    // now decode the original model
+    JsonObject obj = json.getAsJsonObject();
+    String modelString = obj.get(MODEL).getAsString();
+    HmmModel model = gson.fromJson(modelString, HmmModel.class);
+    // now decode the names
+    JsonElement names = obj.get(HIDDENNAMES);
+    if (names != null) {
+      Map<String, Integer> tmpMap = gson.fromJson(names.getAsString(),
+          new TypeToken<Map<String, Integer>>() {
+          } .getType());
+      model.registerHiddenStateNames(tmpMap);
+    }
+    names = obj.get(OUTNAMES);
+    if (names != null) {
+      Map<String, Integer> tmpMap = gson.fromJson(names.getAsString(),
+          new TypeToken<Map<String, Integer>>() {
+          } .getType());
+      model.registerOutputStateNames(tmpMap);
+    }
+    // return the model
+    return model;
+  }
+
+  @Override
+  public JsonElement serialize(HmmModel model, Type arg1,
+                               JsonSerializationContext arg2) {
+    // we need to make sure that we serialize the bidimaps separately, since
+    // GSON is unable to do this naively
+    BidiMap hiddenNames = model.hiddenStateNames;
+    model.hiddenStateNames = null;
+    BidiMap outNames = model.outputStateNames;
+    model.outputStateNames = null;
+    // now register the builders for matrix / vector
+    GsonBuilder builder = new GsonBuilder();
+    builder.registerTypeAdapter(Matrix.class, new JsonMatrixAdapter());
+    builder.registerTypeAdapter(Vector.class, new JsonVectorAdapter());
+    Gson gson = builder.create();
+    // create a model
+    JsonObject json = new JsonObject();
+    // first, we add the model
+    json.add(MODEL, new JsonPrimitive(gson.toJson(model)));
+    // now, we add the state names
+    if (outNames != null) {
+      json.add(OUTNAMES, new JsonPrimitive(gson.toJson(outNames)));
+    }
+    if (hiddenNames != null) {
+      json.add(HIDDENNAMES, new JsonPrimitive(gson.toJson(hiddenNames)));
+    }
+    // return the model to its original state :)
+    model.hiddenStateNames = hiddenNames;
+    model.outputStateNames = outNames;
+    return json;
+  }
+}

Added: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmTrainer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmTrainer.java?rev=1000807&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmTrainer.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmTrainer.java Fri Sep 24 11:17:13 2010
@@ -0,0 +1,483 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.classifier.sequencelearning.hmm;
+
+import java.util.Collection;
+import java.util.Iterator;
+
+import org.apache.mahout.math.DenseMatrix;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.Vector;
+
+/**
+ * Class containing several algorithms used to train a Hidden Markov Model. The
+ * three main algorithms are: supervised learning, unsupervised Viterbi and
+ * unsupervised Baum-Welch.
+ *
+ * @author mheimel
+ */
+public final class HmmTrainer {
+
+  /**
+   * No public constructor for utility classes.
+   */
+  private HmmTrainer() {
+    // nothing to do here really.
+  }
+
+  /**
+   * Create an supervised initial estimate of an HMM Model based on a sequence
+   * of observed and hidden states.
+   *
+   * @param nrOfHiddenStates The total number of hidden states
+   * @param nrOfOutputStates The total number of output states
+   * @param observedSequence Integer array containing the observed sequence
+   * @param hiddenSequence   Integer array containing the hidden sequence
+   * @param pseudoCount      Value that is assigned to non-occurring transitions to avoid zero
+   *                         probabilities.
+   * @return An initial model using the estimated parameters
+   */
+  public static HmmModel trainSupervised(int nrOfHiddenStates,
+                                         int nrOfOutputStates, int[] observedSequence, int[] hiddenSequence,
+                                         double pseudoCount) {
+    // make sure the pseudo count is not zero
+    pseudoCount = (pseudoCount == 0) ? Double.MIN_VALUE : pseudoCount;
+
+    // initialize the parameters
+    DenseMatrix transitionMatrix = new DenseMatrix(nrOfHiddenStates,
+        nrOfHiddenStates);
+    DenseMatrix emissionMatrix = new DenseMatrix(nrOfHiddenStates,
+        nrOfOutputStates);
+    // assign a small initial probability that is larger than zero, so
+    // unseen states will not get a zero probability
+    transitionMatrix.assign(pseudoCount);
+    emissionMatrix.assign(pseudoCount);
+    // given no prior knowledge, we have to assume that all initial hidden
+    // states are equally likely
+    DenseVector initialProbabilities = new DenseVector(nrOfHiddenStates);
+    initialProbabilities.assign(1.0 / (double) nrOfHiddenStates);
+
+    // now loop over the sequences to count the number of transitions
+    countTransitions(transitionMatrix, emissionMatrix, observedSequence,
+        hiddenSequence);
+
+    // make sure that probabilities are normalized
+    for (int i = 0; i < nrOfHiddenStates; i++) {
+      // compute sum of probabilities for current row of transition matrix
+      double sum = 0;
+      for (int j = 0; j < nrOfHiddenStates; j++)
+        sum += transitionMatrix.getQuick(i, j);
+      // normalize current row of transition matrix
+      for (int j = 0; j < nrOfHiddenStates; j++)
+        transitionMatrix.setQuick(i, j, transitionMatrix.getQuick(i, j) / sum);
+      // compute sum of probabilities for current row of emission matrix
+      sum = 0;
+      for (int j = 0; j < nrOfOutputStates; j++)
+        sum += emissionMatrix.getQuick(i, j);
+      // normalize current row of emission matrix
+      for (int j = 0; j < nrOfOutputStates; j++)
+        emissionMatrix.setQuick(i, j, emissionMatrix.getQuick(i, j) / sum);
+    }
+
+    // return a new model using the parameter estimations
+    return new HmmModel(transitionMatrix, emissionMatrix, initialProbabilities);
+  }
+
+  /**
+   * Function that counts the number of state->state and state->output
+   * transitions for the given observed/hidden sequence.
+   *
+   * @param transitionMatrix transition matrix to use.
+   * @param emissionMatrix emission matrix to use for counting.
+   * @param observedSequence observation sequence to use.
+   * @param hiddenSequence sequence of hidden states to use.
+   */
+  private static void countTransitions(Matrix transitionMatrix,
+                                       Matrix emissionMatrix, int[] observedSequence, int[] hiddenSequence) {
+    emissionMatrix.setQuick(hiddenSequence[0], observedSequence[0],
+        emissionMatrix.getQuick(hiddenSequence[0], observedSequence[0]) + 1);
+    for (int i = 1; i < observedSequence.length; ++i) {
+      transitionMatrix
+          .setQuick(hiddenSequence[i - 1], hiddenSequence[i], transitionMatrix
+              .getQuick(hiddenSequence[i - 1], hiddenSequence[i]) + 1);
+      emissionMatrix.setQuick(hiddenSequence[i], observedSequence[i],
+          emissionMatrix.getQuick(hiddenSequence[i], observedSequence[i]) + 1);
+    }
+  }
+
+  /**
+   * Create an supervised initial estimate of an HMM Model based on a number of
+   * sequences of observed and hidden states.
+   *
+   * @param nrOfHiddenStates The total number of hidden states
+   * @param nrOfOutputStates The total number of output states
+   * @param hiddenSequences Collection of hidden sequences to use for training
+   * @param observedSequences Collection of observed sequences to use for training associated with hidden sequences.
+   * @param pseudoCount      Value that is assigned to non-occurring transitions to avoid zero
+   *                         probabilities.
+   * @return An initial model using the estimated parameters
+   */
+  public static HmmModel trainSupervisedSequence(int nrOfHiddenStates,
+                                                 int nrOfOutputStates, Collection<int[]> hiddenSequences,
+                                                 Collection<int[]> observedSequences, double pseudoCount) {
+
+    // make sure the pseudo count is not zero
+    pseudoCount = (pseudoCount == 0) ? Double.MIN_VALUE : pseudoCount;
+
+    // initialize parameters
+    DenseMatrix transitionMatrix = new DenseMatrix(nrOfHiddenStates,
+        nrOfHiddenStates);
+    DenseMatrix emissionMatrix = new DenseMatrix(nrOfHiddenStates,
+        nrOfOutputStates);
+    DenseVector initialProbabilities = new DenseVector(nrOfHiddenStates);
+
+    // assign pseudo count to avoid zero probabilities
+    transitionMatrix.assign(pseudoCount);
+    emissionMatrix.assign(pseudoCount);
+    initialProbabilities.assign(pseudoCount);
+
+    // now loop over the sequences to count the number of transitions
+    Iterator<int[]> hiddenSequenceIt = hiddenSequences.iterator();
+    Iterator<int[]> observedSequenceIt = observedSequences.iterator();
+    while (hiddenSequenceIt.hasNext() && observedSequenceIt.hasNext()) {
+      // fetch the current set of sequences
+      int[] hiddenSequence = hiddenSequenceIt.next();
+      int[] observedSequence = observedSequenceIt.next();
+      // increase the count for initial probabilities
+      initialProbabilities.setQuick(hiddenSequence[0], initialProbabilities
+          .getQuick(hiddenSequence[0]) + 1);
+      countTransitions(transitionMatrix, emissionMatrix, observedSequence,
+          hiddenSequence);
+    }
+
+    // make sure that probabilities are normalized
+    double isum = 0; // sum of initial probabilities
+    for (int i = 0; i < nrOfHiddenStates; i++) {
+      isum += initialProbabilities.getQuick(i);
+      // compute sum of probabilities for current row of transition matrix
+      double sum = 0;
+      for (int j = 0; j < nrOfHiddenStates; j++)
+        sum += transitionMatrix.getQuick(i, j);
+      // normalize current row of transition matrix
+      for (int j = 0; j < nrOfHiddenStates; j++)
+        transitionMatrix.setQuick(i, j, transitionMatrix.getQuick(i, j) / sum);
+      // compute sum of probabilities for current row of emission matrix
+      sum = 0;
+      for (int j = 0; j < nrOfOutputStates; j++)
+        sum += emissionMatrix.getQuick(i, j);
+      // normalize current row of emission matrix
+      for (int j = 0; j < nrOfOutputStates; j++)
+        emissionMatrix.setQuick(i, j, emissionMatrix.getQuick(i, j) / sum);
+    }
+    // normalize the initial probabilities
+    for (int i = 0; i < nrOfHiddenStates; ++i)
+      initialProbabilities.setQuick(i, initialProbabilities.getQuick(i) / isum);
+
+    // return a new model using the parameter estimates
+    return new HmmModel(transitionMatrix, emissionMatrix, initialProbabilities);
+  }
+
+  /**
+   * Iteratively train the parameters of the given initial model wrt to the
+   * observed sequence using Viterbi training.
+   *
+   * @param initialModel     The initial model that gets iterated
+   * @param observedSequence The sequence of observed states
+   * @param pseudoCount      Value that is assigned to non-occurring transitions to avoid zero
+   *                         probabilities.
+   * @param epsilon          Convergence criteria
+   * @param maxIterations    The maximum number of training iterations
+   * @param scaled           Use Log-scaled implementation, this is computationally more
+   *                         expensive but offers better numerical stability for large observed
+   *                         sequences
+   * @return The iterated model
+   */
+  public static HmmModel trainViterbi(HmmModel initialModel,
+                                      int[] observedSequence, double pseudoCount, double epsilon,
+                                      int maxIterations, boolean scaled) {
+
+    // make sure the pseudo count is not zero
+    pseudoCount = (pseudoCount == 0) ? Double.MIN_VALUE : pseudoCount;
+
+    // allocate space for iteration models
+    HmmModel lastIteration;
+    HmmModel iteration;
+    try {
+      lastIteration = initialModel.clone();
+      iteration = initialModel.clone();
+    } catch (CloneNotSupportedException e) {
+      throw new UnknownError("Cloning HmmModels broke. Check for programming errors, changed APIs.");
+    }
+
+    // allocate space for Viterbi path calculation
+    int[] viterbiPath = new int[observedSequence.length];
+    int[][] phi = new int[observedSequence.length - 1][initialModel
+        .getNrOfHiddenStates()];
+    double[][] delta = new double[observedSequence.length][initialModel
+        .getNrOfHiddenStates()];
+
+    // now run the Viterbi training iteration
+    for (int i = 0; i < maxIterations; ++i) {
+      // compute the Viterbi path
+      HmmAlgorithms.viterbiAlgorithm(viterbiPath, delta, phi, lastIteration,
+          observedSequence, scaled);
+      // Viterbi iteration uses the viterbi path to update
+      // the probabilities
+      Matrix emissionMatrix = iteration.getEmissionMatrix();
+      Matrix transitionMatrix = iteration.getTransitionMatrix();
+
+      // first, assign the pseudo count
+      emissionMatrix.assign(pseudoCount);
+      transitionMatrix.assign(pseudoCount);
+
+      // now count the transitions
+      countTransitions(transitionMatrix, emissionMatrix, observedSequence,
+          viterbiPath);
+
+      // and normalize the probabilities
+      for (int j = 0; j < iteration.getNrOfHiddenStates(); ++j) {
+        double sum = 0;
+        // normalize the rows of the transition matrix
+        for (int k = 0; k < iteration.getNrOfHiddenStates(); ++k)
+          sum += transitionMatrix.getQuick(j, k);
+        for (int k = 0; k < iteration.getNrOfHiddenStates(); ++k)
+          transitionMatrix
+              .setQuick(j, k, transitionMatrix.getQuick(j, k) / sum);
+        // normalize the rows of the emission matrix
+        sum = 0;
+        for (int k = 0; k < iteration.getNrOfOutputStates(); ++k)
+          sum += emissionMatrix.getQuick(j, k);
+        for (int k = 0; k < iteration.getNrOfOutputStates(); ++k)
+          emissionMatrix.setQuick(j, k, emissionMatrix.getQuick(j, k) / sum);
+      }
+      // check for convergence
+      if (checkConvergence(lastIteration, iteration, epsilon))
+        break;
+      // overwrite the last iterated model by the new iteration
+      lastIteration.assign(iteration);
+    }
+    // we are done :)
+    return iteration;
+  }
+
+  /**
+   * Iteratively train the parameters of the given initial model wrt the
+   * observed sequence using Baum-Welch training.
+   *
+   * @param initialModel     The initial model that gets iterated
+   * @param observedSequence The sequence of observed states
+   * @param epsilon          Convergence criteria
+   * @param maxIterations    The maximum number of training iterations
+   * @param scaled           Use log-scaled implementations of forward/backward algorithm. This
+   *                         is computationally more expensive, but offers better numerical
+   *                         stability for long output sequences.
+   * @return The iterated model
+   */
+  public static HmmModel trainBaumWelch(HmmModel initialModel,
+                                        int[] observedSequence, double epsilon, int maxIterations, boolean scaled) {
+    // allocate space for the iterations
+    HmmModel lastIteration;
+    HmmModel iteration;
+    try {
+      lastIteration = initialModel.clone();
+      iteration = initialModel.clone();
+    } catch (CloneNotSupportedException e) {
+      throw new UnknownError("Cloning HmmModels broke. Check for programming errors, changed APIs etc.");
+    }
+    // allocate space for baum-welch factors
+    int hiddenCount = initialModel.getNrOfHiddenStates();
+    int visibleCount = observedSequence.length;
+    DenseMatrix alpha = new DenseMatrix(visibleCount, hiddenCount);
+    DenseMatrix beta = new DenseMatrix(visibleCount, hiddenCount);
+
+    // now run the baum Welch training iteration
+    for (int it = 0; it < maxIterations; ++it) {
+      // fetch emission and transition matrix of current iteration
+      Vector initialProbabilities = iteration.getInitialProbabilities();
+      Matrix emissionMatrix = iteration.getEmissionMatrix();
+      Matrix transitionMatrix = iteration.getTransitionMatrix();
+
+      // compute forward and backward factors
+      HmmAlgorithms.forwardAlgorithm(alpha, iteration, observedSequence, scaled);
+      HmmAlgorithms.backwardAlgorithm(beta, iteration, observedSequence, scaled);
+
+      if (scaled) {
+        logScaledBaumWelch(observedSequence, iteration, alpha, beta);
+      } else {
+        unscaledBaumWelch(observedSequence, iteration, alpha, beta);
+      }
+      // normalize transition/emission probabilities
+      // and normalize the probabilities
+      double isum = 0;
+      for (int j = 0; j < iteration.getNrOfHiddenStates(); ++j) {
+        double sum = 0;
+        // normalize the rows of the transition matrix
+        for (int k = 0; k < iteration.getNrOfHiddenStates(); ++k)
+          sum += transitionMatrix.getQuick(j, k);
+        for (int k = 0; k < iteration.getNrOfHiddenStates(); ++k)
+          transitionMatrix
+              .setQuick(j, k, transitionMatrix.getQuick(j, k) / sum);
+        // normalize the rows of the emission matrix
+        sum = 0;
+        for (int k = 0; k < iteration.getNrOfOutputStates(); ++k)
+          sum += emissionMatrix.getQuick(j, k);
+        for (int k = 0; k < iteration.getNrOfOutputStates(); ++k)
+          emissionMatrix.setQuick(j, k, emissionMatrix.getQuick(j, k) / sum);
+        // normalization parameter for initial probabilities
+        isum += initialProbabilities.getQuick(j);
+      }
+      // normalize initial probabilities
+      for (int i = 0; i < iteration.getNrOfHiddenStates(); ++i) {
+        initialProbabilities.setQuick(i, initialProbabilities.getQuick(i)
+            / isum);
+      }
+      // check for convergence
+      if (checkConvergence(lastIteration, iteration, epsilon))
+        break;
+      // overwrite the last iterated model by the new iteration
+      lastIteration.assign(iteration);
+    }
+    // we are done :)
+    return iteration;
+  }
+
+  private static void unscaledBaumWelch(int[] observedSequence, HmmModel iteration, DenseMatrix alpha, DenseMatrix beta) {
+    Vector initialProbabilities = iteration.getInitialProbabilities();
+    Matrix emissionMatrix = iteration.getEmissionMatrix();
+    Matrix transitionMatrix = iteration.getTransitionMatrix();
+    double modelLikelihood = HmmEvaluator.modelLikelihood(alpha, false);
+
+    for (int i = 0; i < iteration.getNrOfHiddenStates(); ++i) {
+      initialProbabilities.setQuick(i, alpha.getQuick(0, i)
+          * beta.getQuick(0, i));
+    }
+
+    // recompute transition probabilities
+    for (int i = 0; i < iteration.getNrOfHiddenStates(); ++i) {
+      for (int j = 0; j < iteration.getNrOfHiddenStates(); ++j) {
+        double temp = 0;
+        for (int t = 0; t < observedSequence.length - 1; ++t) {
+          temp += alpha.getQuick(t, i)
+              * emissionMatrix.getQuick(j, observedSequence[t + 1])
+              * beta.getQuick(t + 1, j);
+        }
+        transitionMatrix.setQuick(i, j, transitionMatrix.getQuick(i, j)
+            * temp / modelLikelihood);
+      }
+    }
+    // recompute emission probabilities
+    for (int i = 0; i < iteration.getNrOfHiddenStates(); ++i) {
+      for (int j = 0; j < iteration.getNrOfOutputStates(); ++j) {
+        double temp = 0;
+        for (int t = 0; t < observedSequence.length; ++t) {
+          // delta tensor
+          if (observedSequence[t] == j) {
+            temp += alpha.getQuick(t, i) * beta.getQuick(t, i);
+          }
+        }
+        emissionMatrix.setQuick(i, j, temp / modelLikelihood);
+      }
+    }
+  }
+
+  private static void logScaledBaumWelch(int[] observedSequence, HmmModel iteration, DenseMatrix alpha, DenseMatrix beta) {
+    Vector initialProbabilities = iteration.getInitialProbabilities();
+    Matrix emissionMatrix = iteration.getEmissionMatrix();
+    Matrix transitionMatrix = iteration.getTransitionMatrix();
+    double modelLikelihood = HmmEvaluator.modelLikelihood(alpha, true);
+
+    for (int i = 0; i < iteration.getNrOfHiddenStates(); ++i) {
+      initialProbabilities.setQuick(i, Math.exp(alpha.getQuick(0, i) + beta.getQuick(0, i)));
+    }
+
+    // recompute transition probabilities
+    for (int i = 0; i < iteration.getNrOfHiddenStates(); ++i) {
+      for (int j = 0; j < iteration.getNrOfHiddenStates(); ++j) {
+        double sum = Double.NEGATIVE_INFINITY; // log(0)
+        for (int t = 0; t < observedSequence.length - 1; ++t) {
+          double temp = alpha.getQuick(t, i)
+              + Math.log(emissionMatrix.getQuick(j, observedSequence[t + 1]))
+              + beta.getQuick(t + 1, j);
+          if (temp > Double.NEGATIVE_INFINITY) // handle
+            // 0-probabilities
+            sum = temp + Math.log(1 + Math.exp(sum - temp));
+        }
+        transitionMatrix.setQuick(i, j, transitionMatrix.getQuick(i, j)
+            * Math.exp(sum - modelLikelihood));
+      }
+    }
+    // recompute emission probabilities
+    for (int i = 0; i < iteration.getNrOfHiddenStates(); ++i) {
+      for (int j = 0; j < iteration.getNrOfOutputStates(); ++j) {
+        double sum = Double.NEGATIVE_INFINITY; // log(0)
+        for (int t = 0; t < observedSequence.length; ++t) {
+          // delta tensor
+          if (observedSequence[t] == j) {
+            double temp = alpha.getQuick(t, i) + beta.getQuick(t, i);
+            if (temp > Double.NEGATIVE_INFINITY) // handle
+              // 0-probabilities
+              sum = temp + Math.log(1 + Math.exp(sum - temp));
+          }
+        }
+        emissionMatrix.setQuick(i, j, Math.exp(sum - modelLikelihood));
+      }
+    }
+  }
+
+  /**
+   * Check convergence of two HMM models by computing a simple distance between
+   * emission / transition matrices
+   *
+   * @param oldModel Old HMM Model
+   * @param newModel New HMM Model
+   * @param epsilon  Convergence Factor
+   * @return true if training converged to a stable state.
+   */
+  private static boolean checkConvergence(HmmModel oldModel, HmmModel newModel,
+                                          double epsilon) {
+    // check convergence of transitionProbabilities
+    Matrix oldTransitionMatrix = oldModel.getTransitionMatrix();
+    Matrix newTransitionMatrix = newModel.getTransitionMatrix();
+    double diff = 0;
+    for (int i = 0; i < oldModel.getNrOfHiddenStates(); ++i) {
+      for (int j = 0; j < oldModel.getNrOfHiddenStates(); ++j) {
+        double tmp = oldTransitionMatrix.getQuick(i, j)
+            - newTransitionMatrix.getQuick(i, j);
+        diff += tmp * tmp;
+      }
+    }
+    double norm = Math.sqrt(diff);
+    diff = 0;
+    // check convergence of emissionProbabilities
+    Matrix oldEmissionMatrix = oldModel.getEmissionMatrix();
+    Matrix newEmissionMatrix = newModel.getEmissionMatrix();
+    for (int i = 0; i < oldModel.getNrOfHiddenStates(); i++) {
+      for (int j = 0; j < oldModel.getNrOfOutputStates(); j++) {
+
+        double tmp = oldEmissionMatrix.getQuick(i, j)
+            - newEmissionMatrix.getQuick(i, j);
+        diff += tmp * tmp;
+      }
+    }
+    norm += Math.sqrt(diff);
+    // iteration has converged :)
+    return norm < epsilon;
+  }
+
+}

Added: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java?rev=1000807&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java Fri Sep 24 11:17:13 2010
@@ -0,0 +1,385 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.classifier.sequencelearning.hmm;
+
+import java.util.Collection;
+import java.util.Iterator;
+
+import org.apache.mahout.math.DenseMatrix;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.RandomAccessSparseVector;
+import org.apache.mahout.math.SparseMatrix;
+import org.apache.mahout.math.Vector;
+import org.uncommons.maths.Maths;
+
+/**
+ * A collection of utilities for handling HMMModel objects.
+ *
+ * @author mheimel
+ */
+public final class HmmUtils {
+
+  /**
+   * No public constructor for utility classes.
+   */
+  private HmmUtils() {
+    // nothing to do here really.
+  }
+
+  /**
+   * Compute the cumulative transition probability matrix for the given HMM
+   * model. Matrix where each row i is the cumulative distribution of the
+   * transition probability distribution for hidden state i.
+   *
+   * @param model The HMM model for which the cumulative transition matrix should be
+   *              computed
+   * @return The computed cumulative transition matrix.
+   */
+  public static Matrix getCumulativeTransitionMatrix(HmmModel model) {
+    // fetch the needed parameters from the model
+    int hiddenStates = model.getNrOfHiddenStates();
+    Matrix transitionMatrix = model.getTransitionMatrix();
+    // now compute the cumulative transition matrix
+    Matrix resultMatrix = new DenseMatrix(hiddenStates, hiddenStates);
+    for (int i = 0; i < hiddenStates; ++i) {
+      double sum = 0;
+      for (int j = 0; j < hiddenStates; ++j) {
+        sum += transitionMatrix.get(i, j);
+        resultMatrix.set(i, j, sum);
+      }
+      resultMatrix.set(i, hiddenStates - 1, 1.0);
+      // make sure the last
+      // state has always a
+      // cumulative
+      // probability of
+      // exactly 1.0
+    }
+    return resultMatrix;
+  }
+
+  /**
+   * Compute the cumulative output probability matrix for the given HMM model.
+   * Matrix where each row i is the cumulative distribution of the output
+   * probability distribution for hidden state i.
+   *
+   * @param model The HMM model for which the cumulative output matrix should be
+   *              computed
+   * @return The computed cumulative output matrix.
+   */
+  public static Matrix getCumulativeOutputMatrix(HmmModel model) {
+    // fetch the needed parameters from the model
+    int hiddenStates = model.getNrOfHiddenStates();
+    int outputStates = model.getNrOfOutputStates();
+    Matrix outputMatrix = model.getEmissionMatrix();
+    // now compute the cumulative output matrix
+    Matrix resultMatrix = new DenseMatrix(hiddenStates, outputStates);
+    for (int i = 0; i < hiddenStates; ++i) {
+      double sum = 0;
+      for (int j = 0; j < outputStates; ++j) {
+        sum += outputMatrix.get(i, j);
+        resultMatrix.set(i, j, sum);
+      }
+      resultMatrix.set(i, outputStates - 1, 1.0);
+      // make sure the last
+      // output state has
+      // always a cumulative
+      // probability of 1.0
+    }
+    return resultMatrix;
+  }
+
+  /**
+   * Compute the cumulative distribution of the initial hidden state
+   * probabilities for the given HMM model.
+   *
+   * @param model The HMM model for which the cumulative initial state probabilities
+   *              should be computed
+   * @return The computed cumulative initial state probability vector.
+   */
+  public static Vector getCumulativeInitialProbabilities(HmmModel model) {
+    // fetch the needed parameters from the model
+    int hiddenStates = model.getNrOfHiddenStates();
+    Vector initialProbabilities = model.getInitialProbabilities();
+    // now compute the cumulative output matrix
+    Vector resultVector = new DenseVector(initialProbabilities.size());
+    double sum = 0;
+    for (int i = 0; i < hiddenStates; ++i) {
+      sum += initialProbabilities.get(i);
+      resultVector.set(i, sum);
+    }
+    resultVector.set(hiddenStates - 1, 1.0); // make sure the last initial
+    // hidden state probability
+    // has always a cumulative
+    // probability of 1.0
+    return resultVector;
+  }
+
+  /**
+   * Validates an HMM model set
+   *
+   * @param model model to sanity check.
+   */
+  public static void validate(HmmModel model) {
+    if (model == null) {
+      return; // empty models are valid
+    }
+
+    /*
+     * The number of hidden states is positive.
+     */
+    if (model.getNrOfHiddenStates() <= 0) {
+      throw new IllegalArgumentException(
+          "Error: The number of hidden states has to be greater than 0!");
+    }
+
+    /*
+     * The number of output states is positive.
+     */
+    if (model.getNrOfOutputStates() <= 0) {
+      throw new IllegalArgumentException(
+          "Error: The number of output states has to be greater than 0!");
+    }
+
+    /*
+     * The size of the vector of initial probabilities is equal to the number of
+     * the hidden states. Each initial probability is non-negative. The sum of
+     * initial probabilities is equal to 1.
+     */
+    if (model.getInitialProbabilities() == null) {
+      throw new IllegalArgumentException(
+          "Error: The vector of initial probabilities is not initialized!");
+    }
+    if (model.getInitialProbabilities().size() != model.getNrOfHiddenStates()) {
+      throw new IllegalArgumentException(
+          "Error: The vector of initial probabilities is not initialized!");
+    }
+    double sum = 0;
+    for (int i = 0; i < model.getInitialProbabilities().size(); i++) {
+      if (model.getInitialProbabilities().get(i) < 0) {
+        throw new IllegalArgumentException(
+            "Error: Initial probability of state " + i + " is negative!");
+      }
+      sum += model.getInitialProbabilities().get(i);
+    }
+    if (!Maths.approxEquals(sum, 1, 0.00001)) {
+      throw new IllegalArgumentException(
+          "Error: Initial probabilities do not add up to 1!");
+    }
+
+    /*
+     * The row size of the output matrix is equal to the number of the hidden
+     * states. The column size is equal to the number of output states. Each
+     * probability of the matrix is non-negative. The sum of each row is equal
+     * to 1.
+     */
+    if (model.getEmissionMatrix() == null) {
+      throw new IllegalArgumentException(
+          "Error: The output state matrix is not initialized!");
+    }
+    if (model.getEmissionMatrix().numRows() != model.getNrOfHiddenStates()
+        || model.getEmissionMatrix().numCols() != model.getNrOfOutputStates()) {
+      throw new IllegalArgumentException(
+          "Error: The output state matrix is not of the form nrOfHiddenStates x nrOfOutputStates!");
+    }
+    for (int i = 0; i < model.getEmissionMatrix().numRows(); i++) {
+      sum = 0;
+      for (int j = 0; j < model.getEmissionMatrix().numCols(); j++) {
+        if (model.getEmissionMatrix().get(i, j) < 0) {
+          throw new IllegalArgumentException(
+              "Error: The output state probability from hidden state " + i
+                  + " to output state " + j + " is negative!");
+        }
+        sum += model.getEmissionMatrix().get(i, j);
+      }
+      if (!Maths.approxEquals(sum, 1, 0.00001)) {
+        throw new IllegalArgumentException(
+            "Error: The output state probabilities for hidden state " + i
+                + " don't add up to 1.");
+      }
+    }
+
+    /*
+     * The size of both dimension of the transition matrix is equal to the
+     * number of the hidden states. Each probability of the matrix is
+     * non-negative. The sum of each row in transition matrix is equal to 1.
+     */
+    if (model.getTransitionMatrix() == null) {
+      throw new IllegalArgumentException(
+          "Error: The hidden state matrix is not initialized!");
+    }
+    if (model.getTransitionMatrix().numRows() != model.getNrOfHiddenStates()
+        || model.getTransitionMatrix().numCols() != model.getNrOfHiddenStates()) {
+      throw new IllegalArgumentException(
+          "Error: The output state matrix is not of the form nrOfHiddenStates x nrOfHiddenStates!");
+    }
+    for (int i = 0; i < model.getTransitionMatrix().numRows(); i++) {
+      sum = 0;
+      for (int j = 0; j < model.getTransitionMatrix().numCols(); j++) {
+        if (model.getTransitionMatrix().get(i, j) < 0) {
+          throw new IllegalArgumentException(
+              "Error: The transition probability from hidden state " + i
+                  + " to hidden state " + j + " is negative!");
+        }
+        sum += model.getTransitionMatrix().get(i, j);
+      }
+      if (!Maths.approxEquals(sum, 1, 0.00001)) {
+        throw new IllegalArgumentException(
+            "Error: The transition probabilities for hidden state " + i
+                + " don't add up to 1.");
+      }
+    }
+  }
+
+  /**
+   * Encodes a given collection of state names by the corresponding state IDs
+   * registered in a given model.
+   *
+   * @param model        Model to provide the encoding for
+   * @param sequence     Collection of state names
+   * @param observed     If set, the sequence is encoded as a sequence of observed states,
+   *                     else it is encoded as sequence of hidden states
+   * @param defaultValue The default value in case a state is not known
+   * @return integer array containing the encoded state IDs
+   */
+  public static int[] encodeStateSequence(HmmModel model,
+                                          Collection<String> sequence, boolean observed, int defaultValue) {
+    int[] encoded = new int[sequence.size()];
+    Iterator<String> seqIter = sequence.iterator();
+    for (int i = 0; i < sequence.size(); ++i) {
+      String nextState = seqIter.next();
+      int nextID;
+      if (observed)
+        nextID = model.getOutputStateID(nextState);
+      else
+        nextID = model.getHiddenStateID(nextState);
+      // if the ID is -1, use the default value
+      encoded[i] = (nextID < 0) ? defaultValue : nextID;
+    }
+    return encoded;
+  }
+
+  /**
+   * Decodes a given collection of state IDs into the corresponding state names
+   * registered in a given model.
+   *
+   * @param model        model to use for retrieving state names
+   * @param sequence     int array of state IDs
+   * @param observed     If set, the sequence is encoded as a sequence of observed states,
+   *                     else it is encoded as sequence of hidden states
+   * @param defaultValue The default value in case a state is not known
+   * @return java.util.Vector containing the decoded state names
+   */
+  public static java.util.Vector<String> decodeStateSequence(HmmModel model,
+                                                             int[] sequence, boolean observed, String defaultValue) {
+    java.util.Vector<String> decoded = new java.util.Vector<String>(
+        sequence.length);
+    for (int position : sequence) {
+      String nextState;
+      if (observed)
+        nextState = model.getOutputStateName(position);
+      else
+        nextState = model.getHiddenStateName(position);
+      // if null was returned, use the default value
+      decoded.add(nextState == null ? defaultValue : nextState);
+    }
+    return decoded;
+  }
+
+  /**
+   * Function used to normalize the probabilities of a given HMM model
+   *
+   * @param model model to normalize
+   */
+  public static void normalizeModel(HmmModel model) {
+    Vector ip = model.getInitialProbabilities();
+    Matrix emission = model.getEmissionMatrix();
+    Matrix transition = model.getTransitionMatrix();
+    // check normalization for all probabilities
+    double isum = 0;
+    for (int i = 0; i < model.getNrOfHiddenStates(); ++i) {
+      isum += ip.getQuick(i);
+      double sum = 0;
+      for (int j = 0; j < model.getNrOfHiddenStates(); ++j)
+        sum += transition.getQuick(i, j);
+      if (sum != 1.0) {
+        for (int j = 0; j < model.getNrOfHiddenStates(); ++j)
+          transition.setQuick(i, j, transition.getQuick(i, j) / sum);
+      }
+      sum = 0;
+      for (int j = 0; j < model.getNrOfOutputStates(); ++j)
+        sum += emission.getQuick(i, j);
+      if (sum != 1.0) {
+        for (int j = 0; j < model.getNrOfOutputStates(); ++j)
+          emission.setQuick(i, j, emission.getQuick(i, j) / sum);
+      }
+    }
+    if (isum != 1.0) {
+      for (int i = 0; i < model.getNrOfHiddenStates(); ++i)
+        ip.setQuick(i, ip.getQuick(i) / isum);
+    }
+  }
+
+  /**
+   * Method to reduce the size of an HMMmodel by converting the models
+   * DenseMatrix/DenseVectors to sparse implementations and setting every value
+   * < threshold to 0
+   *
+   * @param model model to truncate
+   * @param threshold minimum value a model entry must have to be retained.
+   * @return Truncated model
+   */
+  public static HmmModel truncateModel(HmmModel model, double threshold) {
+    Vector ip = model.getInitialProbabilities();
+    Matrix em = model.getEmissionMatrix();
+    Matrix tr = model.getTransitionMatrix();
+    // allocate the sparse data structures
+    RandomAccessSparseVector sparseIp = new RandomAccessSparseVector(model
+        .getNrOfHiddenStates());
+    SparseMatrix sparseEm = new SparseMatrix(new int[]{
+        model.getNrOfHiddenStates(), model.getNrOfOutputStates()});
+    SparseMatrix sparseTr = new SparseMatrix(new int[]{
+        model.getNrOfHiddenStates(), model.getNrOfHiddenStates()});
+    // now transfer the values
+    for (int i = 0; i < model.getNrOfHiddenStates(); ++i) {
+      double value = ip.getQuick(i);
+      if (value > threshold)
+        sparseIp.setQuick(i, value);
+      for (int j = 0; j < model.getNrOfHiddenStates(); ++j) {
+        value = tr.getQuick(i, j);
+        if (value > threshold)
+          sparseTr.setQuick(i, j, value);
+      }
+
+      for (int j = 0; j < model.getNrOfOutputStates(); ++j) {
+        value = em.getQuick(i, j);
+        if (value > threshold)
+          sparseEm.setQuick(i, j, value);
+      }
+    }
+    // create a new model
+    HmmModel sparseModel = new HmmModel(sparseTr, sparseEm, sparseIp);
+    // normalize the model
+    HmmUtils.normalizeModel(sparseModel);
+    // register the names
+    sparseModel.registerHiddenStateNames(model.getHiddenStateNames());
+    sparseModel.registerOutputStateNames(model.getOutputStateNames());
+    // and return
+    return sparseModel;
+  }
+}
\ No newline at end of file

Added: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sequencelearning/hmm/HMMAlgorithmsTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sequencelearning/hmm/HMMAlgorithmsTest.java?rev=1000807&view=auto
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sequencelearning/hmm/HMMAlgorithmsTest.java (added)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sequencelearning/hmm/HMMAlgorithmsTest.java Fri Sep 24 11:17:13 2010
@@ -0,0 +1,158 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.classifier.sequencelearning.hmm;
+
+import junit.framework.Assert;
+
+import org.apache.mahout.math.Matrix;
+import org.junit.Test;
+
+public class HMMAlgorithmsTest extends HMMTestBase {
+
+  /**
+   * Test the forward algorithm by comparing the alpha values with the values
+   * obtained from HMM R model. We test the test observation sequence "O1" "O0"
+   * "O2" "O2" "O0" "O0" "O1" by comparing the generated alpha values to the
+   * R-generated "reference".
+   */
+  @Test
+  public void testForwardAlgorithm() {
+    // intialize the expected alpha values
+    double alphaExpectedA[][] = {
+        {0.02, 0.0392, 0.002438, 0.00035456, 0.0011554672, 7.158497e-04,
+            4.614927e-05},
+        {0.01, 0.0054, 0.001824, 0.00069486, 0.0007586904, 2.514137e-04,
+            1.721505e-05},
+        {0.32, 0.0262, 0.002542, 0.00038026, 0.0001360234, 3.002345e-05,
+            9.659608e-05},
+        {0.03, 0.0000, 0.013428, 0.00951084, 0.0000000000, 0.000000e+00,
+            2.428986e-05},};
+    // fetch the alpha matrix using the forward algorithm
+    Matrix alpha = HmmAlgorithms.forwardAlgorithm(model, sequence, false);
+    // first do some basic checking
+    Assert.assertNotNull(alpha);
+    Assert.assertEquals(alpha.numCols(), 4);
+    Assert.assertEquals(alpha.numRows(), 7);
+    // now compare the resulting matrices
+    for (int i = 0; i < 4; ++i)
+      for (int j = 0; j < 7; ++j)
+        Assert.assertEquals(alphaExpectedA[i][j], alpha.get(j, i), 0.00001);
+  }
+
+  @Test
+  public void testLogScaledForwardAlgorithm() {
+    // intialize the expected alpha values
+    double alphaExpectedA[][] = {
+        {0.02, 0.0392, 0.002438, 0.00035456, 0.0011554672, 7.158497e-04,
+            4.614927e-05},
+        {0.01, 0.0054, 0.001824, 0.00069486, 0.0007586904, 2.514137e-04,
+            1.721505e-05},
+        {0.32, 0.0262, 0.002542, 0.00038026, 0.0001360234, 3.002345e-05,
+            9.659608e-05},
+        {0.03, 0.0000, 0.013428, 0.00951084, 0.0000000000, 0.000000e+00,
+            2.428986e-05},};
+    // fetch the alpha matrix using the forward algorithm
+    Matrix alpha = HmmAlgorithms.forwardAlgorithm(model, sequence, true);
+    // first do some basic checking
+    Assert.assertNotNull(alpha);
+    Assert.assertEquals(alpha.numCols(), 4);
+    Assert.assertEquals(alpha.numRows(), 7);
+    // now compare the resulting matrices
+    for (int i = 0; i < 4; ++i)
+      for (int j = 0; j < 7; ++j)
+        Assert.assertEquals(Math.log(alphaExpectedA[i][j]), alpha.get(j, i),
+            0.00001);
+  }
+
+  /**
+   * Test the backward algorithm by comparing the beta values with the values
+   * obtained from HMM R model. We test the following observation sequence "O1"
+   * "O0" "O2" "O2" "O0" "O0" "O1" by comparing the generated beta values to the
+   * R-generated "reference".
+   */
+  @Test
+  public void testBackwardAlgorithm() {
+    // intialize the expected beta values
+    double betaExpectedA[][] = {
+        {0.0015730559, 0.003543656, 0.00738264, 0.040692, 0.0848, 0.17, 1},
+        {0.0017191865, 0.002386795, 0.00923652, 0.052232, 0.1018, 0.17, 1},
+        {0.0003825772, 0.001238558, 0.00259464, 0.012096, 0.0664, 0.66, 1},
+        {0.0004390858, 0.007076994, 0.01063512, 0.013556, 0.0304, 0.17, 1}};
+    // fetch the beta matrix using the backward algorithm
+    Matrix beta = HmmAlgorithms.backwardAlgorithm(model, sequence, false);
+    // first do some basic checking
+    Assert.assertNotNull(beta);
+    Assert.assertEquals(beta.numCols(), 4);
+    Assert.assertEquals(beta.numRows(), 7);
+    // now compare the resulting matrices
+    for (int i = 0; i < 4; ++i)
+      for (int j = 0; j < 7; ++j)
+        Assert.assertEquals(betaExpectedA[i][j], beta.get(j, i), 0.00001);
+  }
+
+  @Test
+  public void testLogScaledBackwardAlgorithm() {
+    // intialize the expected beta values
+    double betaExpectedA[][] = {
+        {0.0015730559, 0.003543656, 0.00738264, 0.040692, 0.0848, 0.17, 1},
+        {0.0017191865, 0.002386795, 0.00923652, 0.052232, 0.1018, 0.17, 1},
+        {0.0003825772, 0.001238558, 0.00259464, 0.012096, 0.0664, 0.66, 1},
+        {0.0004390858, 0.007076994, 0.01063512, 0.013556, 0.0304, 0.17, 1}};
+    // fetch the beta matrix using the backward algorithm
+    Matrix beta = HmmAlgorithms.backwardAlgorithm(model, sequence, true);
+    // first do some basic checking
+    Assert.assertNotNull(beta);
+    Assert.assertEquals(beta.numCols(), 4);
+    Assert.assertEquals(beta.numRows(), 7);
+    // now compare the resulting matrices
+    for (int i = 0; i < 4; ++i)
+      for (int j = 0; j < 7; ++j)
+        Assert.assertEquals(Math.log(betaExpectedA[i][j]), beta.get(j, i),
+            0.00001);
+  }
+
+  @Test
+  public void testViterbiAlgorithm() {
+    // initialize the expected hidden sequence
+    int[] expected = {2, 0, 3, 3, 0, 0, 2};
+    // fetch the viterbi generated sequence
+    int[] computed = HmmAlgorithms.viterbiAlgorithm(model, sequence, false);
+    // first make sure we return the correct size
+    Assert.assertNotNull(computed);
+    Assert.assertEquals(computed.length, sequence.length);
+    // now check the contents
+    for (int i = 0; i < sequence.length; ++i)
+      Assert.assertEquals(expected[i], computed[i]);
+  }
+
+  @Test
+  public void testLogScaledViterbiAlgorithm() {
+    // initialize the expected hidden sequence
+    int[] expected = {2, 0, 3, 3, 0, 0, 2};
+    // fetch the viterbi generated sequence
+    int[] computed = HmmAlgorithms.viterbiAlgorithm(model, sequence, true);
+    // first make sure we return the correct size
+    Assert.assertNotNull(computed);
+    Assert.assertEquals(computed.length, sequence.length);
+    // now check the contents
+    for (int i = 0; i < sequence.length; ++i)
+      Assert.assertEquals(expected[i], computed[i]);
+
+  }
+
+}

Added: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sequencelearning/hmm/HMMEvaluatorTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sequencelearning/hmm/HMMEvaluatorTest.java?rev=1000807&view=auto
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sequencelearning/hmm/HMMEvaluatorTest.java (added)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sequencelearning/hmm/HMMEvaluatorTest.java Fri Sep 24 11:17:13 2010
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.classifier.sequencelearning.hmm;
+
+import junit.framework.Assert;
+
+import org.apache.mahout.math.Matrix;
+import org.junit.Test;
+
+public class HMMEvaluatorTest extends HMMTestBase {
+
+  /**
+   * Test to make sure the computed model likelihood ist valid. Included tests
+   * are: a) forwad == backward likelihood b) model likelihood for test seqeunce
+   * is the expected one from R reference
+   */
+  @Test
+  public void testModelLikelihood() {
+    // compute alpha and beta values
+    Matrix alpha = HmmAlgorithms.forwardAlgorithm(model, sequence, false);
+    Matrix beta = HmmAlgorithms.backwardAlgorithm(model, sequence, false);
+    // now test whether forward == backward likelihood
+    double forwardLikelihood = HmmEvaluator.modelLikelihood(alpha, false);
+    double backwardLikelihood = HmmEvaluator.modelLikelihood(model, sequence,
+        beta, false);
+    Assert.assertEquals(forwardLikelihood, backwardLikelihood, 1e-6);
+    // also make sure that the likelihood matches the expected one
+    Assert.assertEquals(forwardLikelihood, 1.8425e-4, 1e-6);
+  }
+
+  /**
+   * Test to make sure the computed model likelihood ist valid. Included tests
+   * are: a) forwad == backward likelihood b) model likelihood for test seqeunce
+   * is the expected one from R reference
+   */
+  @Test
+  public void testScaledModelLikelihood() {
+    // compute alpha and beta values
+    Matrix alpha = HmmAlgorithms.forwardAlgorithm(model, sequence, true);
+    Matrix beta = HmmAlgorithms.backwardAlgorithm(model, sequence, true);
+    // now test whether forward == backward likelihood
+    double forwardLikelihood = HmmEvaluator.modelLikelihood(alpha, true);
+    double backwardLikelihood = HmmEvaluator.modelLikelihood(model, sequence,
+        beta, true);
+    Assert.assertEquals(forwardLikelihood, backwardLikelihood, 1e-6);
+    // also make sure that the likelihood matches the expected one
+    Assert.assertEquals(forwardLikelihood, 1.8425e-4, 1e-6);
+  }
+
+}



Mime
View raw message