mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From apalu...@apache.org
Subject [2/2] git commit: MAHOUT-1500: H2O Integration (Anand Avati via apalumbo) closes apache/mahout#21
Date Wed, 27 Aug 2014 16:26:18 GMT
MAHOUT-1500: H2O Integration (Anand Avati via apalumbo) closes apache/mahout#21


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/f870a630
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/f870a630
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/f870a630

Branch: refs/heads/master
Commit: f870a630291bd9d623b32c21f087ba19e69eb1fc
Parents: 74c1074
Author: Andrew Palumbo <ap.dev@outlook.com>
Authored: Wed Aug 27 12:14:37 2014 -0400
Committer: Andrew Palumbo <ap.dev@outlook.com>
Committed: Wed Aug 27 12:14:37 2014 -0400

----------------------------------------------------------------------
 CHANGELOG                                       |   2 +
 bin/mahout                                      |  18 ++
 h2o/README.md                                   |  67 ++++
 h2o/pom.xml                                     | 252 +++++++++++++++
 .../mahout/h2obindings/H2OBlockMatrix.java      | 108 +++++++
 .../apache/mahout/h2obindings/H2OContext.java   |  35 ++
 .../org/apache/mahout/h2obindings/H2OHdfs.java  | 227 +++++++++++++
 .../apache/mahout/h2obindings/H2OHelper.java    | 316 +++++++++++++++++++
 .../apache/mahout/h2obindings/drm/H2OBCast.java |  94 ++++++
 .../apache/mahout/h2obindings/drm/H2ODrm.java   |  36 +++
 .../org/apache/mahout/h2obindings/ops/ABt.java  |  63 ++++
 .../org/apache/mahout/h2obindings/ops/AewB.java |  75 +++++
 .../mahout/h2obindings/ops/AewScalar.java       |  70 ++++
 .../org/apache/mahout/h2obindings/ops/At.java   |  58 ++++
 .../org/apache/mahout/h2obindings/ops/AtA.java  |  63 ++++
 .../org/apache/mahout/h2obindings/ops/AtB.java  |  66 ++++
 .../org/apache/mahout/h2obindings/ops/Atx.java  |  76 +++++
 .../org/apache/mahout/h2obindings/ops/Ax.java   |  61 ++++
 .../apache/mahout/h2obindings/ops/Cbind.java    |  98 ++++++
 .../apache/mahout/h2obindings/ops/MapBlock.java | 105 ++++++
 .../org/apache/mahout/h2obindings/ops/Par.java  |  84 +++++
 .../apache/mahout/h2obindings/ops/Rbind.java    |  82 +++++
 .../apache/mahout/h2obindings/ops/RowRange.java |  88 ++++++
 .../h2obindings/ops/TimesRightMatrix.java       |  97 ++++++
 .../h2obindings/H2ODistributedContext.scala     |  28 ++
 .../apache/mahout/h2obindings/H2OEngine.scala   |  94 ++++++
 .../h2obindings/drm/CheckpointedDrmH2O.scala    |  32 ++
 .../mahout/h2obindings/ops/MapBlockHelper.scala |  62 ++++
 .../org/apache/mahout/h2obindings/package.scala |  24 ++
 .../h2obindings/drm/DrmLikeOpsSuite.scala       |  30 ++
 .../mahout/h2obindings/drm/DrmLikeSuite.scala   |  29 ++
 .../h2obindings/drm/RLikeDrmOpsSuite.scala      |  27 ++
 .../mahout/h2obindings/ops/ABtSuite.scala       |  47 +++
 .../mahout/h2obindings/ops/AewBSuite.scala      |  85 +++++
 .../mahout/h2obindings/ops/AtASuite.scala       |  45 +++
 .../apache/mahout/h2obindings/ops/AtSuite.scala |  43 +++
 .../h2obindings/test/DistributedH2OSuite.scala  |  46 +++
 .../h2obindings/test/LoggerConfiguration.scala  |  13 +
 .../DistributedDecompositionsSuite.scala        |  31 ++
 pom.xml                                         |   2 +
 40 files changed, 2879 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index a0d8507..47518b4 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,8 @@ Mahout Change Log
 
 Release 1.0 - unreleased
 
+  MAHOUT-1500: H2O Integration (Anand Avati via apalumbo)
+
   MAHOUT-1606 - Add rowSums, rowMeans and diagonal extraction operations to distributed matrices (dlyubimov)
 
   MAHOUT-1603: Tweaks for Spark 1.0.x (dlyubimov & pferrel)

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/bin/mahout
----------------------------------------------------------------------
diff --git a/bin/mahout b/bin/mahout
index d0623f1..27acd9f 100755
--- a/bin/mahout
+++ b/bin/mahout
@@ -92,6 +92,10 @@ if [ "$MAHOUT_CORE" != "" ]; then
   IS_CORE=1
 fi
 
+if [ "$1" == "h2o-node" ]; then
+  H2O=1
+fi
+
 # some directories
 THIS_DIR=`dirname "$THIS"`
 MAHOUT_HOME=`cd "$THIS_DIR/.." ; pwd`
@@ -165,6 +169,15 @@ then
     CLASSPATH=${CLASSPATH}:$f;
   done
 
+  if [ "$H2O" == "1" ]; then
+    for f in $MAHOUT_HOME/mrlegacy/target/mahout-mrlegacy-*.jar; do
+       CLASSPATH=${CLASSPATH}:$f;
+    done
+
+    for f in $MAHOUT_HOME/h2o/target/mahout-h2o-*.jar; do
+       CLASSPATH=${CLASSPATH}:$f;
+    done
+  fi
 
   # add spark-shell -- if we requested shell or other spark CLI driver
   if [ "$SPARK" == "1" ]; then
@@ -209,6 +222,7 @@ else
   #CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/mrlegacy/src/main/resources
   CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/spark/target/classes
   CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/spark-shell/target/classes
+  CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/h2o/target/classes
 fi
 
 # add development dependencies to CLASSPATH
@@ -240,6 +254,10 @@ case "$1" in
     shift
     "$JAVA" $JAVA_HEAP_MAX -classpath "$CLASSPATH" "org.apache.mahout.drivers.ItemSimilarityDriver" "$@"
     ;;
+  (h2o-node)
+    shift
+    "$JAVA" $JAVA_HEAP_MAX -classpath "$CLASSPATH" "water.H2O" -md5skip "$@" -name mah2out
+    ;;
   (*)
 
     # default log directory & file

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/README.md
----------------------------------------------------------------------
diff --git a/h2o/README.md b/h2o/README.md
new file mode 100644
index 0000000..0aa5ebd
--- /dev/null
+++ b/h2o/README.md
@@ -0,0 +1,67 @@
+# Introduction
+
+This document demonstrates the integration between Mahout (http://mahout.apache.org) and H2O (http://www.h2o.ai). The integration provides a H2O backend to the Mahout algebra DSL (similar to the Spark backend.)
+
+## Setup
+
+Since the integration is still in the early stages, the demonstration will be using git and source (rather than a pre-built binary distribution.)
+
+    sh:~$ git clone git://github.com/apache/mahout
+    sh:~$ cd mahout
+    sh:~/mahout$ git checkout -b MAHOUT-1500
+    sh:~/mahout$ git pull git://github.com/avati/mahout MAHOUT-1500
+    sh:~/mahout$ mvn -DskipTests install package
+
+The last step (mvn package) is necessary only because we are working off the source repository and do not yet use binary distributions of either Mahout or H2O.
+
+The integration depends on h2o-core maven artifact. This can either be fetched automatically through sonatype, or can be installed locally from source (run 'gradle install -x test' in http://github.com/0xdata/h2o-dev)
+
+## Test
+
+The integration with H2O can be used in either a local mode (single node) or a clustered mode.
+
+### Simple (single node/local) test
+
+Testing in local mode is pretty straight forward. Just run 'mvn test' as shown below.
+
+    sh:~/mahout$ cd h2o
+    sh:~/mahout/h2o$ mvn test
+    ...
+    ...
+    All tests passed.
+    ...
+    sh:~/mahout/h2o$
+
+### Distributed test
+
+H2O is fundamentally a peer-to-peer system. H2O nodes join together to form a cloud on which high performance distributed math can be executed. Each node joins a cloud of a given name. Multiple clouds can exist on the same network at the same time as long as their names are different. Multiple nodes can exist on the same server as well (even belonging to the same cloud.)
+
+The Mahout H2O integration is fit into this model by having N-1 "worker" nodes and one driver node, all belonging to the same cloud name. The default cloud name used for the integration is "mah2out". Clouds have to be spun up per task/job.
+
+**WARNING**: Some Linux systems have default firewall rules which might block traffic required for the following tests. In order to successfully run the tests you might need to temporarily turn off firewall rules with `sh# iptables -F`
+
+First bring up worker nodes:
+
+    host-1:~/mahout$ ./bin/mahout h2o-node
+    ...
+    .. INFO: Cloud of size 1 formed [/W.X.Y.Z:54321]
+
+Similarly,
+
+    host-2:~/mahout$ ./bin/mahout h2o-node
+    ...
+    .. INFO: Cloud of size 2 formed [/A.B.C.D:54322]
+
+... and so on. For the purpose of testing multiple (even all) instances can be run on the same system too.
+
+The nodes discover each other over a multicast channel and establish consensus with Paxos. Next, start the driver just like running in local mode.
+
+    host-N:~/mahout/h2o$ mvn test
+    ...
+    .. INFO: Cloud of size 3 formed [/E.F.G.H:54323]
+    ...
+    All tests passed.
+    ...
+    host-N:~/mahout/h2o$
+
+The workers have to be restarted when when the driver node terminates (automating this is a future task.)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/pom.xml
----------------------------------------------------------------------
diff --git a/h2o/pom.xml b/h2o/pom.xml
new file mode 100644
index 0000000..1f6791a
--- /dev/null
+++ b/h2o/pom.xml
@@ -0,0 +1,252 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.mahout</groupId>
+    <artifactId>mahout</artifactId>
+    <version>1.0-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>mahout-h2o</artifactId>
+  <name>Mahout H2O backend</name>
+  <description>
+    H2O Backend for Mahout DSL
+  </description>
+
+  <packaging>jar</packaging>
+
+  <repositories>
+    <repository>
+      <id>oss.sonatype.org-releases</id>
+      <url>http://oss.sonatype.org/content/repositories/releases</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+      <snapshots>
+        <enabled>true</enabled>
+      </snapshots>
+    </repository>
+  </repositories>
+
+  <!-- this is needed for scalatest plugin until they publish it to central -->
+  <pluginRepositories>
+    <pluginRepository>
+      <id>sonatype</id>
+      <url>https://oss.sonatype.org/content/groups/public</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+    </pluginRepository>
+  </pluginRepositories>
+
+  <build>
+    <defaultGoal>install</defaultGoal>
+
+    <plugins>
+
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>add-source</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>add-source</goal>
+            </goals>
+            <configuration>
+              <sources>
+                <source>${project.build.directory}/generated-sources/mahout</source>
+              </sources>
+            </configuration>
+          </execution>
+          <execution>
+            <id>add-test-source</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>add-test-source</goal>
+            </goals>
+            <configuration>
+              <sources>
+                <source>${project.build.directory}/generated-test-sources/mahout</source>
+              </sources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+      <!-- create test jar so other modules can reuse the math test utility classes. -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <executions>
+          <execution>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+            <phase>package</phase>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+	<artifactId>maven-assembly-plugin</artifactId>
+	<configuration>
+          <descriptorRefs>
+            <descriptorRef>jar-with-dependencies</descriptorRef>
+          </descriptorRefs>
+          <archive>
+            <manifest>
+              <mainClass>water.H2O</mainClass>
+            </manifest>
+          </archive>
+	</configuration>
+	<executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>single</goal>
+            </goals>
+          </execution>
+	</executions>
+      </plugin>
+
+      <plugin>
+        <artifactId>maven-javadoc-plugin</artifactId>
+      </plugin>
+
+      <plugin>
+        <artifactId>maven-source-plugin</artifactId>
+      </plugin>
+
+      <plugin>
+        <groupId>org.scala-tools</groupId>
+        <artifactId>maven-scala-plugin</artifactId>
+        <executions>
+	  <execution>
+	    <id>scala-compile-first</id>
+	    <phase>process-resources</phase>
+	    <goals>
+	      <goal>add-source</goal>
+	      <goal>compile</goal>
+	    </goals>
+	  </execution>
+          <execution>
+            <goals>
+              <goal>compile</goal>
+              <goal>testCompile</goal>
+            </goals>
+          </execution>
+        </executions>
+        <configuration>
+          <sourceDir>src/main/scala</sourceDir>
+          <jvmArgs>
+            <jvmArg>-Xms64m</jvmArg>
+            <jvmArg>-Xmx1024m</jvmArg>
+          </jvmArgs>
+        </configuration>
+      </plugin>
+
+      <!--this is what scalatest recommends to do to enable scala tests -->
+
+      <!-- disable surefire -->
+      <!--<plugin>-->
+      <!--<groupId>org.apache.maven.plugins</groupId>-->
+      <!--<artifactId>maven-surefire-plugin</artifactId>-->
+      <!--<version>2.7</version>-->
+      <!--<configuration>-->
+      <!--<skipTests>true</skipTests>-->
+      <!--</configuration>-->
+      <!--</plugin>-->
+      <!-- enable scalatest -->
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+        <version>1.0-M2</version>
+        <configuration>
+          <reportsDirectory>${project.build.directory}/scalatest-reports</reportsDirectory>
+          <junitxml>.</junitxml>
+          <filereports>WDF TestSuite.txt</filereports>
+        </configuration>
+        <executions>
+          <execution>
+            <id>test</id>
+            <goals>
+              <goal>test</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+    </plugins>
+  </build>
+
+  <dependencies>
+
+    <dependency>
+      <groupId>org.apache.mahout</groupId>
+      <artifactId>mahout-math-scala_2.10</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <!-- for MatrixWritable and VectorWritable -->
+      <groupId>org.apache.mahout</groupId>
+      <artifactId>mahout-mrlegacy</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+   <dependency>
+      <groupId>org.apache.mahout</groupId>
+      <artifactId>mahout-math-scala_2.10</artifactId>
+      <classifier>tests</classifier>
+      <scope>test</scope>
+   </dependency>
+
+    <dependency>
+      <groupId>org.apache.mahout</groupId>
+      <artifactId>mahout-math</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <!--  3rd-party -->
+
+    <!-- H2O -->
+
+    <dependency>
+      <groupId>ai.h2o</groupId>
+      <artifactId>h2o-core</artifactId>
+      <version>${h2o.version}</version>
+    </dependency>
+
+    <!-- scala stuff -->
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_2.10</artifactId>
+      <version>2.0</version>
+      <scope>test</scope>
+    </dependency>
+
+  </dependencies>
+</project>

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/java/org/apache/mahout/h2obindings/H2OBlockMatrix.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/H2OBlockMatrix.java b/h2o/src/main/java/org/apache/mahout/h2obindings/H2OBlockMatrix.java
new file mode 100644
index 0000000..27c1d58
--- /dev/null
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/H2OBlockMatrix.java
@@ -0,0 +1,108 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.mahout.h2obindings;
+
+import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.AbstractMatrix;
+import org.apache.mahout.math.DenseMatrix;
+import org.apache.mahout.math.SparseMatrix;
+
+import water.fvec.Chunk;
+
+/*
+ * A Matrix implementation to represent a vertical Block of DRM.
+ *
+ * Creation of the matrix is an O(1) operation with negligible
+ * overhead, and will remain so as long as the matrix is only
+ * read from (no modifications).
+ *
+ * On the first modification, create a copy on write Matrix and
+ * all further operations happen on this cow matrix.
+ *
+ * The benefit is, mapBlock() closures which never modify the
+ * input matrix save on the copy overhead.
+ */
+public class H2OBlockMatrix extends AbstractMatrix {
+  Chunk _chks[];
+  Matrix cow; /* Copy on Write */
+
+  public H2OBlockMatrix(Chunk chks[]) {
+    super(chks[0].len(), chks.length);
+    _chks = chks;
+  }
+
+  private void cow() {
+    if (cow != null) {
+      return;
+    }
+
+    if (_chks[0].isSparse()) {
+      cow = new SparseMatrix(_chks[0].len(), _chks.length);
+    } else {
+      cow = new DenseMatrix(_chks[0].len(), _chks.length);
+    }
+
+    for (int c = 0; c < _chks.length; c++) {
+      for (int r = 0; r < _chks[0].len(); r++) {
+        cow.setQuick(r, c, _chks[c].at0(r));
+      }
+    }
+  }
+
+  public void setQuick(int row, int col, double val) {
+    cow();
+    cow.setQuick(row, col, val);
+  }
+
+  public Matrix like(int nrow, int ncol) {
+    if (_chks[0].isSparse()) {
+      return new SparseMatrix(nrow, ncol);
+    } else {
+      return new DenseMatrix(nrow, ncol);
+    }
+  }
+
+  public Matrix like() {
+    if (_chks[0].isSparse()) {
+      return new SparseMatrix(rowSize(), columnSize());
+    } else {
+      return new DenseMatrix(rowSize(), columnSize());
+    }
+  }
+
+  public double getQuick(int row, int col) {
+    if (cow != null) {
+      return cow.getQuick(row, col);
+    } else {
+      return _chks[col].at0(row);
+    }
+  }
+
+  public Matrix assignRow(int row, Vector v) {
+    cow();
+    cow.assignRow(row, v);
+    return cow;
+  }
+
+  public Matrix assignColumn(int col, Vector v) {
+    cow();
+    cow.assignColumn(col, v);
+    return cow;
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/java/org/apache/mahout/h2obindings/H2OContext.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/H2OContext.java b/h2o/src/main/java/org/apache/mahout/h2obindings/H2OContext.java
new file mode 100644
index 0000000..1307ef8
--- /dev/null
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/H2OContext.java
@@ -0,0 +1,35 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.mahout.h2obindings;
+
+import water.H2O;
+
+public class H2OContext {
+  String masterURL;
+
+  /* @masterURL should actually be the cloud name (name of cluster) to which
+     all the H2O worker nodes "join into". This is not a hostname or IP address
+     of a server, but a string which all cluster members agree on.
+  */
+  public H2OContext(String _masterURL) {
+    masterURL = _masterURL;
+
+    H2O.main(new String[]{"-md5skip", "-name", _masterURL});
+    H2O.joinOthers();
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHdfs.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHdfs.java b/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHdfs.java
new file mode 100644
index 0000000..d33cb4c
--- /dev/null
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHdfs.java
@@ -0,0 +1,227 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.mahout.h2obindings;
+
+import java.io.IOException;
+import java.io.File;
+import java.net.URI;
+
+import water.fvec.Frame;
+import water.fvec.Vec;
+import water.Futures;
+import water.parser.ValueString;
+import water.util.FrameUtils;
+
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.SequentialAccessSparseVector;
+import org.apache.mahout.math.VectorWritable;
+import org.apache.mahout.h2obindings.drm.H2ODrm;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.util.ReflectionUtils;
+
+
+
+public class H2OHdfs {
+  public static boolean is_seqfile(String filename) {
+    try {
+      String uri = filename;
+      Configuration conf = new Configuration();
+      Path path = new Path(uri);
+      FileSystem fs = FileSystem.get(URI.create(uri), conf);
+      FSDataInputStream fin = fs.open(path);
+      byte seq[] = new byte[3];
+
+      fin.read(seq);
+      fin.close();
+
+      if (seq[0] == 'S' && seq[1] == 'E' && seq[2] == 'Q') {
+        return true;
+      } else {
+        return false;
+      }
+    } catch (java.io.IOException e) {
+      return false;
+    }
+  }
+
+  public static H2ODrm drm_from_file(String filename, int parMin) {
+    try {
+      if (is_seqfile(filename)) {
+        return drm_from_seqfile(filename, parMin);
+      } else {
+        return new H2ODrm(FrameUtils.parseFrame(null,new File(filename)));
+      }
+    } catch (java.io.IOException e) {
+      return null;
+    }
+  }
+
+  public static H2ODrm drm_from_seqfile(String filename, int parMin) {
+    long rows = 0;
+    int cols = 0;
+    Frame frame = null;
+    Vec labels = null;
+
+    SequenceFile.Reader reader = null;
+    try {
+      String uri = filename;
+      Configuration conf = new Configuration();
+      Path path = new Path(uri);
+      FileSystem fs = FileSystem.get(URI.create(uri), conf);
+      Vec.Writer writers[];
+      Vec.Writer labelwriter = null;
+      boolean is_int_key = false, is_long_key = false, is_string_key = false;
+
+      reader = new SequenceFile.Reader(fs, path, conf);
+
+      if (reader.getValueClass() != VectorWritable.class) {
+        System.out.println("ValueClass in file " + filename +
+                           "must be VectorWritable, but found " +
+                           reader.getValueClassName());
+        return null;
+      }
+
+      Writable key = (Writable)
+        ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+      VectorWritable value = (VectorWritable)
+        ReflectionUtils.newInstance(reader.getValueClass(), conf);
+
+      long start = reader.getPosition();
+
+      if (reader.getKeyClass() == Text.class) {
+        is_string_key = true;
+      } else if (reader.getKeyClass() == LongWritable.class) {
+        is_long_key = true;
+      } else {
+        is_int_key = true;
+      }
+
+      while (reader.next(key, value)) {
+        if (cols == 0) {
+          Vector v = value.get();
+          cols = Math.max(v.size(), cols);
+        }
+        if (is_long_key) {
+          rows = Math.max(((LongWritable)(key)).get()+1, rows);
+        }
+        if (is_int_key) {
+          rows = Math.max(((IntWritable)(key)).get()+1, rows);
+        }
+        if (is_string_key) {
+          rows++;
+        }
+      }
+      reader.seek(start);
+
+      frame = H2OHelper.empty_frame(rows, cols, parMin, -1);
+      writers = new Vec.Writer[cols];
+      for (int i = 0; i < writers.length; i++) {
+        writers[i] = frame.vecs()[i].open();
+      }
+
+      if (reader.getKeyClass() == Text.class) {
+        labels = frame.anyVec().makeZero();
+        labelwriter = labels.open();
+      }
+
+      long r = 0;
+      while (reader.next(key, value)) {
+        Vector v = value.get();
+        if (is_long_key) {
+          r = ((LongWritable)(key)).get();
+        }
+        if (is_int_key) {
+          r = ((IntWritable)(key)).get();
+        }
+        for (int c = 0; c < v.size(); c++) {
+          writers[c].set(r, v.getQuick(c));
+        }
+        if (labels != null) {
+          labelwriter.set(r, ((Text)key).toString());
+        }
+        if (is_string_key) {
+          r++;
+        }
+      }
+
+      Futures fus = new Futures();
+      for (Vec.Writer w : writers) {
+        w.close(fus);
+      }
+      if (labelwriter != null) {
+        labelwriter.close(fus);
+      }
+      fus.blockForPending();
+    } catch (java.io.IOException e) {
+      return null;
+    } finally {
+      IOUtils.closeStream(reader);
+    }
+    return new H2ODrm(frame, labels);
+  }
+
+  public static void drm_to_file(String filename, H2ODrm Drm) throws java.io.IOException {
+    Frame frame = Drm.frame;
+    Vec labels = Drm.keys;
+    String uri = filename;
+    Configuration conf = new Configuration();
+    Path path = new Path(uri);
+    FileSystem fs = FileSystem.get(URI.create(uri), conf);
+    SequenceFile.Writer writer = null;
+    boolean is_sparse = H2OHelper.is_sparse(frame);
+    ValueString vstr = new ValueString();
+
+    if (labels != null) {
+      writer = SequenceFile.createWriter(fs, conf, path, Text.class, VectorWritable.class);
+    } else {
+      writer = SequenceFile.createWriter(fs, conf, path, IntWritable.class, VectorWritable.class);
+    }
+
+    for (long r = 0; r < frame.anyVec().length(); r++) {
+      Vector v = null;
+      if (is_sparse) {
+        v = new SequentialAccessSparseVector(frame.numCols());
+      } else {
+        v = new DenseVector(frame.numCols());
+      }
+
+      for (int c = 0; c < frame.numCols(); c++) {
+        v.setQuick(c, frame.vecs()[c].at(r));
+      }
+
+      if (labels != null) {
+        writer.append(new Text(labels.atStr(vstr, r).toString()), new VectorWritable(v));
+      } else {
+        writer.append(new IntWritable((int)r), new VectorWritable(v));
+      }
+    }
+
+    writer.close();
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHelper.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHelper.java b/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHelper.java
new file mode 100644
index 0000000..1b817fc
--- /dev/null
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/H2OHelper.java
@@ -0,0 +1,316 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.mahout.h2obindings;
+
+import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.DenseMatrix;
+import org.apache.mahout.math.SparseMatrix;
+import org.apache.mahout.math.DenseVector;
+
+import water.MRTask;
+import water.Futures;
+import water.fvec.Frame;
+import water.fvec.Vec;
+import water.fvec.Chunk;
+import water.parser.ValueString;
+import water.util.ArrayUtils;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.Arrays;
+
+import org.apache.mahout.h2obindings.drm.H2ODrm;
+
+public class H2OHelper {
+
+  /*
+    Is the matrix sparse? If the number of missing elements is
+    32 x times the number of present elements, treat it as sparse
+  */
+  public static boolean is_sparse(Frame frame) {
+    long rows = frame.numRows();
+    long cols = frame.numCols();
+
+    /* MRTask to aggregate precalculated per-chunk sparse lengths */
+    class MRTaskNZ extends MRTask<MRTaskNZ> {
+      long sparselen;
+      public void map(Chunk chks[]) {
+        for (Chunk chk : chks) {
+          sparselen += chk.sparseLen();
+        }
+      }
+      public void reduce(MRTaskNZ other) {
+        sparselen += other.sparselen;
+      }
+    }
+
+    long sparselen = new MRTaskNZ().doAll(frame).sparselen;
+
+    return (((rows * cols) / (sparselen + 1)) > 32);
+  }
+
+  /*
+    Extract a Matrix from a Frame. Create either Sparse or
+    Dense Matrix depending on number of missing elements
+    in Frame.
+  */
+  public static Matrix matrix_from_drm(H2ODrm drm) {
+    Frame frame = drm.frame;
+    Vec labels = drm.keys;
+    Matrix m;
+
+    if (is_sparse(frame)) {
+      m = new SparseMatrix((int)frame.numRows(), frame.numCols());
+    } else {
+      m = new DenseMatrix((int)frame.numRows(), frame.numCols());
+    }
+
+    int c = 0;
+    /* Fill matrix, column at a time */
+    for (Vec v : frame.vecs()) {
+      for (int r = 0; r < frame.numRows(); r++) {
+        double d = 0.0;
+        if (!v.isNA(r) && ((d = v.at(r)) != 0.0)) {
+          m.setQuick(r, c, d);
+        }
+      }
+      c++;
+    }
+
+    /* If string keyed, set the stings as rowlabels */
+    if (labels != null) {
+      HashMap<String,Integer> map = new HashMap<String,Integer>();
+      ValueString vstr = new ValueString();
+      for (long i = 0; i < labels.length(); i++) {
+        map.put(labels.atStr(vstr, i).toString(), (int)i);
+      }
+      m.setRowLabelBindings(map);
+    }
+    return m;
+  }
+
+  /* Calculate Means of elements in a column, and return
+     as a vector.
+
+     H2O precalculates means in a Vec, and a Vec corresponds
+     to a column.
+  */
+  public static Vector colMeans(Frame frame) {
+    double means[] = new double[frame.numCols()];
+    for (int i = 0; i < frame.numCols(); i++) {
+      means[i] = frame.vecs()[i].mean();
+    }
+    return new DenseVector(means);
+  }
+
+  /* Calculate Sum of all elements in a column, and
+     return as a Vector
+
+     Run an MRTask Job to add up sums in @_sums
+
+     WARNING: Vulnerable to overflow. No way around it.
+  */
+  public static Vector colSums(Frame frame) {
+    class MRTaskSum extends MRTask<MRTaskSum> {
+      public double sums[];
+      public void map(Chunk chks[]) {
+        sums = new double[chks.length];
+
+        for (int c = 0; c < chks.length; c++) {
+          for (int r = 0; r < chks[c].len(); r++) {
+            sums[c] += chks[c].at0(r);
+          }
+        }
+      }
+      public void reduce(MRTaskSum other) {
+        ArrayUtils.add(sums, other.sums);
+      }
+    }
+    return new DenseVector(new MRTaskSum().doAll(frame).sums);
+  }
+
+
+  /* Calculate Sum of squares of all elements in the Matrix
+
+     WARNING: Vulnerable to overflow. No way around it.
+  */
+  public static double sumSqr(Frame frame) {
+    class MRTaskSumSqr extends MRTask<MRTaskSumSqr> {
+      public double sumSqr;
+      public void map(Chunk chks[]) {
+        for (int c = 0; c < chks.length; c++) {
+          for (int r = 0; r < chks[c].len(); r++) {
+            sumSqr += (chks[c].at0(r) * chks[c].at0(r));
+          }
+        }
+      }
+      public void reduce(MRTaskSumSqr other) {
+        sumSqr += other.sumSqr;
+      }
+    }
+    return new MRTaskSumSqr().doAll(frame).sumSqr;
+  }
+
+  /* Calculate Sum of all elements in a column, and
+     return as a Vector
+
+     Run an MRTask Job to add up sums in @_sums
+
+     WARNING: Vulnerable to overflow. No way around it.
+  */
+  public static Vector nonZeroCnt(Frame frame) {
+    class MRTaskNonZero extends MRTask<MRTaskNonZero> {
+      public double sums[];
+      public void map(Chunk chks[]) {
+        sums = new double[chks.length];
+
+        for (int c = 0; c < chks.length; c++) {
+          for (int r = 0; r < chks[c].len(); r++) {
+            if ((long)chks[c].at0(r) != 0) {
+              sums[c] ++;
+            }
+          }
+        }
+      }
+      public void reduce(MRTaskNonZero other) {
+        ArrayUtils.add(sums, other.sums);
+      }
+    }
+    return new DenseVector(new MRTaskNonZero().doAll(frame).sums);
+  }
+
+  /* Convert String->Integer map to Integer->String map */
+  private static Map<Integer,String> reverse_map(Map<String,Integer> map) {
+    if (map == null) {
+      return null;
+    }
+
+    Map<Integer,String> rmap = new HashMap<Integer,String>();
+
+    for(Map.Entry<String,Integer> entry : map.entrySet()) {
+      rmap.put(entry.getValue(),entry.getKey());
+    }
+
+    return rmap;
+  }
+
+  private static int chunk_size(long nrow, int ncol, int min, int exact) {
+    int chunk_sz;
+    int parts_hint = Math.max(min, exact);
+
+    if (parts_hint < 1) {
+      /* XXX: calculate based on cloud size and # of cpu */
+      parts_hint = 4;
+    }
+
+    chunk_sz = (int)(((nrow - 1) / parts_hint) + 1);
+    if (exact > 0) {
+      return chunk_sz;
+    }
+
+    if (chunk_sz > 1e6) {
+      chunk_sz = (int)1e6;
+    }
+
+    if (min > 0) {
+      return chunk_sz;
+    }
+
+    if (chunk_sz < 1e3) {
+      chunk_sz = (int)1e3;
+    }
+
+    return chunk_sz;
+  }
+
+  /* Ingest a Matrix into an H2O Frame. H2O Frame is the "backing"
+     data structure behind CheckpointedDrm. Steps:
+  */
+  public static H2ODrm drm_from_matrix(Matrix m, int min_hint, int exact_hint) {
+    /* First create an empty (0-filled) frame of the required dimensions */
+    Frame frame = empty_frame(m.rowSize(), m.columnSize(), min_hint, exact_hint);
+    Vec labels = null;
+    Vec.Writer writers[] = new Vec.Writer[m.columnSize()];
+    Futures closer = new Futures();
+
+    /* "open" vectors for writing efficiently in bulk */
+    for (int i = 0; i < writers.length; i++) {
+      writers[i] = frame.vecs()[i].open();
+    }
+
+    for (int r = 0; r < m.rowSize(); r++) {
+      for (int c = 0; c < m.columnSize(); c++) {
+        writers[c].set(r, m.getQuick(r, c));
+      }
+    }
+
+    for (int c = 0; c < m.columnSize(); c++) {
+      writers[c].close(closer);
+    }
+
+    /* If string labeled matrix, create aux Vec */
+    Map<String,Integer> map = m.getRowLabelBindings();
+    if (map != null) {
+      /* label vector must be similarly partitioned like the Frame */
+      labels = frame.anyVec().makeZero();
+      Vec.Writer writer = labels.open();
+      Map<Integer,String> rmap = reverse_map(map);
+
+      for (long r = 0; r < m.rowSize(); r++) {
+        writer.set(r, rmap.get(r));
+      }
+
+      writer.close(closer);
+    }
+
+    closer.blockForPending();
+
+    return new H2ODrm(frame, labels);
+  }
+
+  public static Frame empty_frame(long nrow, int ncol, int min_hint, int exact_hint) {
+    Vec.VectorGroup vg = new Vec.VectorGroup();
+
+    return empty_frame(nrow, ncol, min_hint, exact_hint, vg);
+  }
+
+  public static Frame empty_frame(long nrow, int ncol, int min_hint, int exact_hint, Vec.VectorGroup vg) {
+    int chunk_sz = chunk_size(nrow, ncol, min_hint, exact_hint);
+    int nchunks = (int)((nrow - 1) / chunk_sz) + 1; /* Final number of Chunks per Vec */
+    long espc[] = new long[nchunks + 1];
+    final Vec[] vecs = new Vec[ncol];
+
+    for (int i = 0; i < nchunks; i++) {
+      espc[i] = i * chunk_sz;
+    }
+    espc[nchunks] = nrow;
+
+    for (int i = 0; i < vecs.length; i++) {
+      vecs[i] = Vec.makeCon(0, null, vg, espc);
+    }
+
+    return new Frame(vecs);
+  }
+
+  public static H2ODrm empty_drm(long nrow, int ncol, int min_hint, int exact_hint) {
+    return new H2ODrm(empty_frame(nrow, ncol, min_hint, exact_hint));
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/java/org/apache/mahout/h2obindings/drm/H2OBCast.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/drm/H2OBCast.java b/h2o/src/main/java/org/apache/mahout/h2obindings/drm/H2OBCast.java
new file mode 100644
index 0000000..7395027
--- /dev/null
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/drm/H2OBCast.java
@@ -0,0 +1,94 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.mahout.h2obindings.drm;
+
+import org.apache.mahout.math.drm.BCast;
+import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.MatrixWritable;
+import org.apache.mahout.math.VectorWritable;
+
+import org.apache.hadoop.io.Writable;
+
+import java.io.Serializable;
+import java.io.ByteArrayOutputStream;
+import java.io.ByteArrayInputStream;
+import java.io.ObjectOutputStream;
+import java.io.ObjectInputStream;
+
+/* Handle Matrix and Vector separately so that we can live with
+   just importing MatrixWritable and VectorWritable.
+*/
+
+public class H2OBCast<T> implements BCast<T>, Serializable {
+  transient T obj;
+  byte buf[];
+  boolean is_matrix;
+
+  public H2OBCast(T o) {
+    obj = o;
+
+    if (o instanceof Matrix) {
+      buf = serialize(new MatrixWritable((Matrix)o));
+      is_matrix = true;
+    } else if (o instanceof Vector) {
+      buf = serialize(new VectorWritable((Vector)o));
+    } else {
+      throw new IllegalArgumentException("Only Matrix or Vector supported for now");
+    }
+  }
+
+  public T value() {
+    if (obj == null) {
+      obj = deserialize(buf);
+    }
+    return obj;
+  }
+
+  private byte[] serialize(Writable w) {
+    ByteArrayOutputStream bos = new ByteArrayOutputStream();
+    try {
+      ObjectOutputStream oos = new ObjectOutputStream(bos);
+      w.write(oos);
+      oos.close();
+    } catch (java.io.IOException e) {
+      return null;
+    }
+    return bos.toByteArray();
+  }
+
+  private T deserialize(byte buf[]) {
+    T ret = null;
+    ByteArrayInputStream bis = new ByteArrayInputStream(buf);
+    try {
+      ObjectInputStream ois = new ObjectInputStream(bis);
+      if (is_matrix) {
+        MatrixWritable w = new MatrixWritable();
+        w.readFields(ois);
+        ret = (T) w.get();
+      } else {
+        VectorWritable w = new VectorWritable();
+        w.readFields(ois);
+        ret = (T) w.get();
+      }
+    } catch (java.io.IOException e) {
+      System.out.println("Caught exception: " + e);
+    }
+    return ret;
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/java/org/apache/mahout/h2obindings/drm/H2ODrm.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/drm/H2ODrm.java b/h2o/src/main/java/org/apache/mahout/h2obindings/drm/H2ODrm.java
new file mode 100644
index 0000000..058ea0a
--- /dev/null
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/drm/H2ODrm.java
@@ -0,0 +1,36 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.mahout.h2obindings.drm;
+
+import water.fvec.Frame;
+import water.fvec.Vec;
+
+public class H2ODrm {
+  public Frame frame;
+  public Vec keys;
+
+  public H2ODrm(Frame m) {
+    frame = m;
+    keys = null;
+  }
+
+  public H2ODrm(Frame m, Vec k) {
+    frame = m;
+    keys = k;
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/java/org/apache/mahout/h2obindings/ops/ABt.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/ABt.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/ABt.java
new file mode 100644
index 0000000..d05013f
--- /dev/null
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/ABt.java
@@ -0,0 +1,63 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.mahout.h2obindings.ops;
+
+import org.apache.mahout.h2obindings.H2OHelper;
+import org.apache.mahout.h2obindings.drm.H2ODrm;
+
+import water.MRTask;
+import water.fvec.Frame;
+import water.fvec.Vec;
+import water.fvec.Chunk;
+import water.fvec.NewChunk;
+
+public class ABt {
+  /* Calculate AB' */
+  public static H2ODrm ABt(H2ODrm drmA, H2ODrm drmB) {
+    Frame A = drmA.frame;
+    Vec keys = drmA.keys;
+    final Frame B = drmB.frame;
+    int ABt_cols = (int)B.numRows();
+
+    /* ABt is written into ncs[] with an MRTask on A, and therefore will
+       be similarly partitioned as A.
+
+       chks.length == A.numCols() (== B.numCols())
+       ncs.length == ABt_cols (B.numRows())
+    */
+    Frame ABt = new MRTask() {
+        public void map(Chunk chks[], NewChunk ncs[]) {
+          int chunk_size = chks[0].len();
+          Vec B_vecs[] = B.vecs();
+
+          for (int c = 0; c < ncs.length; c++) {
+            for (int r = 0; r < chunk_size; r++) {
+              double v = 0;
+              for (int i = 0; i < chks.length; i++) {
+                v += (chks[i].at0(r) * B_vecs[i].at(c));
+              }
+              ncs[c].addNum(v);
+            }
+          }
+        }
+      }.doAll(ABt_cols, A).outputFrame(null, null);
+
+    /* Carry forward labels of A blindly into ABt */
+    return new H2ODrm(ABt, keys);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AewB.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AewB.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AewB.java
new file mode 100644
index 0000000..2c590bd
--- /dev/null
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AewB.java
@@ -0,0 +1,75 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.mahout.h2obindings.ops;
+
+import org.apache.mahout.h2obindings.H2OHelper;
+import org.apache.mahout.h2obindings.drm.H2ODrm;
+
+import water.MRTask;
+import water.fvec.Frame;
+import water.fvec.Vec;
+import water.fvec.Chunk;
+import water.fvec.NewChunk;
+
+public class AewB {
+  /* Element-wise DRM-DRM operations */
+  public static H2ODrm AewB(H2ODrm drmA, H2ODrm drmB, final String op) {
+    final Frame A = drmA.frame;
+    final Frame B = drmB.frame;
+    Vec keys = drmA.keys;
+    int AewB_cols = A.numCols();
+
+    /* AewB is written into ncs[] with an MRTask on A, and therefore will
+       be similarly partitioned as A.
+
+       B may or may not be similarly partitioned as A, but must have the
+       same dimensions of A.
+    */
+    Frame AewB = new MRTask() {
+        private double opfn(String op, double a, double b) {
+          if (a == 0.0 && b == 0.0) {
+            return 0.0;
+          }
+          if (op.equals("+")) {
+            return a + b;
+          } else if (op.equals("-")) {
+            return a - b;
+          } else if (op.equals("*")) {
+            return a * b;
+          } else if (op.equals("/")) {
+            return a / b;
+          }
+          return 0.0;
+        }
+        public void map(Chunk chks[], NewChunk ncs[]) {
+          int chunk_size = chks[0].len();
+          Vec B_vecs[] = B.vecs();
+          long start = chks[0].start();
+
+          for (int c = 0; c < chks.length; c++) {
+            for (int r = 0; r < chunk_size; r++) {
+              ncs[c].addNum(opfn(op, chks[c].at0(r), B_vecs[c].at(start + r)));
+            }
+          }
+        }
+      }.doAll(AewB_cols, A).outputFrame(null, null);
+
+    /* Carry forward labels of A blindly into ABt */
+    return new H2ODrm(AewB, keys);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AewScalar.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AewScalar.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AewScalar.java
new file mode 100644
index 0000000..9b6387d
--- /dev/null
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AewScalar.java
@@ -0,0 +1,70 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.mahout.h2obindings.ops;
+
+import org.apache.mahout.h2obindings.H2OHelper;
+import org.apache.mahout.h2obindings.drm.H2ODrm;
+
+import water.MRTask;
+import water.fvec.Frame;
+import water.fvec.Vec;
+import water.fvec.Chunk;
+import water.fvec.NewChunk;
+
+public class AewScalar {
+  /* Element-wise DRM-DRM operations */
+  public static H2ODrm AewScalar(H2ODrm drmA, final double s, final String op) {
+    Frame A = drmA.frame;
+    Vec keys = drmA.keys;
+    int AewScalar_cols = A.numCols();
+
+    /* AewScalar is written into ncs[] with an MRTask on A, and therefore will
+       be similarly partitioned as A.
+    */
+    Frame AewScalar = new MRTask() {
+        private double opfn(String op, double a, double b) {
+          if (a == 0.0 && b == 0.0) {
+            return 0.0;
+          }
+          if (op.equals("+")) {
+            return a + b;
+          } else if (op.equals("-")) {
+            return a - b;
+          } else if (op.equals("*")) {
+            return a * b;
+          } else if (op.equals("/")) {
+            return a / b;
+          }
+          return 0.0;
+        }
+        public void map(Chunk chks[], NewChunk ncs[]) {
+          int chunk_size = chks[0].len();
+          long start = chks[0].start();
+
+          for (int c = 0; c < chks.length; c++) {
+            for (int r = 0; r < chunk_size; r++) {
+              ncs[c].addNum(opfn(op, chks[c].at0(r), s));
+            }
+          }
+        }
+      }.doAll(AewScalar_cols, A).outputFrame(null, null);
+
+    /* Carry forward labels of A blindly into ABt */
+    return new H2ODrm(AewScalar, keys);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/java/org/apache/mahout/h2obindings/ops/At.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/At.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/At.java
new file mode 100644
index 0000000..a6698f0
--- /dev/null
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/At.java
@@ -0,0 +1,58 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.mahout.h2obindings.ops;
+
+import org.apache.mahout.h2obindings.H2OHelper;
+import org.apache.mahout.h2obindings.drm.H2ODrm;
+
+import water.MRTask;
+import water.fvec.Frame;
+import water.fvec.Vec;
+import water.fvec.Chunk;
+import water.fvec.NewChunk;
+
+public class At {
+  /* Calculate A' (transpose) */
+  public static H2ODrm At(H2ODrm drmA) {
+    final Frame A = drmA.frame;
+    /* First create a new frame of the required dimensions, A.numCols() rows
+       and A.numRows() columns.
+    */
+    Frame At = H2OHelper.empty_frame(A.numCols(), (int)A.numRows(), -1, -1);
+
+    /* Execute MRTask on the new frame, and fill each cell (initially 0) by
+       pulling in the appropriate value from A.
+    */
+    new MRTask() {
+      public void map(Chunk chks[]) {
+        int chunk_size = chks[0].len();
+        long start = chks[0].start();
+        Vec A_vecs[] = A.vecs();
+
+        for (int c = 0; c < chks.length; c++) {
+          for (int r = 0; r < chunk_size; r++) {
+            chks[c].set0(r, A_vecs[(int)(start + r)].at(c));
+          }
+        }
+      }
+    }.doAll(At);
+
+    /* At is NOT similarly partitioned as A, drop labels */
+    return new H2ODrm(At);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AtA.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AtA.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AtA.java
new file mode 100644
index 0000000..58f21bf
--- /dev/null
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AtA.java
@@ -0,0 +1,63 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.mahout.h2obindings.ops;
+
+import org.apache.mahout.h2obindings.H2OHelper;
+import org.apache.mahout.h2obindings.drm.H2ODrm;
+
+import water.MRTask;
+import water.fvec.Frame;
+import water.fvec.Vec;
+import water.fvec.Chunk;
+import water.fvec.NewChunk;
+
+public class AtA {
+  /* Calculate A'A */
+  public static H2ODrm AtA(H2ODrm drmA) {
+    final Frame A = drmA.frame;
+    /* First create an empty Frame of the required dimensions */
+    Frame AtA = H2OHelper.empty_frame(A.numCols(), A.numCols(), -1, -1);
+
+    /* Execute MRTask on the new Frame, and fill each cell (initially 0) by
+       computing appropriate values from A.
+
+       chks.length == A.numCols()
+    */
+    new MRTask() {
+      public void map(Chunk chks[]) {
+        int chunk_size = chks[0].len();
+        long start = chks[0].start();
+        Vec A_vecs[] = A.vecs();
+        long A_rows = A.numRows();
+
+        for (int c = 0; c < chks.length; c++) {
+          for (int r = 0; r < chunk_size; r++) {
+            double v = 0;
+            for (long i = 0; i < A_rows; i++) {
+              v += (A_vecs[(int)(start + r)].at(i) * A_vecs[c].at(i));
+            }
+            chks[c].set0(r, v);
+          }
+        }
+      }
+    }.doAll(AtA);
+
+    /* AtA is NOT similarly partitioned as A, drop labels */
+    return new H2ODrm(AtA);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AtB.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AtB.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AtB.java
new file mode 100644
index 0000000..3e8cf08
--- /dev/null
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/AtB.java
@@ -0,0 +1,66 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.mahout.h2obindings.ops;
+
+import org.apache.mahout.h2obindings.H2OHelper;
+import org.apache.mahout.h2obindings.drm.H2ODrm;
+
+import water.MRTask;
+import water.fvec.Frame;
+import water.fvec.Vec;
+import water.fvec.Chunk;
+import water.fvec.NewChunk;
+
+public class AtB {
+  /* Calculate A'B */
+  public static H2ODrm AtB(H2ODrm drmA, H2ODrm drmB) {
+    final Frame A = drmA.frame;
+    final Frame B = drmB.frame;
+
+    /* First create an empty frame of the required dimensions */
+    Frame AtB = H2OHelper.empty_frame(A.numCols(), B.numCols(), -1, -1);
+
+    /* Execute MRTask on the new Frame, and fill each cell (initially 0) by
+       computing appropriate values from A and B.
+
+       chks.length == B.numCols()
+    */
+    new MRTask() {
+      public void map(Chunk chks[]) {
+        int chunk_size = chks[0].len();
+        long start = chks[0].start();
+        long A_rows = A.numRows();
+        Vec A_vecs[] = A.vecs();
+        Vec B_vecs[] = B.vecs();
+
+        for (int c = 0; c < chks.length; c++) {
+          for (int r = 0; r < chunk_size; r++) {
+            double v = 0;
+            for (long i = 0; i < A_rows; i++) {
+              v += (A_vecs[(int)(start + r)].at(i) * B_vecs[c].at(i));
+            }
+            chks[c].set0(r, v);
+          }
+        }
+      }
+    }.doAll(AtB);
+
+    /* AtB is NOT similarly partitioned as A, drop labels */
+    return new H2ODrm(AtB);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Atx.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Atx.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Atx.java
new file mode 100644
index 0000000..88465e9
--- /dev/null
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Atx.java
@@ -0,0 +1,76 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.mahout.h2obindings.ops;
+
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.DenseMatrix;
+import org.apache.mahout.h2obindings.H2OHelper;
+import org.apache.mahout.h2obindings.drm.H2OBCast;
+import org.apache.mahout.h2obindings.drm.H2ODrm;
+
+import water.MRTask;
+import water.fvec.Frame;
+import water.fvec.Vec;
+import water.fvec.Chunk;
+import water.fvec.NewChunk;
+import water.util.ArrayUtils;
+
+public class Atx {
+  /* Calculate A'x (where x is an in-core Vector) */
+  public static H2ODrm Atx(H2ODrm drmA, Vector x) {
+    Frame A = drmA.frame;
+    final H2OBCast<Vector> bx = new H2OBCast<Vector>(x);
+
+    /* A'x is computed into atx[] with an MRTask on A (with
+       x available as a Broadcast
+
+       x.size() == A.numRows()
+       atx.length == chks.length == A.numCols()
+    */
+    class MRTaskAtx extends MRTask<MRTaskAtx> {
+      double atx[];
+      public void map(Chunk chks[]) {
+        int chunk_size = chks[0].len();
+        Vector x = bx.value();
+        long start = chks[0].start();
+
+        atx = new double[chks.length];
+        for (int r = 0; r < chunk_size; r++) {
+          double d = x.getQuick((int)start + r);
+          for (int c = 0; c < chks.length; c++) {
+            atx[c] += (chks[c].at0(r) * d);
+          }
+        }
+      }
+      public void reduce(MRTaskAtx other) {
+        ArrayUtils.add(atx, other.atx);
+      }
+    }
+
+    /* Take the result in .atx[], and convert into a Frame
+       using existing helper functions (creating a Matrix
+       along the way for the Helper)
+    */
+    Vector v = new DenseVector(new MRTaskAtx().doAll(A).atx);
+    Matrix m = new DenseMatrix(A.numCols(), 1);
+    m.assignColumn(0, v);
+    return H2OHelper.drm_from_matrix(m, -1, -1);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Ax.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Ax.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Ax.java
new file mode 100644
index 0000000..2d9c0d0
--- /dev/null
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Ax.java
@@ -0,0 +1,61 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.mahout.h2obindings.ops;
+
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.h2obindings.H2OHelper;
+import org.apache.mahout.h2obindings.drm.H2OBCast;
+import org.apache.mahout.h2obindings.drm.H2ODrm;
+
+import water.MRTask;
+import water.fvec.Frame;
+import water.fvec.Vec;
+import water.fvec.Chunk;
+import water.fvec.NewChunk;
+
+public class Ax {
+  /* Calculate Ax (where x is an in-core Vector) */
+  public static H2ODrm Ax(H2ODrm drmA, Vector x) {
+    Frame A = drmA.frame;
+    Vec keys = drmA.keys;
+    final H2OBCast<Vector> bx = new H2OBCast<Vector>(x);
+
+    /* Ax is written into nc (single element, not array) with an MRTask on A,
+       and therefore will be similarly partitioned as A.
+
+       x.size() == A.numCols() == chks.length
+    */
+    Frame Ax = new MRTask() {
+        public void map(Chunk chks[], NewChunk nc) {
+          int chunk_size = chks[0].len();
+          Vector x = bx.value();
+
+          for (int r = 0; r < chunk_size; r++) {
+            double v = 0;
+            for (int c = 0; c < chks.length; c++) {
+              v += (chks[c].at0(r) * x.getQuick(c));
+            }
+            nc.addNum(v);
+          }
+        }
+      }.doAll(1, A).outputFrame(null, null);
+
+    /* Carry forward labels of A blindly into ABt */
+    return new H2ODrm(Ax, keys);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Cbind.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Cbind.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Cbind.java
new file mode 100644
index 0000000..3871cab
--- /dev/null
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Cbind.java
@@ -0,0 +1,98 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.mahout.h2obindings.ops;
+
+import water.MRTask;
+import water.fvec.Frame;
+import water.fvec.Vec;
+import water.fvec.Chunk;
+import water.fvec.NewChunk;
+
+import org.apache.mahout.h2obindings.H2OHelper;
+import org.apache.mahout.h2obindings.drm.H2ODrm;
+
+public class Cbind {
+  /* R's cbind like operator, on DrmA and DrmB */
+  public static H2ODrm Cbind(H2ODrm drmA, H2ODrm drmB) {
+    Frame fra = drmA.frame;
+    Vec keysa = drmA.keys;
+    Frame frb = drmB.frame;
+    Vec keysb = drmB.keys;
+
+    /* If A and B are similarly partitioned, .. */
+    if (fra.anyVec().group() == frb.anyVec().group()) {
+      /* .. then, do a light weight zip() */
+      return zip(fra, keysa, frb, keysb);
+    } else {
+      /* .. else, do a heavy weight join() which involves moving data over the wire */
+      return join(fra, keysa, frb, keysb);
+    }
+  }
+
+  /* Light weight zip(), no data movement */
+  private static H2ODrm zip(final Frame fra, final Vec keysa, final Frame frb, final Vec keysb) {
+    /* Create a new Vec[] to hold the concatenated list of A and B's column vectors */
+    Vec vecs[] = new Vec[fra.vecs().length + frb.vecs().length];
+    int d = 0;
+    /* fill A's column vectors */
+    for (Vec vfra : fra.vecs()) {
+      vecs[d++] = vfra;
+    }
+    /* and B's */
+    for (Vec vfrb : frb.vecs()) {
+      vecs[d++] = vfrb;
+    }
+    /* and create a new Frame with the combined list of column Vecs */
+    Frame fr = new Frame(vecs);
+    /* Finally, inherit A's string labels into the result */
+    return new H2ODrm(fr, keysa);
+  }
+
+  /* heavy weight join(), involves moving data */
+  private static H2ODrm join(final Frame fra, final Vec keysa, final Frame frb, final Vec keysb) {
+
+    /* The plan is to re-organize B to be "similarly partitioned as A", and then zip() */
+    Vec bvecs[] = new Vec[frb.vecs().length];
+
+    for (int i = 0; i < bvecs.length; i++) {
+      /* First create column Vecs which are similarly partitioned as A */
+      bvecs[i] = fra.anyVec().makeZero();
+    }
+
+    /* Next run an MRTask on the new vectors, and fill each cell (initially 0)
+       by pulling in appropriate values from B (frb)
+    */
+    new MRTask() {
+      public void map(Chunk chks[]) {
+        int chunk_size = chks[0].len();
+        long start = chks[0].start();
+        Vec vecs[] = frb.vecs();
+
+        for (int r = 0; r < chunk_size; r++) {
+          for (int c = 0; c < chks.length; c++) {
+            // assert va.atStr(start+r) == vb.atStr(start+r)
+            chks[c].set0(r, vecs[c].at(start + r));
+          }
+        }
+      }
+    }.doAll(bvecs);
+
+    /* now that bvecs[] is compatible, just zip'em'up */
+    return zip(fra, keysa, new Frame(bvecs), null);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/java/org/apache/mahout/h2obindings/ops/MapBlock.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/MapBlock.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/MapBlock.java
new file mode 100644
index 0000000..0f901e4
--- /dev/null
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/MapBlock.java
@@ -0,0 +1,105 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.mahout.h2obindings.ops;
+
+import org.apache.mahout.math.Matrix;
+import org.apache.mahout.h2obindings.H2OBlockMatrix;
+import org.apache.mahout.h2obindings.drm.H2ODrm;
+
+import water.MRTask;
+import water.fvec.Frame;
+import water.fvec.Vec;
+import water.fvec.Chunk;
+import water.fvec.NewChunk;
+
+import java.io.Serializable;
+import java.util.Arrays;
+
+import scala.reflect.ClassTag;
+
+public class MapBlock {
+  public static <K,R> H2ODrm exec(H2ODrm drmA, int ncol, Object bmf, final boolean is_r_str,
+                                  final ClassTag<K> k, final ClassTag<R> r) {
+    Frame A = drmA.frame;
+    Vec keys = drmA.keys;
+
+    class MRTaskBMF extends MRTask<MRTaskBMF> {
+      Serializable bmf;
+      Vec labels;
+      MRTaskBMF(Object _bmf, Vec _labels) {
+        /* BlockMapFun does not implement Serializable,
+           but Scala closures are _always_ Serializable.
+
+           So receive the object as a plain Object (else
+           compilation fails) and typcast it with conviction,
+           that Scala always tags the actually generated
+           closure functions with Serializable.
+         */
+        bmf = (Serializable)_bmf;
+        labels = _labels;
+      }
+
+      private Matrix blockify(Chunk chks[]) {
+        return new H2OBlockMatrix(chks);
+      }
+
+      private void deblockify(Matrix out, NewChunk ncs[]) {
+        // assert (out.colSize() == ncs.length)
+        for (int c = 0; c < out.columnSize(); c++) {
+          for (int r = 0; r < out.rowSize(); r++) {
+            ncs[c].addNum(out.getQuick(r, c));
+          }
+        }
+      }
+
+      /*
+        Input:
+        chks.length == A.numCols()
+
+        Output:
+        ncs.length == (A.numCols() + 1) if String keyed
+                      (A.numCols() + 0) if Int or Long keyed
+
+        First A.numCols() ncs[] elements are fed back the output
+        of bmf() output's _2 in deblockify()
+
+        If String keyed, then MapBlockHelper.exec() would have
+        filled in the Strings into ncs[ncol] already
+      */
+      public void map(Chunk chks[], NewChunk ncs[]) {
+        long start = chks[0].start();
+        NewChunk nclabel = is_r_str ? ncs[ncs.length - 1] : null;
+        deblockify(MapBlockHelper.exec(bmf, blockify(chks), start, labels, nclabel, k, r), ncs);
+        // assert chks[i]._len == ncs[j]._len
+      }
+    }
+
+    int ncol_res = ncol + (is_r_str ? 1 : 0);
+    Frame fmap = new MRTaskBMF(bmf, keys).doAll(ncol_res, A).outputFrame(null, null);
+    Vec vmap = null;
+    if (is_r_str) {
+      /* If output was String keyed, then the last Vec in fmap is the String vec.
+         If so, peel it out into a separate Vec (vmap) and set fmap to be the
+         Frame with just the first ncol Vecs
+      */
+      vmap = fmap.vecs()[ncol];
+      fmap = new Frame(Arrays.copyOfRange(fmap.vecs(), 0, ncol));
+    }
+    return new H2ODrm(fmap, vmap);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Par.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Par.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Par.java
new file mode 100644
index 0000000..27d6733
--- /dev/null
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Par.java
@@ -0,0 +1,84 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.mahout.h2obindings.ops;
+
+import water.MRTask;
+import water.fvec.Frame;
+import water.fvec.Vec;
+import water.fvec.Chunk;
+import water.fvec.NewChunk;
+import water.parser.ValueString;
+
+import org.apache.mahout.h2obindings.H2OHelper;
+import org.apache.mahout.h2obindings.drm.H2ODrm;
+
+public class Par {
+  public static H2ODrm exec(H2ODrm drmA, int min, int exact) {
+    final Frame frin = drmA.frame;
+    final Vec vin = drmA.keys;
+
+    /* First create a new empty Frame with the required partitioning */
+    Frame frout = H2OHelper.empty_frame(frin.numRows(), frin.numCols(), min, exact);
+    Vec vout = null;
+
+    if (vin != null) {
+      /* If String keyed, then run an MRTask on the new frame, and also
+         creat yet another 1-column newer frame for the re-orged String keys.
+         The new String Vec will therefore be similarly partitioned as the
+         new Frame.
+
+         vout is finally collected by calling anyVec() on outputFrame(),
+         as it is the only column in the output frame.
+      */
+      vout = new MRTask() {
+          public void map(Chunk chks[], NewChunk nc) {
+            int chunk_size = chks[0].len();
+            Vec vins[] = frin.vecs();
+            long start = chks[0].start();
+            ValueString vstr = new ValueString();
+
+            for (int r = 0; r < chunk_size; r++) {
+              for (int c = 0; c < chks.length; c++) {
+                chks[c].set0(r, vins[c].at(start + r));
+              }
+              nc.addStr(vin.atStr(vstr, start + r));
+            }
+          }
+        }.doAll(1, frout).outputFrame(null, null).anyVec();
+    } else {
+      /* If not String keyed, then run and MRTask on the new frame, and
+         just pull in right elements from frin
+      */
+      new MRTask() {
+        public void map(Chunk chks[]) {
+          int chunk_size = chks[0].len();
+          Vec vins[] = frin.vecs();
+          long start = chks[0].start();
+
+          for (int r = 0; r < chunk_size; r++) {
+            for (int c = 0; c < chks.length; c++) {
+              chks[c].set0(r, vins[c].at(start + r));
+            }
+          }
+        }
+      }.doAll(frout);
+    }
+
+    return new H2ODrm(frout, vout);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Rbind.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Rbind.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Rbind.java
new file mode 100644
index 0000000..ed4b0fb
--- /dev/null
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Rbind.java
@@ -0,0 +1,82 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.mahout.h2obindings.ops;
+
+import water.MRTask;
+import water.fvec.Frame;
+import water.fvec.Vec;
+import water.fvec.Chunk;
+import water.fvec.NewChunk;
+import water.parser.ValueString;
+
+import org.apache.mahout.h2obindings.H2OHelper;
+import org.apache.mahout.h2obindings.drm.H2ODrm;
+
+public class Rbind {
+  /* R's rbind like operator, on DrmA and DrmB */
+  public static H2ODrm Rbind(H2ODrm drmA, H2ODrm drmB) {
+    final Frame fra = drmA.frame;
+    final Vec keysa = drmA.keys;
+    final Frame frb = drmB.frame;
+    final Vec keysb = drmB.keys;
+
+    /* Create new frame and copy A's data at the top, and B's data below.
+       Create the frame in the same VectorGroup as A, so A's data does not
+       cross the wire during copy. B's data could potentially cross the wire.
+    */
+    Frame frbind = H2OHelper.empty_frame(fra.numRows() + frb.numRows(), fra.numCols(),
+                                         -1, -1, fra.anyVec().group());
+    Vec keys = null;
+
+    MRTask task = new MRTask() {
+        public void map(Chunk chks[], NewChunk nc) {
+          Vec A_vecs[] = fra.vecs();
+          Vec B_vecs[] = frb.vecs();
+          long A_rows = fra.numRows();
+          long B_rows = frb.numRows();
+          long start = chks[0].start();
+          int chunk_size = chks[0].len();
+          ValueString vstr = new ValueString();
+
+          for (int r = 0; r < chunk_size; r++) {
+            for (int c = 0; c < chks.length; c++) {
+              if (r + start < A_rows) {
+                chks[c].set0(r, A_vecs[c].at(r + start));
+                if (keysa != null) {
+                  nc.addStr(keysa.atStr(vstr, r + start));
+                }
+              } else {
+                chks[c].set0(r, B_vecs[c].at(r + start - A_rows));
+                if (keysb != null) {
+                  nc.addStr(keysb.atStr(vstr, r + start - A_rows));
+                }
+              }
+            }
+          }
+        }
+      };
+
+    if (keysa == null) {
+      keys = task.doAll(1, frbind).outputFrame(null, null).anyVec();
+    } else {
+      task.doAll(frbind);
+    }
+
+    return new H2ODrm(frbind, keys);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/java/org/apache/mahout/h2obindings/ops/RowRange.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/RowRange.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/RowRange.java
new file mode 100644
index 0000000..5ce7732
--- /dev/null
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/RowRange.java
@@ -0,0 +1,88 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.mahout.h2obindings.ops;
+
+import scala.collection.immutable.Range;
+
+import water.MRTask;
+import water.fvec.Frame;
+import water.fvec.Vec;
+import water.fvec.Chunk;
+import water.fvec.NewChunk;
+import water.parser.ValueString;
+
+import org.apache.mahout.h2obindings.drm.H2ODrm;
+
+public class RowRange {
+  /* Filter operation */
+  public static H2ODrm RowRange(H2ODrm drmA, final Range R) {
+    Frame A = drmA.frame;
+    Vec keys = drmA.keys;
+
+    /* Run a filtering MRTask on A. If row number falls within R.start() and
+       R.end(), then the row makes it into the output
+    */
+    Frame Arr = new MRTask() {
+        public void map(Chunk chks[], NewChunk ncs[]) {
+          int chunk_size = chks[0].len();
+          long chunk_start = chks[0].start();
+
+          /* First check if the entire chunk even overlaps with R */
+          if (chunk_start > R.end() || (chunk_start + chunk_size) < R.start()) {
+            return;
+          }
+
+          /* This chunk overlaps, filter out just the overlapping rows */
+          for (int r = 0; r < chunk_size; r++) {
+            if (!R.contains(chunk_start + r)) {
+              continue;
+            }
+
+            for (int c = 0; c < chks.length; c++) {
+              ncs[c].addNum(chks[c].at0(r));
+            }
+          }
+        }
+      }.doAll(A.numCols(), A).outputFrame(null, null);
+
+    Vec Vrr = (keys == null) ? null : new MRTask() {
+        /* This is a String keyed DRM. Do the same thing as above,
+           but this time just one column of Strings.
+        */
+        public void map(Chunk chk, NewChunk nc) {
+          int chunk_size = chk.len();
+          long chunk_start = chk.start();
+          ValueString vstr = new ValueString();
+
+          if (chunk_start > R.end() || (chunk_start + chunk_size) < R.start()) {
+            return;
+          }
+
+          for (int r = 0; r < chunk_size; r++) {
+            if (!R.contains(chunk_start + r)) {
+              continue;
+            }
+
+            nc.addStr(chk.atStr0(vstr, r));
+          }
+        }
+      }.doAll(1, keys).outputFrame(null, null).anyVec();
+
+    return new H2ODrm(Arr, Vrr);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f870a630/h2o/src/main/java/org/apache/mahout/h2obindings/ops/TimesRightMatrix.java
----------------------------------------------------------------------
diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/TimesRightMatrix.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/TimesRightMatrix.java
new file mode 100644
index 0000000..364f039
--- /dev/null
+++ b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/TimesRightMatrix.java
@@ -0,0 +1,97 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.mahout.h2obindings.ops;
+
+import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.DiagonalMatrix;
+import org.apache.mahout.h2obindings.H2OHelper;
+import org.apache.mahout.h2obindings.drm.H2OBCast;
+import org.apache.mahout.h2obindings.drm.H2ODrm;
+
+import water.MRTask;
+import water.fvec.Frame;
+import water.fvec.Vec;
+import water.fvec.Chunk;
+import water.fvec.NewChunk;
+
+public class TimesRightMatrix {
+  /* Multiple with in-core Matrix */
+  public static H2ODrm TimesRightMatrix(H2ODrm drmA, Matrix B) {
+    Frame A = drmA.frame;
+    Vec keys = drmA.keys;
+    Frame AinCoreB = null;
+
+    if (B instanceof DiagonalMatrix) {
+      AinCoreB = AinCoreB_diagonal(A, B.viewDiagonal());
+    } else {
+      AinCoreB = AinCoreB_common(A, B);
+    }
+
+    return new H2ODrm(AinCoreB, keys);
+  }
+
+  /*
+    Multiply Frame A with in-core diagonal Matrix (whose diagonal Vector is d)
+
+    A.numCols() == d.size()
+  */
+  private static Frame AinCoreB_diagonal(final Frame A, Vector d) {
+    final H2OBCast<Vector> bd = new H2OBCast<Vector>(d);
+
+    return new MRTask() {
+      public void map(Chunk chks[], NewChunk ncs[]) {
+        Vector D = bd.value();
+        int chunk_size = chks[0].len();
+
+        for (int c = 0; c < ncs.length; c++) {
+          for (int r = 0; r < chunk_size; r++) {
+            double v = (chks[c].at0(r) * D.getQuick(c));
+            ncs[c].addNum(v);
+          }
+        }
+      }
+    }.doAll(d.size(), A).outputFrame(null, null);
+  }
+
+  /*
+    Multiply Frame A with in-core Matrix b
+
+    A.numCols() == b.rowSize()
+  */
+  private static Frame AinCoreB_common(final Frame A, Matrix b) {
+    final H2OBCast<Matrix> bb = new H2OBCast<Matrix>(b);
+
+    return new MRTask() {
+      public void map(Chunk chks[], NewChunk ncs[]) {
+        Matrix B = bb.value();
+        int chunk_size = chks[0].len();
+
+        for (int c = 0; c < ncs.length; c++) {
+          for (int r = 0; r < chunk_size; r++) {
+            double v = 0;
+            for (int i = 0; i < chks.length; i++) {
+              v += (chks[i].at0(r) * B.getQuick(i, c));
+            }
+            ncs[c].addNum(v);
+          }
+        }
+      }
+    }.doAll(b.columnSize(), A).outputFrame(null, null);
+  }
+}


Mime
View raw message