predictionio-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From don...@apache.org
Subject [2/2] incubator-predictionio git commit: [PIO-30] Set up a cross build for Spark 2.0 and Scala 2.11
Date Mon, 20 Mar 2017 17:57:19 GMT
[PIO-30] Set up a cross build for Spark 2.0 and Scala 2.11

Closes #345
Closes #295


Project: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/commit/00779c3d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/tree/00779c3d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-predictionio/diff/00779c3d

Branch: refs/heads/develop
Commit: 00779c3d8e6a7b55306ddc29d4779875ffa3ccba
Parents: bb00324
Author: Chan Lee <chanlee514@gmail.com>
Authored: Mon Mar 20 10:49:24 2017 -0700
Committer: Donald Szeto <donald@apache.org>
Committed: Mon Mar 20 10:49:24 2017 -0700

----------------------------------------------------------------------
 .travis.yml                                     |  66 +++++-
 bin/install.sh                                  |   2 +-
 build.sbt                                       | 153 ++++++++++---
 common/build.sbt                                |   9 +-
 conf/pio-env.sh.template                        |   1 +
 conf/pio-env.sh.travis                          |   5 +-
 conf/pio-vendors.sh                             |  57 +++++
 core/build.sbt                                  |  17 +-
 .../predictionio/workflow/CreateServer.scala    |   3 +-
 data/build.sbt                                  |  17 +-
 .../predictionio/data/api/EventServer.scala     |   5 +-
 .../predictionio/data/view/DataView.scala       |  13 +-
 .../predictionio/data/view/PBatchView.scala     | 204 ++++++++++++++++++
 .../data/SparkVersionDependent.scala            |  30 +++
 .../data/SparkVersionDependent.scala            |  30 +++
 e2/build.sbt                                    |   4 +-
 make-distribution.sh                            |  36 +---
 project/Build.scala                             |  34 ---
 project/PIOBuild.scala                          |  36 ++++
 project/assembly.sbt                            |   2 +-
 project/plugins.sbt                             |   4 +-
 storage/elasticsearch/build.sbt                 |  30 +--
 storage/elasticsearch1/build.sbt                |  18 +-
 storage/hbase/build.sbt                         |  14 +-
 .../predictionio/data/view/PBatchView.scala     | 212 -------------------
 storage/hdfs/build.sbt                          |  20 +-
 storage/jdbc/build.sbt                          |  16 +-
 .../data/storage/jdbc/JDBCPEvents.scala         |  26 ++-
 storage/localfs/build.sbt                       |  16 +-
 tests/Dockerfile                                |  23 +-
 tests/Dockerfile-es1                            |  57 -----
 tests/README.md                                 |   6 +
 tests/after_script.travis.sh                    |   8 +-
 tests/before_script.travis.sh                   |   2 +-
 tests/build-docker.sh                           |  50 -----
 tests/build_docker.sh                           |  57 +++++
 tests/docker-compose-es1.yml                    |  38 ----
 tests/docker-compose.yml                        |   2 +-
 tests/docker-files/env-conf/pio-env-es1.sh      | 103 ---------
 tests/docker-files/env-conf/pio-env.sh          |  15 +-
 tests/docker-files/init.sh                      |   2 +-
 tests/docker-files/set_build_profile.sh         |  31 +++
 .../engines/recommendation-engine/build.sbt     |   8 +-
 .../engines/recommendation-engine/manifest.json |   1 -
 .../recommendation-engine/project/pio-build.sbt |   1 -
 tests/pio_tests/scenarios/quickstart_test.py    |   8 +-
 tests/pio_tests/utils.py                        |   5 +-
 tests/run_docker.sh                             |  19 +-
 tests/script.travis.sh                          |   8 +-
 tests/unit.sh                                   |  19 +-
 tools/build.sbt                                 |  20 +-
 .../predictionio/tools/admin/AdminAPI.scala     |   3 +-
 .../tools/export/EventsToFile.scala             |   6 +-
 53 files changed, 801 insertions(+), 771 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 634d286..8dcc2fa 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -29,9 +29,6 @@ branches:
 
 language: scala
 
-scala:
-  - 2.10.5
-
 jdk:
   - oraclejdk8
 
@@ -44,12 +41,63 @@ cache: false
 
 env:
   matrix:
-    - BUILD_TYPE=Unit METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL
-    - BUILD_TYPE=Integration METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL
-    - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS ES_VERSION=5
-    - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS ES_VERSION=5
-    - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS ES_VERSION=1
-    - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS ES_VERSION=1
+    - BUILD_TYPE=Unit
+      METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL
+      PIO_SCALA_VERSION=2.10.6
+      PIO_SPARK_VERSION=1.6.3
+    - BUILD_TYPE=Integration
+      METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL
+      PIO_SCALA_VERSION=2.10.6
+      PIO_SPARK_VERSION=1.6.3
+    - BUILD_TYPE=Integration
+      METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS
+      PIO_SCALA_VERSION=2.10.6
+      PIO_SPARK_VERSION=1.6.3
+      PIO_ELASTICSEARCH_VERSION=5.2.2
+    - BUILD_TYPE=Integration
+      METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS
+      PIO_SCALA_VERSION=2.10.6
+      PIO_SPARK_VERSION=1.6.3
+      PIO_ELASTICSEARCH_VERSION=5.2.2
+    - BUILD_TYPE=Integration
+      METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS
+      PIO_SCALA_VERSION=2.10.6
+      PIO_SPARK_VERSION=1.6.3
+      PIO_ELASTICSEARCH_VERSION=1.7.3
+    - BUILD_TYPE=Integration
+      METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS
+      PIO_SCALA_VERSION=2.10.6
+      PIO_SPARK_VERSION=1.6.3
+      PIO_ELASTICSEARCH_VERSION=1.7.3
+
+    - BUILD_TYPE=Unit
+      METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL
+      PIO_SCALA_VERSION=2.11.8
+      PIO_SPARK_VERSION=2.1.0
+    - BUILD_TYPE=Integration
+      METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL
+      PIO_SCALA_VERSION=2.11.8
+      PIO_SPARK_VERSION=2.1.0
+    - BUILD_TYPE=Integration
+      METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS
+      PIO_SCALA_VERSION=2.11.8
+      PIO_SPARK_VERSION=2.1.0
+      PIO_ELASTICSEARCH_VERSION=5.2.2
+    - BUILD_TYPE=Integration
+      METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS
+      PIO_SCALA_VERSION=2.11.8
+      PIO_SPARK_VERSION=2.1.0
+      PIO_ELASTICSEARCH_VERSION=5.2.2
+    - BUILD_TYPE=Integration
+      METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS
+      PIO_SCALA_VERSION=2.11.8
+      PIO_SPARK_VERSION=2.1.0
+      PIO_ELASTICSEARCH_VERSION=1.7.3
+    - BUILD_TYPE=Integration
+      METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS
+      PIO_SCALA_VERSION=2.11.8
+      PIO_SPARK_VERSION=2.1.0
+      PIO_ELASTICSEARCH_VERSION=1.7.3
 
 before_install:
   - unset SBT_OPTS JVM_OPTS

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/bin/install.sh
----------------------------------------------------------------------
diff --git a/bin/install.sh b/bin/install.sh
index e485df9..7431b09 100755
--- a/bin/install.sh
+++ b/bin/install.sh
@@ -18,7 +18,7 @@
 #
 
 OS=`uname`
-SPARK_VERSION=1.6.2
+SPARK_VERSION=1.6.3
 # Looks like support for Elasticsearch 2.0 will require 2.0 so deferring
 ELASTICSEARCH_VERSION=1.7.5
 HBASE_VERSION=1.2.2

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/build.sbt
----------------------------------------------------------------------
diff --git a/build.sbt b/build.sbt
index fa5ba29..fc47f6a 100644
--- a/build.sbt
+++ b/build.sbt
@@ -15,28 +15,73 @@
  * limitations under the License.
  */
 
+import PIOBuild._
 import UnidocKeys._
 
+lazy val scalaSparkDepsVersion = Map(
+  "2.10" -> Map(
+    "1.6" -> Map(
+      "akka" -> "2.3.15",
+      "hadoop" -> "2.6.5",
+      "json4s" -> "3.2.10"),
+    "2.0" -> Map(
+      "akka" -> "2.3.16",
+      "hadoop" -> "2.7.3",
+      "json4s" -> "3.2.11"),
+    "2.1" -> Map(
+      "akka" -> "2.3.16",
+      "hadoop" -> "2.7.3",
+      "json4s" -> "3.2.11")),
+  "2.11" -> Map(
+    "1.6" -> Map(
+      "akka" -> "2.3.15",
+      "hadoop" -> "2.6.5",
+      "json4s" -> "3.2.10"),
+    "2.0" -> Map(
+      "akka" -> "2.4.17",
+      "hadoop" -> "2.7.3",
+      "json4s" -> "3.2.11"),
+    "2.1" -> Map(
+      "akka" -> "2.4.17",
+      "hadoop" -> "2.7.3",
+      "json4s" -> "3.2.11")))
+
 name := "apache-predictionio-parent"
 
 version in ThisBuild := "0.11.0-SNAPSHOT"
 
 organization in ThisBuild := "org.apache.predictionio"
 
-scalaVersion in ThisBuild := "2.10.5"
+scalaVersion in ThisBuild := sys.props.getOrElse("scala.version", "2.10.6")
+
+crossScalaVersions in ThisBuild := Seq("2.10.6", "2.11.8")
 
 scalacOptions in ThisBuild ++= Seq("-deprecation", "-unchecked", "-feature")
 
 scalacOptions in (ThisBuild, Test) ++= Seq("-Yrangepos")
-
 fork in (ThisBuild, run) := true
 
 javacOptions in (ThisBuild, compile) ++= Seq("-source", "1.7", "-target", "1.7",
   "-Xlint:deprecation", "-Xlint:unchecked")
 
-json4sVersion in ThisBuild := "3.2.10"
+// Ignore differentiation of Spark patch levels
+sparkVersion in ThisBuild := sys.props.getOrElse("spark.version", "1.6.3")
+
+sparkBinaryVersion in ThisBuild := binaryVersion(sparkVersion.value)
+
+akkaVersion in ThisBuild := sys.props.getOrElse(
+  "akka.version",
+  scalaSparkDepsVersion(scalaBinaryVersion.value)(sparkBinaryVersion.value)("akka"))
+
+lazy val es = sys.props.getOrElse("elasticsearch.version", "1.7.6")
 
-sparkVersion in ThisBuild := "1.6.3"
+elasticsearchVersion in ThisBuild := es
+
+json4sVersion in ThisBuild := scalaSparkDepsVersion(scalaBinaryVersion.value)(sparkBinaryVersion.value)("json4s")
+
+hadoopVersion in ThisBuild := sys.props.getOrElse(
+  "hadoop.version",
+  scalaSparkDepsVersion(scalaBinaryVersion.value)(sparkBinaryVersion.value)("hadoop"))
 
 val pioBuildInfoSettings = buildInfoSettings ++ Seq(
   sourceGenerators in Compile <+= buildInfo,
@@ -45,65 +90,106 @@ val pioBuildInfoSettings = buildInfoSettings ++ Seq(
     version,
     scalaVersion,
     sbtVersion,
-    sparkVersion),
+    sparkVersion,
+    hadoopVersion),
   buildInfoPackage := "org.apache.predictionio.core")
 
+// Used temporarily to modify genjavadoc version to "0.10" until unidoc updates it
+val genjavadocSettings: Seq[sbt.Def.Setting[_]] = Seq(
+  libraryDependencies += compilerPlugin("com.typesafe.genjavadoc" %% "genjavadoc-plugin" % "0.10" cross CrossVersion.full),
+    scalacOptions <+= target map (t => "-P:genjavadoc:out=" + (t / "java")))
+
 val conf = file("conf")
 
 val commonSettings = Seq(
   autoAPIMappings := true,
-  unmanagedClasspath in Test += conf)
+  unmanagedClasspath in Test += conf,
+  unmanagedClasspath in Test += baseDirectory.value.getParentFile / s"storage/jdbc/target/scala-${scalaBinaryVersion.value}/classes")
 
-val common = (project in file("common")).
-  settings(commonSettings: _*).
-  settings(genjavadocSettings: _*)
-
-val data = (project in file("data")).
-  dependsOn(common).
-  settings(commonSettings: _*).
-  settings(genjavadocSettings: _*)
+val commonTestSettings = Seq(
+  libraryDependencies ++= Seq(
+    "org.postgresql"   % "postgresql"  % "9.4-1204-jdbc41" % "test",
+    "org.scalikejdbc" %% "scalikejdbc" % "2.3.5" % "test"))
 
 val dataElasticsearch1 = (project in file("storage/elasticsearch1")).
   settings(commonSettings: _*).
-  settings(genjavadocSettings: _*)
+  settings(genjavadocSettings: _*).
+  settings(publishArtifact := false)
 
 val dataElasticsearch = (project in file("storage/elasticsearch")).
   settings(commonSettings: _*).
-  settings(genjavadocSettings: _*)
+  settings(genjavadocSettings: _*).
+  settings(publishArtifact := false)
 
 val dataHbase = (project in file("storage/hbase")).
   settings(commonSettings: _*).
-  settings(genjavadocSettings: _*)
+  settings(genjavadocSettings: _*).
+  settings(publishArtifact := false)
 
 val dataHdfs = (project in file("storage/hdfs")).
   settings(commonSettings: _*).
-  settings(genjavadocSettings: _*)
+  settings(genjavadocSettings: _*).
+  settings(publishArtifact := false)
 
 val dataJdbc = (project in file("storage/jdbc")).
   settings(commonSettings: _*).
-  settings(genjavadocSettings: _*)
+  settings(genjavadocSettings: _*).
+  settings(publishArtifact := false)
 
 val dataLocalfs = (project in file("storage/localfs")).
   settings(commonSettings: _*).
-  settings(genjavadocSettings: _*)
+  settings(genjavadocSettings: _*).
+  settings(publishArtifact := false)
+
+val common = (project in file("common")).
+  settings(commonSettings: _*).
+  settings(genjavadocSettings: _*).
+  disablePlugins(sbtassembly.AssemblyPlugin)
+
+val data = (project in file("data")).
+  dependsOn(common).
+  settings(commonSettings: _*).
+  settings(commonTestSettings: _*).
+  settings(genjavadocSettings: _*).
+  settings(unmanagedSourceDirectories in Compile +=
+    sourceDirectory.value / s"main/spark-${majorVersion(sparkVersion.value)}").
+  disablePlugins(sbtassembly.AssemblyPlugin)
 
 val core = (project in file("core")).
   dependsOn(data).
   settings(commonSettings: _*).
+  settings(commonTestSettings: _*).
   settings(genjavadocSettings: _*).
   settings(pioBuildInfoSettings: _*).
-  enablePlugins(SbtTwirl)
+  enablePlugins(SbtTwirl).
+  disablePlugins(sbtassembly.AssemblyPlugin)
 
 val tools = (project in file("tools")).
   dependsOn(core).
   dependsOn(data).
   settings(commonSettings: _*).
+  settings(commonTestSettings: _*).
   settings(genjavadocSettings: _*).
-  enablePlugins(SbtTwirl)
+  enablePlugins(SbtTwirl).
+  settings(publishArtifact := false)
 
 val e2 = (project in file("e2")).
   settings(commonSettings: _*).
-  settings(genjavadocSettings: _*)
+  settings(genjavadocSettings: _*).
+  disablePlugins(sbtassembly.AssemblyPlugin)
+
+val dataEs = if (majorVersion(es) == 1) dataElasticsearch1 else dataElasticsearch
+
+val storageSubprojects = Seq(
+    dataEs,
+    dataHbase,
+    dataHdfs,
+    dataJdbc,
+    dataLocalfs)
+
+val storage = (project in file("storage"))
+  .aggregate(storageSubprojects map Project.projectToRef: _*)
+  .disablePlugins(sbtassembly.AssemblyPlugin)
 
 val root = (project in file(".")).
   settings(commonSettings: _*).
@@ -162,18 +248,8 @@ val root = (project in file(".")).
       "docs/javadoc/javadoc-overview.html",
       "-noqualifier",
       "java.lang")).
-  aggregate(
-    common,
-    core,
-    data,
-    dataElasticsearch1,
-    dataElasticsearch,
-    dataHbase,
-    dataHdfs,
-    dataJdbc,
-    dataLocalfs,
-    tools,
-    e2)
+  aggregate(common, core, data, tools, e2).
+  disablePlugins(sbtassembly.AssemblyPlugin)
 
 val pioUnidoc = taskKey[Unit]("Builds PredictionIO ScalaDoc")
 
@@ -232,3 +308,10 @@ parallelExecution := false
 parallelExecution in Global := false
 
 testOptions in Test += Tests.Argument("-oDF")
+
+printBuildInfo := {
+  println(s"PIO_SCALA_VERSION=${scalaVersion.value}")
+  println(s"PIO_SPARK_VERSION=${sparkVersion.value}")
+  println(s"PIO_ELASTICSEARCH_VERSION=${elasticsearchVersion.value}")
+  println(s"PIO_HADOOP_VERSION=${hadoopVersion.value}")
+}

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/common/build.sbt
----------------------------------------------------------------------
diff --git a/common/build.sbt b/common/build.sbt
index e7050d4..5a0fba1 100644
--- a/common/build.sbt
+++ b/common/build.sbt
@@ -15,12 +15,15 @@
  * limitations under the License.
  */
 
+import PIOBuild._
+
 name := "apache-predictionio-common"
 
 libraryDependencies ++= Seq(
-  "io.spray"               %% "spray-can"        % "1.3.2",
-  "io.spray"               %% "spray-routing"    % "1.3.2",
-  "org.spark-project.akka" %% "akka-actor"     % "2.3.4-spark"
+  "io.spray"               %% "spray-can"      % "1.3.3",
+  "io.spray"               %% "spray-routing"  % "1.3.3",
+  "com.typesafe.akka"      %% "akka-actor"     % akkaVersion.value,
+  "com.typesafe.akka"      %% "akka-slf4j"     % akkaVersion.value
 )
 
 pomExtra := childrenPomExtra.value

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/conf/pio-env.sh.template
----------------------------------------------------------------------
diff --git a/conf/pio-env.sh.template b/conf/pio-env.sh.template
index 0d76102..57185ba 100644
--- a/conf/pio-env.sh.template
+++ b/conf/pio-env.sh.template
@@ -24,6 +24,7 @@
 # you need to change these to fit your site.
 
 # SPARK_HOME: Apache Spark is a hard dependency and must be configured.
+# SPARK_HOME=$PIO_HOME/vendors/spark-2.0.2-bin-hadoop2.7
 SPARK_HOME=$PIO_HOME/vendors/spark-1.6.3-bin-hadoop2.6
 
 POSTGRES_JDBC_DRIVER=$PIO_HOME/lib/postgresql-9.4-1204.jdbc41.jar

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/conf/pio-env.sh.travis
----------------------------------------------------------------------
diff --git a/conf/pio-env.sh.travis b/conf/pio-env.sh.travis
index 94b30cd..80e3332 100644
--- a/conf/pio-env.sh.travis
+++ b/conf/pio-env.sh.travis
@@ -24,7 +24,8 @@
 # you need to change these to fit your site.
 
 # SPARK_HOME: Apache Spark is a hard dependency and must be configured.
-SPARK_HOME=$PIO_HOME/vendors/spark-1.3.0-bin-hadoop2.4
+# it is set up in script.travis.sh
+SPARK_HOME=$SPARK_HOME
 
 # Filesystem paths where PredictionIO uses as block storage.
 PIO_FS_BASEDIR=$HOME/.pio_store
@@ -41,7 +42,7 @@ PIO_STORAGE_SOURCES_LOCALFS_TYPE=localfs
 PIO_STORAGE_SOURCES_LOCALFS_PATH=$PIO_FS_BASEDIR/models
 
 PIO_STORAGE_SOURCES_HBASE_TYPE=hbase
-PIO_STORAGE_SOURCES_HBASE_HOME=$PIO_HOME/vendors/hbase-1.0.0
+PIO_STORAGE_SOURCES_HBASE_HOME=$HBASE_HOME
 
 # Storage Data Sources (pgsql)
 PIO_STORAGE_SOURCES_PGSQL_TYPE=jdbc

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/conf/pio-vendors.sh
----------------------------------------------------------------------
diff --git a/conf/pio-vendors.sh b/conf/pio-vendors.sh
new file mode 100644
index 0000000..830b576
--- /dev/null
+++ b/conf/pio-vendors.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# IMPORTANT: PIO_*_VERSION for dependencies must be set before envoking this script.
+# `source conf/set_build_profile.sh $BUILD_PROFILE` to get the proper versions
+
+if [ -z "$PIO_SCALA_VERSION" ]; then
+    PIO_SCALA_VERSION="2.10.6"
+fi
+
+if [ -z "$PIO_SPARK_VERSION" ]; then
+    PIO_SPARK_VERSION="1.6.3"
+fi
+
+if [ -z "$PIO_HADOOP_VERSION" ]; then
+    PIO_HADOOP_VERSION="2.6.5"
+fi
+
+if [ -z "$PIO_ELASTICSEARCH_VERSION" ]; then
+    PIO_ELASTICSEARCH_VERSION="1.7.3"
+fi
+
+ES_MAJOR=`echo $PIO_ELASTICSEARCH_VERSION | awk -F. '{print $1}'`
+
+if [ "$ES_MAJOR" = "1" ]; then
+    export ES_IMAGE="elasticsearch"
+    export ES_TAG="1"
+else
+    export ES_IMAGE="docker.elastic.co/elasticsearch/elasticsearch"
+    export ES_TAG="5.2.2"
+fi
+
+PGSQL_JAR=postgresql-9.4-1204.jdbc41.jar
+PGSQL_DOWNLOAD=https://jdbc.postgresql.org/download/${PGSQL_JAR}
+
+HADOOP_MAJOR=`echo $PIO_HADOOP_VERSION | awk -F. '{print $1 "." $2}'`
+SPARK_DIR=spark-${PIO_SPARK_VERSION}-bin-hadoop${HADOOP_MAJOR}
+SPARK_ARCHIVE=${SPARK_DIR}.tgz
+SPARK_DOWNLOAD=http://d3kbcqa49mib13.cloudfront.net/${SPARK_ARCHIVE}
+# ELASTICSEARCH_DOWNLOAD
+#   5.x https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-${PIO_ELASTICSEARCH_VERSION}.tar.gz
+#   1.x https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${PIO_ELASTICSEARCH_VERSION}.tar.gz

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/core/build.sbt
----------------------------------------------------------------------
diff --git a/core/build.sbt b/core/build.sbt
index bfb8bf3..b95a957 100644
--- a/core/build.sbt
+++ b/core/build.sbt
@@ -15,32 +15,23 @@
  * limitations under the License.
  */
 
+import PIOBuild._
+
 name := "apache-predictionio-core"
 
 libraryDependencies ++= Seq(
-  "com.github.scopt"       %% "scopt"            % "3.3.0",
+  "com.github.scopt"       %% "scopt"            % "3.5.0",
   "com.google.code.gson"    % "gson"             % "2.5",
-  "com.google.guava"        % "guava"            % "18.0",
-  "com.twitter"            %% "chill"            % "0.7.2"
-    exclude("com.esotericsoftware.minlog", "minlog"),
   "com.twitter"            %% "chill-bijection"  % "0.7.2",
   "de.javakaffee"           % "kryo-serializers" % "0.37",
-  "commons-io"              % "commons-io"       % "2.4",
-  "io.spray"               %% "spray-can"        % "1.3.3",
-  "io.spray"               %% "spray-routing"    % "1.3.3",
   "net.jodah"               % "typetools"        % "0.3.1",
   "org.apache.spark"       %% "spark-core"       % sparkVersion.value % "provided",
-  "org.apache.spark"       %% "spark-sql"        % sparkVersion.value % "provided",
-  "org.clapper"            %% "grizzled-slf4j"   % "1.0.2",
-  "org.json4s"             %% "json4s-native"    % json4sVersion.value,
   "org.json4s"             %% "json4s-ext"       % json4sVersion.value,
   "org.scalaj"             %% "scalaj-http"      % "1.1.6",
-  "org.scalatest"          %% "scalatest"        % "2.1.7" % "test",
   "org.slf4j"               % "slf4j-log4j12"    % "1.7.18",
+  "org.scalatest"          %% "scalatest"        % "2.1.7" % "test",
   "org.specs2"             %% "specs2"           % "2.3.13" % "test")
 
-//testOptions := Seq(Tests.Filter(s => Seq("Dev").exists(s.contains(_))))
-
 parallelExecution in Test := false
 
 pomExtra := childrenPomExtra.value

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/core/src/main/scala/org/apache/predictionio/workflow/CreateServer.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/predictionio/workflow/CreateServer.scala b/core/src/main/scala/org/apache/predictionio/workflow/CreateServer.scala
index 6f274bc..31b7831 100644
--- a/core/src/main/scala/org/apache/predictionio/workflow/CreateServer.scala
+++ b/core/src/main/scala/org/apache/predictionio/workflow/CreateServer.scala
@@ -51,7 +51,6 @@ import spray.routing._
 import scala.concurrent.ExecutionContext.Implicits.global
 import scala.concurrent.Future
 import scala.concurrent.duration._
-import scala.concurrent.future
 import scala.language.existentials
 import scala.util.{Failure, Random, Success}
 import scalaj.http.HttpOptions
@@ -548,7 +547,7 @@ class ServerActor[Q, P](
                     "prediction" -> prediction)) ++ queryPrId
                 // At this point args.accessKey should be Some(String).
                 val accessKey = args.accessKey.getOrElse("")
-                val f: Future[Int] = future {
+                val f: Future[Int] = Future {
                   scalaj.http.Http(
                     s"http://${args.eventServerIp}:${args.eventServerPort}/" +
                     s"events.json?accessKey=$accessKey").postData(

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/data/build.sbt
----------------------------------------------------------------------
diff --git a/data/build.sbt b/data/build.sbt
index f5e95b5..ddd085d 100644
--- a/data/build.sbt
+++ b/data/build.sbt
@@ -15,28 +15,21 @@
  * limitations under the License.
  */
 
+import PIOBuild._
+
 name := "apache-predictionio-data"
 
 libraryDependencies ++= Seq(
   "com.github.nscala-time" %% "nscala-time"    % "2.6.0",
-  "commons-codec"           % "commons-codec"  % "1.9",
+  "com.google.guava"        % "guava"          % "19.0",
   "io.spray"               %% "spray-can"      % "1.3.3",
   "io.spray"               %% "spray-routing"  % "1.3.3",
-  "io.spray"               %% "spray-testkit"  % "1.3.3" % "test",
-  "mysql"                   % "mysql-connector-java" % "5.1.37" % "optional",
-  "org.apache.hadoop"       % "hadoop-common"  % "2.6.2"
-    exclude("javax.servlet", "servlet-api"),
-  "org.apache.zookeeper"    % "zookeeper"      % "3.4.7"
-    exclude("org.slf4j", "slf4j-api")
-    exclude("org.slf4j", "slf4j-log4j12"),
-  "org.apache.spark"       %% "spark-core"     % sparkVersion.value % "provided",
   "org.apache.spark"       %% "spark-sql"      % sparkVersion.value % "provided",
   "org.clapper"            %% "grizzled-slf4j" % "1.0.2",
   "org.json4s"             %% "json4s-native"  % json4sVersion.value,
-  "org.json4s"             %% "json4s-ext"     % json4sVersion.value,
+  "org.scalikejdbc"        %% "scalikejdbc"    % "2.3.2",
+  "io.spray"               %% "spray-testkit"  % "1.3.3" % "test",
   "org.scalatest"          %% "scalatest"      % "2.1.7" % "test",
-  "org.slf4j"               % "slf4j-log4j12"  % "1.7.18",
-  "org.spark-project.akka" %% "akka-actor"     % "2.3.4-spark",
   "org.specs2"             %% "specs2"         % "2.3.13" % "test")
 
 parallelExecution in Test := false

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/data/src/main/scala/org/apache/predictionio/data/api/EventServer.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/api/EventServer.scala b/data/src/main/scala/org/apache/predictionio/data/api/EventServer.scala
index 648316e..b4392ff 100644
--- a/data/src/main/scala/org/apache/predictionio/data/api/EventServer.scala
+++ b/data/src/main/scala/org/apache/predictionio/data/api/EventServer.scala
@@ -48,8 +48,7 @@ import spray.httpx.Json4sSupport
 import spray.routing._
 import spray.routing.authentication.Authentication
 
-import scala.concurrent.ExecutionContext
-import scala.concurrent.Future
+import scala.concurrent.{ExecutionContext, Future}
 import scala.util.{Try, Success, Failure}
 
 class  EventServiceActor(
@@ -635,7 +634,7 @@ object EventServer {
 }
 
 object Run {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     EventServer.createEventServer(EventServerConfig(
       ip = "0.0.0.0",
       port = 7070))

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/data/src/main/scala/org/apache/predictionio/data/view/DataView.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/view/DataView.scala b/data/src/main/scala/org/apache/predictionio/data/view/DataView.scala
index 4866b5d..1c47e10 100644
--- a/data/src/main/scala/org/apache/predictionio/data/view/DataView.scala
+++ b/data/src/main/scala/org/apache/predictionio/data/view/DataView.scala
@@ -20,6 +20,7 @@ package org.apache.predictionio.data.view
 
 import org.apache.predictionio.annotation.Experimental
 import org.apache.predictionio.data.storage.Event
+import org.apache.predictionio.data.SparkVersionDependent
 
 import grizzled.slf4j.Logger
 import org.apache.predictionio.data.store.PEventStore
@@ -27,7 +28,7 @@ import org.apache.predictionio.data.store.PEventStore
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.SaveMode
-import org.apache.spark.sql.SQLContext
+import org.apache.spark.SparkContext
 import org.joda.time.DateTime
 
 import scala.reflect.ClassTag
@@ -64,11 +65,11 @@ object DataView {
     untilTime: Option[DateTime] = None,
     conversionFunction: Event => Option[E],
     name: String = "",
-    version: String = "")(sqlContext: SQLContext): DataFrame = {
+    version: String = "")(sc: SparkContext): DataFrame = {
 
     @transient lazy val logger = Logger[this.type]
 
-    val sc = sqlContext.sparkContext
+    val sqlSession = SparkVersionDependent.sqlSession(sc)
 
     val beginTime = startTime match {
       case Some(t) => t
@@ -85,7 +86,7 @@ object DataView {
     val baseDir = s"${sys.env("PIO_FS_BASEDIR")}/view"
     val fileName = s"$baseDir/$name-$appName-$hash.parquet"
     try {
-      sqlContext.read.parquet(fileName)
+      sqlSession.read.parquet(fileName)
     } catch {
       case e: java.io.FileNotFoundException =>
         logger.info("Cached copy not found, reading from DB.")
@@ -96,11 +97,11 @@ object DataView {
             startTime = startTime,
             untilTime = Some(endTime))(sc)
           .flatMap((e) => conversionFunction(e))
-        import sqlContext.implicits._ // needed for RDD.toDF()
+        import sqlSession.implicits._ // needed for RDD.toDF()
         val resultDF = result.toDF()
 
         resultDF.write.mode(SaveMode.ErrorIfExists).parquet(fileName)
-        sqlContext.read.parquet(fileName)
+        sqlSession.read.parquet(fileName)
       case e: java.lang.RuntimeException =>
         if (e.toString.contains("is not a Parquet file")) {
           logger.error(s"$fileName does not contain a valid Parquet file. " +

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/data/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala
----------------------------------------------------------------------
diff --git a/data/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala b/data/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala
new file mode 100644
index 0000000..cbdebce
--- /dev/null
+++ b/data/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.view
+
+import org.apache.predictionio.data.storage.{DataMap, Event, EventValidation, Storage}
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.joda.time.DateTime
+import org.json4s.JValue
+
+
+// each JValue data associated with the time it is set
+private[predictionio] case class PropTime(val d: JValue, val t: Long) extends Serializable
+
+private[predictionio] case class SetProp (
+  val fields: Map[String, PropTime],
+  // last set time. Note: fields could be empty with valid set time
+  val t: Long) extends Serializable {
+
+  def ++ (that: SetProp): SetProp = {
+    val commonKeys = fields.keySet.intersect(that.fields.keySet)
+
+    val common: Map[String, PropTime] = commonKeys.map { k =>
+      val thisData = this.fields(k)
+      val thatData = that.fields(k)
+      // only keep the value with latest time
+      val v = if (thisData.t > thatData.t) thisData else thatData
+      (k, v)
+    }.toMap
+
+    val combinedFields = common ++
+      (this.fields -- commonKeys) ++ (that.fields -- commonKeys)
+
+    // keep the latest set time
+    val combinedT = if (this.t > that.t) this.t else that.t
+
+    SetProp(
+      fields = combinedFields,
+      t = combinedT
+    )
+  }
+}
+
+private[predictionio] case class UnsetProp (fields: Map[String, Long]) extends Serializable {
+  def ++ (that: UnsetProp): UnsetProp = {
+    val commonKeys = fields.keySet.intersect(that.fields.keySet)
+
+    val common: Map[String, Long] = commonKeys.map { k =>
+      val thisData = this.fields(k)
+      val thatData = that.fields(k)
+      // only keep the value with latest time
+      val v = if (thisData > thatData) thisData else thatData
+      (k, v)
+    }.toMap
+
+    val combinedFields = common ++
+      (this.fields -- commonKeys) ++ (that.fields -- commonKeys)
+
+    UnsetProp(
+      fields = combinedFields
+    )
+  }
+}
+
+private[predictionio] case class DeleteEntity (t: Long) extends Serializable {
+  def ++ (that: DeleteEntity): DeleteEntity = {
+    if (this.t > that.t) this else that
+  }
+}
+
+private[predictionio] case class EventOp (
+  val setProp: Option[SetProp] = None,
+  val unsetProp: Option[UnsetProp] = None,
+  val deleteEntity: Option[DeleteEntity] = None
+) extends Serializable {
+
+  def ++ (that: EventOp): EventOp = {
+    EventOp(
+      setProp = (setProp ++ that.setProp).reduceOption(_ ++ _),
+      unsetProp = (unsetProp ++ that.unsetProp).reduceOption(_ ++ _),
+      deleteEntity = (deleteEntity ++ that.deleteEntity).reduceOption(_ ++ _)
+    )
+  }
+
+  def toDataMap(): Option[DataMap] = {
+    setProp.flatMap { set =>
+
+      val unsetKeys: Set[String] = unsetProp.map( unset =>
+        unset.fields.filter{ case (k, v) => (v >= set.fields(k).t) }.keySet
+      ).getOrElse(Set())
+
+      val combinedFields = deleteEntity.map { delete =>
+        if (delete.t >= set.t) {
+          None
+        } else {
+          val deleteKeys: Set[String] = set.fields
+            .filter { case (k, PropTime(kv, t)) =>
+              (delete.t >= t)
+            }.keySet
+          Some(set.fields -- unsetKeys -- deleteKeys)
+        }
+      }.getOrElse{
+        Some(set.fields -- unsetKeys)
+      }
+
+      // Note: mapValues() doesn't return concrete Map and causes
+      // NotSerializableException issue. Use map(identity) to work around this.
+      // see https://issues.scala-lang.org/browse/SI-7005
+      combinedFields.map(f => DataMap(f.mapValues(_.d).map(identity)))
+    }
+  }
+
+}
+
+private[predictionio] object EventOp {
+  def apply(e: Event): EventOp = {
+    val t = e.eventTime.getMillis
+    e.event match {
+      case "$set" => {
+        val fields = e.properties.fields.mapValues(jv =>
+          PropTime(jv, t)
+        ).map(identity)
+
+        EventOp(
+          setProp = Some(SetProp(fields = fields, t = t))
+        )
+      }
+      case "$unset" => {
+        val fields = e.properties.fields.mapValues(jv => t).map(identity)
+        EventOp(
+          unsetProp = Some(UnsetProp(fields = fields))
+        )
+      }
+      case "$delete" => {
+        EventOp(
+          deleteEntity = Some(DeleteEntity(t))
+        )
+      }
+      case _ => {
+        EventOp()
+      }
+    }
+  }
+}
+
+@deprecated("Use PEvents or PEventStore instead.", "0.9.2")
+class PBatchView(
+  val appId: Int,
+  val startTime: Option[DateTime],
+  val untilTime: Option[DateTime],
+  val sc: SparkContext) {
+
+  // NOTE: parallel Events DB interface
+  @transient lazy val eventsDb = Storage.getPEvents()
+
+  @transient lazy val _events: RDD[Event] =
+    eventsDb.getByAppIdAndTimeAndEntity(
+      appId = appId,
+      startTime = startTime,
+      untilTime = untilTime,
+      entityType = None,
+      entityId = None)(sc)
+
+  // TODO: change to use EventSeq?
+  @transient lazy val events: RDD[Event] = _events
+
+  def aggregateProperties(
+    entityType: String,
+    startTimeOpt: Option[DateTime] = None,
+    untilTimeOpt: Option[DateTime] = None
+  ): RDD[(String, DataMap)] = {
+
+    _events
+      .filter( e => ((e.entityType == entityType) &&
+        (EventValidation.isSpecialEvents(e.event))) )
+      .map( e => (e.entityId, EventOp(e) ))
+      .aggregateByKey[EventOp](EventOp())(
+        // within same partition
+        seqOp = { case (u, v) => u ++ v },
+        // across partition
+        combOp = { case (accu, u) => accu ++ u }
+      )
+      .mapValues(_.toDataMap)
+      .filter{ case (k, v) => v.isDefined }
+      .map{ case (k, v) => (k, v.get) }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/data/src/main/spark-1/org/apache/predictionio/data/SparkVersionDependent.scala
----------------------------------------------------------------------
diff --git a/data/src/main/spark-1/org/apache/predictionio/data/SparkVersionDependent.scala b/data/src/main/spark-1/org/apache/predictionio/data/SparkVersionDependent.scala
new file mode 100644
index 0000000..0652e0b
--- /dev/null
+++ b/data/src/main/spark-1/org/apache/predictionio/data/SparkVersionDependent.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data
+
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.SQLContext
+
+object SparkVersionDependent {
+
+  def sqlSession(sc: SparkContext): SQLContext = {
+    return new SQLContext(sc)
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/data/src/main/spark-2/org/apache/predictionio/data/SparkVersionDependent.scala
----------------------------------------------------------------------
diff --git a/data/src/main/spark-2/org/apache/predictionio/data/SparkVersionDependent.scala b/data/src/main/spark-2/org/apache/predictionio/data/SparkVersionDependent.scala
new file mode 100644
index 0000000..3d07bdf
--- /dev/null
+++ b/data/src/main/spark-2/org/apache/predictionio/data/SparkVersionDependent.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data
+
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.SparkSession
+
+object SparkVersionDependent {
+
+  def sqlSession(sc: SparkContext): SparkSession = {
+    SparkSession.builder().getOrCreate()
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/e2/build.sbt
----------------------------------------------------------------------
diff --git a/e2/build.sbt b/e2/build.sbt
index 50de5e5..0774a55 100644
--- a/e2/build.sbt
+++ b/e2/build.sbt
@@ -15,14 +15,14 @@
  * limitations under the License.
  */
 
+import PIOBuild._
+
 name := "apache-predictionio-e2"
 
 parallelExecution in Test := false
 
 libraryDependencies ++= Seq(
-  "org.apache.spark" %% "spark-core" % sparkVersion.value % "provided",
   "org.apache.spark" %% "spark-mllib" % sparkVersion.value % "provided",
-  "org.clapper" %% "grizzled-slf4j" % "1.0.2",
   "org.scalatest" %% "scalatest" % "2.2.5" % "test")
 
 pomExtra := childrenPomExtra.value

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/make-distribution.sh
----------------------------------------------------------------------
diff --git a/make-distribution.sh b/make-distribution.sh
index c360c0e..e92178f 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -21,15 +21,12 @@ set -e
 
 usage ()
 {
-    echo "Usage: $0 [-h|--help] [--with-es=x]"
+    echo "Usage: $0 [-h|--help]"
     echo ""
     echo "  -h|--help    Show usage"
-    echo ""
-    echo "  --with-es=1  Build distribution with Elasticsearch 1 support as default"
-    echo "  --with-es=5  Build distribution with Elasticsearch 5 support as default"
 }
 
-ES_VERSION=1
+JAVA_PROPS=()
 
 for i in "$@"
 do
@@ -39,8 +36,8 @@ case $i in
     shift
     exit
     ;;
-    --with-es=*)
-    ES_VERSION="${i#*=}"
+    -D*)
+    JAVA_PROPS+=("$i")
     shift
     ;;
     *)
@@ -50,23 +47,19 @@ case $i in
 esac
 done
 
-if [ "$ES_VERSION" = "1" ] || [ "$ES_VERSION" = "5" ]
-then
-    echo -e "\033[0;32mBuilding with Elasticsearch $ES_VERSION support as the default choice\033[0m"
-else
-    usage
-    exit 1
-fi
-
 FWDIR="$(cd `dirname $0`; pwd)"
 DISTDIR="${FWDIR}/dist"
 
-VERSION=$(grep version ${FWDIR}/build.sbt | grep ThisBuild | grep -o '".*"' | sed 's/"//g')
+VERSION=$(grep ^version ${FWDIR}/build.sbt | grep ThisBuild | grep -o '".*"' | sed 's/"//g')
 
 echo "Building binary distribution for PredictionIO $VERSION..."
 
 cd ${FWDIR}
-sbt/sbt common/publishLocal data/publishLocal core/publishLocal e2/publishLocal dataElasticsearch1/assembly dataElasticsearch/assembly dataHbase/assembly dataHdfs/assembly dataJdbc/assembly dataLocalfs/assembly tools/assembly
+set -x
+sbt/sbt "${JAVA_PROPS[@]}" clean
+sbt/sbt "${JAVA_PROPS[@]}" printBuildInfo
+sbt/sbt "${JAVA_PROPS[@]}" publishLocal assembly storage/assembly
+set +x
 
 cd ${FWDIR}
 rm -rf ${DISTDIR}
@@ -74,8 +67,8 @@ mkdir -p ${DISTDIR}/bin
 mkdir -p ${DISTDIR}/conf
 mkdir -p ${DISTDIR}/lib
 mkdir -p ${DISTDIR}/lib/spark
-mkdir -p ${DISTDIR}/lib/extra
 mkdir -p ${DISTDIR}/project
+
 mkdir -p ${DISTDIR}/sbt
 
 cp ${FWDIR}/bin/* ${DISTDIR}/bin || :
@@ -85,13 +78,6 @@ cp ${FWDIR}/sbt/sbt ${DISTDIR}/sbt
 cp ${FWDIR}/assembly/*assembly*jar ${DISTDIR}/lib
 cp ${FWDIR}/assembly/spark/*jar ${DISTDIR}/lib/spark
 
-if [ "$ES_VERSION" = "5" ]
-then
-    mv ${DISTDIR}/lib/spark/pio-data-elasticsearch1-assembly-*.jar ${DISTDIR}/lib/extra
-else
-    mv ${DISTDIR}/lib/spark/pio-data-elasticsearch-assembly-*.jar ${DISTDIR}/lib/extra
-fi
-
 rm -f ${DISTDIR}/lib/*javadoc.jar
 rm -f ${DISTDIR}/lib/*sources.jar
 rm -f ${DISTDIR}/conf/pio-env.sh

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/project/Build.scala
----------------------------------------------------------------------
diff --git a/project/Build.scala b/project/Build.scala
deleted file mode 100644
index 885073a..0000000
--- a/project/Build.scala
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import sbt._
-import Keys._
-
-object PIOBuild extends Build {
-  val elasticsearchVersion = SettingKey[String](
-    "elasticsearch-version",
-    "The version of Elasticsearch used for building.")
-  val json4sVersion = SettingKey[String](
-    "json4s-version",
-    "The version of JSON4S used for building.")
-  val sparkVersion = SettingKey[String](
-    "spark-version",
-    "The version of Apache Spark used for building.")
-  val childrenPomExtra = SettingKey[scala.xml.NodeSeq](
-    "children-pom-extra",
-    "Extra POM data for children projects.")
-}

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/project/PIOBuild.scala
----------------------------------------------------------------------
diff --git a/project/PIOBuild.scala b/project/PIOBuild.scala
new file mode 100644
index 0000000..30fca65
--- /dev/null
+++ b/project/PIOBuild.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import sbt._
+
+object PIOBuild {
+  val elasticsearchVersion = settingKey[String]("The version of Elasticsearch used for building")
+  val json4sVersion = settingKey[String]("The version of JSON4S used for building")
+  val sparkVersion = settingKey[String]("The version of Apache Spark used for building")
+  val sparkBinaryVersion = settingKey[String]("The binary version of Apache Spark used for building")
+  val hadoopVersion = settingKey[String]("The version of Apache Hadoop used for building")
+  val akkaVersion = settingKey[String]("The version of Akka used for building")
+
+  val childrenPomExtra = settingKey[scala.xml.NodeSeq]("Extra POM data for children projects")
+  val elasticsearchSparkArtifact = settingKey[String]("Name of Elasticsearch-Spark artifact used for building")
+
+  def binaryVersion(versionString: String): String = versionString.split('.').take(2).mkString(".")
+  def majorVersion(versionString: String): Int = versionString.split('.')(0).toInt
+  def minorVersion(versionString: String): Int = versionString.split('.')(1).toInt
+
+  lazy val printBuildInfo = taskKey[Unit]("Print build information")
+}

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/project/assembly.sbt
----------------------------------------------------------------------
diff --git a/project/assembly.sbt b/project/assembly.sbt
index 49085ee..39c1bb8 100644
--- a/project/assembly.sbt
+++ b/project/assembly.sbt
@@ -1 +1 @@
-addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.1")
+addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.3")

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/project/plugins.sbt
----------------------------------------------------------------------
diff --git a/project/plugins.sbt b/project/plugins.sbt
index 3edaf67..2f21e00 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -2,7 +2,7 @@ addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.3.2")
 
 addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.0.0")
 
-addSbtPlugin("com.typesafe.sbt" % "sbt-twirl" % "1.0.3")
+addSbtPlugin("com.typesafe.sbt" % "sbt-twirl" % "1.1.1")
 
 addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "1.1")
 
@@ -10,4 +10,4 @@ addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.8.0")
 
 resolvers += "sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases/"
 
-addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.1.0")
+addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.3.5")

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/storage/elasticsearch/build.sbt
----------------------------------------------------------------------
diff --git a/storage/elasticsearch/build.sbt b/storage/elasticsearch/build.sbt
index 7d9b2ad..52dc965 100644
--- a/storage/elasticsearch/build.sbt
+++ b/storage/elasticsearch/build.sbt
@@ -15,36 +15,26 @@
  * limitations under the License.
  */
 
+import PIOBuild._
+
 name := "apache-predictionio-data-elasticsearch"
 
-elasticsearchVersion := "5.2.1"
+elasticsearchSparkArtifact := (if (majorVersion(sparkVersion.value) == 2) "elasticsearch-spark-20" else "elasticsearch-spark-13")
 
 libraryDependencies ++= Seq(
   "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
-  "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided",
-  "org.apache.spark"        %% "spark-core"     % sparkVersion.value % "provided",
-  "org.apache.spark"        %% "spark-sql"      % sparkVersion.value % "provided",
-  "org.elasticsearch.client" % "rest"           % elasticsearchVersion.value,
-  "org.elasticsearch"       %% "elasticsearch-spark-13" % elasticsearchVersion.value
-    exclude("org.apache.spark", "spark-sql_2.10")
-    exclude("org.apache.spark", "spark-streaming_2.10"),
-  "org.elasticsearch"        % "elasticsearch-hadoop-mr" % elasticsearchVersion.value,
-  "org.scalatest"           %% "scalatest"      % "2.1.7" % "test",
-  "org.specs2"              %% "specs2"         % "2.3.13" % "test")
+  "org.apache.spark"        %% "spark-core"               % sparkVersion.value % "provided",
+  "org.elasticsearch.client" % "rest"                     % elasticsearchVersion.value,
+  "org.elasticsearch"       %% elasticsearchSparkArtifact.value % elasticsearchVersion.value
+    exclude("org.apache.spark", "*"),
+  "org.elasticsearch"        % "elasticsearch-hadoop-mr"  % elasticsearchVersion.value,
+  "org.scalatest"           %% "scalatest"                % "2.1.7" % "test")
 
 parallelExecution in Test := false
 
 pomExtra := childrenPomExtra.value
 
-assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false, includeDependency = true)
-
-assemblyMergeStrategy in assembly := {
-  case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat
-  case PathList("META-INF", "NOTICE.txt")  => MergeStrategy.concat
-  case x =>
-    val oldStrategy = (assemblyMergeStrategy in assembly).value
-    oldStrategy(x)
-}
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)
 
 assemblyShadeRules in assembly := Seq(
   ShadeRule.rename("org.apache.http.**" -> "shadeio.data.http.@1").inAll

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/storage/elasticsearch1/build.sbt
----------------------------------------------------------------------
diff --git a/storage/elasticsearch1/build.sbt b/storage/elasticsearch1/build.sbt
index dde7285..bb68b11 100644
--- a/storage/elasticsearch1/build.sbt
+++ b/storage/elasticsearch1/build.sbt
@@ -15,30 +15,20 @@
  * limitations under the License.
  */
 
-name := "apache-predictionio-data-elasticsearch1"
+import PIOBuild._
 
-elasticsearchVersion := "1.7.3"
+name := "apache-predictionio-data-elasticsearch1"
 
 libraryDependencies ++= Seq(
   "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
-  "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided",
   "org.elasticsearch"        % "elasticsearch"  % elasticsearchVersion.value,
-  "org.scalatest"           %% "scalatest"      % "2.1.7" % "test",
-  "org.specs2"              %% "specs2"         % "2.3.13" % "test")
+  "org.scalatest"           %% "scalatest"      % "2.1.7" % "test")
 
 parallelExecution in Test := false
 
 pomExtra := childrenPomExtra.value
 
-assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false, includeDependency = true)
-
-assemblyMergeStrategy in assembly := {
-  case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat
-  case PathList("META-INF", "NOTICE.txt")  => MergeStrategy.concat
-  case x =>
-    val oldStrategy = (assemblyMergeStrategy in assembly).value
-    oldStrategy(x)
-}
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)
 
 // skip test in assembly
 test in assembly := {}

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/storage/hbase/build.sbt
----------------------------------------------------------------------
diff --git a/storage/hbase/build.sbt b/storage/hbase/build.sbt
index 513e294..19aa126 100644
--- a/storage/hbase/build.sbt
+++ b/storage/hbase/build.sbt
@@ -15,11 +15,12 @@
  * limitations under the License.
  */
 
+import PIOBuild._
+
 name := "apache-predictionio-data-hbase"
 
 libraryDependencies ++= Seq(
   "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
-  "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided",
   "org.apache.spark"        %% "spark-core"     % sparkVersion.value % "provided",
   "org.apache.hbase"         % "hbase-common"   % "0.98.5-hadoop2",
   "org.apache.hbase"         % "hbase-client"   % "0.98.5-hadoop2"
@@ -32,22 +33,13 @@ libraryDependencies ++= Seq(
     exclude("org.mortbay.jetty", "servlet-api-2.5")
     exclude("org.mortbay.jetty", "jsp-api-2.1")
     exclude("org.mortbay.jetty", "jsp-2.1"),
-  "org.scalatest"           %% "scalatest"      % "2.1.7" % "test",
   "org.specs2"              %% "specs2"         % "2.3.13" % "test")
 
 parallelExecution in Test := false
 
 pomExtra := childrenPomExtra.value
 
-assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false, includeDependency = true)
-
-assemblyMergeStrategy in assembly := {
-  case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat
-  case PathList("META-INF", "NOTICE.txt")  => MergeStrategy.concat
-  case x =>
-    val oldStrategy = (assemblyMergeStrategy in assembly).value
-    oldStrategy(x)
-}
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)
 
 // skip test in assembly
 test in assembly := {}

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/storage/hbase/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala
----------------------------------------------------------------------
diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala
deleted file mode 100644
index b453820..0000000
--- a/storage/hbase/src/main/scala/org/apache/predictionio/data/view/PBatchView.scala
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.predictionio.data.view
-
-import org.apache.predictionio.data.storage.hbase.HBPEvents
-import org.apache.predictionio.data.storage.Event
-import org.apache.predictionio.data.storage.EventValidation
-import org.apache.predictionio.data.storage.DataMap
-import org.apache.predictionio.data.storage.Storage
-
-import org.joda.time.DateTime
-
-import org.json4s.JValue
-
-import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
-import org.apache.spark.rdd.RDD
-
-
-// each JValue data associated with the time it is set
-private[predictionio] case class PropTime(val d: JValue, val t: Long) extends Serializable
-
-private[predictionio] case class SetProp (
-  val fields: Map[String, PropTime],
-  // last set time. Note: fields could be empty with valid set time
-  val t: Long) extends Serializable {
-
-  def ++ (that: SetProp): SetProp = {
-    val commonKeys = fields.keySet.intersect(that.fields.keySet)
-
-    val common: Map[String, PropTime] = commonKeys.map { k =>
-      val thisData = this.fields(k)
-      val thatData = that.fields(k)
-      // only keep the value with latest time
-      val v = if (thisData.t > thatData.t) thisData else thatData
-      (k, v)
-    }.toMap
-
-    val combinedFields = common ++
-      (this.fields -- commonKeys) ++ (that.fields -- commonKeys)
-
-    // keep the latest set time
-    val combinedT = if (this.t > that.t) this.t else that.t
-
-    SetProp(
-      fields = combinedFields,
-      t = combinedT
-    )
-  }
-}
-
-private[predictionio] case class UnsetProp (fields: Map[String, Long]) extends Serializable {
-  def ++ (that: UnsetProp): UnsetProp = {
-    val commonKeys = fields.keySet.intersect(that.fields.keySet)
-
-    val common: Map[String, Long] = commonKeys.map { k =>
-      val thisData = this.fields(k)
-      val thatData = that.fields(k)
-      // only keep the value with latest time
-      val v = if (thisData > thatData) thisData else thatData
-      (k, v)
-    }.toMap
-
-    val combinedFields = common ++
-      (this.fields -- commonKeys) ++ (that.fields -- commonKeys)
-
-    UnsetProp(
-      fields = combinedFields
-    )
-  }
-}
-
-private[predictionio] case class DeleteEntity (t: Long) extends Serializable {
-  def ++ (that: DeleteEntity): DeleteEntity = {
-    if (this.t > that.t) this else that
-  }
-}
-
-private[predictionio] case class EventOp (
-  val setProp: Option[SetProp] = None,
-  val unsetProp: Option[UnsetProp] = None,
-  val deleteEntity: Option[DeleteEntity] = None
-) extends Serializable {
-
-  def ++ (that: EventOp): EventOp = {
-    EventOp(
-      setProp = (setProp ++ that.setProp).reduceOption(_ ++ _),
-      unsetProp = (unsetProp ++ that.unsetProp).reduceOption(_ ++ _),
-      deleteEntity = (deleteEntity ++ that.deleteEntity).reduceOption(_ ++ _)
-    )
-  }
-
-  def toDataMap(): Option[DataMap] = {
-    setProp.flatMap { set =>
-
-      val unsetKeys: Set[String] = unsetProp.map( unset =>
-        unset.fields.filter{ case (k, v) => (v >= set.fields(k).t) }.keySet
-      ).getOrElse(Set())
-
-      val combinedFields = deleteEntity.map { delete =>
-        if (delete.t >= set.t) {
-          None
-        } else {
-          val deleteKeys: Set[String] = set.fields
-            .filter { case (k, PropTime(kv, t)) =>
-              (delete.t >= t)
-            }.keySet
-          Some(set.fields -- unsetKeys -- deleteKeys)
-        }
-      }.getOrElse{
-        Some(set.fields -- unsetKeys)
-      }
-
-      // Note: mapValues() doesn't return concrete Map and causes
-      // NotSerializableException issue. Use map(identity) to work around this.
-      // see https://issues.scala-lang.org/browse/SI-7005
-      combinedFields.map(f => DataMap(f.mapValues(_.d).map(identity)))
-    }
-  }
-
-}
-
-private[predictionio] object EventOp {
-  def apply(e: Event): EventOp = {
-    val t = e.eventTime.getMillis
-    e.event match {
-      case "$set" => {
-        val fields = e.properties.fields.mapValues(jv =>
-          PropTime(jv, t)
-        ).map(identity)
-
-        EventOp(
-          setProp = Some(SetProp(fields = fields, t = t))
-        )
-      }
-      case "$unset" => {
-        val fields = e.properties.fields.mapValues(jv => t).map(identity)
-        EventOp(
-          unsetProp = Some(UnsetProp(fields = fields))
-        )
-      }
-      case "$delete" => {
-        EventOp(
-          deleteEntity = Some(DeleteEntity(t))
-        )
-      }
-      case _ => {
-        EventOp()
-      }
-    }
-  }
-}
-
-@deprecated("Use PEvents or PEventStore instead.", "0.9.2")
-class PBatchView(
-  val appId: Int,
-  val startTime: Option[DateTime],
-  val untilTime: Option[DateTime],
-  val sc: SparkContext) {
-
-  // NOTE: parallel Events DB interface
-  @transient lazy val eventsDb = Storage.getPEvents()
-
-  @transient lazy val _events: RDD[Event] =
-    eventsDb.getByAppIdAndTimeAndEntity(
-      appId = appId,
-      startTime = startTime,
-      untilTime = untilTime,
-      entityType = None,
-      entityId = None)(sc)
-
-  // TODO: change to use EventSeq?
-  @transient lazy val events: RDD[Event] = _events
-
-  def aggregateProperties(
-    entityType: String,
-    startTimeOpt: Option[DateTime] = None,
-    untilTimeOpt: Option[DateTime] = None
-  ): RDD[(String, DataMap)] = {
-
-    _events
-      .filter( e => ((e.entityType == entityType) &&
-        (EventValidation.isSpecialEvents(e.event))) )
-      .map( e => (e.entityId, EventOp(e) ))
-      .aggregateByKey[EventOp](EventOp())(
-        // within same partition
-        seqOp = { case (u, v) => u ++ v },
-        // across partition
-        combOp = { case (accu, u) => accu ++ u }
-      )
-      .mapValues(_.toDataMap)
-      .filter{ case (k, v) => v.isDefined }
-      .map{ case (k, v) => (k, v.get) }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/storage/hdfs/build.sbt
----------------------------------------------------------------------
diff --git a/storage/hdfs/build.sbt b/storage/hdfs/build.sbt
index 7ddc86c..8bae2a8 100644
--- a/storage/hdfs/build.sbt
+++ b/storage/hdfs/build.sbt
@@ -15,27 +15,25 @@
  * limitations under the License.
  */
 
+import PIOBuild._
+
 name := "apache-predictionio-data-hdfs"
 
 libraryDependencies ++= Seq(
-  "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
+  "org.apache.hadoop"        % "hadoop-common"            % hadoopVersion.value
+    exclude("commons-beanutils", "*"),
   "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided",
-  "org.scalatest"           %% "scalatest"      % "2.1.7" % "test",
-  "org.specs2"              %% "specs2"         % "2.3.13" % "test")
+  "org.scalatest"           %% "scalatest"                % "2.1.7" % "test")
 
 parallelExecution in Test := false
 
 pomExtra := childrenPomExtra.value
 
-assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false, includeDependency = true)
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)
 
-assemblyMergeStrategy in assembly := {
-  case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat
-  case PathList("META-INF", "NOTICE.txt")  => MergeStrategy.concat
-  case x =>
-    val oldStrategy = (assemblyMergeStrategy in assembly).value
-    oldStrategy(x)
-}
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(
+  includeScala = false,
+  excludedJars = (fullClasspath in assembly).value.filter {_.data.getName startsWith "apache-predictionio"})
 
 // skip test in assembly
 test in assembly := {}

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/storage/jdbc/build.sbt
----------------------------------------------------------------------
diff --git a/storage/jdbc/build.sbt b/storage/jdbc/build.sbt
index 2011722..dc98e21 100644
--- a/storage/jdbc/build.sbt
+++ b/storage/jdbc/build.sbt
@@ -15,30 +15,22 @@
  * limitations under the License.
  */
 
+import PIOBuild._
+
 name := "apache-predictionio-data-jdbc"
 
 libraryDependencies ++= Seq(
   "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
-  "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided",
   "org.apache.spark"        %% "spark-sql"      % sparkVersion.value % "provided",
-  "org.postgresql"           % "postgresql"     % "9.4-1204-jdbc41",
   "org.scalikejdbc"         %% "scalikejdbc"    % "2.3.5",
-  "org.scalatest"           %% "scalatest"      % "2.1.7" % "test",
+  "org.postgresql"           % "postgresql"     % "9.4-1204-jdbc41" % "test",
   "org.specs2"              %% "specs2"         % "2.3.13" % "test")
 
 parallelExecution in Test := false
 
 pomExtra := childrenPomExtra.value
 
-assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false, includeDependency = true)
-
-assemblyMergeStrategy in assembly := {
-  case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat
-  case PathList("META-INF", "NOTICE.txt")  => MergeStrategy.concat
-  case x =>
-    val oldStrategy = (assemblyMergeStrategy in assembly).value
-    oldStrategy(x)
-}
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)
 
 // skip test in assembly
 test in assembly := {}

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala
----------------------------------------------------------------------
diff --git a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala
index 2e6ee83..ff16d5d 100644
--- a/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala
+++ b/storage/jdbc/src/main/scala/org/apache/predictionio/data/storage/jdbc/JDBCPEvents.scala
@@ -15,16 +15,17 @@
  * limitations under the License.
  */
 
-
 package org.apache.predictionio.data.storage.jdbc
 
 import java.sql.{DriverManager, ResultSet}
 
 import com.github.nscala_time.time.Imports._
-import org.apache.predictionio.data.storage.{DataMap, Event, PEvents, StorageClientConfig}
+import org.apache.predictionio.data.storage.{
+  DataMap, Event, PEvents, StorageClientConfig}
+import org.apache.predictionio.data.SparkVersionDependent
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.{JdbcRDD, RDD}
-import org.apache.spark.sql.{SQLContext, SaveMode}
+import org.apache.spark.sql.SaveMode
 import org.json4s.JObject
 import org.json4s.native.Serialization
 import scalikejdbc._
@@ -32,6 +33,7 @@ import scalikejdbc._
 /** JDBC implementation of [[PEvents]] */
 class JDBCPEvents(client: String, config: StorageClientConfig, namespace: String) extends PEvents {
   @transient private implicit lazy val formats = org.json4s.DefaultFormats
+
   def find(
     appId: Int,
     channelId: Option[Int] = None,
@@ -42,6 +44,7 @@ class JDBCPEvents(client: String, config: StorageClientConfig, namespace: String
     eventNames: Option[Seq[String]] = None,
     targetEntityType: Option[Option[String]] = None,
     targetEntityId: Option[Option[String]] = None)(sc: SparkContext): RDD[Event] = {
+
     val lower = startTime.map(_.getMillis).getOrElse(0.toLong)
     /** Change the default upper bound from +100 to +1 year because MySQL's
       * FROM_UNIXTIME(t) will return NULL if we use +100 years.
@@ -118,13 +121,12 @@ class JDBCPEvents(client: String, config: StorageClientConfig, namespace: String
   }
 
   def write(events: RDD[Event], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = {
-    val sqlContext = new SQLContext(sc)
-
-    import sqlContext.implicits._
+    val sqlSession = SparkVersionDependent.sqlSession(sc)
+    import sqlSession.implicits._
 
     val tableName = JDBCUtils.eventTableName(namespace, appId, channelId)
 
-    val eventTableColumns = Seq[String](
+    val eventsColumnNamesInDF = Seq[String](
         "id"
       , "event"
       , "entityType"
@@ -139,11 +141,16 @@ class JDBCPEvents(client: String, config: StorageClientConfig, namespace: String
       , "creationTime"
       , "creationTimeZone")
 
+    // Necessary for handling postgres "case-sensitivity"
+    val eventsColumnNamesInSQL = JDBCUtils.driverType(client) match {
+      case "postgresql" => eventsColumnNamesInDF.map(_.toLowerCase)
+      case _ => eventsColumnNamesInDF
+    }
     val eventDF = events.map(x =>
                               Event(eventId = None, event = x.event, entityType = x.entityType,
                               entityId = x.entityId, targetEntityType = x.targetEntityType,
                               targetEntityId = x.targetEntityId, properties = x.properties,
-                              eventTime = x.eventTime, tags = x.tags, prId= x.prId,
+                              eventTime = x.eventTime, tags = x.tags, prId = x.prId,
                               creationTime = x.eventTime)
                             )
                         .map { event =>
@@ -160,9 +167,8 @@ class JDBCPEvents(client: String, config: StorageClientConfig, namespace: String
         , event.prId
         , new java.sql.Timestamp(event.creationTime.getMillis)
         , event.creationTime.getZone.getID)
-    }.toDF(eventTableColumns:_*)
+    }.toDF(eventsColumnNamesInSQL:_*)
 
-    // spark version 1.4.0 or higher
     val prop = new java.util.Properties
     prop.setProperty("user", config.properties("USERNAME"))
     prop.setProperty("password", config.properties("PASSWORD"))

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/storage/localfs/build.sbt
----------------------------------------------------------------------
diff --git a/storage/localfs/build.sbt b/storage/localfs/build.sbt
index f9e101d..f8c6b8e 100644
--- a/storage/localfs/build.sbt
+++ b/storage/localfs/build.sbt
@@ -15,27 +15,19 @@
  * limitations under the License.
  */
 
+import PIOBuild._
+
 name := "apache-predictionio-data-localfs"
 
 libraryDependencies ++= Seq(
   "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided",
-  "org.apache.predictionio" %% "apache-predictionio-data" % version.value % "provided",
-  "org.scalatest"           %% "scalatest"      % "2.1.7" % "test",
-  "org.specs2"              %% "specs2"         % "2.3.13" % "test")
+  "org.scalatest"           %% "scalatest"      % "2.1.7" % "test")
 
 parallelExecution in Test := false
 
 pomExtra := childrenPomExtra.value
 
-assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false, includeDependency = true)
-
-assemblyMergeStrategy in assembly := {
-  case PathList("META-INF", "LICENSE.txt") => MergeStrategy.concat
-  case PathList("META-INF", "NOTICE.txt")  => MergeStrategy.concat
-  case x =>
-    val oldStrategy = (assemblyMergeStrategy in assembly).value
-    oldStrategy(x)
-}
+assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)
 
 // skip test in assembly
 test in assembly := {}

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/Dockerfile
----------------------------------------------------------------------
diff --git a/tests/Dockerfile b/tests/Dockerfile
index 94f5688..619bdf8 100644
--- a/tests/Dockerfile
+++ b/tests/Dockerfile
@@ -17,16 +17,25 @@
 
 FROM predictionio/pio
 
-ENV SPARK_VERSION 1.6.3
-ENV ELASTICSEARCH_VERSION 5.2.1
-ENV HBASE_VERSION 1.0.0
+ARG SPARK_ARCHIVE
+ARG SPARK_DIR
+ARG PGSQL_JAR
+ARG PIO_SCALA_VERSION
+ARG PIO_SPARK_VERSION
+ARG PIO_ELASTICSEARCH_VERSION
+
+ENV PIO_SCALA_VERSION=$PIO_SCALA_VERSION
+ENV PIO_SPARK_VERSION=$PIO_SPARK_VERSION
+ENV PIO_ELASTICSEARCH_VERSION=$PIO_ELASTICSEARCH_VERSION
+
+ENV PGSQL_JAR=$PGSQL_JAR
 
-ADD docker-files/spark-${SPARK_VERSION}-bin-hadoop2.6.tgz /vendors
 # WORKAROUND: es-hadoop stops on RDD#take(1)
-RUN echo "spark.locality.wait.node 0s" > /vendors/spark-${SPARK_VERSION}-bin-hadoop2.6/conf/spark-defaults.conf
-ENV SPARK_HOME /vendors/spark-${SPARK_VERSION}-bin-hadoop2.6
+ADD docker-files/${SPARK_ARCHIVE} /vendors
+RUN echo "spark.locality.wait.node 0s" > /vendors/${SPARK_DIR}/conf/spark-defaults.conf
+ENV SPARK_HOME /vendors/${SPARK_DIR}
 
-COPY docker-files/postgresql-9.4-1204.jdbc41.jar /drivers/postgresql-9.4-1204.jdbc41.jar
+COPY docker-files/${PGSQL_JAR} /drivers/${PGSQL_JAR}
 COPY docker-files/init.sh init.sh
 COPY docker-files/env-conf/hbase-site.xml ${PIO_HOME}/conf/hbase-site.xml
 COPY docker-files/env-conf/pio-env.sh ${PIO_HOME}/conf/pio-env.sh

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/Dockerfile-es1
----------------------------------------------------------------------
diff --git a/tests/Dockerfile-es1 b/tests/Dockerfile-es1
deleted file mode 100644
index 73d4c36..0000000
--- a/tests/Dockerfile-es1
+++ /dev/null
@@ -1,57 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-FROM predictionio/pio
-
-ENV SPARK_VERSION 1.6.3
-ENV ELASTICSEARCH_VERSION 1.7
-ENV HBASE_VERSION 1.0.0
-
-ADD docker-files/spark-${SPARK_VERSION}-bin-hadoop2.6.tgz /vendors
-# WORKAROUND: es-hadoop stops on RDD#take(1)
-RUN echo "spark.locality.wait.node 0s" > /vendors/spark-${SPARK_VERSION}-bin-hadoop2.6/conf/spark-defaults.conf
-ENV SPARK_HOME /vendors/spark-${SPARK_VERSION}-bin-hadoop2.6
-
-COPY docker-files/postgresql-9.4-1204.jdbc41.jar /drivers/postgresql-9.4-1204.jdbc41.jar
-COPY docker-files/init.sh init.sh
-COPY docker-files/env-conf/hbase-site.xml ${PIO_HOME}/conf/hbase-site.xml
-COPY docker-files/env-conf/pio-env-es1.sh ${PIO_HOME}/conf/pio-env.sh
-COPY docker-files/pgpass /root/.pgpass
-RUN chmod 600 /root/.pgpass
-
-# Python
-RUN pip install python-dateutil
-RUN pip install pytz
-
-# Default repositories setup
-ENV PIO_STORAGE_REPOSITORIES_METADATA_SOURCE PGSQL
-ENV PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE PGSQL
-ENV PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE PGSQL
-
-# JVM settings
-ENV JVM_OPTS '-Dfile.encoding=UTF8 -Xms2048M -Xmx2048M -Xss8M -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=256M'
-
-# Expose relevant ports
-# pio engine
-EXPOSE 8000
-# eventserver
-EXPOSE 7070
-
-ENV SLEEP_TIME 30
-
-ENTRYPOINT ["/init.sh"]
-CMD 'bash'

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/README.md
----------------------------------------------------------------------
diff --git a/tests/README.md b/tests/README.md
index 95b3fdf..236d168 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -34,6 +34,12 @@ To download the image run:
 $ docker pull predictionio/pio-testing
 ```
 
+To build the image use the script:
+```
+$ tests/docker-build.sh <image_name>
+```
+This is necessary to infer proper versions of dependencies e.g. Spark to be included in the image.
+
 The most convenient way to make use of it is to execute ***run_docker.sh*** script passing it the configuration, the path to PredictionIO's repository with archived snapshot and the command to run. When no command is provided it opens a bash shell inside the docker image. Example of usage:
 ```sh
 $ ./run_docker.sh ELASTICSEARCH HBASE LOCALFS \

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/after_script.travis.sh
----------------------------------------------------------------------
diff --git a/tests/after_script.travis.sh b/tests/after_script.travis.sh
index 8cbe2c0..e9d5792 100755
--- a/tests/after_script.travis.sh
+++ b/tests/after_script.travis.sh
@@ -18,12 +18,10 @@
 
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 
+source $DIR/../conf/pio-vendors.sh
+
 # Print a summary of containers used
 docker ps -a
 
 # Clean up used containers
-if [ "$ES_VERSION" = "1" ]; then
-    docker-compose -f $DIR/docker-compose-es1.yml down
-else
-    docker-compose -f $DIR/docker-compose.yml down
-fi
+docker-compose -f $DIR/docker-compose.yml down

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/before_script.travis.sh
----------------------------------------------------------------------
diff --git a/tests/before_script.travis.sh b/tests/before_script.travis.sh
index 9ec7a4a..5889a60 100755
--- a/tests/before_script.travis.sh
+++ b/tests/before_script.travis.sh
@@ -18,4 +18,4 @@
 
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 
-$DIR/build-docker.sh
+$DIR/build_docker.sh

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/build-docker.sh
----------------------------------------------------------------------
diff --git a/tests/build-docker.sh b/tests/build-docker.sh
deleted file mode 100755
index ed43715..0000000
--- a/tests/build-docker.sh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/bin/bash -ex
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-if [ ! -f $DIR/docker-files/spark-1.6.3-bin-hadoop2.6.tgz ]; then
-  wget http://d3kbcqa49mib13.cloudfront.net/spark-1.6.3-bin-hadoop2.6.tgz
-  mv spark-1.6.3-bin-hadoop2.6.tgz $DIR/docker-files/
-fi
-
-if [ ! -f $DIR/docker-files/postgresql-9.4-1204.jdbc41.jar ]; then
-  wget https://jdbc.postgresql.org/download/postgresql-9.4-1204.jdbc41.jar
-  mv postgresql-9.4-1204.jdbc41.jar $DIR/docker-files/
-fi
-
-docker pull predictionio/pio-testing-base
-pushd $DIR/..
-if [ -z "$ES_VERSION" ]; then
-    ./make-distribution.sh
-else
-    ./make-distribution.sh --with-es=$ES_VERSION
-fi
-sbt/sbt clean
-mkdir assembly
-cp dist/lib/*.jar assembly/
-mkdir -p lib/spark
-cp dist/lib/spark/*.jar lib/spark
-rm *.tar.gz
-docker build -t predictionio/pio .
-popd
-
-if [ "$ES_VERSION" = "1" ]; then
-    docker build -t predictionio/pio-testing-es1 -f $DIR/Dockerfile-es1 $DIR
-else
-    docker build -t predictionio/pio-testing $DIR
-fi

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/build_docker.sh
----------------------------------------------------------------------
diff --git a/tests/build_docker.sh b/tests/build_docker.sh
new file mode 100755
index 0000000..af30cb8
--- /dev/null
+++ b/tests/build_docker.sh
@@ -0,0 +1,57 @@
+#!/bin/bash -ex
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+docker pull predictionio/pio-testing-base
+
+pushd $DIR/..
+
+source conf/pio-vendors.sh
+if [ ! -f $DIR/docker-files/${PGSQL_JAR} ]; then
+  wget $PGSQL_DOWNLOAD
+  mv ${PGSQL_JAR} $DIR/docker-files/
+fi
+if [ ! -f $DIR/docker-files/${SPARK_ARCHIVE} ]; then
+  wget $SPARK_DOWNLOAD
+  mv $SPARK_ARCHIVE $DIR/docker-files/
+fi
+
+./make-distribution.sh \
+    -Dscala.version=$PIO_SCALA_VERSION \
+    -Dspark.version=$PIO_SPARK_VERSION \
+    -Dhadoop.version=$PIO_HADOOP_VERSION \
+    -Delasticsearch.version=$PIO_ELASTICSEARCH_VERSION
+sbt/sbt clean storage/clean
+rm -rf assembly
+mkdir assembly
+cp dist/lib/*.jar assembly/
+rm -rf lib/spark
+mkdir -p lib/spark
+cp dist/lib/spark/*.jar lib/spark
+rm *.tar.gz
+docker build -t predictionio/pio .
+popd
+
+docker build -t predictionio/pio-testing $DIR \
+  --build-arg SPARK_ARCHIVE=$SPARK_ARCHIVE \
+  --build-arg SPARK_DIR=$SPARK_DIR \
+  --build-arg PGSQL_JAR=$PGSQL_JAR \
+  --build-arg PIO_SCALA_VERSION=$PIO_SCALA_VERSION \
+  --build-arg PIO_SPARK_VERSION=$PIO_SPARK_VERSION \
+  --build-arg PIO_ELASTICSEARCH_VERSION=$PIO_ELASTICSEARCH_VERSION

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/docker-compose-es1.yml
----------------------------------------------------------------------
diff --git a/tests/docker-compose-es1.yml b/tests/docker-compose-es1.yml
deleted file mode 100644
index 381f41c..0000000
--- a/tests/docker-compose-es1.yml
+++ /dev/null
@@ -1,38 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-version: "2"
-services:
-  elasticsearch:
-    image: elasticsearch:1
-    environment:
-      - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
-  hbase:
-    image: harisekhon/hbase:1.0
-  postgres:
-    image: postgres:9
-    environment:
-      POSTGRES_USER: pio
-      POSTGRES_PASSWORD: pio
-      POSTGRES_INITDB_ARGS: --encoding=UTF8
-  pio-testing:
-    image: predictionio/pio-testing-es1:latest
-    depends_on:
-      - elasticsearch
-      - hbase
-      - postgres
-    volumes:
-      - ~/.ivy2:/root/.ivy2
-      - ~/.sbt:/root/.sbt

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/docker-compose.yml
----------------------------------------------------------------------
diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml
index 06126a5..ac1d91d 100644
--- a/tests/docker-compose.yml
+++ b/tests/docker-compose.yml
@@ -16,7 +16,7 @@
 version: "2"
 services:
   elasticsearch:
-    image: docker.elastic.co/elasticsearch/elasticsearch:5.2.2
+    image: ${ES_IMAGE}:${ES_TAG}
     environment:
       - xpack.security.enabled=false
       - "ES_JAVA_OPTS=-Xms512m -Xmx512m"

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/00779c3d/tests/docker-files/env-conf/pio-env-es1.sh
----------------------------------------------------------------------
diff --git a/tests/docker-files/env-conf/pio-env-es1.sh b/tests/docker-files/env-conf/pio-env-es1.sh
deleted file mode 100644
index e1076ba..0000000
--- a/tests/docker-files/env-conf/pio-env-es1.sh
+++ /dev/null
@@ -1,103 +0,0 @@
-#!/usr/bin/env bash
-#
-# Copy this file as pio-env.sh and edit it for your site's configuration.
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# PredictionIO Main Configuration
-#
-# This section controls core behavior of PredictionIO. It is very likely that
-# you need to change these to fit your site.
-
-# SPARK_HOME: Apache Spark is a hard dependency and must be configured.
-SPARK_HOME=$SPARK_HOME
-
-POSTGRES_JDBC_DRIVER=/drivers/postgresql-9.4-1204.jdbc41.jar
-MYSQL_JDBC_DRIVER=
-
-# ES_CONF_DIR: You must configure this if you have advanced configuration for
-#              your Elasticsearch setup.
-# ES_CONF_DIR=/opt/elasticsearch
-
-# HADOOP_CONF_DIR: You must configure this if you intend to run PredictionIO
-#                  with Hadoop 2.
-# HADOOP_CONF_DIR=/opt/hadoop
-
-# HBASE_CONF_DIR: You must configure this if you intend to run PredictionIO
-#                 with HBase on a remote cluster.
-HBASE_CONF_DIR=$PIO_HOME/conf
-
-# Filesystem paths where PredictionIO uses as block storage.
-PIO_FS_BASEDIR=$HOME/.pio_store
-PIO_FS_ENGINESDIR=$PIO_FS_BASEDIR/engines
-PIO_FS_TMPDIR=$PIO_FS_BASEDIR/tmp
-
-# PredictionIO Storage Configuration
-#
-# This section controls programs that make use of PredictionIO's built-in
-# storage facilities. Default values are shown below.
-#
-# For more information on storage configuration please refer to
-# https://docs.prediction.io/system/anotherdatastore/
-
-# Storage Repositories
-
-# Default is to use PostgreSQL
-PIO_STORAGE_REPOSITORIES_METADATA_NAME=pio_meta
-PIO_STORAGE_REPOSITORIES_METADATA_SOURCE=$PIO_STORAGE_REPOSITORIES_METADATA_SOURCE
-
-PIO_STORAGE_REPOSITORIES_EVENTDATA_NAME=pio_event
-PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE=$PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE
-
-PIO_STORAGE_REPOSITORIES_MODELDATA_NAME=pio_model
-PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE=$PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE
-
-# Storage Data Sources
-
-# PostgreSQL Default Settings
-# Please change "pio" to your database name in PIO_STORAGE_SOURCES_PGSQL_URL
-# Please change PIO_STORAGE_SOURCES_PGSQL_USERNAME and
-# PIO_STORAGE_SOURCES_PGSQL_PASSWORD accordingly
-PIO_STORAGE_SOURCES_PGSQL_TYPE=jdbc
-PIO_STORAGE_SOURCES_PGSQL_URL=jdbc:postgresql://postgres/pio
-PIO_STORAGE_SOURCES_PGSQL_USERNAME=pio
-PIO_STORAGE_SOURCES_PGSQL_PASSWORD=pio
-
-# MySQL Example
-# PIO_STORAGE_SOURCES_MYSQL_TYPE=jdbc
-# PIO_STORAGE_SOURCES_MYSQL_URL=jdbc:mysql://localhost/pio
-# PIO_STORAGE_SOURCES_MYSQL_USERNAME=pio
-# PIO_STORAGE_SOURCES_MYSQL_PASSWORD=pio
-
-# Elasticsearch Example
-PIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE=elasticsearch
-#PIO_STORAGE_SOURCES_ELASTICSEARCH_CLUSTERNAME=pio
-PIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS=elasticsearch
-PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9300
-#PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$ELASTICSEARCH_HOME
-
-# Local File System Example
-PIO_STORAGE_SOURCES_LOCALFS_TYPE=localfs
-PIO_STORAGE_SOURCES_LOCALFS_PATH=$PIO_FS_BASEDIR/local_models
-
-# HBase Example
-PIO_STORAGE_SOURCES_HBASE_TYPE=hbase
-#PIO_STORAGE_SOURCES_HBASE_HOME=$HBASE_HOME
-
-# HDFS config
-PIO_STORAGE_SOURCES_HDFS_TYPE=hdfs
-PIO_STORAGE_SOURCES_HDFS_PATH=/hdfs_models



Mime
View raw message