predictionio-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From don...@apache.org
Subject [10/50] [abbrv] incubator-predictionio git commit: [PIO-49] Add support for Elasticsearch 5
Date Tue, 25 Apr 2017 22:11:33 GMT
http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/StorageClient.scala
----------------------------------------------------------------------
diff --git a/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/StorageClient.scala
b/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/StorageClient.scala
new file mode 100644
index 0000000..b9ec957
--- /dev/null
+++ b/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/StorageClient.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage.localfs
+
+import java.io.File
+
+import grizzled.slf4j.Logging
+import org.apache.predictionio.data.storage.BaseStorageClient
+import org.apache.predictionio.data.storage.StorageClientConfig
+import org.apache.predictionio.data.storage.StorageClientException
+
+class StorageClient(val config: StorageClientConfig) extends BaseStorageClient
+    with Logging {
+  override val prefix = "LocalFS"
+  val f = new File(
+    config.properties.getOrElse("PATH", config.properties("HOSTS")))
+  if (f.exists) {
+    if (!f.isDirectory) throw new StorageClientException(
+      s"${f} already exists but it is not a directory!",
+      null)
+    if (!f.canWrite) throw new StorageClientException(
+      s"${f} already exists but it is not writable!",
+      null)
+  } else {
+    if (!f.mkdirs) throw new StorageClientException(
+      s"${f} does not exist and automatic creation failed!",
+      null)
+  }
+  val client = f
+}

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/package.scala
----------------------------------------------------------------------
diff --git a/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/package.scala
b/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/package.scala
new file mode 100644
index 0000000..554ab26
--- /dev/null
+++ b/storage/localfs/src/main/scala/org/apache/predictionio/data/storage/localfs/package.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.predictionio.data.storage
+
+/** Local file system implementation of storage traits, supporting model data only
+  *
+  * @group Implementation
+  */
+package object localfs {}

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/storage/localfs/src/test/resources/application.conf
----------------------------------------------------------------------
diff --git a/storage/localfs/src/test/resources/application.conf b/storage/localfs/src/test/resources/application.conf
new file mode 100644
index 0000000..eecae44
--- /dev/null
+++ b/storage/localfs/src/test/resources/application.conf
@@ -0,0 +1,28 @@
+org.apache.predictionio.data.storage {
+  sources {
+    mongodb {
+      type = mongodb
+      hosts = [localhost]
+      ports = [27017]
+    }
+    elasticsearch {
+      type = elasticsearch
+      hosts = [localhost]
+      ports = [9300]
+    }
+  }
+  repositories {
+    # This section is dummy just to make storage happy.
+    # The actual testing will not bypass these repository settings completely.
+    # Please refer to StorageTestUtils.scala.
+    settings {
+      name = "test_predictionio"
+      source = mongodb
+    }
+
+    appdata {
+      name = "test_predictionio_appdata"
+      source = mongodb
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tests/Dockerfile
----------------------------------------------------------------------
diff --git a/tests/Dockerfile b/tests/Dockerfile
index d1d048a..94f5688 100644
--- a/tests/Dockerfile
+++ b/tests/Dockerfile
@@ -17,11 +17,13 @@
 
 FROM predictionio/pio
 
-ENV SPARK_VERSION 1.4.0
-ENV ELASTICSEARCH_VERSION 1.4.4
+ENV SPARK_VERSION 1.6.3
+ENV ELASTICSEARCH_VERSION 5.2.1
 ENV HBASE_VERSION 1.0.0
 
 ADD docker-files/spark-${SPARK_VERSION}-bin-hadoop2.6.tgz /vendors
+# WORKAROUND: es-hadoop stops on RDD#take(1)
+RUN echo "spark.locality.wait.node 0s" > /vendors/spark-${SPARK_VERSION}-bin-hadoop2.6/conf/spark-defaults.conf
 ENV SPARK_HOME /vendors/spark-${SPARK_VERSION}-bin-hadoop2.6
 
 COPY docker-files/postgresql-9.4-1204.jdbc41.jar /drivers/postgresql-9.4-1204.jdbc41.jar
@@ -31,6 +33,10 @@ COPY docker-files/env-conf/pio-env.sh ${PIO_HOME}/conf/pio-env.sh
 COPY docker-files/pgpass /root/.pgpass
 RUN chmod 600 /root/.pgpass
 
+# Python
+RUN pip install python-dateutil
+RUN pip install pytz
+
 # Default repositories setup
 ENV PIO_STORAGE_REPOSITORIES_METADATA_SOURCE PGSQL
 ENV PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE PGSQL

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tests/build-docker.sh
----------------------------------------------------------------------
diff --git a/tests/build-docker.sh b/tests/build-docker.sh
index dd95168..459b929 100755
--- a/tests/build-docker.sh
+++ b/tests/build-docker.sh
@@ -17,9 +17,9 @@
 
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 
-if [ ! -f $DIR/docker-files/spark-1.4.0-bin-hadoop2.6.tgz ]; then
-  wget http://d3kbcqa49mib13.cloudfront.net/spark-1.4.0-bin-hadoop2.6.tgz
-  mv spark-1.4.0-bin-hadoop2.6.tgz $DIR/docker-files/
+if [ ! -f $DIR/docker-files/spark-1.6.3-bin-hadoop2.6.tgz ]; then
+  wget http://d3kbcqa49mib13.cloudfront.net/spark-1.6.3-bin-hadoop2.6.tgz
+  mv spark-1.6.3-bin-hadoop2.6.tgz $DIR/docker-files/
 fi
 
 if [ ! -f $DIR/docker-files/postgresql-9.4-1204.jdbc41.jar ]; then
@@ -33,6 +33,8 @@ pushd $DIR/..
 sbt/sbt clean
 mkdir assembly
 cp dist/lib/*.jar assembly/
+mkdir -p lib/spark
+cp dist/lib/spark/*.jar lib/spark
 docker build -t predictionio/pio .
 popd
 docker build -t predictionio/pio-testing $DIR

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tests/docker-compose.yml
----------------------------------------------------------------------
diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml
index 3939a0b..b556f7b 100644
--- a/tests/docker-compose.yml
+++ b/tests/docker-compose.yml
@@ -16,7 +16,7 @@
 version: "2"
 services:
   elasticsearch:
-    image: elasticsearch:1-alpine
+    image: elasticsearch:5-alpine
   hbase:
     image: harisekhon/hbase:1.0
   postgres:

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tests/docker-files/env-conf/pio-env.sh
----------------------------------------------------------------------
diff --git a/tests/docker-files/env-conf/pio-env.sh b/tests/docker-files/env-conf/pio-env.sh
index e1076ba..0acf3a7 100644
--- a/tests/docker-files/env-conf/pio-env.sh
+++ b/tests/docker-files/env-conf/pio-env.sh
@@ -87,7 +87,8 @@ PIO_STORAGE_SOURCES_PGSQL_PASSWORD=pio
 PIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE=elasticsearch
 #PIO_STORAGE_SOURCES_ELASTICSEARCH_CLUSTERNAME=pio
 PIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS=elasticsearch
-PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9300
+PIO_STORAGE_SOURCES_ELASTICSEARCH_SCHEMES=http
+PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9200
 #PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$ELASTICSEARCH_HOME
 
 # Local File System Example

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tests/pio_tests/scenarios/eventserver_test.py
----------------------------------------------------------------------
diff --git a/tests/pio_tests/scenarios/eventserver_test.py b/tests/pio_tests/scenarios/eventserver_test.py
index c09e815..6f29876 100644
--- a/tests/pio_tests/scenarios/eventserver_test.py
+++ b/tests/pio_tests/scenarios/eventserver_test.py
@@ -19,6 +19,8 @@ import unittest
 import requests
 import json
 import argparse
+import dateutil.parser
+import pytz
 from subprocess import Popen
 from utils import AppEngine, pjoin
 from pio_tests.integration import BaseTestCase, AppContext
@@ -155,7 +157,8 @@ class EventserverTest(BaseTestCase):
       'reversed': 'true' }
     r = self.app.get_events(params=params)
     self.assertEqual(5, len(r.json()))
-    self.assertEqual('2014-11-05T09:39:45.618-08:00', r.json()[0]['eventTime'])
+    event_time = dateutil.parser.parse(r.json()[0]['eventTime']).astimezone(pytz.utc)
+    self.assertEqual('2014-11-05 17:39:45.618000+00:00', str(event_time))
 
   def tearDown(self):
     self.log.info("Deleting all app data")

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tests/run_docker.sh
----------------------------------------------------------------------
diff --git a/tests/run_docker.sh b/tests/run_docker.sh
index 9f28d1c..fe07957 100755
--- a/tests/run_docker.sh
+++ b/tests/run_docker.sh
@@ -19,7 +19,7 @@
 USAGE=$"Usage: run_docker <meta> <event> <model> <command>
   Where:
     meta         = [PGSQL,ELASTICSEARCH]
-    event        = [PGSQL,HBASE]
+    event        = [PGSQL,HBASE,ELASTICSEARCH]
     model        = [PGSQL,LOCALFS,HDFS]
     command      = command to run in the container"
 
@@ -30,7 +30,7 @@ fi
 META="$1"
 shift
 
-if ! [[ "$1" =~ ^(PGSQL|HBASE)$ ]]; then
+if ! [[ "$1" =~ ^(PGSQL|HBASE|ELASTICSEARCH)$ ]]; then
   echo "$USAGE"
   exit 1
 fi

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tools/src/main/scala/org/apache/predictionio/tools/Common.scala
----------------------------------------------------------------------
diff --git a/tools/src/main/scala/org/apache/predictionio/tools/Common.scala b/tools/src/main/scala/org/apache/predictionio/tools/Common.scala
index 6c56615..7d04c07 100644
--- a/tools/src/main/scala/org/apache/predictionio/tools/Common.scala
+++ b/tools/src/main/scala/org/apache/predictionio/tools/Common.scala
@@ -102,6 +102,13 @@ object Common extends EitherLogging {
     if (targetFiles.size > 0) targetFiles else libFiles
   }
 
+  def jarFilesForSpark(pioHome: String): Array[File] = {
+    def jarFilesAt(path: File): Array[File] = path.listFiles filter {
+      _.getName.toLowerCase.endsWith(".jar")
+    }
+    jarFilesAt(new File(pioHome, "lib/spark"))
+  }
+
   def coreAssembly(pioHome: String): Expected[File] = {
     val core = s"pio-assembly-${BuildInfo.version}.jar"
     val coreDir =

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tools/src/main/scala/org/apache/predictionio/tools/Runner.scala
----------------------------------------------------------------------
diff --git a/tools/src/main/scala/org/apache/predictionio/tools/Runner.scala b/tools/src/main/scala/org/apache/predictionio/tools/Runner.scala
index 4f5a176..662dbbf 100644
--- a/tools/src/main/scala/org/apache/predictionio/tools/Runner.scala
+++ b/tools/src/main/scala/org/apache/predictionio/tools/Runner.scala
@@ -160,7 +160,8 @@ object Runner extends EitherLogging {
     val sparkSubmitCommand =
       Seq(Seq(sparkHome, "bin", "spark-submit").mkString(File.separator))
 
-    val sparkSubmitJarsList = WorkflowUtils.thirdPartyJars ++ deployedJars
+    val sparkSubmitJarsList = WorkflowUtils.thirdPartyJars ++ deployedJars ++
+      Common.jarFilesForSpark(pioHome).map(_.toURI)
     val sparkSubmitJars = if (sparkSubmitJarsList.nonEmpty) {
       Seq("--jars", sparkSubmitJarsList.map(_.toString).mkString(","))
     } else {

http://git-wip-us.apache.org/repos/asf/incubator-predictionio/blob/d78b3cbe/tools/src/main/scala/org/apache/predictionio/tools/commands/Engine.scala
----------------------------------------------------------------------
diff --git a/tools/src/main/scala/org/apache/predictionio/tools/commands/Engine.scala b/tools/src/main/scala/org/apache/predictionio/tools/commands/Engine.scala
index 4656457..69a3924 100644
--- a/tools/src/main/scala/org/apache/predictionio/tools/commands/Engine.scala
+++ b/tools/src/main/scala/org/apache/predictionio/tools/commands/Engine.scala
@@ -297,7 +297,9 @@ object Engine extends EitherLogging {
     val extraFiles = WorkflowUtils.thirdPartyConfFiles
     val jarFiles = jarFilesForScala(engineDirPath)
     jarFiles foreach { f => info(s"Found JAR: ${f.getName}") }
-    val allJarFiles = jarFiles.map(_.getCanonicalPath)
+    val jarPluginFiles = jarFilesForSpark(pioHome)
+    jarPluginFiles foreach { f => info(s"Found JAR: ${f.getName}") }
+    val allJarFiles = jarFiles.map(_.getCanonicalPath) ++ jarPluginFiles.map(_.getCanonicalPath)
 
     val cmd = s"${getSparkHome(sparkArgs.sparkHome)}/bin/spark-submit --jars " +
       s"${allJarFiles.mkString(",")} " +


Mime
View raw message