spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mch...@apache.org
Subject spark git commit: [SPARK-25957][K8S] Make building alternate language binding docker images optional
Date Wed, 21 Nov 2018 23:51:43 GMT
Repository: spark
Updated Branches:
  refs/heads/master 4aa9ccbde -> 9b48107f9


[SPARK-25957][K8S] Make building alternate language binding docker images optional

## What changes were proposed in this pull request?
bin/docker-image-tool.sh tries to build all docker images (JVM, PySpark
and SparkR) by default. But not all spark distributions are built with
SparkR and hence this script will fail on such distros.

With this change, we make building alternate language binding docker images (PySpark and SparkR)
optional. User has to specify dockerfile for those language bindings using -p and -R flags
accordingly, to build the binding docker images.

## How was this patch tested?

Tested following scenarios.
*bin/docker-image-tool.sh -r <repo> -t <tag> build* --> Builds only JVM docker
image (default behavior)

*bin/docker-image-tool.sh -r <repo> -t <tag> -p kubernetes/dockerfiles/spark/bindings/python/Dockerfile
build* --> Builds both JVM and PySpark docker images

*bin/docker-image-tool.sh -r <repo> -t <tag> -p kubernetes/dockerfiles/spark/bindings/python/Dockerfile
-R kubernetes/dockerfiles/spark/bindings/R/Dockerfile build* --> Builds JVM, PySpark and
SparkR docker images.

Author: Nagaram Prasad Addepally <ram@cloudera.com>

Closes #23053 from ramaddepally/SPARK-25957.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9b48107f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9b48107f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9b48107f

Branch: refs/heads/master
Commit: 9b48107f9c84631e0ddaf0f2223296a3cbc16f83
Parents: 4aa9ccb
Author: Nagaram Prasad Addepally <ram@cloudera.com>
Authored: Wed Nov 21 15:51:37 2018 -0800
Committer: mcheah <mcheah@palantir.com>
Committed: Wed Nov 21 15:51:37 2018 -0800

----------------------------------------------------------------------
 bin/docker-image-tool.sh                        | 63 ++++++++++++--------
 docs/running-on-kubernetes.md                   | 12 ++++
 .../scripts/setup-integration-test-env.sh       | 12 +++-
 3 files changed, 59 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/9b48107f/bin/docker-image-tool.sh
----------------------------------------------------------------------
diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh
index aa5d847..e51201a 100755
--- a/bin/docker-image-tool.sh
+++ b/bin/docker-image-tool.sh
@@ -41,6 +41,18 @@ function image_ref {
   echo "$image"
 }
 
+function docker_push {
+  local image_name="$1"
+  if [ ! -z $(docker images -q "$(image_ref ${image_name})") ]; then
+    docker push "$(image_ref ${image_name})"
+    if [ $? -ne 0 ]; then
+      error "Failed to push $image_name Docker image."
+    fi
+  else
+    echo "$(image_ref ${image_name}) image not found. Skipping push for this image."
+  fi
+}
+
 function build {
   local BUILD_ARGS
   local IMG_PATH
@@ -92,8 +104,8 @@ function build {
     base_img=$(image_ref spark)
   )
   local BASEDOCKERFILE=${BASEDOCKERFILE:-"$IMG_PATH/spark/Dockerfile"}
-  local PYDOCKERFILE=${PYDOCKERFILE:-"$IMG_PATH/spark/bindings/python/Dockerfile"}
-  local RDOCKERFILE=${RDOCKERFILE:-"$IMG_PATH/spark/bindings/R/Dockerfile"}
+  local PYDOCKERFILE=${PYDOCKERFILE:-false}
+  local RDOCKERFILE=${RDOCKERFILE:-false}
 
   docker build $NOCACHEARG "${BUILD_ARGS[@]}" \
     -t $(image_ref spark) \
@@ -102,33 +114,29 @@ function build {
     error "Failed to build Spark JVM Docker image, please refer to Docker build output for
details."
   fi
 
-  docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
-    -t $(image_ref spark-py) \
-    -f "$PYDOCKERFILE" .
+  if [ "${PYDOCKERFILE}" != "false" ]; then
+    docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
+      -t $(image_ref spark-py) \
+      -f "$PYDOCKERFILE" .
+      if [ $? -ne 0 ]; then
+        error "Failed to build PySpark Docker image, please refer to Docker build output
for details."
+      fi
+  fi
+
+  if [ "${RDOCKERFILE}" != "false" ]; then
+    docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
+      -t $(image_ref spark-r) \
+      -f "$RDOCKERFILE" .
     if [ $? -ne 0 ]; then
-      error "Failed to build PySpark Docker image, please refer to Docker build output for
details."
+      error "Failed to build SparkR Docker image, please refer to Docker build output for
details."
     fi
-  docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
-    -t $(image_ref spark-r) \
-    -f "$RDOCKERFILE" .
-  if [ $? -ne 0 ]; then
-    error "Failed to build SparkR Docker image, please refer to Docker build output for details."
   fi
 }
 
 function push {
-  docker push "$(image_ref spark)"
-  if [ $? -ne 0 ]; then
-    error "Failed to push Spark JVM Docker image."
-  fi
-  docker push "$(image_ref spark-py)"
-  if [ $? -ne 0 ]; then
-    error "Failed to push PySpark Docker image."
-  fi
-  docker push "$(image_ref spark-r)"
-  if [ $? -ne 0 ]; then
-    error "Failed to push SparkR Docker image."
-  fi
+  docker_push "spark"
+  docker_push "spark-py"
+  docker_push "spark-r"
 }
 
 function usage {
@@ -143,8 +151,10 @@ Commands:
 
 Options:
   -f file               Dockerfile to build for JVM based Jobs. By default builds the Dockerfile
shipped with Spark.
-  -p file               Dockerfile to build for PySpark Jobs. Builds Python dependencies
and ships with Spark.
-  -R file               Dockerfile to build for SparkR Jobs. Builds R dependencies and ships
with Spark.
+  -p file               (Optional) Dockerfile to build for PySpark Jobs. Builds Python dependencies
and ships with Spark.
+                        Skips building PySpark docker image if not specified.
+  -R file               (Optional) Dockerfile to build for SparkR Jobs. Builds R dependencies
and ships with Spark.
+                        Skips building SparkR docker image if not specified.
   -r repo               Repository address.
   -t tag                Tag to apply to the built image, or to identify the image to be pushed.
   -m                    Use minikube's Docker daemon.
@@ -164,6 +174,9 @@ Examples:
   - Build image in minikube with tag "testing"
     $0 -m -t testing build
 
+  - Build PySpark docker image
+    $0 -r docker.io/myrepo -t v2.3.0 -p kubernetes/dockerfiles/spark/bindings/python/Dockerfile
build
+
   - Build and push image with tag "v2.3.0" to docker.io/myrepo
     $0 -r docker.io/myrepo -t v2.3.0 build
     $0 -r docker.io/myrepo -t v2.3.0 push

http://git-wip-us.apache.org/repos/asf/spark/blob/9b48107f/docs/running-on-kubernetes.md
----------------------------------------------------------------------
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index a7b6fd1..a9d4488 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -88,6 +88,18 @@ $ ./bin/docker-image-tool.sh -r <repo> -t my-tag build
 $ ./bin/docker-image-tool.sh -r <repo> -t my-tag push
 ```
 
+By default `bin/docker-image-tool.sh` builds docker image for running JVM jobs. You need
to opt-in to build additional 
+language binding docker images.
+
+Example usage is
+```bash
+# To build additional PySpark docker image
+$ ./bin/docker-image-tool.sh -r <repo> -t my-tag -p ./kubernetes/dockerfiles/spark/bindings/python/Dockerfile
build
+
+# To build additional SparkR docker image
+$ ./bin/docker-image-tool.sh -r <repo> -t my-tag -R ./kubernetes/dockerfiles/spark/bindings/R/Dockerfile
build
+```
+
 ## Cluster Mode
 
 To launch Spark Pi in cluster mode,

http://git-wip-us.apache.org/repos/asf/spark/blob/9b48107f/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
----------------------------------------------------------------------
diff --git a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
index a4a9f5b..36e30d7 100755
--- a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
+++ b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
@@ -72,10 +72,16 @@ then
   IMAGE_TAG=$(uuidgen);
   cd $UNPACKED_SPARK_TGZ
 
+  # Build PySpark image
+  LANGUAGE_BINDING_BUILD_ARGS="-p $UNPACKED_SPARK_TGZ/kubernetes/dockerfiles/spark/bindings/python/Dockerfile"
+
+  # Build SparkR image
+  LANGUAGE_BINDING_BUILD_ARGS="$LANGUAGE_BINDING_BUILD_ARGS -R $UNPACKED_SPARK_TGZ/kubernetes/dockerfiles/spark/bindings/R/Dockerfile"
+
   case $DEPLOY_MODE in
     cloud)
       # Build images
-      $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG build
+      $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS
build
 
       # Push images appropriately
       if [[ $IMAGE_REPO == gcr.io* ]] ;
@@ -89,13 +95,13 @@ then
     docker-for-desktop)
        # Only need to build as this will place it in our local Docker repo which is all
        # we need for Docker for Desktop to work so no need to also push
-       $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG build
+       $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS
build
        ;;
 
     minikube)
        # Only need to build and if we do this with the -m option for minikube we will
        # build the images directly using the minikube Docker daemon so no need to push
-       $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG build
+       $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS
build
        ;;
     *)
        echo "Unrecognized deploy mode $DEPLOY_MODE" && exit 1


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message