Return-Path: X-Original-To: apmail-spark-commits-archive@minotaur.apache.org Delivered-To: apmail-spark-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id D023B178F2 for ; Sat, 23 May 2015 07:04:26 +0000 (UTC) Received: (qmail 6449 invoked by uid 500); 23 May 2015 07:04:26 -0000 Delivered-To: apmail-spark-commits-archive@spark.apache.org Received: (qmail 6417 invoked by uid 500); 23 May 2015 07:04:26 -0000 Mailing-List: contact commits-help@spark.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list commits@spark.apache.org Received: (qmail 6408 invoked by uid 99); 23 May 2015 07:04:26 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 23 May 2015 07:04:26 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 9BACFE0009; Sat, 23 May 2015 07:04:26 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: shivaram@apache.org To: commits@spark.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: spark git commit: [SPARK-6811] Copy SparkR lib in make-distribution.sh Date: Sat, 23 May 2015 07:04:26 +0000 (UTC) Repository: spark Updated Branches: refs/heads/master 7af3818c6 -> a40bca011 [SPARK-6811] Copy SparkR lib in make-distribution.sh This change also remove native libraries from SparkR to make sure our distribution works across platforms Tested by building on Mac, running on Amazon Linux (CentOS), Windows VM and vice-versa (built on Linux run on Mac) I will also test this with YARN soon and update this PR. Author: Shivaram Venkataraman Closes #6373 from shivaram/sparkr-binary and squashes the following commits: ae41b5c [Shivaram Venkataraman] Remove native libraries from SparkR Also include the built SparkR package in make-distribution.sh Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a40bca01 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a40bca01 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a40bca01 Branch: refs/heads/master Commit: a40bca0111de45763c3ef4270afb2185c16b8f95 Parents: 7af3818 Author: Shivaram Venkataraman Authored: Sat May 23 00:04:01 2015 -0700 Committer: Shivaram Venkataraman Committed: Sat May 23 00:04:01 2015 -0700 ---------------------------------------------------------------------- R/pkg/NAMESPACE | 5 +++- R/pkg/R/utils.R | 38 ++++++++++++++++++++++++- R/pkg/src-native/Makefile | 27 ++++++++++++++++++ R/pkg/src-native/Makefile.win | 27 ++++++++++++++++++ R/pkg/src-native/string_hash_code.c | 49 ++++++++++++++++++++++++++++++++ R/pkg/src/Makefile | 27 ------------------ R/pkg/src/Makefile.win | 27 ------------------ R/pkg/src/string_hash_code.c | 49 -------------------------------- make-distribution.sh | 2 ++ 9 files changed, 146 insertions(+), 105 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/NAMESPACE ---------------------------------------------------------------------- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 64ffdcf..411126a 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -1,6 +1,9 @@ # Imports from base R importFrom(methods, setGeneric, setMethod, setOldClass) -useDynLib(SparkR, stringHashCode) + +# Disable native libraries till we figure out how to package it +# See SPARKR-7839 +#useDynLib(SparkR, stringHashCode) # S3 methods exported export("sparkR.init") http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/R/utils.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 0e7b7bd..69b2700 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -122,13 +122,49 @@ hashCode <- function(key) { intBits <- packBits(rawToBits(rawVec), "integer") as.integer(bitwXor(intBits[2], intBits[1])) } else if (class(key) == "character") { - .Call("stringHashCode", key) + # TODO: SPARK-7839 means we might not have the native library available + if (is.loaded("stringHashCode")) { + .Call("stringHashCode", key) + } else { + n <- nchar(key) + if (n == 0) { + 0L + } else { + asciiVals <- sapply(charToRaw(key), function(x) { strtoi(x, 16L) }) + hashC <- 0 + for (k in 1:length(asciiVals)) { + hashC <- mult31AndAdd(hashC, asciiVals[k]) + } + as.integer(hashC) + } + } } else { warning(paste("Could not hash object, returning 0", sep = "")) as.integer(0) } } +# Helper function used to wrap a 'numeric' value to integer bounds. +# Useful for implementing C-like integer arithmetic +wrapInt <- function(value) { + if (value > .Machine$integer.max) { + value <- value - 2 * .Machine$integer.max - 2 + } else if (value < -1 * .Machine$integer.max) { + value <- 2 * .Machine$integer.max + value + 2 + } + value +} + +# Multiply `val` by 31 and add `addVal` to the result. Ensures that +# integer-overflows are handled at every step. +mult31AndAdd <- function(val, addVal) { + vec <- c(bitwShiftL(val, c(4,3,2,1,0)), addVal) + Reduce(function(a, b) { + wrapInt(as.numeric(a) + as.numeric(b)) + }, + vec) +} + # Create a new RDD with serializedMode == "byte". # Return itself if already in "byte" format. serializeToBytes <- function(rdd) { http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/src-native/Makefile ---------------------------------------------------------------------- diff --git a/R/pkg/src-native/Makefile b/R/pkg/src-native/Makefile new file mode 100644 index 0000000..a55a56f --- /dev/null +++ b/R/pkg/src-native/Makefile @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +all: sharelib + +sharelib: string_hash_code.c + R CMD SHLIB -o SparkR.so string_hash_code.c + +clean: + rm -f *.o + rm -f *.so + +.PHONY: all clean http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/src-native/Makefile.win ---------------------------------------------------------------------- diff --git a/R/pkg/src-native/Makefile.win b/R/pkg/src-native/Makefile.win new file mode 100644 index 0000000..aa486d8 --- /dev/null +++ b/R/pkg/src-native/Makefile.win @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +all: sharelib + +sharelib: string_hash_code.c + R CMD SHLIB -o SparkR.dll string_hash_code.c + +clean: + rm -f *.o + rm -f *.dll + +.PHONY: all clean http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/src-native/string_hash_code.c ---------------------------------------------------------------------- diff --git a/R/pkg/src-native/string_hash_code.c b/R/pkg/src-native/string_hash_code.c new file mode 100644 index 0000000..e3274b9 --- /dev/null +++ b/R/pkg/src-native/string_hash_code.c @@ -0,0 +1,49 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* + * A C function for R extension which implements the Java String hash algorithm. + * Refer to http://en.wikipedia.org/wiki/Java_hashCode%28%29#The_java.lang.String_hash_function + * + */ + +#include +#include + +/* for compatibility with R before 3.1 */ +#ifndef IS_SCALAR +#define IS_SCALAR(x, type) (TYPEOF(x) == (type) && XLENGTH(x) == 1) +#endif + +SEXP stringHashCode(SEXP string) { + const char* str; + R_xlen_t len, i; + int hashCode = 0; + + if (!IS_SCALAR(string, STRSXP)) { + error("invalid input"); + } + + str = CHAR(asChar(string)); + len = XLENGTH(asChar(string)); + + for (i = 0; i < len; i++) { + hashCode = (hashCode << 5) - hashCode + *str++; + } + + return ScalarInteger(hashCode); +} http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/src/Makefile ---------------------------------------------------------------------- diff --git a/R/pkg/src/Makefile b/R/pkg/src/Makefile deleted file mode 100644 index a55a56f..0000000 --- a/R/pkg/src/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -all: sharelib - -sharelib: string_hash_code.c - R CMD SHLIB -o SparkR.so string_hash_code.c - -clean: - rm -f *.o - rm -f *.so - -.PHONY: all clean http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/src/Makefile.win ---------------------------------------------------------------------- diff --git a/R/pkg/src/Makefile.win b/R/pkg/src/Makefile.win deleted file mode 100644 index aa486d8..0000000 --- a/R/pkg/src/Makefile.win +++ /dev/null @@ -1,27 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -all: sharelib - -sharelib: string_hash_code.c - R CMD SHLIB -o SparkR.dll string_hash_code.c - -clean: - rm -f *.o - rm -f *.dll - -.PHONY: all clean http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/R/pkg/src/string_hash_code.c ---------------------------------------------------------------------- diff --git a/R/pkg/src/string_hash_code.c b/R/pkg/src/string_hash_code.c deleted file mode 100644 index e3274b9..0000000 --- a/R/pkg/src/string_hash_code.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -/* - * A C function for R extension which implements the Java String hash algorithm. - * Refer to http://en.wikipedia.org/wiki/Java_hashCode%28%29#The_java.lang.String_hash_function - * - */ - -#include -#include - -/* for compatibility with R before 3.1 */ -#ifndef IS_SCALAR -#define IS_SCALAR(x, type) (TYPEOF(x) == (type) && XLENGTH(x) == 1) -#endif - -SEXP stringHashCode(SEXP string) { - const char* str; - R_xlen_t len, i; - int hashCode = 0; - - if (!IS_SCALAR(string, STRSXP)) { - error("invalid input"); - } - - str = CHAR(asChar(string)); - len = XLENGTH(asChar(string)); - - for (i = 0; i < len; i++) { - hashCode = (hashCode << 5) - hashCode + *str++; - } - - return ScalarInteger(hashCode); -} http://git-wip-us.apache.org/repos/asf/spark/blob/a40bca01/make-distribution.sh ---------------------------------------------------------------------- diff --git a/make-distribution.sh b/make-distribution.sh index 8d6e91d..7882734 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -229,6 +229,8 @@ cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf cp "$SPARK_HOME/README.md" "$DISTDIR" cp -r "$SPARK_HOME/bin" "$DISTDIR" cp -r "$SPARK_HOME/python" "$DISTDIR" +mkdir -p "$DISTDIR"/R/lib +cp -r "$SPARK_HOME/R/lib/SparkR" "$DISTDIR"/R/lib cp -r "$SPARK_HOME/sbin" "$DISTDIR" cp -r "$SPARK_HOME/ec2" "$DISTDIR" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org For additional commands, e-mail: commits-help@spark.apache.org