spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dav...@apache.org
Subject spark git commit: [SPARK-8727] [SQL] Missing python api; md5, log2
Date Wed, 01 Jul 2015 00:01:09 GMT
Repository: spark
Updated Branches:
  refs/heads/master 8133125ca -> ccdb05222


[SPARK-8727] [SQL] Missing python api; md5, log2

Jira: https://issues.apache.org/jira/browse/SPARK-8727

Author: Tarek Auel <tarek.auel@gmail.com>
Author: Tarek Auel <tarek.auel@googlemail.com>

Closes #7114 from tarekauel/missing-python and squashes the following commits:

ef4c61b [Tarek Auel] [SPARK-8727] revert dataframe change
4029d4d [Tarek Auel] removed dataframe pi and e unit test
66f0d2b [Tarek Auel] removed pi and e from python api and dataframe api; added _to_java_column(col)
for strlen
4d07318 [Tarek Auel] fixed python unit test
45f2bee [Tarek Auel] fixed result of pi and e
c39f47b [Tarek Auel] add python api
bd50a3a [Tarek Auel] add missing python functions


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ccdb0522
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ccdb0522
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ccdb0522

Branch: refs/heads/master
Commit: ccdb05222a223187199183fd48e3a3313d536965
Parents: 8133125
Author: Tarek Auel <tarek.auel@gmail.com>
Authored: Tue Jun 30 16:59:44 2015 -0700
Committer: Davies Liu <davies@databricks.com>
Committed: Tue Jun 30 17:00:51 2015 -0700

----------------------------------------------------------------------
 python/pyspark/sql/functions.py | 65 ++++++++++++++++++++++++++++--------
 1 file changed, 52 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/ccdb0522/python/pyspark/sql/functions.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 45ecd82..4e2be88 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -39,12 +39,15 @@ __all__ = [
     'coalesce',
     'countDistinct',
     'explode',
+    'log2',
+    'md5',
     'monotonicallyIncreasingId',
     'rand',
     'randn',
     'sha1',
     'sha2',
     'sparkPartitionId',
+    'strlen',
     'struct',
     'udf',
     'when']
@@ -320,6 +323,19 @@ def explode(col):
     return Column(jc)
 
 
+@ignore_unicode_prefix
+@since(1.5)
+def md5(col):
+    """Calculates the MD5 digest and returns the value as a 32 character hex string.
+
+    >>> sqlContext.createDataFrame([('ABC',)], ['a']).select(md5('a').alias('hash')).collect()
+    [Row(hash=u'902fbdd2b1df0c4f70b4a5d23525e932')]
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.md5(_to_java_column(col))
+    return Column(jc)
+
+
 @since(1.4)
 def monotonicallyIncreasingId():
     """A column that generates monotonically increasing 64-bit integers.
@@ -367,6 +383,19 @@ def randn(seed=None):
 
 @ignore_unicode_prefix
 @since(1.5)
+def sha1(col):
+    """Returns the hex string result of SHA-1.
+
+    >>> sqlContext.createDataFrame([('ABC',)], ['a']).select(sha1('a').alias('hash')).collect()
+    [Row(hash=u'3c01bdbb26f358bab27f267924aa2c9a03fcfdb8')]
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.sha1(_to_java_column(col))
+    return Column(jc)
+
+
+@ignore_unicode_prefix
+@since(1.5)
 def sha2(col, numBits):
     """Returns the hex string result of SHA-2 family of hash functions (SHA-224, SHA-256,
SHA-384,
     and SHA-512). The numBits indicates the desired bit length of the result, which must
have a
@@ -383,19 +412,6 @@ def sha2(col, numBits):
     return Column(jc)
 
 
-@ignore_unicode_prefix
-@since(1.5)
-def sha1(col):
-    """Returns the hex string result of SHA-1.
-
-    >>> sqlContext.createDataFrame([('ABC',)], ['a']).select(sha1('a').alias('hash')).collect()
-    [Row(hash=u'3c01bdbb26f358bab27f267924aa2c9a03fcfdb8')]
-    """
-    sc = SparkContext._active_spark_context
-    jc = sc._jvm.functions.sha1(_to_java_column(col))
-    return Column(jc)
-
-
 @since(1.4)
 def sparkPartitionId():
     """A column for partition ID of the Spark task.
@@ -410,6 +426,18 @@ def sparkPartitionId():
 
 
 @ignore_unicode_prefix
+@since(1.5)
+def strlen(col):
+    """Calculates the length of a string expression.
+
+    >>> sqlContext.createDataFrame([('ABC',)], ['a']).select(strlen('a').alias('length')).collect()
+    [Row(length=3)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.strlen(_to_java_column(col)))
+
+
+@ignore_unicode_prefix
 @since(1.4)
 def struct(*cols):
     """Creates a new struct column.
@@ -471,6 +499,17 @@ def log(arg1, arg2=None):
     return Column(jc)
 
 
+@since(1.5)
+def log2(col):
+    """Returns the base-2 logarithm of the argument.
+
+    >>> sqlContext.createDataFrame([(4,)], ['a']).select(log2('a').alias('log2')).collect()
+    [Row(log2=2.0)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.log2(_to_java_column(col)))
+
+
 @since(1.4)
 def lag(col, count=1, default=None):
     """


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message