spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gurwls...@apache.org
Subject spark git commit: [SPARK-23847][PYTHON][SQL] Add asc_nulls_first, asc_nulls_last to PySpark
Date Sun, 08 Apr 2018 04:09:16 GMT
Repository: spark
Updated Branches:
  refs/heads/master 6ab134ca7 -> 2c1fe6475


[SPARK-23847][PYTHON][SQL] Add asc_nulls_first, asc_nulls_last to PySpark

## What changes were proposed in this pull request?

Column.scala and Functions.scala have asc_nulls_first, asc_nulls_last,  desc_nulls_first and
desc_nulls_last. Add the corresponding python APIs in column.py and functions.py

## How was this patch tested?
Add doctest

Author: Huaxin Gao <huaxing@us.ibm.com>

Closes #20962 from huaxingao/spark-23847.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2c1fe647
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2c1fe647
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2c1fe647

Branch: refs/heads/master
Commit: 2c1fe647575e97e28b2232478ca86847d113e185
Parents: 6ab134c
Author: Huaxin Gao <huaxing@us.ibm.com>
Authored: Sun Apr 8 12:09:06 2018 +0800
Committer: hyukjinkwon <gurwls223@apache.org>
Committed: Sun Apr 8 12:09:06 2018 +0800

----------------------------------------------------------------------
 python/pyspark/sql/column.py                    | 56 ++++++++++++++++++--
 python/pyspark/sql/functions.py                 | 13 +++++
 python/pyspark/sql/tests.py                     | 14 +++++
 .../scala/org/apache/spark/sql/Column.scala     |  4 +-
 .../scala/org/apache/spark/sql/functions.scala  |  2 +-
 5 files changed, 82 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2c1fe647/python/pyspark/sql/column.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 922c7cf..e7dec11 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -447,24 +447,72 @@ class Column(object):
 
     # order
     _asc_doc = """
-    Returns a sort expression based on the ascending order of the given column name
+    Returns a sort expression based on ascending order of the column.
 
     >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(name=u'Tom', height=80), Row(name=u'Alice',
height=None)])
+    >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])
     >>> df.select(df.name).orderBy(df.name.asc()).collect()
     [Row(name=u'Alice'), Row(name=u'Tom')]
     """
+    _asc_nulls_first_doc = """
+    Returns a sort expression based on ascending order of the column, and null values
+    return before non-null values.
+
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name",
"height"])
+    >>> df.select(df.name).orderBy(df.name.asc_nulls_first()).collect()
+    [Row(name=None), Row(name=u'Alice'), Row(name=u'Tom')]
+
+    .. versionadded:: 2.4
+    """
+    _asc_nulls_last_doc = """
+    Returns a sort expression based on ascending order of the column, and null values
+    appear after non-null values.
+
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name",
"height"])
+    >>> df.select(df.name).orderBy(df.name.asc_nulls_last()).collect()
+    [Row(name=u'Alice'), Row(name=u'Tom'), Row(name=None)]
+
+    .. versionadded:: 2.4
+    """
     _desc_doc = """
-    Returns a sort expression based on the descending order of the given column name.
+    Returns a sort expression based on the descending order of the column.
 
     >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(name=u'Tom', height=80), Row(name=u'Alice',
height=None)])
+    >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])
     >>> df.select(df.name).orderBy(df.name.desc()).collect()
     [Row(name=u'Tom'), Row(name=u'Alice')]
     """
+    _desc_nulls_first_doc = """
+    Returns a sort expression based on the descending order of the column, and null values
+    appear before non-null values.
+
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name",
"height"])
+    >>> df.select(df.name).orderBy(df.name.desc_nulls_first()).collect()
+    [Row(name=None), Row(name=u'Tom'), Row(name=u'Alice')]
+
+    .. versionadded:: 2.4
+    """
+    _desc_nulls_last_doc = """
+    Returns a sort expression based on the descending order of the column, and null values
+    appear after non-null values.
+
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name",
"height"])
+    >>> df.select(df.name).orderBy(df.name.desc_nulls_last()).collect()
+    [Row(name=u'Tom'), Row(name=u'Alice'), Row(name=None)]
+
+    .. versionadded:: 2.4
+    """
 
     asc = ignore_unicode_prefix(_unary_op("asc", _asc_doc))
+    asc_nulls_first = ignore_unicode_prefix(_unary_op("asc_nulls_first", _asc_nulls_first_doc))
+    asc_nulls_last = ignore_unicode_prefix(_unary_op("asc_nulls_last", _asc_nulls_last_doc))
     desc = ignore_unicode_prefix(_unary_op("desc", _desc_doc))
+    desc_nulls_first = ignore_unicode_prefix(_unary_op("desc_nulls_first", _desc_nulls_first_doc))
+    desc_nulls_last = ignore_unicode_prefix(_unary_op("desc_nulls_last", _desc_nulls_last_doc))
 
     _isNull_doc = """
     True if the current expression is null.

http://git-wip-us.apache.org/repos/asf/spark/blob/2c1fe647/python/pyspark/sql/functions.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index ad3e37c..1b19268 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -138,6 +138,17 @@ _functions_1_4 = {
     'bitwiseNOT': 'Computes bitwise not.',
 }
 
+_functions_2_4 = {
+    'asc_nulls_first': 'Returns a sort expression based on the ascending order of the given'
+
+                       ' column name, and null values return before non-null values.',
+    'asc_nulls_last': 'Returns a sort expression based on the ascending order of the given'
+
+                      ' column name, and null values appear after non-null values.',
+    'desc_nulls_first': 'Returns a sort expression based on the descending order of the given'
+
+                        ' column name, and null values appear before non-null values.',
+    'desc_nulls_last': 'Returns a sort expression based on the descending order of the given'
+
+                       ' column name, and null values appear after non-null values',
+}
+
 _collect_list_doc = """
     Aggregate function: returns a list of objects with duplicates.
 
@@ -250,6 +261,8 @@ for _name, _doc in _functions_2_1.items():
     globals()[_name] = since(2.1)(_create_function(_name, _doc))
 for _name, _message in _functions_deprecated.items():
     globals()[_name] = _wrap_deprecated_function(globals()[_name], _message)
+for _name, _doc in _functions_2_4.items():
+    globals()[_name] = since(2.4)(_create_function(_name, _doc))
 del _name, _doc
 
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2c1fe647/python/pyspark/sql/tests.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 5181053..dd04ffb 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -2991,6 +2991,20 @@ class SQLTests(ReusedSQLTestCase):
                 os.environ['TZ'] = orig_env_tz
             time.tzset()
 
+    def test_2_4_functions(self):
+        from pyspark.sql import functions
+
+        df = self.spark.createDataFrame(
+            [('Tom', 80), (None, 60), ('Alice', 50)], ["name", "height"])
+        df.select(df.name).orderBy(functions.asc_nulls_first('name')).collect()
+        [Row(name=None), Row(name=u'Alice'), Row(name=u'Tom')]
+        df.select(df.name).orderBy(functions.asc_nulls_last('name')).collect()
+        [Row(name=u'Alice'), Row(name=u'Tom'), Row(name=None)]
+        df.select(df.name).orderBy(functions.desc_nulls_first('name')).collect()
+        [Row(name=None), Row(name=u'Tom'), Row(name=u'Alice')]
+        df.select(df.name).orderBy(functions.desc_nulls_last('name')).collect()
+        [Row(name=u'Tom'), Row(name=u'Alice'), Row(name=None)]
+
 
 class HiveSparkSubmitTests(SparkSubmitTests):
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2c1fe647/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 9298868..ad0efba 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -1083,10 +1083,10 @@ class Column(val expr: Expression) extends Logging {
    * and null values return before non-null values.
    * {{{
    *   // Scala: sort a DataFrame by age column in ascending order and null values appearing
first.
-   *   df.sort(df("age").asc_nulls_last)
+   *   df.sort(df("age").asc_nulls_first)
    *
    *   // Java
-   *   df.sort(df.col("age").asc_nulls_last());
+   *   df.sort(df.col("age").asc_nulls_first());
    * }}}
    *
    * @group expr_ops

http://git-wip-us.apache.org/repos/asf/spark/blob/2c1fe647/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index c9ca9a8..c658f25 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -132,7 +132,7 @@ object functions {
    * Returns a sort expression based on ascending order of the column,
    * and null values return before non-null values.
    * {{{
-   *   df.sort(asc_nulls_last("dept"), desc("age"))
+   *   df.sort(asc_nulls_first("dept"), desc("age"))
    * }}}
    *
    * @group sort_funcs


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message