spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jkbrad...@apache.org
Subject spark git commit: [SPARK-17587][PYTHON][MLLIB] SparseVector __getitem__ should follow __getitem__ contract
Date Tue, 04 Oct 2016 00:58:09 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 b57e2acb1 -> 613863b11


[SPARK-17587][PYTHON][MLLIB] SparseVector __getitem__ should follow __getitem__ contract

## What changes were proposed in this pull request?

Replaces` ValueError` with `IndexError` when index passed to `ml` / `mllib` `SparseVector.__getitem__`
is out of range. This ensures correct iteration behavior.

Replaces `ValueError` with `IndexError` for `DenseMatrix` and `SparkMatrix` in `ml` / `mllib`.

## How was this patch tested?

PySpark `ml` / `mllib` unit tests. Additional unit tests to prove that the problem has been
resolved.

Author: zero323 <zero323@users.noreply.github.com>

Closes #15144 from zero323/SPARK-17587.

(cherry picked from commit d8399b600cef706c22d381b01fab19c610db439a)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/613863b1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/613863b1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/613863b1

Branch: refs/heads/branch-2.0
Commit: 613863b116b6cbc9ac83845c68a2d11b3b02f7cb
Parents: b57e2ac
Author: zero323 <zero323@users.noreply.github.com>
Authored: Mon Oct 3 17:57:54 2016 -0700
Committer: Joseph K. Bradley <joseph@databricks.com>
Committed: Mon Oct 3 17:58:05 2016 -0700

----------------------------------------------------------------------
 python/pyspark/ml/linalg/__init__.py    | 10 +++++-----
 python/pyspark/ml/tests.py              | 16 +++++++++++++---
 python/pyspark/mllib/linalg/__init__.py | 10 +++++-----
 python/pyspark/mllib/tests.py           | 16 +++++++++++++---
 4 files changed, 36 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/613863b1/python/pyspark/ml/linalg/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/linalg/__init__.py b/python/pyspark/ml/linalg/__init__.py
index f42c589..bd0e186 100644
--- a/python/pyspark/ml/linalg/__init__.py
+++ b/python/pyspark/ml/linalg/__init__.py
@@ -698,7 +698,7 @@ class SparseVector(Vector):
                 "Indices must be of type integer, got type %s" % type(index))
 
         if index >= self.size or index < -self.size:
-            raise ValueError("Index %d out of bounds." % index)
+            raise IndexError("Index %d out of bounds." % index)
         if index < 0:
             index += self.size
 
@@ -945,10 +945,10 @@ class DenseMatrix(Matrix):
     def __getitem__(self, indices):
         i, j = indices
         if i < 0 or i >= self.numRows:
-            raise ValueError("Row index %d is out of range [0, %d)"
+            raise IndexError("Row index %d is out of range [0, %d)"
                              % (i, self.numRows))
         if j >= self.numCols or j < 0:
-            raise ValueError("Column index %d is out of range [0, %d)"
+            raise IndexError("Column index %d is out of range [0, %d)"
                              % (j, self.numCols))
 
         if self.isTransposed:
@@ -1075,10 +1075,10 @@ class SparseMatrix(Matrix):
     def __getitem__(self, indices):
         i, j = indices
         if i < 0 or i >= self.numRows:
-            raise ValueError("Row index %d is out of range [0, %d)"
+            raise IndexError("Row index %d is out of range [0, %d)"
                              % (i, self.numRows))
         if j < 0 or j >= self.numCols:
-            raise ValueError("Column index %d is out of range [0, %d)"
+            raise IndexError("Column index %d is out of range [0, %d)"
                              % (j, self.numCols))
 
         # If a CSR matrix is given, then the row index should be searched

http://git-wip-us.apache.org/repos/asf/spark/blob/613863b1/python/pyspark/ml/tests.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 4bcb2c4..de95a47 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -1305,7 +1305,7 @@ class VectorTests(MLlibTestCase):
         self.assertEqual(sv[-3], 0.)
         self.assertEqual(sv[-5], 0.)
         for ind in [5, -6]:
-            self.assertRaises(ValueError, sv.__getitem__, ind)
+            self.assertRaises(IndexError, sv.__getitem__, ind)
         for ind in [7.8, '1']:
             self.assertRaises(TypeError, sv.__getitem__, ind)
 
@@ -1313,11 +1313,15 @@ class VectorTests(MLlibTestCase):
         self.assertEqual(zeros[0], 0.0)
         self.assertEqual(zeros[3], 0.0)
         for ind in [4, -5]:
-            self.assertRaises(ValueError, zeros.__getitem__, ind)
+            self.assertRaises(IndexError, zeros.__getitem__, ind)
 
         empty = SparseVector(0, {})
         for ind in [-1, 0, 1]:
-            self.assertRaises(ValueError, empty.__getitem__, ind)
+            self.assertRaises(IndexError, empty.__getitem__, ind)
+
+    def test_sparse_vector_iteration(self):
+        self.assertListEqual(list(SparseVector(3, [], [])), [0.0, 0.0, 0.0])
+        self.assertListEqual(list(SparseVector(5, [0, 3], [1.0, 2.0])), [1.0, 0.0, 0.0, 2.0,
0.0])
 
     def test_matrix_indexing(self):
         mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10])
@@ -1326,6 +1330,9 @@ class VectorTests(MLlibTestCase):
             for j in range(2):
                 self.assertEqual(mat[i, j], expected[i][j])
 
+        for i, j in [(-1, 0), (4, 1), (3, 4)]:
+            self.assertRaises(IndexError, mat.__getitem__, (i, j))
+
     def test_repr_dense_matrix(self):
         mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10])
         self.assertTrue(
@@ -1397,6 +1404,9 @@ class VectorTests(MLlibTestCase):
                 self.assertEqual(expected[i][j], sm1[i, j])
         self.assertTrue(array_equal(sm1.toArray(), expected))
 
+        for i, j in [(-1, 1), (4, 3), (3, 5)]:
+            self.assertRaises(IndexError, sm1.__getitem__, (i, j))
+
         # Test conversion to dense and sparse.
         smnew = sm1.toDense().toSparse()
         self.assertEqual(sm1.numRows, smnew.numRows)

http://git-wip-us.apache.org/repos/asf/spark/blob/613863b1/python/pyspark/mllib/linalg/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index 9672dbd..d37e715 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -802,7 +802,7 @@ class SparseVector(Vector):
                 "Indices must be of type integer, got type %s" % type(index))
 
         if index >= self.size or index < -self.size:
-            raise ValueError("Index %d out of bounds." % index)
+            raise IndexError("Index %d out of bounds." % index)
         if index < 0:
             index += self.size
 
@@ -1115,10 +1115,10 @@ class DenseMatrix(Matrix):
     def __getitem__(self, indices):
         i, j = indices
         if i < 0 or i >= self.numRows:
-            raise ValueError("Row index %d is out of range [0, %d)"
+            raise IndexError("Row index %d is out of range [0, %d)"
                              % (i, self.numRows))
         if j >= self.numCols or j < 0:
-            raise ValueError("Column index %d is out of range [0, %d)"
+            raise IndexError("Column index %d is out of range [0, %d)"
                              % (j, self.numCols))
 
         if self.isTransposed:
@@ -1245,10 +1245,10 @@ class SparseMatrix(Matrix):
     def __getitem__(self, indices):
         i, j = indices
         if i < 0 or i >= self.numRows:
-            raise ValueError("Row index %d is out of range [0, %d)"
+            raise IndexError("Row index %d is out of range [0, %d)"
                              % (i, self.numRows))
         if j < 0 or j >= self.numCols:
-            raise ValueError("Column index %d is out of range [0, %d)"
+            raise IndexError("Column index %d is out of range [0, %d)"
                              % (j, self.numCols))
 
         # If a CSR matrix is given, then the row index should be searched

http://git-wip-us.apache.org/repos/asf/spark/blob/613863b1/python/pyspark/mllib/tests.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index 3f3dfd1..c519883 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -260,7 +260,7 @@ class VectorTests(MLlibTestCase):
         self.assertEqual(sv[-3], 0.)
         self.assertEqual(sv[-5], 0.)
         for ind in [5, -6]:
-            self.assertRaises(ValueError, sv.__getitem__, ind)
+            self.assertRaises(IndexError, sv.__getitem__, ind)
         for ind in [7.8, '1']:
             self.assertRaises(TypeError, sv.__getitem__, ind)
 
@@ -268,11 +268,15 @@ class VectorTests(MLlibTestCase):
         self.assertEqual(zeros[0], 0.0)
         self.assertEqual(zeros[3], 0.0)
         for ind in [4, -5]:
-            self.assertRaises(ValueError, zeros.__getitem__, ind)
+            self.assertRaises(IndexError, zeros.__getitem__, ind)
 
         empty = SparseVector(0, {})
         for ind in [-1, 0, 1]:
-            self.assertRaises(ValueError, empty.__getitem__, ind)
+            self.assertRaises(IndexError, empty.__getitem__, ind)
+
+    def test_sparse_vector_iteration(self):
+        self.assertListEqual(list(SparseVector(3, [], [])), [0.0, 0.0, 0.0])
+        self.assertListEqual(list(SparseVector(5, [0, 3], [1.0, 2.0])), [1.0, 0.0, 0.0, 2.0,
0.0])
 
     def test_matrix_indexing(self):
         mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10])
@@ -281,6 +285,9 @@ class VectorTests(MLlibTestCase):
             for j in range(2):
                 self.assertEqual(mat[i, j], expected[i][j])
 
+        for i, j in [(-1, 0), (4, 1), (3, 4)]:
+            self.assertRaises(IndexError, mat.__getitem__, (i, j))
+
     def test_repr_dense_matrix(self):
         mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10])
         self.assertTrue(
@@ -352,6 +359,9 @@ class VectorTests(MLlibTestCase):
                 self.assertEqual(expected[i][j], sm1[i, j])
         self.assertTrue(array_equal(sm1.toArray(), expected))
 
+        for i, j in [(-1, 1), (4, 3), (3, 5)]:
+            self.assertRaises(IndexError, sm1.__getitem__, (i, j))
+
         # Test conversion to dense and sparse.
         smnew = sm1.toDense().toSparse()
         self.assertEqual(sm1.numRows, smnew.numRows)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message