spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hvanhov...@apache.org
Subject spark git commit: [SPARK-23599][SQL] Add a UUID generator from Pseudo-Random Numbers
Date Mon, 19 Mar 2018 08:41:47 GMT
Repository: spark
Updated Branches:
  refs/heads/master 745c8c090 -> 4de638c19


[SPARK-23599][SQL] Add a UUID generator from Pseudo-Random Numbers

## What changes were proposed in this pull request?

This patch adds a UUID generator from Pseudo-Random Numbers. We can use it later to have deterministic
`UUID()` expression.

## How was this patch tested?

Added unit tests.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #20817 from viirya/SPARK-23599.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4de638c1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4de638c1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4de638c1

Branch: refs/heads/master
Commit: 4de638c1976dea74761bbe5c30da808178ee885d
Parents: 745c8c0
Author: Liang-Chi Hsieh <viirya@gmail.com>
Authored: Mon Mar 19 09:41:43 2018 +0100
Committer: Herman van Hovell <hvanhovell@databricks.com>
Committed: Mon Mar 19 09:41:43 2018 +0100

----------------------------------------------------------------------
 .../sql/catalyst/util/RandomUUIDGenerator.scala | 43 +++++++++++++++
 .../util/RandomUUIDGeneratorSuite.scala         | 57 ++++++++++++++++++++
 2 files changed, 100 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4de638c1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGenerator.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGenerator.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGenerator.scala
new file mode 100644
index 0000000..4fe07a0
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGenerator.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import java.util.UUID
+
+import org.apache.commons.math3.random.MersenneTwister
+
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * This class is used to generate a UUID from Pseudo-Random Numbers.
+ *
+ * For the algorithm, see RFC 4122: A Universally Unique IDentifier (UUID) URN Namespace,
+ * section 4.4 "Algorithms for Creating a UUID from Truly Random or Pseudo-Random Numbers".
+ */
+case class RandomUUIDGenerator(randomSeed: Long) {
+  private val random = new MersenneTwister(randomSeed)
+
+  def getNextUUID(): UUID = {
+    val mostSigBits = (random.nextLong() & 0xFFFFFFFFFFFF0FFFL) | 0x0000000000004000L
+    val leastSigBits = (random.nextLong() | 0x8000000000000000L) & 0xBFFFFFFFFFFFFFFFL
+
+    new UUID(mostSigBits, leastSigBits)
+  }
+
+  def getNextUUIDUTF8String(): UTF8String = UTF8String.fromString(getNextUUID().toString())
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/4de638c1/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGeneratorSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGeneratorSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGeneratorSuite.scala
new file mode 100644
index 0000000..b75739e
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGeneratorSuite.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import scala.util.Random
+
+import org.apache.spark.SparkFunSuite
+
+class RandomUUIDGeneratorSuite extends SparkFunSuite {
+  test("RandomUUIDGenerator should generate version 4, variant 2 UUIDs") {
+    val generator = RandomUUIDGenerator(new Random().nextLong())
+    for (_ <- 0 to 100) {
+      val uuid = generator.getNextUUID()
+      assert(uuid.version() == 4)
+      assert(uuid.variant() == 2)
+    }
+  }
+
+ test("UUID from RandomUUIDGenerator should be deterministic") {
+   val r1 = new Random(100)
+   val generator1 = RandomUUIDGenerator(r1.nextLong())
+   val r2 = new Random(100)
+   val generator2 = RandomUUIDGenerator(r2.nextLong())
+   val r3 = new Random(101)
+   val generator3 = RandomUUIDGenerator(r3.nextLong())
+
+   for (_ <- 0 to 100) {
+      val uuid1 = generator1.getNextUUID()
+      val uuid2 = generator2.getNextUUID()
+      val uuid3 = generator3.getNextUUID()
+      assert(uuid1 == uuid2)
+      assert(uuid1 != uuid3)
+   }
+ }
+
+ test("Get UTF8String UUID") {
+   val generator = RandomUUIDGenerator(new Random().nextLong())
+   val utf8StringUUID = generator.getNextUUIDUTF8String()
+   val uuid = java.util.UUID.fromString(utf8StringUUID.toString)
+   assert(uuid.version() == 4 && uuid.variant() == 2 && utf8StringUUID.toString
== uuid.toString)
+ }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message