impala-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dhe...@apache.org
Subject [1/2] incubator-impala git commit: IMPALA-5137: Support TIMESTAMPs in Kudu range predicate DDL
Date Fri, 19 May 2017 16:22:04 GMT
Repository: incubator-impala
Updated Branches:
  refs/heads/master 24c77f194 -> d6e612f5c


IMPALA-5137: Support TIMESTAMPs in Kudu range predicate DDL

Adds support in DDL for timestamps in Kudu range partition syntax.

For convenience, strings can be specified with or without
explicit casts to TIMESTAMP.

E.g.
create table ts_ranges (ts timestamp primary key, i int)
partition by range (
  partition '2009-01-02 00:00:00' <= VALUES < '2009-01-03 00:00:00'
) stored as kudu

Range bounds are converted to Kudu UNIXTIME_MICROS during
analysis.

Testing: Adds FE and EE tests.

Change-Id: Iae409b6106c073b038940f0413ed9d5859daaeff
Reviewed-on: http://gerrit.cloudera.org:8080/6849
Reviewed-by: Matthew Jacobs <mj@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/6226e597
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/6226e597
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/6226e597

Branch: refs/heads/master
Commit: 6226e597020b6950d7f2f2d43bf54d944e194bda
Parents: 24c77f1
Author: Matthew Jacobs <mj@cloudera.com>
Authored: Wed May 10 16:28:58 2017 -0700
Committer: Impala Public Jenkins <impala-public-jenkins@gerrit.cloudera.org>
Committed: Fri May 19 00:41:46 2017 +0000

----------------------------------------------------------------------
 .../apache/impala/analysis/RangePartition.java  | 48 +++++++++++--
 .../java/org/apache/impala/util/KuduUtil.java   |  8 ++-
 .../apache/impala/analysis/AnalyzeDDLTest.java  | 26 +++++++
 .../queries/QueryTest/kudu_alter.test           | 42 ++++++++++++
 .../queries/QueryTest/kudu_create.test          | 72 ++++++++++++++++++++
 5 files changed, 188 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6226e597/fe/src/main/java/org/apache/impala/analysis/RangePartition.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/RangePartition.java b/fe/src/main/java/org/apache/impala/analysis/RangePartition.java
index b441a78..5ff00c7 100644
--- a/fe/src/main/java/org/apache/impala/analysis/RangePartition.java
+++ b/fe/src/main/java/org/apache/impala/analysis/RangePartition.java
@@ -17,18 +17,20 @@
 
 package org.apache.impala.analysis;
 
+import java.math.BigInteger;
 import java.util.List;
+
+import org.apache.impala.catalog.Type;
+import org.apache.impala.common.AnalysisException;
+import org.apache.impala.common.InternalException;
 import org.apache.impala.common.Pair;
+import org.apache.impala.thrift.TRangePartition;
+import org.apache.impala.util.KuduUtil;
 
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 
-import org.apache.impala.common.AnalysisException;
-import org.apache.impala.util.KuduUtil;
-import org.apache.impala.thrift.TRangePartition;
-
 /**
  * Represents a range partition of a Kudu table.
  *
@@ -50,12 +52,18 @@ import org.apache.impala.thrift.TRangePartition;
  * are inclusive (true) or exclusive (false).
  */
 public class RangePartition implements ParseNode {
+
+  // Upper and lower bound exprs contain literals of the target column type post-analysis.
+  // For TIMESTAMPs those are Kudu UNIXTIME_MICROS, i.e. int64s.
   private final List<Expr> lowerBound_;
-  private final boolean lowerBoundInclusive_;
   private final List<Expr> upperBound_;
+  private final boolean lowerBoundInclusive_;
   private final boolean upperBoundInclusive_;
   private final boolean isSingletonRange_;
 
+  // Set true when this partition has been analyzed.
+  private boolean isAnalyzed_ = false;
+
   private RangePartition(List<Expr> lowerBoundValues, boolean lowerBoundInclusive,
       List<Expr> upperBoundValues, boolean upperBoundInclusive) {
     Preconditions.checkNotNull(lowerBoundValues);
@@ -112,10 +120,14 @@ public class RangePartition implements ParseNode {
 
   public void analyze(Analyzer analyzer, List<ColumnDef> partColDefs)
       throws AnalysisException {
+    // Reanalyzing not supported because TIMESTAMPs are converted to BIGINT (unixtime
+    // micros) in place.
+    Preconditions.checkArgument(!isAnalyzed_);
     analyzeBoundaryValues(lowerBound_, partColDefs, analyzer);
     if (!isSingletonRange_) {
       analyzeBoundaryValues(upperBound_, partColDefs, analyzer);
     }
+    isAnalyzed_ = true;
   }
 
   private void analyzeBoundaryValues(List<Expr> boundaryValues,
@@ -158,6 +170,20 @@ public class RangePartition implements ParseNode {
     }
     org.apache.impala.catalog.Type colType = pkColumn.getType();
     Preconditions.checkState(KuduUtil.isSupportedKeyType(colType));
+
+    // Special case string literals in timestamp columns for convenience.
+    if (literal.getType().isStringType() && colType.isTimestamp()) {
+      // Add an explicit cast to TIMESTAMP
+      Expr e = new CastExpr(new TypeDef(Type.TIMESTAMP), literal);
+      e.analyze(analyzer);
+      literal = LiteralExpr.create(e, analyzer.getQueryCtx());
+      Preconditions.checkNotNull(literal);
+      if (literal.isNullLiteral()) {
+        throw new AnalysisException(String.format("Range partition value %s cannot be " +
+            "cast to target TIMESTAMP partitioning column.", value.toSql()));
+      }
+    }
+
     org.apache.impala.catalog.Type literalType = literal.getType();
     if (!org.apache.impala.catalog.Type.isImplicitlyCastable(literalType, colType,
         true)) {
@@ -171,6 +197,16 @@ public class RangePartition implements ParseNode {
       literal = LiteralExpr.create(castLiteral, analyzer.getQueryCtx());
     }
     Preconditions.checkNotNull(literal);
+
+    if (colType.isTimestamp()) {
+      try {
+        long unixTimeMicros = KuduUtil.timestampToUnixTimeMicros(analyzer, literal);
+        literal = new NumericLiteral(BigInteger.valueOf(unixTimeMicros), Type.BIGINT);
+      } catch (InternalException e) {
+        throw new AnalysisException(
+            "Error converting timestamp in range definition: " + toSql(), e);
+      }
+    }
     return literal;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6226e597/fe/src/main/java/org/apache/impala/util/KuduUtil.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/util/KuduUtil.java b/fe/src/main/java/org/apache/impala/util/KuduUtil.java
index 9fc13b7..8b61c88 100644
--- a/fe/src/main/java/org/apache/impala/util/KuduUtil.java
+++ b/fe/src/main/java/org/apache/impala/util/KuduUtil.java
@@ -154,6 +154,10 @@ public class KuduUtil {
         checkCorrectType(literal.isSetString_literal(), type, colName, literal);
         key.addString(pos, literal.getString_literal().getValue());
         break;
+      case UNIXTIME_MICROS:
+        checkCorrectType(literal.isSetInt_literal(), type, colName, literal);
+        key.addLong(pos, literal.getInt_literal().getValue());
+        break;
       default:
         throw new ImpalaRuntimeException("Key columns not supported for type: "
             + type.toString());
@@ -196,7 +200,7 @@ public class KuduUtil {
     }
   }
 
-  public static Long timestampToUnixTimeMicros(Analyzer analyzer, Expr timestampExpr)
+  public static long timestampToUnixTimeMicros(Analyzer analyzer, Expr timestampExpr)
       throws AnalysisException, InternalException {
     Preconditions.checkArgument(timestampExpr.isAnalyzed());
     Preconditions.checkArgument(timestampExpr.isConstant());
@@ -330,7 +334,7 @@ public class KuduUtil {
   }
 
   public static boolean isSupportedKeyType(org.apache.impala.catalog.Type type) {
-    return type.isIntegerType() || type.isStringType();
+    return type.isIntegerType() || type.isStringType() || type.isTimestamp();
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6226e597/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
index 48b15c7..0858774 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
@@ -2366,6 +2366,32 @@ public class AnalyzeDDLTest extends FrontendTestBase {
     AnalysisError("create table tab (i int, x int primary key) partition by hash(x) " +
         "partitions 8 sort by(i) stored as kudu", "SORT BY is not supported for Kudu " +
         "tables.");
+
+    // Range partitions with TIMESTAMP
+    AnalyzesOk("create table ts_ranges (ts timestamp primary key) " +
+        "partition by range (partition cast('2009-01-01 00:00:00' as timestamp) " +
+        "<= VALUES < '2009-01-02 00:00:00') stored as kudu");
+    AnalyzesOk("create table ts_ranges (ts timestamp primary key) " +
+        "partition by range (partition value = cast('2009-01-01 00:00:00' as timestamp" +
+        ")) stored as kudu");
+    AnalyzesOk("create table ts_ranges (ts timestamp primary key) " +
+        "partition by range (partition value = '2009-01-01 00:00:00') " +
+        "stored as kudu");
+    AnalyzesOk("create table ts_ranges (id int, ts timestamp, primary key(id, ts))" +
+        "partition by range (partition value = (9, cast('2009-01-01 00:00:00' as " +
+        "timestamp))) stored as kudu");
+    AnalyzesOk("create table ts_ranges (id int, ts timestamp, primary key(id, ts))" +
+        "partition by range (partition value = (9, '2009-01-01 00:00:00')) " +
+        "stored as kudu");
+    AnalysisError("create table ts_ranges (ts timestamp primary key, i int)" +
+        "partition by range (partition '2009-01-01 00:00:00' <= VALUES < " +
+        "'NOT A TIMESTAMP') stored as kudu",
+        "Range partition value 'NOT A TIMESTAMP' cannot be cast to target TIMESTAMP " +
+        "partitioning column.");
+    AnalysisError("create table ts_ranges (ts timestamp primary key, i int)" +
+        "partition by range (partition 100 <= VALUES < 200) stored as kudu",
+        "Range partition value 100 (type: TINYINT) is not type " +
+        "compatible with partitioning column 'ts' (type: TIMESTAMP).");
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6226e597/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test b/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
index 369eecf..20c2c56 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
@@ -413,3 +413,45 @@ select count(*) from external_tbl
 ---- TYPES
 BIGINT
 ====
+---- QUERY
+create table ts_ranges (ts timestamp primary key, i int)
+partition by range (
+  partition cast('2009-01-01 00:00:00' as timestamp) <= VALUES <
+            cast('2009-01-02 00:00:00' as timestamp)
+) stored as kudu
+---- RESULTS
+====
+---- QUERY
+show range partitions ts_ranges
+---- RESULTS
+'2009-01-01T00:00:00.000000Z <= VALUES < 2009-01-02T00:00:00.000000Z'
+---- TYPES
+STRING
+====
+---- QUERY
+alter table ts_ranges add range partition
+cast('2009-01-02 00:00:00' as timestamp) <= VALUES <
+cast('2009-01-03 00:00:00' as timestamp)
+---- RESULTS
+====
+---- QUERY
+show range partitions ts_ranges
+---- RESULTS
+'2009-01-01T00:00:00.000000Z <= VALUES < 2009-01-02T00:00:00.000000Z'
+'2009-01-02T00:00:00.000000Z <= VALUES < 2009-01-03T00:00:00.000000Z'
+---- TYPES
+STRING
+====
+---- QUERY
+alter table ts_ranges drop range partition
+cast('2009-01-02 00:00:00' as timestamp) <= VALUES <
+cast('2009-01-03 00:00:00' as timestamp)
+---- RESULTS
+====
+---- QUERY
+show range partitions ts_ranges
+---- RESULTS
+'2009-01-01T00:00:00.000000Z <= VALUES < 2009-01-02T00:00:00.000000Z'
+---- TYPES
+STRING
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6226e597/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test b/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test
index b7db12f..704c868 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test
@@ -144,3 +144,75 @@ X, I1, I2, I3, I4, VALS, VALF, VALD, VALB
 ---- TYPES
 INT,TINYINT,SMALLINT,INT,BIGINT,STRING,FLOAT,DOUBLE,BOOLEAN
 ====
+---- QUERY
+# Overlapping ranges are rejected by the Kudu client
+create table ts_ranges (ts timestamp primary key, i int)
+partition by range (
+  partition cast('2009-01-02 00:00:00' as timestamp) <= VALUES <
+            cast('2009-01-03 00:00:00' as timestamp),
+  partition cast('2009-01-02 10:00:00' as timestamp) <= VALUES
+) stored as kudu
+---- CATCH
+NonRecoverableException: overlapping range partitions: first range partition: 2009-01-02T00:00:00.000000Z
<= VALUES < 2009-01-03T00:00:00.000000Z, second range partition: VALUES >= 2009-01-02T10:00:00.000000Z
+====
+---- QUERY
+# Creates a range partitioned Kudu table with a timestamp PK. Note that nanoseconds
+# are rounded (same behavior as when writing timestamp values to Kudu).
+create table ts_ranges (ts timestamp primary key, i int)
+partition by range (
+  partition nanoseconds_add(cast('2009-01-01 00:00:00' as timestamp), 999) <= VALUES <
+            nanoseconds_add(cast('2009-01-02 00:00:00' as timestamp), 1),
+  partition cast('2009-01-02 00:00:00' as timestamp) <= VALUES <
+            cast('2009-01-03 00:00:00' as timestamp),
+  partition '2009-01-03 00:00:00' <= VALUES
+) stored as kudu
+---- RESULTS
+====
+---- QUERY
+show range partitions ts_ranges
+---- RESULTS
+'2009-01-01T00:00:00.000001Z <= VALUES < 2009-01-02T00:00:00.000000Z'
+'2009-01-02T00:00:00.000000Z <= VALUES < 2009-01-03T00:00:00.000000Z'
+'VALUES >= 2009-01-03T00:00:00.000000Z'
+---- TYPES
+STRING
+====
+---- QUERY
+create table ts_ranges_ctas
+primary key (ts)
+partition by range (
+  partition VALUES < '2009-01-02 00:00:00',
+  partition '2009-01-02 00:00:00' <= VALUES <
+            '2009-01-03 00:00:00',
+  partition '2009-01-03 00:00:00' < VALUES,
+  partition VALUE = ('2009-01-03 00:00:00')
+)
+stored as kudu
+as select timestamp_col ts, id from functional.alltypestiny;
+---- RESULTS
+'Inserted 8 row(s)'
+====
+---- QUERY
+show range partitions ts_ranges_ctas
+---- RESULTS
+'VALUES < 2009-01-02T00:00:00.000000Z'
+'2009-01-02T00:00:00.000000Z <= VALUES < 2009-01-03T00:00:00.000000Z'
+'VALUE = 2009-01-03T00:00:00.000000Z'
+'VALUES >= 2009-01-03T00:00:00.000001Z'
+---- TYPES
+STRING
+====
+---- QUERY
+select * from ts_ranges_ctas order by id
+---- RESULTS
+2009-01-01 00:00:00,0
+2009-01-01 00:01:00,1
+2009-02-01 00:00:00,2
+2009-02-01 00:01:00,3
+2009-03-01 00:00:00,4
+2009-03-01 00:01:00,5
+2009-04-01 00:00:00,6
+2009-04-01 00:01:00,7
+---- TYPES
+TIMESTAMP,INT
+====


Mime
View raw message