hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jcama...@apache.org
Subject hive git commit: HIVE-15750: Fail schema discovery when two columns have similar names with different casing in Druid (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Date Tue, 31 Jan 2017 10:16:57 GMT
Repository: hive
Updated Branches:
  refs/heads/master eb29bd120 -> f7bc76486


HIVE-15750: Fail schema discovery when two columns have similar names with different casing
in Druid (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f7bc7648
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f7bc7648
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f7bc7648

Branch: refs/heads/master
Commit: f7bc76486b3b7063ccdf25062ba88feda492c6b8
Parents: eb29bd1
Author: Jesus Camacho Rodriguez <jcamacho@apache.org>
Authored: Mon Jan 30 13:36:36 2017 +0000
Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
Committed: Tue Jan 31 10:16:46 2017 +0000

----------------------------------------------------------------------
 .../hadoop/hive/druid/QTestDruidSerDe2.java     | 92 ++++++++++++++++++++
 .../hive/druid/QTestDruidStorageHandler2.java   | 34 ++++++++
 ql/src/test/queries/clientnegative/druid_case.q |  6 ++
 .../results/clientnegative/druid_case.q.out     |  7 ++
 4 files changed, 139 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f7bc7648/druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidSerDe2.java
----------------------------------------------------------------------
diff --git a/druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidSerDe2.java b/druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidSerDe2.java
new file mode 100644
index 0000000..06b2072
--- /dev/null
+++ b/druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidSerDe2.java
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.druid;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.druid.serde.DruidSerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.util.StringUtils;
+
+import com.fasterxml.jackson.core.type.TypeReference;
+
+import io.druid.query.metadata.metadata.SegmentAnalysis;
+import io.druid.query.metadata.metadata.SegmentMetadataQuery;
+
+/**
+ * Druid SerDe to be used in tests.
+ */
+public class QTestDruidSerDe2 extends DruidSerDe {
+
+  // Request :
+  //        "{\"queryType\":\"segmentMetadata\",\"dataSource\":{\"type\":\"table\",\"name\":\"wikipedia\"},"
+  //        + "\"intervals\":{\"type\":\"intervals\","
+  //        + "\"intervals\":[\"-146136543-09-08T00:30:34.096-07:52:58/146140482-04-24T08:36:27.903-07:00\"]},"
+  //        + "\"toInclude\":{\"type\":\"all\"},\"merge\":true,\"context\":null,\"analysisTypes\":[],"
+  //        + "\"usingDefaultInterval\":true,\"lenientAggregatorMerge\":false,\"descending\":false}";
+  private static final String RESPONSE =
+          "[ {\r\n "
+                  + " \"id\" : \"merged\",\r\n "
+                  + " \"intervals\" : [ \"2010-01-01T00:00:00.000Z/2015-12-31T00:00:00.000Z\"
],\r\n "
+                  + " \"columns\" : {\r\n  "
+                  + "  \"__time\" : { \"type\" : \"LONG\", \"hasMultipleValues\" : false,
\"size\" : 407240380, \"cardinality\" : null, \"errorMessage\" : null },\r\n  "
+                  + "  \"robot\" : { \"type\" : \"STRING\", \"hasMultipleValues\" : false,
\"size\" : 100000, \"cardinality\" : 1944, \"errorMessage\" : null },\r\n  "
+                  + "  \"namespace\" : { \"type\" : \"STRING\", \"hasMultipleValues\" : true,
\"size\" : 100000, \"cardinality\" : 1504, \"errorMessage\" : null },\r\n  "
+                  + "  \"anonymous\" : { \"type\" : \"STRING\", \"hasMultipleValues\" : false,
\"size\" : 100000, \"cardinality\" : 1944, \"errorMessage\" : null },\r\n  "
+                  // Next column has a similar name as previous, but different casing.
+                  // This is allowed in Druid, but it should fail in Hive.
+                  + "  \"Anonymous\" : { \"type\" : \"STRING\", \"hasMultipleValues\" : false,
\"size\" : 100000, \"cardinality\" : 1944, \"errorMessage\" : null },\r\n  "
+                  + "  \"page\" : { \"type\" : \"STRING\", \"hasMultipleValues\" : false,
\"size\" : 100000, \"cardinality\" : 1944, \"errorMessage\" : null },\r\n  "
+                  + "  \"language\" : { \"type\" : \"STRING\", \"hasMultipleValues\" : false,
\"size\" : 100000, \"cardinality\" : 1944, \"errorMessage\" : null },\r\n  "
+                  + "  \"newpage\" : { \"type\" : \"STRING\", \"hasMultipleValues\" : false,
\"size\" : 100000, \"cardinality\" : 1944, \"errorMessage\" : null },\r\n  "
+                  + "  \"user\" : { \"type\" : \"STRING\", \"hasMultipleValues\" : false,
\"size\" : 100000, \"cardinality\" : 1944, \"errorMessage\" : null },\r\n  "
+                  + "  \"count\" : { \"type\" : \"FLOAT\", \"hasMultipleValues\" : false,
\"size\" : 100000, \"cardinality\" : null, \"errorMessage\" : null },\r\n  "
+                  + "  \"added\" : { \"type\" : \"FLOAT\", \"hasMultipleValues\" : false,
\"size\" : 100000, \"cardinality\" : null, \"errorMessage\" : null },\r\n  "
+                  + "  \"delta\" : { \"type\" : \"FLOAT\", \"hasMultipleValues\" : false,
\"size\" : 100000, \"cardinality\" : null, \"errorMessage\" : null },\r\n  "
+                  + "  \"variation\" : { \"type\" : \"FLOAT\", \"hasMultipleValues\" : false,
\"size\" : 100000, \"cardinality\" : null, \"errorMessage\" : null },\r\n  "
+                  + "  \"deleted\" : { \"type\" : \"FLOAT\", \"hasMultipleValues\" : false,
\"size\" : 100000, \"cardinality\" : null, \"errorMessage\" : null }\r\n "
+                  + " },\r\n "
+                  + " \"aggregators\" : {\r\n  "
+                  + "  \"count\" : { \"type\" : \"longSum\", \"name\" : \"count\", \"fieldName\"
: \"count\" },\r\n  "
+                  + "  \"added\" : { \"type\" : \"doubleSum\", \"name\" : \"added\", \"fieldName\"
: \"added\" },\r\n  "
+                  + "  \"delta\" : { \"type\" : \"doubleSum\", \"name\" : \"delta\", \"fieldName\"
: \"delta\" },\r\n  "
+                  + "  \"variation\" : { \"type\" : \"doubleSum\", \"name\" : \"variation\",
\"fieldName\" : \"variation\" },\r\n  "
+                  + "  \"deleted\" : { \"type\" : \"doubleSum\", \"name\" : \"deleted\",
\"fieldName\" : \"deleted\" }\r\n "
+                  + " },\r\n "
+                  + " \"queryGranularity\" : {\r\n    \"type\": \"none\"\r\n  },\r\n "
+                  + " \"size\" : 300000,\r\n "
+                  + " \"numRows\" : 5000000\r\n} ]";
+
+  /* Submits the request and returns */
+  @Override
+  protected SegmentAnalysis submitMetadataRequest(String address, SegmentMetadataQuery query)
+          throws SerDeException {
+    // Retrieve results
+    List<SegmentAnalysis> resultsList;
+    try {
+      resultsList = DruidStorageHandlerUtils.JSON_MAPPER.readValue(RESPONSE,
+              new TypeReference<List<SegmentAnalysis>>() {
+              }
+      );
+    } catch (Exception e) {
+      throw new SerDeException(StringUtils.stringifyException(e));
+    }
+    return resultsList.get(0);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/f7bc7648/druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidStorageHandler2.java
----------------------------------------------------------------------
diff --git a/druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidStorageHandler2.java
b/druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidStorageHandler2.java
new file mode 100644
index 0000000..6ac4df9
--- /dev/null
+++ b/druid-handler/src/test/org/apache/hadoop/hive/druid/QTestDruidStorageHandler2.java
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.druid;
+
+import org.apache.hadoop.hive.serde2.AbstractSerDe;
+
+/**
+ * Storage handler for Druid to be used in tests. It cannot connect to
+ * Druid, and thus it cannot execute queries.
+ */
+@SuppressWarnings("deprecation")
+public class QTestDruidStorageHandler2 extends DruidStorageHandler {
+
+  @Override
+  public Class<? extends AbstractSerDe> getSerDeClass() {
+    return QTestDruidSerDe2.class;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/f7bc7648/ql/src/test/queries/clientnegative/druid_case.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/druid_case.q b/ql/src/test/queries/clientnegative/druid_case.q
new file mode 100644
index 0000000..4828bf8
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/druid_case.q
@@ -0,0 +1,6 @@
+set hive.strict.checks.cartesian.product=false;
+set hive.druid.broker.address.default=localhost.test;
+
+CREATE EXTERNAL TABLE druid_table_1
+STORED BY 'org.apache.hadoop.hive.druid.QTestDruidStorageHandler2'
+TBLPROPERTIES ("druid.datasource" = "wikipedia");

http://git-wip-us.apache.org/repos/asf/hive/blob/f7bc7648/ql/src/test/results/clientnegative/druid_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/druid_case.q.out b/ql/src/test/results/clientnegative/druid_case.q.out
new file mode 100644
index 0000000..457028b
--- /dev/null
+++ b/ql/src/test/results/clientnegative/druid_case.q.out
@@ -0,0 +1,7 @@
+PREHOOK: query: CREATE EXTERNAL TABLE druid_table_1
+STORED BY 'org.apache.hadoop.hive.druid.QTestDruidStorageHandler2'
+TBLPROPERTIES ("druid.datasource" = "wikipedia")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@druid_table_1
+FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. org.apache.hadoop.hive.ql.metadata.HiveException:
Duplicate column name anonymous in the table definition.


Mime
View raw message