drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ve...@apache.org
Subject [1/3] drill git commit: DRILL-3739: Fix issues in reading Hive tables with StorageHandler configuration (eg. Hive-HBase tables)
Date Tue, 01 Dec 2015 19:35:27 GMT
Repository: drill
Updated Branches:
  refs/heads/master 53e7a696f -> 9cb553dfe


DRILL-3739: Fix issues in reading Hive tables with StorageHandler configuration (eg. Hive-HBase
tables)


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/9cb553df
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/9cb553df
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/9cb553df

Branch: refs/heads/master
Commit: 9cb553dfe90f367d99eb064abeda85d6bcbea1fe
Parents: 7de3429
Author: vkorukanti <venki.korukanti@gmail.com>
Authored: Wed Oct 21 11:01:23 2015 -0700
Committer: vkorukanti <venki.korukanti@gmail.com>
Committed: Tue Dec 1 10:37:16 2015 -0800

----------------------------------------------------------------------
 .../drill/exec/store/hive/HiveRecordReader.java |  4 +--
 .../apache/drill/exec/store/hive/HiveScan.java  | 19 +++++---------
 .../drill/exec/store/hive/HiveUtilities.java    | 26 +++++++++++++++-----
 .../apache/drill/exec/hive/TestHiveStorage.java | 11 +++++++++
 .../exec/hive/TestInfoSchemaOnHiveStorage.java  |  1 +
 .../exec/store/hive/HiveTestDataGenerator.java  |  8 ++++++
 6 files changed, 48 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/9cb553df/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveRecordReader.java
----------------------------------------------------------------------
diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveRecordReader.java
b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveRecordReader.java
index f148479..f50f331 100644
--- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveRecordReader.java
+++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveRecordReader.java
@@ -138,14 +138,14 @@ public class HiveRecordReader extends AbstractRecordReader {
 
         finalOI = (StructObjectInspector)ObjectInspectorConverters.getConvertedOI(partitionOI,
tableOI);
         partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partitionOI,
finalOI);
-        HiveUtilities.setInputFormatClass(job, partition.getSd());
+        HiveUtilities.setInputFormatClass(job, partition.getSd(), table);
       } else {
         // For non-partitioned tables, there is no need to create converter as there are
no schema changes expected.
         partitionSerDe = tableSerDe;
         partitionOI = tableOI;
         partTblObjectInspectorConverter = null;
         finalOI = tableOI;
-        HiveUtilities.setInputFormatClass(job, table.getSd());
+        HiveUtilities.setInputFormatClass(job, table.getSd(), table);
       }
 
       HiveUtilities.addConfToJob(job, properties, hiveConfigOverride);

http://git-wip-us.apache.org/repos/asf/drill/blob/9cb553df/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveScan.java
----------------------------------------------------------------------
diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveScan.java
b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveScan.java
index 85a8595..cd7d6e5 100644
--- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveScan.java
+++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveScan.java
@@ -178,34 +178,27 @@ public class HiveScan extends AbstractGroupScan {
         splitInput(properties, table.getSd(), null);
       } else {
         for (final Partition partition : partitions) {
-          final Properties properties = MetaStoreUtils.getPartitionMetadata(partition, table);
+          final Properties properties = HiveUtilities.getPartitionMetadata(partition, table);
           splitInput(properties, partition.getSd(), partition);
         }
       }
-    } catch (ReflectiveOperationException | IOException e) {
+    } catch (final Exception e) {
       throw new ExecutionSetupException(e);
     }
   }
 
   /* Split the input given in StorageDescriptor */
   private void splitInput(final Properties properties, final StorageDescriptor sd, final
Partition partition)
-      throws ReflectiveOperationException, IOException {
+      throws Exception {
     final JobConf job = new JobConf();
-    for (final Object obj : properties.keySet()) {
-      job.set((String) obj, (String) properties.get(obj));
-    }
-    for (final Map.Entry<String, String> entry : hiveReadEntry.hiveConfigOverride.entrySet())
{
-      job.set(entry.getKey(), entry.getValue());
-    }
-    InputFormat<?, ?> format = (InputFormat<?, ?>)
-        Class.forName(sd.getInputFormat()).getConstructor().newInstance();
-    job.setInputFormat(format.getClass());
+    HiveUtilities.addConfToJob(job, properties, hiveReadEntry.hiveConfigOverride);
+    HiveUtilities.setInputFormatClass(job, sd, hiveReadEntry.getTable());
     final Path path = new Path(sd.getLocation());
     final FileSystem fs = path.getFileSystem(job);
 
     if (fs.exists(path)) {
       FileInputFormat.addInputPath(job, path);
-      format = job.getInputFormat();
+      final InputFormat format = job.getInputFormat();
       for (final InputSplit split : format.getSplits(job, 1)) {
         inputSplits.add(split);
         partitionMap.put(split, partition);

http://git-wip-us.apache.org/repos/asf/drill/blob/9cb553df/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveUtilities.java
----------------------------------------------------------------------
diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveUtilities.java
b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveUtilities.java
index 8475c81..9bf4213 100644
--- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveUtilities.java
+++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveUtilities.java
@@ -21,6 +21,7 @@ import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
 import io.netty.buffer.DrillBuf;
 import org.apache.drill.common.exceptions.DrillRuntimeException;
+import org.apache.drill.common.exceptions.ExecutionSetupException;
 import org.apache.drill.common.exceptions.UserException;
 import org.apache.drill.common.types.TypeProtos;
 import org.apache.drill.common.types.TypeProtos.DataMode;
@@ -56,6 +57,8 @@ import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
+import org.apache.hadoop.hive.ql.metadata.HiveUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
@@ -73,7 +76,7 @@ import java.sql.Timestamp;
 import java.util.Map;
 import java.util.Properties;
 
-import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT;
+import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE;
 
 public class HiveUtilities {
   static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(HiveUtilities.class);
@@ -365,17 +368,28 @@ public class HiveUtilities {
   }
 
   /**
-   * Utility method which sets table or partition {@link InputFormat} class in given {@link
JobConf} object. It gets
-   * the class name from given StorageDescriptor object.
-   *
+   * Utility method which sets table or partition {@link InputFormat} class in given {@link
JobConf} object. First it
+   * tries to get the class name from given StorageDescriptor object. If it doesn't contain
it tries to get it from
+   * StorageHandler class set in table properties. If not found throws an exception.
    * @param job {@link JobConf} instance where InputFormat class is set.
    * @param sd {@link StorageDescriptor} instance of currently reading partition or table
(for non-partitioned tables).
+   * @param table Table object
    * @throws Exception
    */
-  public static void setInputFormatClass(final JobConf job, final StorageDescriptor sd)
+  public static void setInputFormatClass(final JobConf job, final StorageDescriptor sd, final
Table table)
       throws Exception {
     final String inputFormatName = sd.getInputFormat();
-    job.setInputFormat((Class<? extends InputFormat>) Class.forName(inputFormatName));
+    if (Strings.isNullOrEmpty(inputFormatName)) {
+      final String storageHandlerClass = table.getParameters().get(META_TABLE_STORAGE);
+      if (Strings.isNullOrEmpty(storageHandlerClass)) {
+        throw new ExecutionSetupException("Unable to get Hive table InputFormat class. There
is neither " +
+            "InputFormat class explicitly specified nor StorageHandler class");
+      }
+      final HiveStorageHandler storageHandler = HiveUtils.getStorageHandler(job, storageHandlerClass);
+      job.setInputFormat(storageHandler.getInputFormatClass());
+    } else {
+      job.setInputFormat((Class<? extends InputFormat>) Class.forName(inputFormatName));
+    }
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/drill/blob/9cb553df/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
----------------------------------------------------------------------
diff --git a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
index 06c08ef..5c844e8 100644
--- a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
+++ b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
@@ -368,6 +368,17 @@ public class TestHiveStorage extends HiveTestBase {
     }
   }
 
+  @Test // DRILL-3739
+  public void readingFromStorageHandleBasedTable() throws Exception {
+    testBuilder()
+        .sqlQuery("SELECT * FROM hive.kv_sh ORDER BY key LIMIT 2")
+        .ordered()
+        .baselineColumns("key", "value")
+        .baselineValues(1, " key_1")
+        .baselineValues(2, " key_2")
+        .go();
+  }
+
   @AfterClass
   public static void shutdownOptions() throws Exception {
     test(String.format("alter session set `%s` = false", PlannerSettings.ENABLE_DECIMAL_DATA_TYPE_KEY));

http://git-wip-us.apache.org/repos/asf/drill/blob/9cb553df/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestInfoSchemaOnHiveStorage.java
----------------------------------------------------------------------
diff --git a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestInfoSchemaOnHiveStorage.java
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestInfoSchemaOnHiveStorage.java
index 3234e43..9352ce0 100644
--- a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestInfoSchemaOnHiveStorage.java
+++ b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestInfoSchemaOnHiveStorage.java
@@ -40,6 +40,7 @@ public class TestInfoSchemaOnHiveStorage extends HiveTestBase {
         .baselineValues("hive.default", "hiveview")
         .baselineValues("hive.default", "kv")
         .baselineValues("hive.default", "kv_parquet")
+        .baselineValues("hive.default", "kv_sh")
         .go();
 
     testBuilder()

http://git-wip-us.apache.org/repos/asf/drill/blob/9cb553df/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
----------------------------------------------------------------------
diff --git a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
index e5d843d..06473cd 100644
--- a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
+++ b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
@@ -435,6 +435,14 @@ public class HiveTestDataGenerator {
     executeQuery(hiveDriver, "INSERT INTO TABLE kv_parquet PARTITION(part1) SELECT key, value,
key FROM default.kv");
     executeQuery(hiveDriver, "ALTER TABLE kv_parquet ADD COLUMNS (newcol string)");
 
+    // Create a StorageHandler based table (DRILL-3739)
+    executeQuery(hiveDriver, "CREATE TABLE kv_sh(key INT, value STRING) STORED BY " +
+        "'org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler'");
+    // Insert fails if the table directory already exists for tables with DefaultStorageHandlers.
Its a known
+    // issue in Hive. So delete the table directory created as part of the CREATE TABLE
+    FileUtils.deleteQuietly(new File(whDir, "kv_sh"));
+    executeQuery(hiveDriver, "INSERT OVERWRITE TABLE kv_sh SELECT * FROM kv");
+
     ss.close();
   }
 


Mime
View raw message