hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From na...@apache.org
Subject svn commit: r1057502 - in /hive/trunk: CHANGES.txt hbase-handler/src/test/queries/hbase_joins.q hbase-handler/src/test/results/hbase_joins.q.out ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
Date Tue, 11 Jan 2011 07:00:20 GMT
Author: namit
Date: Tue Jan 11 07:00:20 2011
New Revision: 1057502

URL: http://svn.apache.org/viewvc?rev=1057502&view=rev
Log:
HIVE-1903 Can't join HBase tables if one's name is the beginning of
the other (John Sichi via namit)


Modified:
    hive/trunk/CHANGES.txt
    hive/trunk/hbase-handler/src/test/queries/hbase_joins.q
    hive/trunk/hbase-handler/src/test/results/hbase_joins.q.out
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java

Modified: hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hive/trunk/CHANGES.txt?rev=1057502&r1=1057501&r2=1057502&view=diff
==============================================================================
--- hive/trunk/CHANGES.txt (original)
+++ hive/trunk/CHANGES.txt Tue Jan 11 07:00:20 2011
@@ -680,6 +680,9 @@ Trunk -  Unreleased
     to ignore HDFS location stored in index files
     (Yongqiang He via namit)
 
+    HIVE-1903 Can't join HBase tables if one's name is the beginning of
+    the other (John Sichi via namit)
+
   TESTS
 
     HIVE-1464. improve  test query performance

Modified: hive/trunk/hbase-handler/src/test/queries/hbase_joins.q
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/queries/hbase_joins.q?rev=1057502&r1=1057501&r2=1057502&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/test/queries/hbase_joins.q (original)
+++ hive/trunk/hbase-handler/src/test/queries/hbase_joins.q Tue Jan 11 07:00:20 2011
@@ -1,6 +1,7 @@
 DROP TABLE users;
 DROP TABLE states;
 DROP TABLE countries;
+DROP TABLE users_level;
 
 -- From HIVE-1257
 
@@ -62,3 +63,20 @@ ON (u.state = s.key);
 DROP TABLE users;
 DROP TABLE states;
 DROP TABLE countries;
+
+CREATE TABLE users(key int, userid int, username string, created int) 
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f:userid,f:nickname,f:created");
+
+CREATE TABLE users_level(key int, userid int, level int)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f:userid,f:level");
+
+-- HIVE-1903:  the problem fixed here showed up even without any data,
+-- so no need to load any to test it
+SELECT year(from_unixtime(users.created)) AS year, level, count(users.userid) AS num 
+ FROM users JOIN users_level ON (users.userid = users_level.userid) 
+ GROUP BY year(from_unixtime(users.created)), level;
+
+DROP TABLE users;
+DROP TABLE users_level;

Modified: hive/trunk/hbase-handler/src/test/results/hbase_joins.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/results/hbase_joins.q.out?rev=1057502&r1=1057501&r2=1057502&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/test/results/hbase_joins.q.out (original)
+++ hive/trunk/hbase-handler/src/test/results/hbase_joins.q.out Tue Jan 11 07:00:20 2011
@@ -10,6 +10,10 @@ PREHOOK: query: DROP TABLE countries
 PREHOOK: type: DROPTABLE
 POSTHOOK: query: DROP TABLE countries
 POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE users_level
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE users_level
+POSTHOOK: type: DROPTABLE
 PREHOOK: query: -- From HIVE-1257
 
 CREATE TABLE users(key string, state string, country string, country_id int)
@@ -88,102 +92,102 @@ ON (u.country = c.key)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@countries
 PREHOOK: Input: default@users
-PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-44_025_3464030805185795112/-mr-10000
+PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-29_136_2919119166696342265/-mr-10000
 POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c 
 ON (u.country = c.key)
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@countries
 POSTHOOK: Input: default@users
-POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-44_025_3464030805185795112/-mr-10000
+POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-29_136_2919119166696342265/-mr-10000
 user1	USA	United States	USA
 PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c
 ON (u.country = c.country)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@countries
 PREHOOK: Input: default@users
-PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-49_449_2533239955498825412/-mr-10000
+PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-38_418_1418937364423533875/-mr-10000
 POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c
 ON (u.country = c.country)
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@countries
 POSTHOOK: Input: default@users
-POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-49_449_2533239955498825412/-mr-10000
+POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-38_418_1418937364423533875/-mr-10000
 user1	USA	United States	USA
 PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c 
 ON (u.country_id = c.country_id)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@countries
 PREHOOK: Input: default@users
-PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-54_460_9134325599532847572/-mr-10000
+PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-47_279_1891102438076444084/-mr-10000
 POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c 
 ON (u.country_id = c.country_id)
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@countries
 POSTHOOK: Input: default@users
-POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-54_460_9134325599532847572/-mr-10000
+POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-47_279_1891102438076444084/-mr-10000
 PREHOOK: query: SELECT u.key, u.state, s.name FROM users u JOIN states s 
 ON (u.state = s.key)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@states
 PREHOOK: Input: default@users
-PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-59_427_3646437485215925564/-mr-10000
+PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-54_306_2919915084551749896/-mr-10000
 POSTHOOK: query: SELECT u.key, u.state, s.name FROM users u JOIN states s 
 ON (u.state = s.key)
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@states
 POSTHOOK: Input: default@users
-POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-58-59_427_3646437485215925564/-mr-10000
+POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-19-54_306_2919915084551749896/-mr-10000
 user1	IA	Iowa
 PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c 
 ON (u.country = c.key)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@countries
 PREHOOK: Input: default@users
-PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-03_357_736778343063311968/-mr-10000
+PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-03_810_1067780128697572780/-mr-10000
 POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c 
 ON (u.country = c.key)
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@countries
 POSTHOOK: Input: default@users
-POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-03_357_736778343063311968/-mr-10000
+POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-03_810_1067780128697572780/-mr-10000
 user1	USA	United States	USA
 PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c
 ON (u.country = c.country)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@countries
 PREHOOK: Input: default@users
-PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-08_313_7684989920596569472/-mr-10000
+PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-14_736_8923692779050900406/-mr-10000
 POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c
 ON (u.country = c.country)
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@countries
 POSTHOOK: Input: default@users
-POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-08_313_7684989920596569472/-mr-10000
+POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-14_736_8923692779050900406/-mr-10000
 user1	USA	United States	USA
 PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c 
 ON (u.country_id = c.country_id)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@countries
 PREHOOK: Input: default@users
-PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-13_256_4291980393265625395/-mr-10000
+PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-22_235_696090037944243521/-mr-10000
 POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c 
 ON (u.country_id = c.country_id)
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@countries
 POSTHOOK: Input: default@users
-POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-13_256_4291980393265625395/-mr-10000
+POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-22_235_696090037944243521/-mr-10000
 PREHOOK: query: SELECT u.key, u.state, s.name FROM users u JOIN states s 
 ON (u.state = s.key)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@states
 PREHOOK: Input: default@users
-PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-18_206_1231084557369200625/-mr-10000
+PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-28_951_5386570432365997648/-mr-10000
 POSTHOOK: query: SELECT u.key, u.state, s.name FROM users u JOIN states s 
 ON (u.state = s.key)
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@states
 POSTHOOK: Input: default@users
-POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_17-59-18_206_1231084557369200625/-mr-10000
+POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-28_951_5386570432365997648/-mr-10000
 user1	IA	Iowa
 PREHOOK: query: DROP TABLE users
 PREHOOK: type: DROPTABLE
@@ -209,3 +213,55 @@ POSTHOOK: query: DROP TABLE countries
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@countries
 POSTHOOK: Output: default@countries
+PREHOOK: query: CREATE TABLE users(key int, userid int, username string, created int) 
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f:userid,f:nickname,f:created")
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE users(key int, userid int, username string, created int) 
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f:userid,f:nickname,f:created")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@users
+PREHOOK: query: CREATE TABLE users_level(key int, userid int, level int)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f:userid,f:level")
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE users_level(key int, userid int, level int)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f:userid,f:level")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@users_level
+PREHOOK: query: -- HIVE-1903:  the problem fixed here showed up even without any data,
+-- so no need to load any to test it
+SELECT year(from_unixtime(users.created)) AS year, level, count(users.userid) AS num 
+ FROM users JOIN users_level ON (users.userid = users_level.userid) 
+ GROUP BY year(from_unixtime(users.created)), level
+PREHOOK: type: QUERY
+PREHOOK: Input: default@users
+PREHOOK: Input: default@users_level
+PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-47_957_2665969936395506719/-mr-10000
+POSTHOOK: query: -- HIVE-1903:  the problem fixed here showed up even without any data,
+-- so no need to load any to test it
+SELECT year(from_unixtime(users.created)) AS year, level, count(users.userid) AS num 
+ FROM users JOIN users_level ON (users.userid = users_level.userid) 
+ GROUP BY year(from_unixtime(users.created)), level
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@users
+POSTHOOK: Input: default@users_level
+POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-01-10_14-20-47_957_2665969936395506719/-mr-10000
+PREHOOK: query: DROP TABLE users
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@users
+PREHOOK: Output: default@users
+POSTHOOK: query: DROP TABLE users
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@users
+POSTHOOK: Output: default@users
+PREHOOK: query: DROP TABLE users_level
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@users_level
+PREHOOK: Output: default@users_level
+POSTHOOK: query: DROP TABLE users_level
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@users_level
+POSTHOOK: Output: default@users_level

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1057502&r1=1057501&r2=1057502&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Tue Jan 11 07:00:20
2011
@@ -217,17 +217,23 @@ public class HiveInputFormat<K extends W
 
     // clone a jobConf for setting needed columns for reading
     JobConf cloneJobConf = new JobConf(job);
-    pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath()
-        .toString(), hsplit.getPath().toUri().getPath());
 
-    InputFormat inputFormat = getInputFormatFromCache(inputFormatClass,
-        cloneJobConf);
+    if (this.mrwork == null) {
+      init(job);
+    }
 
+    boolean nonNative = false;
     PartitionDesc part = pathToPartitionInfo.get(hsplit.getPath().toString());
     if ((part != null) && (part.getTableDesc() != null)) {
       Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf);
+      nonNative = part.getTableDesc().isNonNative();
     }
 
+    pushProjectionsAndFilters(cloneJobConf, inputFormatClass, hsplit.getPath()
+      .toString(), hsplit.getPath().toUri().getPath(), nonNative);
+
+    InputFormat inputFormat = getInputFormatFromCache(inputFormatClass,
+        cloneJobConf);
     RecordReader innerReader = inputFormat.getRecordReader(inputSplit,
         cloneJobConf, reporter);
 
@@ -356,6 +362,12 @@ public class HiveInputFormat<K extends W
 
   protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass,
       String splitPath, String splitPathWithNoSchema) {
+    pushProjectionsAndFilters(jobConf, inputFormatClass, splitPath,
+      splitPathWithNoSchema, false);
+  }
+  
+  protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass,
+      String splitPath, String splitPathWithNoSchema, boolean nonNative) {
     if (this.mrwork == null) {
       init(job);
     }
@@ -367,7 +379,22 @@ public class HiveInputFormat<K extends W
     while (iterator.hasNext()) {
       Entry<String, ArrayList<String>> entry = iterator.next();
       String key = entry.getKey();
-      if (splitPath.startsWith(key) || splitPathWithNoSchema.startsWith(key)) {
+      boolean match;
+      if (nonNative) {
+        // For non-native tables, we need to do an exact match to avoid
+        // HIVE-1903.  (The table location contains no files, and the string
+        // representation of its path does not have a trailing slash.)
+        match =
+          splitPath.equals(key) || splitPathWithNoSchema.equals(key);
+      } else {
+        // But for native tables, we need to do a prefix match for
+        // subdirectories.  (Unlike non-native tables, prefix mixups don't seem
+        // to be a potential problem here since we are always dealing with the
+        // path to something deeper than the table location.)
+        match =
+          splitPath.startsWith(key) || splitPathWithNoSchema.startsWith(key);
+      }
+      if (match) {
         ArrayList<String> list = entry.getValue();
         for (String val : list) {
           aliases.add(val);



Mime
View raw message