hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nzh...@apache.org
Subject svn commit: r922999 - in /hadoop/hive/trunk: CHANGES.txt ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java ql/src/test/queries/clientpositive/combine1.q ql/src/test/results/clientpositive/combine1.q.out
Date Sun, 14 Mar 2010 23:02:40 GMT
Author: nzhang
Date: Sun Mar 14 23:02:39 2010
New Revision: 922999

URL: http://svn.apache.org/viewvc?rev=922999&view=rev
Log:
HIVE-1242. CombineHiveInputFormat does not work for compressed text files. (Namit Jain via
Ning Zhang).

Added:
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/combine1.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/combine1.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=922999&r1=922998&r2=922999&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Sun Mar 14 23:02:39 2010
@@ -261,6 +261,9 @@ Trunk -  Unreleased
     HIVE-1241. Drop the table at the beginning of tests
     (He Yongqiang via namit)
 
+    HIVE-1242. CombinHiveInputFormat does not work for compressed text files.
+    (Namit Jain via Ning Zhang)
+
 Release 0.5.0 -  Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=922999&r1=922998&r2=922999&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
(original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
Sun Mar 14 23:02:39 2010
@@ -42,6 +42,12 @@ import org.apache.hadoop.mapred.InputSpl
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+
 
 /**
  * CombineHiveInputFormat is a parameterized InputFormat which looks at the path
@@ -217,6 +223,9 @@ public class CombineHiveInputFormat<K ex
     CombineFileInputFormatShim combine = ShimLoader.getHadoopShims()
         .getCombineFileInputFormat();
 
+    if (combine == null)
+      return super.getSplits(job, numSplits);
+
     if (combine.getInputPathsShim(job).length == 0) {
       throw new IOException("No input paths specified in job");
     }
@@ -227,6 +236,35 @@ public class CombineHiveInputFormat<K ex
     Path[] paths = combine.getInputPathsShim(job);
     for (Path path : paths) {
       LOG.info("CombineHiveInputSplit creating pool for " + path);
+
+      // The following code should be removed, once
+      // https://issues.apache.org/jira/browse/MAPREDUCE-1597 is fixed.
+      // Hadoop does not handle non-splitable files correctly for CombineFileInputFormat,
+      // so don't use CombineFileInputFormat for non-splittable files
+      FileSystem inpFs = path.getFileSystem(job);
+
+      FileStatus fStats = inpFs.getFileStatus(path);
+      Path tstPath = path;
+
+      // If path is a directory
+      if (fStats.isDir()) {
+        FileStatus[] fStatus = inpFs.listStatus(path);
+        if (fStatus.length > 0) {
+          tstPath = fStatus[0].getPath();
+        }
+      }
+
+      PartitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, path);
+
+      // Use HiveInputFormat if any of the paths is not splittable
+      Class inputFormatClass = part.getInputFileFormatClass();
+      InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
+
+      if ((inputFormat instanceof TextInputFormat) &&
+          ((new CompressionCodecFactory(job)).getCodec(tstPath) != null)) {
+        return super.getSplits(job, numSplits);
+      }
+
       combine.createPool(job, new CombineFilter(path));
     }
 
@@ -248,6 +286,10 @@ public class CombineHiveInputFormat<K ex
   @Override
   public RecordReader getRecordReader(InputSplit split, JobConf job,
       Reporter reporter) throws IOException {
+    if (!(split instanceof CombineHiveInputSplit)) {
+      return super.getRecordReader(split, job, reporter);
+    }
+
     CombineHiveInputSplit hsplit = (CombineHiveInputSplit) split;
 
     String inputFormatClassName = null;
@@ -270,25 +312,27 @@ public class CombineHiveInputFormat<K ex
 
   protected static PartitionDesc getPartitionDescFromPath(
       Map<String, PartitionDesc> pathToPartitionInfo, Path dir) throws IOException
{
-    // The format of the keys in pathToPartitionInfo sometimes contains a port
-    // and sometimes doesn't, so we just compare paths.
-    URI dirUri = dir.toUri();
-    for (Map.Entry<String, PartitionDesc> entry : pathToPartitionInfo
-        .entrySet()) {
-      try {
-        // Take only the path part of the URI.
+    try {
+      // Take only the path part of the URI.
+
+      // The format of the keys in pathToPartitionInfo sometimes contains a port
+      // and sometimes doesn't, so we just compare paths.
+      URI dirUri = new URI(dir.toUri().getPath());
+      for (Map.Entry<String, PartitionDesc> entry : pathToPartitionInfo
+             .entrySet()) {
         URI pathOfPartition = new URI(entry.getKey());
         pathOfPartition = new URI(pathOfPartition.getPath());
 
         if (!pathOfPartition.relativize(dirUri).equals(dirUri)) {
           return entry.getValue();
         }
-      } catch (URISyntaxException e2) {
-        LOG.info("getPartitionDescFromPath ", e2);
       }
+    } catch (URISyntaxException e2) {
+      LOG.info("getPartitionDescFromPath ", e2);
     }
+
     throw new IOException("cannot find dir = " + dir.toString()
-        + " in partToPartitionInfo: " + pathToPartitionInfo.keySet());
+                          + " in partToPartitionInfo: " + pathToPartitionInfo.keySet());
   }
 
   static class CombineFilter implements PathFilter {

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/combine1.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/combine1.q?rev=922999&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/combine1.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/combine1.q Sun Mar 14 23:02:39 2010
@@ -0,0 +1,22 @@
+set hive.exec.compress.output = true;
+set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+set mapred.min.split.size=256;
+set mapred.min.split.size.per.node=256;
+set mapred.min.split.size.per.rack=256;
+set mapred.max.split.size=256;
+
+set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;
+
+drop table combine1_1;
+
+
+create table combine1_1(key string, value string) stored as textfile;
+
+insert overwrite table combine1_1
+select * from src;
+
+
+select key, value from combine1_1;
+
+drop table combine1_1;
+

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/combine1.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/combine1.q.out?rev=922999&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/combine1.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/combine1.q.out Sun Mar 14 23:02:39
2010
@@ -0,0 +1,532 @@
+PREHOOK: query: drop table combine1_1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table combine1_1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table combine1_1(key string, value string) stored as textfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table combine1_1(key string, value string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@combine1_1
+PREHOOK: query: insert overwrite table combine1_1
+select * from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@combine1_1
+POSTHOOK: query: insert overwrite table combine1_1
+select * from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@combine1_1
+PREHOOK: query: select key, value from combine1_1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@combine1_1
+PREHOOK: Output: file:/data/users/njain/hive3/hive3/build/ql/scratchdir/hive_2010-03-12_14-00-56_188_1241439225254397610/10000
+POSTHOOK: query: select key, value from combine1_1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@combine1_1
+POSTHOOK: Output: file:/data/users/njain/hive3/hive3/build/ql/scratchdir/hive_2010-03-12_14-00-56_188_1241439225254397610/10000
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+66	val_66
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+152	val_152
+469	val_469
+145	val_145
+495	val_495
+37	val_37
+327	val_327
+281	val_281
+277	val_277
+209	val_209
+15	val_15
+82	val_82
+403	val_403
+166	val_166
+417	val_417
+430	val_430
+252	val_252
+292	val_292
+219	val_219
+287	val_287
+153	val_153
+193	val_193
+338	val_338
+446	val_446
+459	val_459
+394	val_394
+237	val_237
+482	val_482
+174	val_174
+413	val_413
+494	val_494
+207	val_207
+199	val_199
+466	val_466
+208	val_208
+174	val_174
+399	val_399
+396	val_396
+247	val_247
+417	val_417
+489	val_489
+162	val_162
+377	val_377
+397	val_397
+309	val_309
+365	val_365
+266	val_266
+439	val_439
+342	val_342
+367	val_367
+325	val_325
+167	val_167
+195	val_195
+475	val_475
+17	val_17
+113	val_113
+155	val_155
+203	val_203
+339	val_339
+0	val_0
+455	val_455
+128	val_128
+311	val_311
+316	val_316
+57	val_57
+302	val_302
+205	val_205
+149	val_149
+438	val_438
+345	val_345
+129	val_129
+170	val_170
+20	val_20
+489	val_489
+157	val_157
+378	val_378
+221	val_221
+92	val_92
+111	val_111
+47	val_47
+72	val_72
+4	val_4
+280	val_280
+35	val_35
+427	val_427
+277	val_277
+208	val_208
+356	val_356
+399	val_399
+169	val_169
+382	val_382
+498	val_498
+125	val_125
+386	val_386
+437	val_437
+469	val_469
+192	val_192
+286	val_286
+187	val_187
+176	val_176
+54	val_54
+459	val_459
+51	val_51
+138	val_138
+103	val_103
+239	val_239
+213	val_213
+216	val_216
+430	val_430
+278	val_278
+176	val_176
+289	val_289
+221	val_221
+65	val_65
+318	val_318
+332	val_332
+311	val_311
+275	val_275
+137	val_137
+241	val_241
+83	val_83
+333	val_333
+180	val_180
+284	val_284
+12	val_12
+230	val_230
+181	val_181
+67	val_67
+260	val_260
+404	val_404
+384	val_384
+489	val_489
+353	val_353
+373	val_373
+272	val_272
+138	val_138
+217	val_217
+84	val_84
+348	val_348
+466	val_466
+58	val_58
+8	val_8
+411	val_411
+230	val_230
+208	val_208
+348	val_348
+24	val_24
+463	val_463
+431	val_431
+179	val_179
+172	val_172
+42	val_42
+129	val_129
+158	val_158
+119	val_119
+496	val_496
+0	val_0
+322	val_322
+197	val_197
+468	val_468
+393	val_393
+454	val_454
+100	val_100
+298	val_298
+199	val_199
+191	val_191
+418	val_418
+96	val_96
+26	val_26
+165	val_165
+327	val_327
+230	val_230
+205	val_205
+120	val_120
+131	val_131
+51	val_51
+404	val_404
+43	val_43
+436	val_436
+156	val_156
+469	val_469
+468	val_468
+308	val_308
+95	val_95
+196	val_196
+288	val_288
+481	val_481
+457	val_457
+98	val_98
+282	val_282
+197	val_197
+187	val_187
+318	val_318
+318	val_318
+409	val_409
+470	val_470
+137	val_137
+369	val_369
+316	val_316
+169	val_169
+413	val_413
+85	val_85
+77	val_77
+0	val_0
+490	val_490
+87	val_87
+364	val_364
+179	val_179
+118	val_118
+134	val_134
+395	val_395
+282	val_282
+138	val_138
+238	val_238
+419	val_419
+15	val_15
+118	val_118
+72	val_72
+90	val_90
+307	val_307
+19	val_19
+435	val_435
+10	val_10
+277	val_277
+273	val_273
+306	val_306
+224	val_224
+309	val_309
+389	val_389
+327	val_327
+242	val_242
+369	val_369
+392	val_392
+272	val_272
+331	val_331
+401	val_401
+242	val_242
+452	val_452
+177	val_177
+226	val_226
+5	val_5
+497	val_497
+402	val_402
+396	val_396
+317	val_317
+395	val_395
+58	val_58
+35	val_35
+336	val_336
+95	val_95
+11	val_11
+168	val_168
+34	val_34
+229	val_229
+233	val_233
+143	val_143
+472	val_472
+322	val_322
+498	val_498
+160	val_160
+195	val_195
+42	val_42
+321	val_321
+430	val_430
+119	val_119
+489	val_489
+458	val_458
+78	val_78
+76	val_76
+41	val_41
+223	val_223
+492	val_492
+149	val_149
+449	val_449
+218	val_218
+228	val_228
+138	val_138
+453	val_453
+30	val_30
+209	val_209
+64	val_64
+468	val_468
+76	val_76
+74	val_74
+342	val_342
+69	val_69
+230	val_230
+33	val_33
+368	val_368
+103	val_103
+296	val_296
+113	val_113
+216	val_216
+367	val_367
+344	val_344
+167	val_167
+274	val_274
+219	val_219
+239	val_239
+485	val_485
+116	val_116
+223	val_223
+256	val_256
+263	val_263
+70	val_70
+487	val_487
+480	val_480
+401	val_401
+288	val_288
+191	val_191
+5	val_5
+244	val_244
+438	val_438
+128	val_128
+467	val_467
+432	val_432
+202	val_202
+316	val_316
+229	val_229
+469	val_469
+463	val_463
+280	val_280
+2	val_2
+35	val_35
+283	val_283
+331	val_331
+235	val_235
+80	val_80
+44	val_44
+193	val_193
+321	val_321
+335	val_335
+104	val_104
+466	val_466
+366	val_366
+175	val_175
+403	val_403
+483	val_483
+53	val_53
+105	val_105
+257	val_257
+406	val_406
+409	val_409
+190	val_190
+406	val_406
+401	val_401
+114	val_114
+258	val_258
+90	val_90
+203	val_203
+262	val_262
+348	val_348
+424	val_424
+12	val_12
+396	val_396
+201	val_201
+217	val_217
+164	val_164
+431	val_431
+454	val_454
+478	val_478
+298	val_298
+125	val_125
+431	val_431
+164	val_164
+424	val_424
+187	val_187
+382	val_382
+5	val_5
+70	val_70
+397	val_397
+480	val_480
+291	val_291
+24	val_24
+351	val_351
+255	val_255
+104	val_104
+70	val_70
+163	val_163
+438	val_438
+119	val_119
+414	val_414
+200	val_200
+491	val_491
+237	val_237
+439	val_439
+360	val_360
+248	val_248
+479	val_479
+305	val_305
+417	val_417
+199	val_199
+444	val_444
+120	val_120
+429	val_429
+169	val_169
+443	val_443
+323	val_323
+325	val_325
+277	val_277
+230	val_230
+478	val_478
+178	val_178
+468	val_468
+310	val_310
+317	val_317
+333	val_333
+493	val_493
+460	val_460
+207	val_207
+249	val_249
+265	val_265
+480	val_480
+83	val_83
+136	val_136
+353	val_353
+172	val_172
+214	val_214
+462	val_462
+233	val_233
+406	val_406
+133	val_133
+175	val_175
+189	val_189
+454	val_454
+375	val_375
+401	val_401
+421	val_421
+407	val_407
+384	val_384
+256	val_256
+26	val_26
+134	val_134
+67	val_67
+384	val_384
+379	val_379
+18	val_18
+462	val_462
+492	val_492
+100	val_100
+298	val_298
+9	val_9
+341	val_341
+498	val_498
+146	val_146
+458	val_458
+362	val_362
+186	val_186
+285	val_285
+348	val_348
+167	val_167
+18	val_18
+273	val_273
+183	val_183
+281	val_281
+344	val_344
+97	val_97
+469	val_469
+315	val_315
+84	val_84
+28	val_28
+37	val_37
+448	val_448
+152	val_152
+348	val_348
+307	val_307
+194	val_194
+414	val_414
+477	val_477
+222	val_222
+126	val_126
+90	val_90
+169	val_169
+403	val_403
+400	val_400
+200	val_200
+97	val_97
+PREHOOK: query: drop table combine1_1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table combine1_1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: default@combine1_1



Mime
View raw message