Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 79D8A105F8 for ; Sat, 25 Jan 2014 02:46:06 +0000 (UTC) Received: (qmail 19094 invoked by uid 500); 25 Jan 2014 02:46:05 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 18919 invoked by uid 500); 25 Jan 2014 02:46:02 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 18911 invoked by uid 99); 25 Jan 2014 02:46:00 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 25 Jan 2014 02:46:00 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 25 Jan 2014 02:45:58 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 34FC22388868; Sat, 25 Jan 2014 02:45:38 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1561248 - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ ql/src/test/org/apache/hadoop/hive/ql/io/orc/ serde/src/java/org/apache/hadoop/hive/serde2/ shims/0.20/src/main/java/org/a... Date: Sat, 25 Jan 2014 02:45:37 -0000 To: commits@hive.apache.org From: thejas@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140125024538.34FC22388868@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: thejas Date: Sat Jan 25 02:45:37 2014 New Revision: 1561248 URL: http://svn.apache.org/r1561248 Log: Reverting HIVE-5728 patch Removed: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewInputFormat.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewOutputFormat.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcNewSplit.java hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestNewInputOutputFormat.java Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java hive/trunk/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java hive/trunk/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java hive/trunk/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java hive/trunk/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1561248&r1=1561247&r2=1561248&view=diff ============================================================================== --- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original) +++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Sat Jan 25 02:45:37 2014 @@ -517,19 +517,8 @@ public class HiveConf extends Configurat // Define the default ORC stripe size HIVE_ORC_DEFAULT_STRIPE_SIZE("hive.exec.orc.default.stripe.size", 256L * 1024 * 1024), - // Define the default ORC index stripe - HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE("hive.exec.orc.default.row.index.stride" - , null), - // Define the default ORC buffer size - HIVE_ORC_DEFAULT_BUFFER_SIZE("hive.exec.orc.default.buffer.size", null), - // Define the default block padding - HIVE_ORC_DEFAULT_BLOCK_PADDING("hive.exec.orc.default.block.padding", - null), - // Define the default orc compress - HIVE_ORC_DEFAULT_COMPRESS("hive.exec.orc.default.compress", null), - HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD( - "hive.exec.orc.dictionary.key.size.threshold", 0.8f), + HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD("hive.exec.orc.dictionary.key.size.threshold", 0.8f), HIVE_ORC_INCLUDE_FILE_FOOTER_IN_SPLITS("hive.orc.splits.include.file.footer", false), HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE("hive.orc.cache.stripe.details.size", 10000), Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java?rev=1561248&r1=1561247&r2=1561248&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java Sat Jan 25 02:45:37 2014 @@ -153,19 +153,6 @@ public final class OrcFile { stripeSizeValue = conf.getLong(HiveConf.ConfVars.HIVE_ORC_DEFAULT_STRIPE_SIZE.varname, DEFAULT_STRIPE_SIZE); - rowIndexStrideValue = - conf.getInt(HiveConf.ConfVars.HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE - .varname, DEFAULT_ROW_INDEX_STRIDE); - bufferSizeValue = - conf.getInt(HiveConf.ConfVars.HIVE_ORC_DEFAULT_BUFFER_SIZE.varname, - DEFAULT_ROW_INDEX_STRIDE); - blockPaddingValue = - conf.getBoolean(HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_PADDING - .varname, DEFAULT_BLOCK_PADDING); - compressValue = - CompressionKind.valueOf(conf.get(HiveConf.ConfVars - .HIVE_ORC_DEFAULT_COMPRESS.varname, - DEFAULT_COMPRESSION_KIND.toString())); String versionName = conf.get(HiveConf.ConfVars.HIVE_ORC_WRITE_FORMAT.varname); if (versionName == null) { Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1561248&r1=1561247&r2=1561248&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Sat Jan 25 02:45:37 2014 @@ -44,8 +44,9 @@ import org.apache.hadoop.hive.ql.exec.ve import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.io.InputFormatChecker; import org.apache.hadoop.hive.ql.io.orc.Metadata; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.FileGenerator; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.SplitGenerator; import org.apache.hadoop.hive.ql.io.orc.Reader.FileMetaInfo; -import org.apache.hadoop.hive.ql.io.orc.RecordReader; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.log.PerfLogger; @@ -62,6 +63,7 @@ import org.apache.hadoop.mapred.InputFor import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.InvalidInputException; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.util.StringUtils; @@ -97,8 +99,8 @@ public class OrcInputFormat implements private static final double MIN_INCLUDED_LOCATION = 0.80; private static class OrcRecordReader - implements org.apache.hadoop.mapred.RecordReader { - private final RecordReader reader; + implements RecordReader { + private final org.apache.hadoop.hive.ql.io.orc.RecordReader reader; private final long offset; private final long length; private final int numColumns; @@ -109,7 +111,10 @@ public class OrcInputFormat implements long offset, long length) throws IOException { List types = file.getTypes(); numColumns = (types.size() == 0) ? 0 : types.get(0).getSubtypesCount(); - this.reader = createReaderFromFile(file, conf, offset, length); + boolean[] includedColumns = findIncludedColumns(types, conf); + String[] columnNames = getIncludedColumnNames(types, includedColumns, conf); + SearchArgument sarg = createSarg(types, conf); + this.reader = file.rows(offset, length, includedColumns, sarg, columnNames); this.offset = offset; this.length = length; } @@ -150,19 +155,6 @@ public class OrcInputFormat implements return progress; } } - - static RecordReader createReaderFromFile( - Reader file, Configuration conf, long offset, long length) - throws IOException { - List types = file.getTypes(); - boolean[] includedColumns = findIncludedColumns(types, conf); - String[] columnNames = getIncludedColumnNames(types, includedColumns, - conf); - SearchArgument sarg = createSarg(types, conf); - RecordReader reader = - file.rows(offset, length, includedColumns, sarg, columnNames); - return reader; - } private static final PathFilter hiddenFileFilter = new PathFilter(){ public boolean accept(Path p){ @@ -252,15 +244,14 @@ public class OrcInputFormat implements } } - @SuppressWarnings("unchecked") @Override - public org.apache.hadoop.mapred.RecordReader + public RecordReader getRecordReader(InputSplit inputSplit, JobConf conf, Reporter reporter) throws IOException { if (isVectorMode(conf)) { - org.apache.hadoop.mapred.RecordReader vorr = voif.getRecordReader(inputSplit, conf, + RecordReader vorr = voif.getRecordReader(inputSplit, conf, reporter); - return (org.apache.hadoop.mapred.RecordReader) vorr; + return (RecordReader) vorr; } FileSplit fSplit = (FileSplit)inputSplit; reporter.setStatus(fSplit.toString()); @@ -317,7 +308,7 @@ public class OrcInputFormat implements * @param conf The configuration of the job * @return the list of input {@link Path}s for the map-reduce job. */ - static Path[] getInputPaths(Configuration conf) throws IOException { + static Path[] getInputPaths(JobConf conf) throws IOException { String dirs = conf.get("mapred.input.dir"); if (dirs == null) { throw new IOException("Configuration mapred.input.dir is not defined."); @@ -335,41 +326,10 @@ public class OrcInputFormat implements * the different worker threads. */ static class Context { - static class FileSplitInfo { - FileSplitInfo(Path file, long start, long length, String[] hosts, - FileMetaInfo fileMetaInfo) { - this.file = file; - this.start = start; - this.length = length; - this.hosts = hosts; - this.fileMetaInfo = fileMetaInfo; - } - Path getPath() { - return file; - } - long getStart() { - return start; - } - long getLength() { - return length; - } - String[] getLocations() { - return hosts; - } - FileMetaInfo getFileMetaInfo() { - return fileMetaInfo; - } - private Path file; - private long start; - private long length; - private String[] hosts; - FileMetaInfo fileMetaInfo; - } private final Configuration conf; private static Cache footerCache; private final ExecutorService threadPool; - private final List splits = - new ArrayList(10000); + private final List splits = new ArrayList(10000); private final List errors = new ArrayList(); private final HadoopShims shims = ShimLoader.getHadoopShims(); private final long maxSize; @@ -418,7 +378,7 @@ public class OrcInputFormat implements * the back. * @result the Nth file split */ - FileSplitInfo getResult(int index) { + OrcSplit getResult(int index) { if (index >= 0) { return splits.get(index); } else { @@ -596,8 +556,8 @@ public class OrcInputFormat implements if(locations.length == 1 && file.getLen() < context.maxSize) { String[] hosts = locations[0].getHosts(); synchronized (context.splits) { - context.splits.add(new Context.FileSplitInfo(file.getPath(), 0, - file.getLen(), hosts, fileMetaInfo)); + context.splits.add(new OrcSplit(file.getPath(), 0, file.getLen(), + hosts, fileMetaInfo)); } } else { // if it requires a compute task @@ -683,8 +643,8 @@ public class OrcInputFormat implements hostList.toArray(hosts); } synchronized (context.splits) { - context.splits.add(new Context.FileSplitInfo(file.getPath(), offset, - length, hosts, fileMetaInfo)); + context.splits.add(new OrcSplit(file.getPath(), offset, length, + hosts, fileMetaInfo)); } } @@ -891,45 +851,35 @@ public class OrcInputFormat implements } } - static List generateSplitsInfo(Configuration conf) - throws IOException { - // use threads to resolve directories into splits - Context context = new Context(conf); - for(Path dir: getInputPaths(conf)) { - FileSystem fs = dir.getFileSystem(conf); - context.schedule(new FileGenerator(context, fs, dir)); - } - context.waitForTasks(); - // deal with exceptions - if (!context.errors.isEmpty()) { - List errors = - new ArrayList(context.errors.size()); - for(Throwable th: context.errors) { - if (th instanceof IOException) { - errors.add((IOException) th); - } else { - throw new RuntimeException("serious problem", th); - } - } - throw new InvalidInputException(errors); - } - if (context.cacheStripeDetails) { - LOG.info("FooterCacheHitRatio: " + context.cacheHitCounter.get() + "/" - + context.numFilesCounter.get()); - } - return context.splits; - } @Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { + // use threads to resolve directories into splits perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ORC_GET_SPLITS); - List splits = - OrcInputFormat.generateSplitsInfo(job); - InputSplit[] result = new InputSplit[splits.size()]; - for (int i=0;i errors = + new ArrayList(context.errors.size()); + for(Throwable th: context.errors) { + if (th instanceof IOException) { + errors.add((IOException) th); + } else { + throw new RuntimeException("serious problem", th); + } + } + throw new InvalidInputException(errors); + } + InputSplit[] result = new InputSplit[context.splits.size()]; + context.splits.toArray(result); + if (context.cacheStripeDetails) { + LOG.info("FooterCacheHitRatio: " + context.cacheHitCounter.get() + "/" + + context.numFilesCounter.get()); } perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ORC_GET_SPLITS); return result; Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java?rev=1561248&r1=1561247&r2=1561248&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java Sat Jan 25 02:45:37 2014 @@ -43,7 +43,7 @@ import org.apache.hadoop.hive.serde2.typ import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.apache.hadoop.io.Writable; -final public class OrcStruct implements Writable { +final class OrcStruct implements Writable { private Object[] fields; @@ -461,7 +461,7 @@ final public class OrcStruct implements } } - static public ObjectInspector createObjectInspector(TypeInfo info) { + static ObjectInspector createObjectInspector(TypeInfo info) { switch (info.getCategory()) { case PRIMITIVE: switch (((PrimitiveTypeInfo) info).getPrimitiveCategory()) { Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java?rev=1561248&r1=1561247&r2=1561248&view=diff ============================================================================== --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (original) +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java Sat Jan 25 02:45:37 2014 @@ -428,7 +428,7 @@ public class TestInputOutputFormat { new OrcInputFormat.SplitGenerator(context, fs, fs.getFileStatus(new Path("/a/file")), null); splitter.createSplit(0, 200, null); - OrcInputFormat.Context.FileSplitInfo result = context.getResult(-1); + FileSplit result = context.getResult(-1); assertEquals(0, result.getStart()); assertEquals(200, result.getLength()); assertEquals("/a/file", result.getPath().toString()); @@ -477,7 +477,7 @@ public class TestInputOutputFormat { } throw new IOException("Errors during splitting"); } - OrcInputFormat.Context.FileSplitInfo result = context.getResult(0); + FileSplit result = context.getResult(0); assertEquals(3, result.getStart()); assertEquals(497, result.getLength()); result = context.getResult(1); Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java?rev=1561248&r1=1561247&r2=1561248&view=diff ============================================================================== --- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java (original) +++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java Sat Jan 25 02:45:37 2014 @@ -31,9 +31,9 @@ import org.apache.hadoop.util.StringUtil public final class ColumnProjectionUtils { public static final String READ_COLUMN_IDS_CONF_STR = "hive.io.file.readcolumn.ids"; - public static final String READ_ALL_COLUMNS = "hive.io.file.read.all.columns"; public static final String READ_COLUMN_NAMES_CONF_STR = "hive.io.file.readcolumn.names"; private static final String READ_COLUMN_IDS_CONF_STR_DEFAULT = ""; + private static final String READ_ALL_COLUMNS = "hive.io.file.read.all.columns"; private static final boolean READ_ALL_COLUMNS_DEFAULT = true; /** Modified: hive/trunk/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java URL: http://svn.apache.org/viewvc/hive/trunk/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java?rev=1561248&r1=1561247&r2=1561248&view=diff ============================================================================== --- hive/trunk/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java (original) +++ hive/trunk/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java Sat Jan 25 02:45:37 2014 @@ -773,8 +773,4 @@ public class Hadoop20Shims implements Ha ret.put("MAPREDTASKCLEANUPNEEDED", "mapreduce.job.committer.task.cleanup.needed"); return ret; } - @Override - public Configuration getConfiguration(org.apache.hadoop.mapreduce.JobContext context) { - return context.getConfiguration(); - } } Modified: hive/trunk/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java URL: http://svn.apache.org/viewvc/hive/trunk/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java?rev=1561248&r1=1561247&r2=1561248&view=diff ============================================================================== --- hive/trunk/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java (original) +++ hive/trunk/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java Sat Jan 25 02:45:37 2014 @@ -410,9 +410,4 @@ public class Hadoop20SShims extends Hado ret.put("MAPREDTASKCLEANUPNEEDED", "mapreduce.job.committer.task.cleanup.needed"); return ret; } - - @Override - public Configuration getConfiguration(org.apache.hadoop.mapreduce.JobContext context) { - return context.getConfiguration(); - } } Modified: hive/trunk/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java URL: http://svn.apache.org/viewvc/hive/trunk/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java?rev=1561248&r1=1561247&r2=1561248&view=diff ============================================================================== --- hive/trunk/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java (original) +++ hive/trunk/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java Sat Jan 25 02:45:37 2014 @@ -557,9 +557,4 @@ public class Hadoop23Shims extends Hadoo ret.put("MAPREDTASKCLEANUPNEEDED", "mapreduce.job.committer.task.cleanup.needed"); return ret; } - - @Override - public Configuration getConfiguration(org.apache.hadoop.mapreduce.JobContext context) { - return context.getConfiguration(); - } } Modified: hive/trunk/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java URL: http://svn.apache.org/viewvc/hive/trunk/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java?rev=1561248&r1=1561247&r2=1561248&view=diff ============================================================================== --- hive/trunk/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java (original) +++ hive/trunk/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java Sat Jan 25 02:45:37 2014 @@ -520,10 +520,4 @@ public interface HadoopShims { public FileSystem createProxyFileSystem(FileSystem fs, URI uri); public Map getHadoopConfNames(); - - - /** - * Get configuration from JobContext - */ - public Configuration getConfiguration(JobContext context); }