hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ser...@apache.org
Subject hive git commit: HIVE-17327 : LLAP IO: restrict native file ID usage to default FS to avoid hypothetical collisions when HDFS federation is used (Sergey Shelukhin, reviewed by Gopal Vijayaraghavan)
Date Mon, 21 Aug 2017 19:45:24 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-2 564d52949 -> 069b20afb


HIVE-17327 : LLAP IO: restrict native file ID usage to default FS to avoid hypothetical collisions
when HDFS federation is used (Sergey Shelukhin, reviewed by Gopal Vijayaraghavan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/069b20af
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/069b20af
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/069b20af

Branch: refs/heads/branch-2
Commit: 069b20afbbcf4e2c010814be5381d6c445e76be7
Parents: 564d529
Author: sergey <sershe@apache.org>
Authored: Mon Aug 21 12:05:57 2017 -0700
Committer: sergey <sershe@apache.org>
Committed: Mon Aug 21 12:45:28 2017 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |  4 ++
 .../llap/io/encoded/OrcEncodedDataReader.java   |  7 ++-
 .../llap/io/encoded/SerDeEncodedDataReader.java |  7 ++-
 .../hive/metastore/FileMetadataManager.java     |  7 ++-
 .../hive/ql/exec/tez/TezSessionPoolManager.java | 11 +++-
 .../org/apache/hadoop/hive/ql/io/HdfsUtils.java | 39 ++++++++++--
 .../hadoop/hive/ql/io/orc/ExternalCache.java    |  2 +-
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   | 62 ++++++++++++--------
 .../hive/ql/io/orc/TestInputOutputFormat.java   | 12 ++--
 .../apache/hadoop/hive/shims/Hadoop23Shims.java |  4 +-
 .../apache/hadoop/hive/shims/HadoopShims.java   |  4 +-
 11 files changed, 113 insertions(+), 46 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/069b20af/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 30506b0..b8052c6 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2950,6 +2950,10 @@ public class HiveConf extends Configuration {
         "modification time, which is almost certain to identify file uniquely. However, if
you\n" +
         "use a FS without file IDs and rewrite files a lot (or are paranoid), you might want\n"
+
         "to avoid this setting."),
+    LLAP_CACHE_DEFAULT_FS_FILE_ID("hive.llap.cache.defaultfs.only.native.fileid", true,
+        "Whether LLAP cache should use native file IDs from the default FS only. This is
to\n" +
+        "avoid file ID collisions when several different DFS instances are in use at the
same\n" +
+        "time. Disable this check to allow native file IDs from non-default DFS."),
     LLAP_CACHE_ENABLE_ORC_GAP_CACHE("hive.llap.orc.gap.cache", true,
         "Whether LLAP cache for ORC should remember gaps in ORC compression buffer read\n"
+
         "estimates, to avoid re-reading the data that was read once and discarded because
it\n" +

http://git-wip-us.apache.org/repos/asf/hive/blob/069b20af/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
index bc81953..0fd8139 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
@@ -202,7 +202,8 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
     // Don't cache the filesystem object for now; Tez closes it and FS cache will fix all
that
     fs = split.getPath().getFileSystem(jobConf);
     fileKey = determineFileId(fs, split,
-        HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_CACHE_ALLOW_SYNTHETIC_FILEID));
+        HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_CACHE_ALLOW_SYNTHETIC_FILEID),
+        HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID));
     fileMetadata = getOrReadFileMetadata();
     if (readerSchema == null) {
       readerSchema = fileMetadata.getSchema();
@@ -517,7 +518,7 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
   }
 
   private static Object determineFileId(FileSystem fs, FileSplit split,
-      boolean allowSynthetic) throws IOException {
+      boolean allowSynthetic, boolean checkDefaultFs) throws IOException {
     if (split instanceof OrcSplit) {
       Object fileKey = ((OrcSplit)split).getFileKey();
       if (fileKey != null) {
@@ -525,7 +526,7 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
       }
     }
     LOG.warn("Split for " + split.getPath() + " (" + split.getClass() + ") does not have
file ID");
-    return HdfsUtils.getFileId(fs, split.getPath(), allowSynthetic);
+    return HdfsUtils.getFileId(fs, split.getPath(), allowSynthetic, checkDefaultFs);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hive/blob/069b20af/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
index 907200a..a088e27 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
@@ -213,7 +213,8 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
 
     fs = split.getPath().getFileSystem(daemonConf);
     fileKey = determineFileId(fs, split,
-        HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_CACHE_ALLOW_SYNTHETIC_FILEID));
+        HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_CACHE_ALLOW_SYNTHETIC_FILEID),
+        HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID));
     this.sourceInputFormat = sourceInputFormat;
     this.sourceSerDe = sourceSerDe;
     this.reporter = reporter;
@@ -1646,12 +1647,12 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
   }
 
   private static Object determineFileId(FileSystem fs, FileSplit split,
-      boolean allowSynthetic) throws IOException {
+      boolean allowSynthetic, boolean checkDefaultFs) throws IOException {
     /* TODO: support this optionally? this is not OrcSplit, but we could add a custom split.
       Object fileKey = ((OrcSplit)split).getFileKey();
       if (fileKey != null) return fileKey; */
     LlapIoImpl.LOG.warn("Split for " + split.getPath() + " (" + split.getClass() + ") does
not have file ID");
-    return HdfsUtils.getFileId(fs, split.getPath(), allowSynthetic);
+    return HdfsUtils.getFileId(fs, split.getPath(), allowSynthetic, checkDefaultFs);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/069b20af/metastore/src/java/org/apache/hadoop/hive/metastore/FileMetadataManager.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/FileMetadataManager.java
b/metastore/src/java/org/apache/hadoop/hive/metastore/FileMetadataManager.java
index 9b43328..67aa144 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/FileMetadataManager.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/FileMetadataManager.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.metastore;
 
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+
 import org.apache.hadoop.fs.LocatedFileStatus;
 
 import org.apache.hadoop.fs.RemoteIterator;
@@ -114,8 +116,11 @@ public class FileMetadataManager {
     }
     for (Path file : files) {
       long fileId;
+      // TODO: use the other HdfsUtils here
+      if (!(fs instanceof DistributedFileSystem)) return;
       try {
-        fileId = SHIMS.getFileId(fs, Path.getPathWithoutSchemeAndAuthority(file).toString());
+        fileId = SHIMS.getFileId((DistributedFileSystem)fs,
+            Path.getPathWithoutSchemeAndAuthority(file).toString());
       } catch (UnsupportedOperationException ex) {
         LOG.error("Cannot cache file metadata for " + location + "; "
             + fs.getClass().getCanonicalName() + " does not support fileId");

http://git-wip-us.apache.org/repos/asf/hive/blob/069b20af/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java
index 8f45947..c96a079 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java
@@ -232,7 +232,6 @@ public class TezSessionPoolManager {
         }
       }
     }
-    
 
     sessionLifetimeMs = conf.getTimeVar(
         ConfVars.HIVE_SERVER2_TEZ_SESSION_LIFETIME, TimeUnit.MILLISECONDS);
@@ -319,6 +318,8 @@ public class TezSessionPoolManager {
       default: // All good.
       }
     }
+
+    // Check the restricted configs that the users cannot set.
     for (ConfVars var : restrictedHiveConf) {
       String userValue = HiveConf.getVarWithoutType(conf, var),
           serverValue = HiveConf.getVarWithoutType(initConf, var);
@@ -331,6 +332,14 @@ public class TezSessionPoolManager {
       validateRestrictedConfigValues(var, userValue, serverValue);
     }
 
+    // Propagate this value from HS2; don't allow users to set it.
+    // In HS2, initConf will be set; it won't be set otherwise as noone calls setupPool.
+    // TODO: add a section like the restricted configs for overrides when there's more than
one.
+    if (initConf != null) {
+      conf.set(ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID.varname,
+          HiveConf.getVarWithoutType(initConf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID));
+    }
+
     // TODO Session re-use completely disabled for doAs=true. Always launches a new session.
     boolean nonDefaultUser = conf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/069b20af/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java
index 9b8b761..fa7f59d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java
@@ -18,6 +18,12 @@
 
 package org.apache.hadoop.hive.ql.io;
 
+import com.google.common.annotations.VisibleForTesting;
+
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+
+import java.net.URI;
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
@@ -38,10 +44,13 @@ public class HdfsUtils {
   private static final HadoopShims SHIMS = ShimLoader.getHadoopShims();
   private static final Logger LOG = LoggerFactory.getLogger(HdfsUtils.class);
 
-  public static Object getFileId(
-      FileSystem fileSystem, Path path, boolean allowSynthetic) throws IOException {
+  public static Object getFileId(FileSystem fileSystem, Path path,
+      boolean allowSynthetic, boolean checkDefaultFs) throws IOException {
     if (fileSystem instanceof DistributedFileSystem) {
-      return SHIMS.getFileId(fileSystem, path.toUri().getPath());
+      DistributedFileSystem dfs = (DistributedFileSystem) fileSystem;
+      if ((!checkDefaultFs) || isDefaultFs(dfs)) {
+        return SHIMS.getFileId(dfs, path.toUri().getPath());
+      }
     }
     if (!allowSynthetic) {
       LOG.warn("Cannot get unique file ID from "
@@ -52,7 +61,10 @@ public class HdfsUtils {
     return new SyntheticFileId(path, fs.getLen(), fs.getModificationTime());
   }
 
-  public static long createFileId(String pathStr, FileStatus fs, boolean doLog, String fsName)
{
+  // This is not actually used for production.
+  @VisibleForTesting
+  public static long createTestFileId(
+      String pathStr, FileStatus fs, boolean doLog, String fsName) {
     int nameHash = pathStr.hashCode();
     long fileSize = fs.getLen(), modTime = fs.getModificationTime();
     int fileSizeHash = (int)(fileSize ^ (fileSize >>> 32)),
@@ -91,4 +103,23 @@ public class HdfsUtils {
     return ((fileSystem instanceof DistributedFileSystem))
         ? new Path(HDFS_ID_PATH_PREFIX + fileId) : path;
   }
+
+  public static boolean isDefaultFs(DistributedFileSystem fs) {
+    URI uri = fs.getUri();
+
+    String scheme = uri.getScheme();
+    if (scheme == null) return true; // Assume that relative URI resolves to default FS.
+    URI defaultUri = FileSystem.getDefaultUri(fs.getConf());
+    if (!defaultUri.getScheme().equalsIgnoreCase(scheme)) return false; // Mismatch.
+ 
+    String defaultAuthority = defaultUri.getAuthority(), authority = uri.getAuthority();
+    if (authority == null) return true; // Schemes match, no authority - assume default.
+    if (defaultAuthority == null) return false; // TODO: What does this even mean?
+    if (!defaultUri.getHost().equalsIgnoreCase(uri.getHost())) return false; // Mismatch.
+
+    int defaultPort = defaultUri.getPort(), port = uri.getPort();
+    if (port == -1) return true; // No port, assume default.
+    // Note - this makes assumptions that are DFS-specific; DFS::getDefaultPort is not visible.
+    return (defaultPort == -1) ? (port == NameNode.DEFAULT_PORT) : (port == defaultPort);
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/069b20af/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ExternalCache.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ExternalCache.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ExternalCache.java
index 9299306..5ec08f8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ExternalCache.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ExternalCache.java
@@ -209,7 +209,7 @@ public class ExternalCache implements FooterCache {
   }
 
   private Long generateTestFileId(final FileStatus fs, List<HdfsFileStatusWithId> files,
int i) {
-    final Long fileId = HdfsUtils.createFileId(fs.getPath().toUri().getPath(), fs, false,
null);
+    final Long fileId = HdfsUtils.createTestFileId(fs.getPath().toUri().getPath(), fs, false,
null);
     files.set(i, new HdfsFileStatusWithId() {
       @Override
       public FileStatus getFileStatus() {

http://git-wip-us.apache.org/repos/asf/hive/blob/069b20af/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 076bac1..647e7c8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.io.orc;
 
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.security.PrivilegedExceptionAction;
@@ -766,10 +768,12 @@ public class OrcInputFormat implements InputFormat<NullWritable,
OrcStruct>,
     private List<OrcSplit> splitsRef = null;
     private final UserGroupInformation ugi;
     private final boolean allowSyntheticFileIds;
+    private final boolean isDefaultFs;
 
     public ETLSplitStrategy(Context context, FileSystem fs, Path dir,
         List<HdfsFileStatusWithId> children, List<OrcProto.Type> readerTypes,
boolean isOriginal,
-        List<DeltaMetaData> deltas, boolean[] covered, UserGroupInformation ugi, boolean
allowSyntheticFileIds) {
+        List<DeltaMetaData> deltas, boolean[] covered, UserGroupInformation ugi,
+        boolean allowSyntheticFileIds, boolean isDefaultFs) {
       assert !children.isEmpty();
       this.context = context;
       this.dirs = Lists.newArrayList(new ETLDir(dir, fs, children.size()));
@@ -780,6 +784,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
       this.covered = covered;
       this.ugi = ugi;
       this.allowSyntheticFileIds = allowSyntheticFileIds;
+      this.isDefaultFs = isDefaultFs;
     }
 
     @Override
@@ -917,7 +922,8 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
       List<Future<List<OrcSplit>>> localListF = null;
       List<OrcSplit> localListS = null;
       for (SplitInfo splitInfo : splitInfos) {
-        SplitGenerator sg = new SplitGenerator(splitInfo, tpUgi, allowSyntheticFileIds);
+        SplitGenerator sg = new SplitGenerator(
+            splitInfo, tpUgi, allowSyntheticFileIds, isDefaultFs);
         if (!sg.isBlocking()) {
           if (localListS == null) {
             localListS = new ArrayList<>(splits.size());
@@ -955,10 +961,11 @@ public class OrcInputFormat implements InputFormat<NullWritable,
OrcStruct>,
     private final FileSystem fs;
     private final Path dir;
     private final boolean allowSyntheticFileIds;
+    private final boolean isDefaultFs;
 
-    public BISplitStrategy(Context context, FileSystem fs,
-        Path dir, List<HdfsFileStatusWithId> fileStatuses, boolean isOriginal,
-        List<DeltaMetaData> deltas, boolean[] covered, boolean allowSyntheticFileIds)
{
+    public BISplitStrategy(Context context, FileSystem fs, Path dir,
+        List<HdfsFileStatusWithId> fileStatuses, boolean isOriginal, List<DeltaMetaData>
deltas,
+        boolean[] covered, boolean allowSyntheticFileIds, boolean isDefaultFs) {
       super(dir, context.numBuckets, deltas, covered, context.acidOperationalProperties);
       this.fileStatuses = fileStatuses;
       this.isOriginal = isOriginal;
@@ -966,6 +973,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
       this.fs = fs;
       this.dir = dir;
       this.allowSyntheticFileIds = allowSyntheticFileIds;
+      this.isDefaultFs = isDefaultFs;
     }
 
     @Override
@@ -974,7 +982,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
       for (HdfsFileStatusWithId file : fileStatuses) {
         FileStatus fileStatus = file.getFileStatus();
         if (fileStatus.getLen() != 0) {
-          Object fileKey = file.getFileId();
+          Object fileKey = isDefaultFs ? file.getFileId() : null;
           if (fileKey == null && allowSyntheticFileIds) {
             fileKey = new SyntheticFileId(fileStatus);
           }
@@ -1225,12 +1233,12 @@ public class OrcInputFormat implements InputFormat<NullWritable,
OrcStruct>,
     private SchemaEvolution evolution;
 
     public SplitGenerator(SplitInfo splitInfo, UserGroupInformation ugi,
-        boolean allowSyntheticFileIds) throws IOException {
+        boolean allowSyntheticFileIds, boolean isDefaultFs) throws IOException {
       this.ugi = ugi;
       this.context = splitInfo.context;
       this.fs = splitInfo.fs;
       this.file = splitInfo.fileWithId.getFileStatus();
-      this.fsFileId = splitInfo.fileWithId.getFileId();
+      this.fsFileId = isDefaultFs ? splitInfo.fileWithId.getFileId() : null;
       this.blockSize = this.file.getBlockSize();
       this.orcTail = splitInfo.orcTail;
       this.readerTypes = splitInfo.readerTypes;
@@ -1759,15 +1767,16 @@ public class OrcInputFormat implements InputFormat<NullWritable,
OrcStruct>,
   private static SplitStrategy<?> combineOrCreateETLStrategy(CombinedCtx combinedCtx,
       Context context, FileSystem fs, Path dir, List<HdfsFileStatusWithId> files,
       List<DeltaMetaData> deltas, boolean[] covered, List<OrcProto.Type> readerTypes,
-      boolean isOriginal, UserGroupInformation ugi, boolean allowSyntheticFileIds) {
+      boolean isOriginal, UserGroupInformation ugi, boolean allowSyntheticFileIds,
+      boolean isDefaultFs) {
     if (!deltas.isEmpty() || combinedCtx == null) {
       return new ETLSplitStrategy(
           context, fs, dir, files, readerTypes, isOriginal, deltas, covered, ugi,
-          allowSyntheticFileIds);
+          allowSyntheticFileIds, isDefaultFs);
     } else if (combinedCtx.combined == null) {
       combinedCtx.combined = new ETLSplitStrategy(
           context, fs, dir, files, readerTypes, isOriginal, deltas, covered, ugi,
-          allowSyntheticFileIds);
+          allowSyntheticFileIds, isDefaultFs);
       combinedCtx.combineStartUs = System.nanoTime();
       return null;
     } else {
@@ -1778,12 +1787,12 @@ public class OrcInputFormat implements InputFormat<NullWritable,
OrcStruct>,
       case NO_AND_CONTINUE:
         return new ETLSplitStrategy(
             context, fs, dir, files, readerTypes, isOriginal, deltas, covered, ugi,
-            allowSyntheticFileIds);
+            allowSyntheticFileIds, isDefaultFs);
       case NO_AND_SWAP: {
         ETLSplitStrategy oldBase = combinedCtx.combined;
         combinedCtx.combined = new ETLSplitStrategy(
             context, fs, dir, files, readerTypes, isOriginal, deltas, covered, ugi,
-            allowSyntheticFileIds);
+            allowSyntheticFileIds, isDefaultFs);
         combinedCtx.combineStartUs = System.nanoTime();
         return oldBase;
       }
@@ -2151,11 +2160,16 @@ public class OrcInputFormat implements InputFormat<NullWritable,
OrcStruct>,
     List<SplitStrategy<?>> splitStrategies = new ArrayList<SplitStrategy<?>>();
     SplitStrategy<?> splitStrategy;
 
+    boolean checkDefaultFs = HiveConf.getBoolVar(
+        context.conf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID);
+    boolean isDefaultFs = (!checkDefaultFs) || ((fs instanceof DistributedFileSystem)
+            && HdfsUtils.isDefaultFs((DistributedFileSystem) fs));
+
     // When no baseFiles, we will just generate a single split strategy and return.
     List<HdfsFileStatusWithId> acidSchemaFiles = new ArrayList<HdfsFileStatusWithId>();
     if (baseFiles.isEmpty()) {
-      splitStrategy = determineSplitStrategy(combinedCtx, context, fs, dir,
-          acidSchemaFiles, false, parsedDeltas, readerTypes, ugi, allowSyntheticFileIds);
+      splitStrategy = determineSplitStrategy(combinedCtx, context, fs, dir, acidSchemaFiles,
+          false, parsedDeltas, readerTypes, ugi, allowSyntheticFileIds, isDefaultFs);
       if (splitStrategy != null) {
         splitStrategies.add(splitStrategy);
       }
@@ -2174,8 +2188,8 @@ public class OrcInputFormat implements InputFormat<NullWritable,
OrcStruct>,
 
     // Generate split strategy for non-acid schema original files, if any.
     if (!originalSchemaFiles.isEmpty()) {
-      splitStrategy = determineSplitStrategy(combinedCtx, context, fs, dir,
-          originalSchemaFiles, true, parsedDeltas, readerTypes, ugi, allowSyntheticFileIds);
+      splitStrategy = determineSplitStrategy(combinedCtx, context, fs, dir, originalSchemaFiles,
+          true, parsedDeltas, readerTypes, ugi, allowSyntheticFileIds, isDefaultFs);
       if (splitStrategy != null) {
         splitStrategies.add(splitStrategy);
       }
@@ -2183,8 +2197,8 @@ public class OrcInputFormat implements InputFormat<NullWritable,
OrcStruct>,
 
     // Generate split strategy for acid schema files, if any.
     if (!acidSchemaFiles.isEmpty()) {
-      splitStrategy = determineSplitStrategy(combinedCtx, context, fs, dir,
-          acidSchemaFiles, false, parsedDeltas, readerTypes, ugi, allowSyntheticFileIds);
+      splitStrategy = determineSplitStrategy(combinedCtx, context, fs, dir, acidSchemaFiles,
+          false, parsedDeltas, readerTypes, ugi, allowSyntheticFileIds, isDefaultFs);
       if (splitStrategy != null) {
         splitStrategies.add(splitStrategy);
       }
@@ -2200,7 +2214,7 @@ public class OrcInputFormat implements InputFormat<NullWritable,
OrcStruct>,
       boolean isOriginal,
       List<ParsedDelta> parsedDeltas,
       List<OrcProto.Type> readerTypes,
-      UserGroupInformation ugi, boolean allowSyntheticFileIds) {
+      UserGroupInformation ugi, boolean allowSyntheticFileIds, boolean isDefaultFs) {
     List<DeltaMetaData> deltas = AcidUtils.serializeDeltas(parsedDeltas);
     boolean[] covered = new boolean[context.numBuckets];
 
@@ -2227,19 +2241,19 @@ public class OrcInputFormat implements InputFormat<NullWritable,
OrcStruct>,
         case BI:
           // BI strategy requested through config
           return new BISplitStrategy(context, fs, dir, baseFiles,
-              isOriginal, deltas, covered, allowSyntheticFileIds);
+              isOriginal, deltas, covered, allowSyntheticFileIds, isDefaultFs);
         case ETL:
           // ETL strategy requested through config
           return combineOrCreateETLStrategy(combinedCtx, context, fs, dir, baseFiles,
-              deltas, covered, readerTypes, isOriginal, ugi, allowSyntheticFileIds);
+              deltas, covered, readerTypes, isOriginal, ugi, allowSyntheticFileIds, isDefaultFs);
         default:
           // HYBRID strategy
           if (avgFileSize > context.maxSize || totalFiles <= context.etlFileThreshold)
{
             return combineOrCreateETLStrategy(combinedCtx, context, fs, dir, baseFiles,
-                deltas, covered, readerTypes, isOriginal, ugi, allowSyntheticFileIds);
+                deltas, covered, readerTypes, isOriginal, ugi, allowSyntheticFileIds, isDefaultFs);
           } else {
             return new BISplitStrategy(context, fs, dir, baseFiles,
-                isOriginal, deltas, covered, allowSyntheticFileIds);
+                isOriginal, deltas, covered, allowSyntheticFileIds, isDefaultFs);
           }
       }
     } else {

http://git-wip-us.apache.org/repos/asf/hive/blob/069b20af/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index 44a76f6..a14ff5d 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -1535,7 +1535,7 @@ public class TestInputOutputFormat {
     OrcInputFormat.SplitGenerator splitter =
         new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs,
             fs.getFileStatus(new Path("/a/file")), null, null, true,
-            new ArrayList<AcidInputFormat.DeltaMetaData>(), true, null, null), null,
true);
+            new ArrayList<AcidInputFormat.DeltaMetaData>(), true, null, null), null,
true, true);
     OrcSplit result = splitter.createSplit(0, 200, null);
     assertEquals(0, result.getStart());
     assertEquals(200, result.getLength());
@@ -1576,7 +1576,7 @@ public class TestInputOutputFormat {
     OrcInputFormat.SplitGenerator splitter =
         new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs,
             fs.getFileStatus(new Path("/a/file")), null, null, true,
-            new ArrayList<AcidInputFormat.DeltaMetaData>(), true, null, null), null,
true);
+            new ArrayList<AcidInputFormat.DeltaMetaData>(), true, null, null), null,
true, true);
     List<OrcSplit> results = splitter.call();
     OrcSplit result = results.get(0);
     assertEquals(3, result.getStart());
@@ -1599,7 +1599,7 @@ public class TestInputOutputFormat {
     context = new OrcInputFormat.Context(conf);
     splitter = new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs,
       fs.getFileStatus(new Path("/a/file")), null, null, true,
-        new ArrayList<AcidInputFormat.DeltaMetaData>(), true, null, null), null, true);
+        new ArrayList<AcidInputFormat.DeltaMetaData>(), true, null, null), null, true,
true);
     results = splitter.call();
     for(int i=0; i < stripeSizes.length; ++i) {
       assertEquals("checking stripe " + i + " size",
@@ -1627,7 +1627,7 @@ public class TestInputOutputFormat {
     OrcInputFormat.SplitGenerator splitter =
         new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs,
             fs.getFileStatus(new Path("/a/file")), null, null, true,
-            new ArrayList<AcidInputFormat.DeltaMetaData>(), true, null, null), null,
true);
+            new ArrayList<AcidInputFormat.DeltaMetaData>(), true, null, null), null,
true, true);
     List<OrcSplit> results = splitter.call();
     OrcSplit result = results.get(0);
     assertEquals(3, results.size());
@@ -1650,7 +1650,7 @@ public class TestInputOutputFormat {
     splitter = new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs,
         fs.getFileStatus(new Path("/a/file")), null, null, true,
         new ArrayList<AcidInputFormat.DeltaMetaData>(),
-        true, null, null), null, true);
+        true, null, null), null, true, true);
     results = splitter.call();
     assertEquals(5, results.size());
     for (int i = 0; i < stripeSizes.length; ++i) {
@@ -1670,7 +1670,7 @@ public class TestInputOutputFormat {
     splitter = new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs,
         fs.getFileStatus(new Path("/a/file")), null, null, true,
         new ArrayList<AcidInputFormat.DeltaMetaData>(),
-        true, null, null), null, true);
+        true, null, null), null, true, true);
     results = splitter.call();
     assertEquals(1, results.size());
     result = results.get(0);

http://git-wip-us.apache.org/repos/asf/hive/blob/069b20af/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
----------------------------------------------------------------------
diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
index 985a5bd..20664bb 100644
--- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
+++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
@@ -1335,8 +1335,8 @@ public class Hadoop23Shims extends HadoopShimsSecure {
   }
 
   @Override
-  public long getFileId(FileSystem fs, String path) throws IOException {
-    return ensureDfs(fs).getClient().getFileInfo(path).getFileId();
+  public long getFileId(DistributedFileSystem fs, String path) throws IOException {
+    return fs.getClient().getFileInfo(path).getFileId();
   }
 
 

http://git-wip-us.apache.org/repos/asf/hive/blob/069b20af/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
----------------------------------------------------------------------
diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
index 9c6901d..9b49dd1 100644
--- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
+++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hive.shims;
 
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.InetSocketAddress;
@@ -646,7 +648,7 @@ public interface HadoopShims {
    * Gets file ID. Only supported on hadoop-2.
    * @return inode ID of the file.
    */
-  long getFileId(FileSystem fs, String path) throws IOException;
+  long getFileId(DistributedFileSystem fs, String path) throws IOException;
 
   /** Clones the UGI and the Subject. */
   UserGroupInformation cloneUgi(UserGroupInformation baseUgi) throws IOException;


Mime
View raw message