hive-gitbox mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [hive] ayushtkn commented on a change in pull request #2043: HIVE-24852. Add support for Snapshots during external table replication.
Date Tue, 04 May 2021 18:54:18 GMT

ayushtkn commented on a change in pull request #2043:
URL: https://github.com/apache/hive/pull/2043#discussion_r626026659



##########
File path: ql/src/java/org/apache/hadoop/hive/ql/exec/repl/DirCopyTask.java
##########
@@ -218,4 +219,62 @@ public String getName() {
   public boolean canExecuteInParallel() {
     return true;
   }
+
+  boolean copyUsingDistCpSnapshots(Path sourcePath, Path targetPath, UserGroupInformation
proxyUser) throws IOException {
+
+    DistributedFileSystem targetFs = SnapshotUtils.getDFS(targetPath, conf);
+    boolean result = false;
+    if (getWork().getCopyMode().equals(SnapshotUtils.SnapshotCopyMode.DIFF_COPY)) {
+      LOG.info("Using snapshot diff copy for source: {} and target: {}", sourcePath, targetPath);
+       result = FileUtils
+          .distCpWithSnapshot(firstSnapshot(work.getSnapshotPrefix()), secondSnapshot(work.getSnapshotPrefix()),
+              Collections.singletonList(sourcePath), targetPath, proxyUser,
+              conf, ShimLoader.getHadoopShims());
+       if(result) {
+         // Delete the older snapshot from last iteration.
+         targetFs.deleteSnapshot(targetPath, firstSnapshot(work.getSnapshotPrefix()));
+       } else {
+         throw new IOException(
+             "Can not successfully copy external table data using snapshot diff. source:"
+ sourcePath + " and target: "
+                 + targetPath);
+       }
+    } else if (getWork().getCopyMode().equals(SnapshotUtils.SnapshotCopyMode.INITIAL_COPY))
{
+      LOG.info("Using snapshot initial copy for source: {} and target: {}", sourcePath, targetPath);
+      // Get the path relative to the initial snapshot for copy.
+      Path snapRelPath =
+          new Path(sourcePath, HdfsConstants.DOT_SNAPSHOT_DIR + "/" + secondSnapshot(work.getSnapshotPrefix()));
+
+      // This is the first time we are copying, check if the target is snapshottable or not,
if not attempt to allow
+      // snapshots.
+      SnapshotUtils.allowSnapshot(targetFs, targetPath, conf);
+      // Attempt to delete the snapshot, in case this is a bootstrap post a failed incremental,
Since in case of
+      // bootstrap we go from start, so delete any pre-existing snapshot.
+      SnapshotUtils.deleteSnapshotSafe(targetFs, targetPath, firstSnapshot(work.getSnapshotPrefix()));
+
+      // Copy from the initial snapshot path.
+      result = runFallbackDistCp(snapRelPath, targetPath, proxyUser);
+    }
+
+    // Create a new snapshot at target Filesystem. For the next iteration.
+    if (result) {
+      SnapshotUtils.createSnapshot(targetFs, targetPath, firstSnapshot(work.getSnapshotPrefix()),
conf);
+    }
+    return result;
+  }
+
+  private boolean runFallbackDistCp(Path sourcePath, Path targetPath, UserGroupInformation
proxyUser)
+      throws IOException {
+     // do we create a new conf and only here provide this additional option so that we get
away from

Review comment:
       This was there already, Showing up again due to refactor




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscribe@hive.apache.org
For additional commands, e-mail: gitbox-help@hive.apache.org


Mime
View raw message