hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From prasan...@apache.org
Subject hive git commit: HIVE-17562: ACID 1.0 + ETL strategy should treat empty compacted files as uncovered deltas (Prasanth Jayachandran reviewed by Eugene Koifman)
Date Wed, 27 Sep 2017 19:57:42 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-2 53dc1ff76 -> 61bbad6ea


HIVE-17562: ACID 1.0 + ETL strategy should treat empty compacted files as uncovered deltas
(Prasanth Jayachandran reviewed by Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/61bbad6e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/61bbad6e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/61bbad6e

Branch: refs/heads/branch-2
Commit: 61bbad6ea1f40a7e135d305a6e151d982801e3c3
Parents: 53dc1ff
Author: Prasanth Jayachandran <prasanthj@apache.org>
Authored: Wed Sep 27 12:57:33 2017 -0700
Committer: Prasanth Jayachandran <prasanthj@apache.org>
Committed: Wed Sep 27 12:57:33 2017 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   | 58 ++++++++++------
 .../apache/hadoop/hive/ql/TestTxnCommands2.java | 73 ++++++++++++++++++++
 2 files changed, 110 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/61bbad6e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 647e7c8..0813033 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -1226,7 +1226,8 @@ public class OrcInputFormat implements InputFormat<NullWritable,
OrcStruct>,
     private final boolean hasBase;
     private OrcFile.WriterVersion writerVersion;
     private long projColsUncompressedSize;
-    private final List<OrcSplit> deltaSplits;
+    private List<OrcSplit> deltaSplits;
+    private final SplitInfo splitInfo;
     private final ByteBuffer ppdResult;
     private final UserGroupInformation ugi;
     private final boolean allowSyntheticFileIds;
@@ -1249,6 +1250,7 @@ public class OrcInputFormat implements InputFormat<NullWritable,
OrcStruct>,
       this.hasBase = splitInfo.hasBase;
       this.projColsUncompressedSize = -1;
       this.deltaSplits = splitInfo.getSplits();
+      this.splitInfo = splitInfo;
       this.allowSyntheticFileIds = allowSyntheticFileIds;
       this.ppdResult = splitInfo.ppdResult;
     }
@@ -1423,6 +1425,7 @@ public class OrcInputFormat implements InputFormat<NullWritable,
OrcStruct>,
                 stripeStats, stripes.size(), file.getPath(), evolution);
           }
         }
+
         return generateSplitsFromStripes(includeStripe);
       }
     }
@@ -1455,31 +1458,44 @@ public class OrcInputFormat implements InputFormat<NullWritable,
OrcStruct>,
 
     private List<OrcSplit> generateSplitsFromStripes(boolean[] includeStripe) throws
IOException {
       List<OrcSplit> splits = new ArrayList<>(stripes.size());
-      // if we didn't have predicate pushdown, read everything
-      if (includeStripe == null) {
-        includeStripe = new boolean[stripes.size()];
-        Arrays.fill(includeStripe, true);
-      }
 
-      OffsetAndLength current = new OffsetAndLength();
-      int idx = -1;
-      for (StripeInformation stripe : stripes) {
-        idx++;
-
-        if (!includeStripe[idx]) {
-          // create split for the previous unfinished stripe
-          if (current.offset != -1) {
-            splits.add(createSplit(current.offset, current.length, orcTail));
-            current.offset = -1;
-          }
-          continue;
+      // after major compaction, base files may become empty base files. Following sequence
is an example
+      // 1) insert some rows
+      // 2) delete all rows
+      // 3) major compaction
+      // 4) insert some rows
+      // In such cases, consider base files without any stripes as uncovered delta
+      if (stripes == null || stripes.isEmpty()) {
+        AcidOutputFormat.Options options = AcidUtils.parseBaseOrDeltaBucketFilename(file.getPath(),
context.conf);
+        int bucket = options.getBucket();
+        splitInfo.covered[bucket] = false;
+        deltaSplits = splitInfo.getSplits();
+      } else {
+        // if we didn't have predicate pushdown, read everything
+        if (includeStripe == null) {
+          includeStripe = new boolean[stripes.size()];
+          Arrays.fill(includeStripe, true);
         }
 
-        current = generateOrUpdateSplit(
+        OffsetAndLength current = new OffsetAndLength();
+        int idx = -1;
+        for (StripeInformation stripe : stripes) {
+          idx++;
+
+          if (!includeStripe[idx]) {
+            // create split for the previous unfinished stripe
+            if (current.offset != -1) {
+              splits.add(createSplit(current.offset, current.length, orcTail));
+              current.offset = -1;
+            }
+            continue;
+          }
+
+          current = generateOrUpdateSplit(
             splits, current, stripe.getOffset(), stripe.getLength(), orcTail);
+        }
+        generateLastSplit(splits, current, orcTail);
       }
-      generateLastSplit(splits, current, orcTail);
-
       // Add uncovered ACID delta splits.
       splits.addAll(deltaSplits);
       return splits;

http://git-wip-us.apache.org/repos/asf/hive/blob/61bbad6e/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
index 6726273..267de21 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
@@ -319,6 +319,79 @@ public class TestTxnCommands2 {
     resultData = new int[][] {{3,8}, {5,6}, {9,20}};
     Assert.assertEquals(stringifyValues(resultData), rs);
   }
+
+  @Test
+  public void testBICompactedNoStripes() throws Exception {
+    hiveConf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "BI");
+    runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) values(1,2)");
+    List<String> rs = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " order
by a,b");
+    int[][] resultData = new int[][] {{1,2}};
+    Assert.assertEquals(stringifyValues(resultData), rs);
+    runStatementOnDriver("delete from " + Table.ACIDTBL);
+    rs = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " order by a,b");
+    Assert.assertEquals(0, rs.size());
+    runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) values(3,4)");
+    rs = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " order by a,b");
+    resultData = new int[][] {{3,4}};
+    Assert.assertEquals(stringifyValues(resultData), rs);
+    runStatementOnDriver("delete from " + Table.ACIDTBL);
+    rs = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " order by a,b");
+    Assert.assertEquals(0, rs.size());
+
+    runStatementOnDriver("alter table "+ Table.ACIDTBL + " compact 'MAJOR'");
+    runWorker(hiveConf);
+    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
+    ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
+    Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize());
+    Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
+    Assert.assertTrue(resp.getCompacts().get(0).getHadoopJobId().startsWith("job_local"));
+
+    runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) values(3,4)");
+    rs = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " order by a,b");
+    resultData = new int[][] {{3,4}};
+    Assert.assertEquals(stringifyValues(resultData), rs);
+    runStatementOnDriver("delete from " + Table.ACIDTBL);
+    rs = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " order by a,b");
+    Assert.assertEquals(0, rs.size());
+    hiveConf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "HYBRID");
+  }
+
+  @Test
+  public void testETLCompactedNoStripes() throws Exception {
+    hiveConf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "ETL");
+    runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) values(1,2)");
+    List<String> rs = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " order
by a,b");
+    int[][] resultData = new int[][] {{1,2}};
+    Assert.assertEquals(stringifyValues(resultData), rs);
+    runStatementOnDriver("delete from " + Table.ACIDTBL);
+    rs = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " order by a,b");
+    Assert.assertEquals(0, rs.size());
+    runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) values(3,4)");
+    rs = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " order by a,b");
+    resultData = new int[][] {{3,4}};
+    Assert.assertEquals(stringifyValues(resultData), rs);
+    runStatementOnDriver("delete from " + Table.ACIDTBL);
+    rs = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " order by a,b");
+    Assert.assertEquals(0, rs.size());
+
+    runStatementOnDriver("alter table "+ Table.ACIDTBL + " compact 'MAJOR'");
+    runWorker(hiveConf);
+    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
+    ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest());
+    Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize());
+    Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState());
+    Assert.assertTrue(resp.getCompacts().get(0).getHadoopJobId().startsWith("job_local"));
+
+    runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) values(3,4)");
+    rs = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " order by a,b");
+    resultData = new int[][] {{3,4}};
+    Assert.assertEquals(stringifyValues(resultData), rs);
+    runStatementOnDriver("delete from " + Table.ACIDTBL);
+    rs = runStatementOnDriver("select a,b from " + Table.ACIDTBL + " order by a,b");
+    Assert.assertEquals(0, rs.size());
+    hiveConf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "HYBRID");
+  }
+
   /**
    * see HIVE-16177
    * See also {@link TestTxnCommands#testNonAcidToAcidConversion01()}


Mime
View raw message