hive-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Work logged] (HIVE-21762) REPL DUMP to support new format for replication policy input to take included tables list.
Date Mon, 10 Jun 2019 04:46:00 GMT

     [ https://issues.apache.org/jira/browse/HIVE-21762?focusedWorklogId=256625&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-256625
]

ASF GitHub Bot logged work on HIVE-21762:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 10/Jun/19 04:45
            Start Date: 10/Jun/19 04:45
    Worklog Time Spent: 10m 
      Work Description: sankarh commented on pull request #664: HIVE-21762: REPL DUMP to support
new format for replication policy input to take included tables list.
URL: https://github.com/apache/hive/pull/664#discussion_r291878111
 
 

 ##########
 File path: ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java
 ##########
 @@ -134,50 +130,99 @@ private void setTxnConfigs() {
     }
   }
 
+  private boolean isValidTablesList(Tree tablesListTree) {
+    if (tablesListTree.getChildCount() <= 1) {
+      // If one or zero child, then it is valid.
+      // For single table replication, the valid format is <db_name>.<table_name>.
+      // To include multiple tables (t1 and t2), then valid format is <db_name>.[t1,
t2].
+      // So, single child is always valid either it is table_name or tables_list.
+      return true;
+    }
+    assert(tablesListTree.getChildCount() == 2);
+
+    // We don't allow input of format <db_name>.<table_name>.<table_name>
or <db_name>.<table_name>.[<tables_list>].
+    // To include t1* and exclude t100, then valid format is <db_name>.[t1*].[t100].
+    // So, if 2 children are there, then both should be table_lists.
+    return ((tablesListTree.getChild(0).getType() == TOK_REPL_TABLES_LIST)
+            && (tablesListTree.getChild(1).getType() == TOK_REPL_TABLES_LIST));
+  }
+
   private void initReplDump(ASTNode ast) throws HiveException {
     int numChildren = ast.getChildCount();
     boolean isMetaDataOnly = false;
-    dbNameOrPattern = PlanUtils.stripQuotes(ast.getChild(0).getText());
-
-    // skip the first node, which is always required
-    int currNode = 1;
-    while (currNode < numChildren) {
-      if (ast.getChild(currNode).getType() == TOK_REPL_CONFIG) {
-        Map<String, String> replConfigs
-            = DDLSemanticAnalyzer.getProps((ASTNode) ast.getChild(currNode).getChild(0));
-        if (null != replConfigs) {
-          for (Map.Entry<String, String> config : replConfigs.entrySet()) {
-            conf.set(config.getKey(), config.getValue());
+    String dbNameOrPattern = PlanUtils.stripQuotes(ast.getChild(0).getText());
+    replScope.setDbName(dbNameOrPattern);
+
+    // Skip the first node, which is always required
+    int childIdx = 1;
+    while (childIdx < numChildren) {
+      Tree currNode = ast.getChild(childIdx);
+      switch (currNode.getType()) {
+        case TOK_REPL_CONFIG: {
+          Map<String, String> replConfigs
+              = DDLSemanticAnalyzer.getProps((ASTNode) currNode.getChild(0));
+          if (null != replConfigs) {
+            for (Map.Entry<String, String> config : replConfigs.entrySet()) {
+              conf.set(config.getKey(), config.getValue());
+            }
+            isMetaDataOnly = HiveConf.getBoolVar(conf, REPL_DUMP_METADATA_ONLY);
           }
-          isMetaDataOnly = HiveConf.getBoolVar(conf, REPL_DUMP_METADATA_ONLY);
+          break;
         }
-      } else if (ast.getChild(currNode).getType() == TOK_TABNAME) {
-        // optional tblName was specified.
-        tblNameOrPattern = PlanUtils.stripQuotes(ast.getChild(currNode).getChild(0).getText());
-      } else {
-        // TOK_FROM subtree
-        Tree fromNode = ast.getChild(currNode);
-        eventFrom = Long.parseLong(PlanUtils.stripQuotes(fromNode.getChild(0).getText()));
-        // skip the first, which is always required
-        int numChild = 1;
-        while (numChild < fromNode.getChildCount()) {
-          if (fromNode.getChild(numChild).getType() == TOK_TO) {
-            eventTo =
-                Long.parseLong(PlanUtils.stripQuotes(fromNode.getChild(numChild + 1).getText()));
-            // skip the next child, since we already took care of it
-            numChild++;
-          } else if (fromNode.getChild(numChild).getType() == TOK_LIMIT) {
-            maxEventLimit =
-                Integer.parseInt(PlanUtils.stripQuotes(fromNode.getChild(numChild + 1).getText()));
-            // skip the next child, since we already took care of it
+        case TOK_REPL_TABLES: {
+          assert(currNode.getChildCount() <= 2);
+          if (!isValidTablesList(currNode)) {
+            LOG.error(ErrorMsg.REPL_INCORRECT_SYNTAX_FOR_REPL_POLICY.getMsg());
+            throw new SemanticException(ErrorMsg.REPL_INCORRECT_SYNTAX_FOR_REPL_POLICY.getMsg());
+          }
+
+          // Traverse the children which can be single table_name node or just include tables
list
+          // or both include and exclude tables list.
+          for (int listIdx = 0; listIdx < currNode.getChildCount(); listIdx++) {
+            Tree tablesNode = currNode.getChild(listIdx);
+            if (tablesNode.getType() == TOK_TABNAME) {
+              replScope.setTableName(tablesNode.getChild(0).getText());
+            } else {
+              List<String> tablesList = new ArrayList<>();
+              for (int child = 0; child < tablesNode.getChildCount(); child++) {
+                tablesList.add(unescapeSQLString(tablesNode.getChild(child).getText()));
+              }
+              if (!tablesList.isEmpty()) {
+                if (listIdx == 0) {
+                  replScope.setIncludedTablePatterns(tablesList);
+                } else {
+                  replScope.setExcludedTablePatterns(tablesList);
+                }
+              }
+            }
+          }
+          break;
+        }
+        default: {
 
 Review comment:
   If I remove default clause, check style/findbugs throw error. If I keep it, then cause
dead code. I will fix this. Let the default case can be deadocode. No harm in that. It will
catch corruptions at least.
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 256625)
    Time Spent: 2h 40m  (was: 2.5h)

> REPL DUMP to support new format for replication policy input to take included tables
list.
> ------------------------------------------------------------------------------------------
>
>                 Key: HIVE-21762
>                 URL: https://issues.apache.org/jira/browse/HIVE-21762
>             Project: Hive
>          Issue Type: Sub-task
>          Components: repl
>            Reporter: Sankar Hariappan
>            Assignee: Sankar Hariappan
>            Priority: Major
>              Labels: DR, Replication, pull-request-available
>         Attachments: HIVE-21762.01.patch, HIVE-21762.02.patch, HIVE-21762.03.patch
>
>          Time Spent: 2h 40m
>  Remaining Estimate: 0h
>
> - REPL DUMP syntax:
> {code}
> REPL DUMP <repl_policy> [FROM <last_repl_id> WITH <key_values_list>;
> {code}
> - New format for the Replication policy have 3 parts all separated with Dot (.). 
> 1. First part is DB name.
> 2. Second part is included list. Comma separated table names/regex with in square brackets[].
 If square brackets are not there, then it is treated as single table replication which skips
DB level events.
> 3. Third part is excluded list. Comma separated table names/regex with in square brackets[].
> {code}
> <db_name> -- Full DB replication which is currently supported
> <db_name>.[]  - Full DB replication
> <db_name>.['.*?']  - Full DB replication
> <db_name>.t1 -- Single table replication (DB events excluded) which is currently
supported
> <db_name>.['t1', 't2']  -- DB replication with static list of tables t1 and t2
included.
> <db_name>.['t1*', 't2', '*t3'].['t100', '5t3', 't4'] -- DB replication with all
tables having prefix t1, with suffix t3 and include table t2 and exclude t100 which has the
prefix t1, 5t3 which suffix t3 and t4.
> {code}
> - Need to support regular expression of any format. 
> - A table is included in dump only if it matches the regular expressions in included
list and doesn't match the excluded list.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Mime
View raw message