From issues-return-3141-archive-asf-public=cust-asf.ponee.io@phoenix.apache.org Mon Dec 3 23:12:57 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id BB5FE180645 for ; Mon, 3 Dec 2018 23:12:56 +0100 (CET) Received: (qmail 37471 invoked by uid 500); 3 Dec 2018 22:12:55 -0000 Mailing-List: contact issues-help@phoenix.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@phoenix.apache.org Delivered-To: mailing list issues@phoenix.apache.org Received: (qmail 37462 invoked by uid 99); 3 Dec 2018 22:12:55 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 03 Dec 2018 22:12:55 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id C940BE118C; Mon, 3 Dec 2018 22:12:55 +0000 (UTC) From: kadirozde To: issues@phoenix.apache.org Reply-To: issues@phoenix.apache.org References: In-Reply-To: Subject: [GitHub] phoenix pull request #404: PHOENIX-5025 Tool to clean up orphan views Content-Type: text/plain Message-Id: <20181203221255.C940BE118C@git1-us-west.apache.org> Date: Mon, 3 Dec 2018 22:12:55 +0000 (UTC) Github user kadirozde commented on a diff in the pull request: https://github.com/apache/phoenix/pull/404#discussion_r238458517 --- Diff: phoenix-core/src/main/java/org/apache/phoenix/mapreduce/OrphanViewTool.java --- @@ -0,0 +1,812 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.phoenix.mapreduce; + +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.COLUMN_FAMILY; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.COLUMN_NAME; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.LINK_TYPE; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.SYSTEM_CATALOG_NAME; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.SYSTEM_CHILD_LINK_NAME; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.TABLE_NAME; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.TABLE_SCHEM; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.TABLE_TYPE; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.TENANT_ID; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.VIEW_TYPE; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Properties; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.cli.PosixParser; +import org.apache.commons.lang.exception.ExceptionUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.phoenix.jdbc.PhoenixConnection; +import org.apache.phoenix.mapreduce.util.ConnectionUtil; +import org.apache.phoenix.parse.DropTableStatement; +import org.apache.phoenix.query.QueryServices; +import org.apache.phoenix.query.QueryServicesOptions; +import org.apache.phoenix.schema.MetaDataClient; +import org.apache.phoenix.schema.PTable; +import org.apache.phoenix.schema.PTableType; +import org.apache.phoenix.schema.TableNotFoundException; +import org.apache.phoenix.util.PhoenixRuntime; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A tool to identify orphan views and links, and drop them + * + */ +public class OrphanViewTool extends Configured implements Tool { + private static final Logger LOG = LoggerFactory.getLogger(OrphanViewTool.class); + // Query all the views that are not "MAPPED" views + private static final String viewQuery = "SELECT " + + TENANT_ID + ", " + + TABLE_SCHEM + "," + + TABLE_NAME + + " FROM " + SYSTEM_CATALOG_NAME + + " WHERE "+ TABLE_TYPE + " = '" + PTableType.VIEW.getSerializedValue() +"' AND NOT " + + VIEW_TYPE + " = " + PTable.ViewType.MAPPED.getSerializedValue(); + // Query all physical links + private static final String physicalLinkQuery = "SELECT " + + TENANT_ID + ", " + + TABLE_SCHEM + ", " + + TABLE_NAME + ", " + + COLUMN_NAME + " AS PHYSICAL_TABLE_TENANT_ID, " + + COLUMN_FAMILY + " AS PHYSICAL_TABLE_FULL_NAME " + + " FROM " + SYSTEM_CATALOG_NAME + + " WHERE "+ LINK_TYPE + " = " + + PTable.LinkType.PHYSICAL_TABLE.getSerializedValue(); + // Query all child-parent links + private static final String childParentLinkQuery = "SELECT " + + TENANT_ID + ", " + + TABLE_SCHEM + ", " + + TABLE_NAME + ", " + + COLUMN_NAME + " AS PARENT_VIEW_TENANT_ID, " + + COLUMN_FAMILY + " AS PARENT_VIEW_FULL_NAME " + + " FROM " + SYSTEM_CATALOG_NAME + + " WHERE "+ LINK_TYPE + " = " + + PTable.LinkType.PARENT_TABLE.getSerializedValue(); + // Query all parent-child links + private static final String parentChildLinkQuery = "SELECT " + + TENANT_ID + ", " + + TABLE_SCHEM + ", " + + TABLE_NAME + ", " + + COLUMN_NAME + " AS CHILD_VIEW_TENANT_ID, " + + COLUMN_FAMILY + " AS CHILD_VIEW_FULL_NAME " + + " FROM " + SYSTEM_CHILD_LINK_NAME + + " WHERE "+ LINK_TYPE + " = " + + PTable.LinkType.CHILD_TABLE.getSerializedValue(); + + // Query all the tables that can be a base table + private static final String candidateBaseTableQuery = "SELECT " + + TENANT_ID + ", " + + TABLE_SCHEM + ", " + + TABLE_NAME + + " FROM " + SYSTEM_CATALOG_NAME + + " WHERE "+ " NOT " + TABLE_TYPE + " = '" + PTableType.VIEW.getSerializedValue() + "'"; + + private String outputPath; + private String inputPath; + private boolean clean = false; + private int maxViewLevel = 0; + private static final long defaultAge = 24*60*60*1000; // 1 day + private long age = 0; + + private static final byte VIEW = 0; + private static final byte PHYSICAL_TABLE_LINK = 1; + private static final byte PARENT_TABLE_LINK = 2; + private static final byte CHILD_TABLE_LINK = 3; + private static final byte ORPHAN_TYPE_COUNT = 4; + + BufferedWriter writer[] = new BufferedWriter[ORPHAN_TYPE_COUNT]; + BufferedReader reader[] = new BufferedReader[ORPHAN_TYPE_COUNT]; + HashMap orphanViewSet = new HashMap<>(); + List> viewSetArray = new ArrayList>(); + HashMap baseSet = new HashMap<>(); + HashSet orphanLinkSet = new HashSet<>(); + + private static final String fileName[] = {"OrphanView.txt", "OrphanPhysicalTableLink.txt", "OrphanParentTableLink", "OrphanChildTableLink"}; + private static final Option OUTPUT_PATH_OPTION = new Option("op", "output-path", true, + "Output path where the files listing orphan views and links are written"); + private static final Option INPUT_PATH_OPTION = new Option("ip", "input-path", true, + "Input path where the files listing orphan views and links are read"); + private static final Option CLEAN_ORPHAN_VIEWS_OPTION = new Option("c", "clean", false, + "If specified, cleans orphan views and links"); + private static final Option IDENTIFY_ORPHAN_VIEWS_OPTION = new Option("i", "identify", false, + "If specified, identifies orphan views and links"); + private static final Option AGE_OPTION = new Option("a", "age", true, + "The minimum age (in milliseconds) for the views (default value is " + Long.toString(defaultAge) + ", i.e. 1 day)"); + private static final Option HELP_OPTION = new Option("h", "help", false, "Help"); + + private Options getOptions() { + final Options options = new Options(); + options.addOption(OUTPUT_PATH_OPTION); + options.addOption(INPUT_PATH_OPTION); + options.addOption(CLEAN_ORPHAN_VIEWS_OPTION); + options.addOption(IDENTIFY_ORPHAN_VIEWS_OPTION); + options.addOption(AGE_OPTION); + options.addOption(HELP_OPTION); + return options; + } + + /** + * Parses the commandline arguments, throws IllegalStateException if mandatory arguments are + * missing. + * @param args supplied command line arguments + */ + private void parseOptions(String[] args) throws Exception { + + final Options options = getOptions(); + + CommandLineParser parser = new PosixParser(); + CommandLine cmdLine = null; + try { + cmdLine = parser.parse(options, args); + } catch (ParseException e) { + printHelpAndExit("Error parsing command line options: " + e.getMessage(), options); + } + if (cmdLine.hasOption(HELP_OPTION.getOpt())) { + printHelpAndExit(options, 0); + } + if (cmdLine.hasOption(OUTPUT_PATH_OPTION.getOpt()) && cmdLine.hasOption(INPUT_PATH_OPTION.getOpt())) { + throw new IllegalStateException("Specify either " + OUTPUT_PATH_OPTION.getLongOpt() + " or " + + INPUT_PATH_OPTION.getOpt()); + } + if (cmdLine.hasOption(INPUT_PATH_OPTION.getOpt()) && !cmdLine.hasOption(CLEAN_ORPHAN_VIEWS_OPTION.getOpt())) { + throw new IllegalStateException(INPUT_PATH_OPTION.getLongOpt() + " is only used with " + + IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt()); + } + if (cmdLine.hasOption(IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt()) && cmdLine.hasOption(CLEAN_ORPHAN_VIEWS_OPTION.getOpt())) { + throw new IllegalStateException("Specify either " + IDENTIFY_ORPHAN_VIEWS_OPTION.getLongOpt() + " or " + + IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt()); + } + if (cmdLine.hasOption(OUTPUT_PATH_OPTION.getOpt()) && (!cmdLine.hasOption(IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt()) && + !cmdLine.hasOption(CLEAN_ORPHAN_VIEWS_OPTION.getOpt()))) { + throw new IllegalStateException(OUTPUT_PATH_OPTION.getLongOpt() + " requires either " + + IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt() + " or " + CLEAN_ORPHAN_VIEWS_OPTION.getOpt()); + } + if (cmdLine.hasOption(CLEAN_ORPHAN_VIEWS_OPTION.getOpt())) { + clean = true; + } + else if (!cmdLine.hasOption(IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt())) { + throw new IllegalStateException("Specify either " + + IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt() + " or " + CLEAN_ORPHAN_VIEWS_OPTION.getOpt()); + } + if (cmdLine.hasOption(AGE_OPTION.getOpt())) { + age = Long.valueOf(cmdLine.getOptionValue(AGE_OPTION.getOpt())); + } + + outputPath = cmdLine.getOptionValue(OUTPUT_PATH_OPTION.getOpt()); + inputPath = cmdLine.getOptionValue(INPUT_PATH_OPTION.getOpt()); + } + + private void printHelpAndExit(String errorMessage, Options options) { + System.err.println(errorMessage); + printHelpAndExit(options, 1); + } + + private void printHelpAndExit(Options options, int exitCode) { + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp("help", options); + System.exit(exitCode); + } + + private class Key { + private String tenantId; + private String schemaName; + private String tableName; + + public Key (String tenantId, String schemaName, String tableName) { + this.tenantId = tenantId; + this.schemaName = schemaName; + this.tableName = tableName; + } + + public Key (String tenantId, String fullTableName) { + this.tenantId = tenantId; + String[] columns = fullTableName.split("\\."); + if (columns.length == 1) { + this.schemaName = null; + this.tableName = fullTableName; + } else { + this.schemaName = columns[0]; + this.tableName = columns[1]; + } + } + + public Key (String serializedKey) { + String[] columns = serializedKey.split(","); + tenantId = columns[0].compareTo("") == 0 ? null : columns[0]; + schemaName = columns[1].compareTo("") == 0 ? null : columns[1]; + tableName = columns[2]; + } + + public String serialize () { + return (tenantId != null ? tenantId + "," : ",") + + (schemaName != null ? schemaName + "," : ",") + + tableName; + } + @Override + public int hashCode() { + return Objects.hash(serialize()); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (getClass() != obj.getClass()) + return false; + Key other = (Key) obj; + if (this.serialize().compareTo(other.serialize()) != 0) + return false; + return true; + } + } + + private abstract class Table { + protected Key key; + protected List childViews; + + public void addChild(Key childView) { + if (childViews == null) { + childViews = new LinkedList<>(); + } + childViews.add(childView); + } + + public boolean isParent() { + if (childViews == null || childViews.isEmpty()) { + return false; + } + return true; + } + } + + private class Base extends Table { + public Base (Key key) { + this.key = key; + } + } + + private class View extends Table { + Key parent; + Key base; + + public View (Key key) { + this.key = key; + } + + public void setParent(Key parent) { + this.parent = parent; + } + + public void setBase(Key base) { + this.base = base; + } + } + + private class Link { + Key src; + Key dst; + PTable.LinkType type; + + public Link(Key src, Key dst, PTable.LinkType type) { + this.src = src; + this.dst = dst; + this.type = type; + } + + public String serialize () { + return src.serialize() + "," + dst.serialize() + "," + type.toString(); + } + + @Override + public int hashCode() { + return Objects.hash(serialize()); + } + } + + private void gracefullyDropView(PhoenixConnection phoenixConnection, Configuration configuration, + Key key) throws Exception { + PhoenixConnection tenantConnection; + if (key.tenantId != null) { + Properties tenantProps = new Properties(); + tenantProps.setProperty(PhoenixRuntime.TENANT_ID_ATTRIB, key.tenantId); + tenantConnection = ConnectionUtil.getInputConnection(configuration, tenantProps). + unwrap(PhoenixConnection.class); + } else { + tenantConnection = phoenixConnection; + } + + MetaDataClient client = new MetaDataClient(tenantConnection); + org.apache.phoenix.parse.TableName pTableName = org.apache.phoenix.parse.TableName + .create(key.schemaName, key.tableName); + try { + client.dropTable( + new DropTableStatement(pTableName, PTableType.VIEW, false, true, true)); + } + catch (TableNotFoundException e) { + LOG.info("Ignoring view " + pTableName + " as it has already been dropped"); + } + } + + private void removeLink(PhoenixConnection phoenixConnection, Key src, Key dst, PTable.LinkType linkType) throws Exception { + String deleteQuery = "DELETE FROM " + + ((linkType == PTable.LinkType.PHYSICAL_TABLE || linkType == PTable.LinkType.PARENT_TABLE) ? SYSTEM_CATALOG_NAME : SYSTEM_CHILD_LINK_NAME) + + " WHERE " + TENANT_ID + (src.tenantId == null ? " IS NULL" : " = '" + src.tenantId + "'") + " AND " + + TABLE_SCHEM + (src.schemaName == null ? " IS NULL " : " = '" + src.schemaName + "'") + " AND " + + TABLE_NAME + " = '" + src.tableName + "' AND " + + COLUMN_NAME + (dst.tenantId == null ? " IS NULL" : " = '" + dst.tenantId + "'") + " AND " + + COLUMN_FAMILY + " = '" + (dst.schemaName == null ? dst.tableName : dst.schemaName + "." + + dst.tableName) + "'"; + phoenixConnection.createStatement().execute(deleteQuery); + phoenixConnection.commit(); + } + + private byte getLinkType(PTable.LinkType linkType) { + byte type; + if (linkType == PTable.LinkType.PHYSICAL_TABLE) { + type = PHYSICAL_TABLE_LINK; + } + else if (linkType == PTable.LinkType.PARENT_TABLE) { + type = PARENT_TABLE_LINK; + } else if (linkType == PTable.LinkType.CHILD_TABLE) { + type = CHILD_TABLE_LINK; + } + else { + throw new AssertionError("Unknown Link Type"); + } + return type; + } + + private PTable.LinkType getLinkType(byte linkType) { + PTable.LinkType type; + if (linkType == PHYSICAL_TABLE_LINK) { + type = PTable.LinkType.PHYSICAL_TABLE; + } + else if (linkType == PARENT_TABLE_LINK) { + type = PTable.LinkType.PARENT_TABLE; + } else if (linkType == CHILD_TABLE_LINK) { + type = PTable.LinkType.CHILD_TABLE; + } + else { + throw new AssertionError("Unknown Link Type"); + } + return type; + } + --- End diff -- It sounded a bit overkill to me. I will think about it. ---