phoenix-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (PHOENIX-2743) HivePhoenixHandler for big-big join with predicate push down
Date Mon, 11 Apr 2016 19:56:25 GMT

    [ https://issues.apache.org/jira/browse/PHOENIX-2743?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15235851#comment-15235851
] 

ASF GitHub Bot commented on PHOENIX-2743:
-----------------------------------------

Github user joshelser commented on a diff in the pull request:

    https://github.com/apache/phoenix/pull/155#discussion_r59269847
  
    --- Diff: phoenix-hive/src/main/java/org/apache/phoenix/hive/PhoenixMetaHook.java ---
    @@ -0,0 +1,245 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.phoenix.hive;
    +
    +import java.sql.Connection;
    +import java.sql.SQLException;
    +import java.util.Collections;
    +import java.util.List;
    +import java.util.Map;
    +
    +import org.apache.commons.logging.Log;
    +import org.apache.commons.logging.LogFactory;
    +import org.apache.hadoop.hive.metastore.HiveMetaHook;
    +import org.apache.hadoop.hive.metastore.TableType;
    +import org.apache.hadoop.hive.metastore.api.FieldSchema;
    +import org.apache.hadoop.hive.metastore.api.MetaException;
    +import org.apache.hadoop.hive.metastore.api.Table;
    +import org.apache.phoenix.hive.constants.PhoenixStorageHandlerConstants;
    +import org.apache.phoenix.hive.util.PhoenixConnectionUtil;
    +import org.apache.phoenix.hive.util.PhoenixStorageHandlerUtil;
    +import org.apache.phoenix.hive.util.PhoenixUtil;
    +
    +import com.google.common.base.CharMatcher;
    +import com.google.common.base.Splitter;
    +import com.google.common.collect.Lists;
    +
    +public class PhoenixMetaHook implements HiveMetaHook {
    +
    +    private static final Log LOG = LogFactory.getLog(PhoenixMetaHook.class);
    +
    +    @Override
    +    public void preCreateTable(Table table) throws MetaException {
    +        if (LOG.isDebugEnabled()) {
    +            LOG.debug("Precreate  table : " + table.getTableName());
    +        }
    +
    +        try (Connection conn = PhoenixConnectionUtil.getConnection(table)) {
    +            String tableType = table.getTableType();
    +            String tableName = PhoenixStorageHandlerUtil.getTargetTableName(table);
    +
    +            if (TableType.EXTERNAL_TABLE.name().equals(tableType)) {
    +                // Check whether phoenix table exists.
    +                if (!PhoenixUtil.existTable(conn, tableName)) {
    +                    // Error if phoenix table not exist.
    +                    throw new MetaException("Phoenix table " + tableName + " doesn't
exist");
    +                }
    +            } else if (TableType.MANAGED_TABLE.name().equals(tableType)) {
    +                // Check whether phoenix table exists.
    +                if (PhoenixUtil.existTable(conn, tableName)) {
    +                    // Error if phoenix table already exist.
    +                    throw new MetaException("Phoenix table " + tableName + " already
exist.");
    +                }
    +
    +                PhoenixUtil.createTable(conn, createTableStatement(table));
    +            } else {
    +                throw new MetaException("Unsupported table Type: " + table.getTableType());
    +            }
    +
    +            if (LOG.isDebugEnabled()) {
    +                LOG.debug("Phoenix table " + tableName + " was created");
    +            }
    +        } catch (SQLException e) {
    +            throw new MetaException(e.getMessage());
    +        }
    +    }
    +
    +    private String createTableStatement(Table table) throws MetaException {
    +        Map<String, String> tableParameterMap = table.getParameters();
    +
    +        String tableName = PhoenixStorageHandlerUtil.getTargetTableName(table);
    +        StringBuilder ddl = new StringBuilder("create table ").append(tableName).append("
(\n");
    +
    +        String phoenixRowKeys = tableParameterMap.get(PhoenixStorageHandlerConstants
    +                .PHOENIX_ROWKEYS);
    +        StringBuilder realRowKeys = new StringBuilder();
    +        List<String> phoenixRowKeyList = Lists.newArrayList(Splitter.on
    +                (PhoenixStorageHandlerConstants.COMMA).trimResults().split(phoenixRowKeys));
    +        Map<String, String> columnMappingMap = getColumnMappingMap(tableParameterMap.get
    +                (PhoenixStorageHandlerConstants.PHOENIX_COLUMN_MAPPING));
    +
    +        List<FieldSchema> fieldSchemaList = table.getSd().getCols();
    +        for (int i = 0, limit = fieldSchemaList.size(); i < limit; i++) {
    +            FieldSchema fieldSchema = fieldSchemaList.get(i);
    +            String fieldName = fieldSchema.getName();
    +            String fieldType = fieldSchema.getType();
    +            String columnType = PhoenixUtil.getPhoenixType(fieldType);
    +
    +            String rowKeyName = getRowKeyMapping(fieldName, phoenixRowKeyList);
    +            if (rowKeyName != null) {
    +                // In case of RowKey
    +                if ("binary".equals(columnType)) {
    +                    // Phoenix must define max length of binary when type definition.
Obtaining
    +                    // information from the column mapping. ex) phoenix.rowkeys = "r1,
r2(100), ..."
    +                    List<String> tokenList = Lists.newArrayList(Splitter.on(CharMatcher.is('(')
    +                            .or(CharMatcher.is(')'))).trimResults().split(rowKeyName));
    +                    columnType = columnType + "(" + tokenList.get(1) + ")";
    +                    rowKeyName = tokenList.get(0);
    +                }
    +
    +                ddl.append("  ").append(rowKeyName).append(" ").append(columnType).append("
not " +
    +                        "null,\n");
    +                realRowKeys.append(rowKeyName).append(",");
    +            } else {
    +                // In case of Column
    +                String columnName = columnMappingMap.get(fieldName);
    +
    +                if (columnName == null) {
    +                    // Use field definition.
    +                    columnName = fieldName;
    +//					throw new MetaException("<<<<<<<<<< " + fieldName
+ " column mapping not exist
    --- End diff --
    
    Remove this.


> HivePhoenixHandler for big-big join with predicate push down
> ------------------------------------------------------------
>
>                 Key: PHOENIX-2743
>                 URL: https://issues.apache.org/jira/browse/PHOENIX-2743
>             Project: Phoenix
>          Issue Type: New Feature
>    Affects Versions: 4.5.0, 4.6.0
>         Environment: hive-1.2.1
>            Reporter: JeongMin Ju
>              Labels: features, performance
>         Attachments: PHOENIX-2743-1.patch
>
>   Original Estimate: 168h
>  Remaining Estimate: 168h
>
> Phoenix support hash join & sort-merge join. But in case of big*big join does not
process well.
> Therefore Need other method like Hive.
> I implemented hive-phoenix-handler that can access Apache Phoenix table on HBase using
HiveQL.
> hive-phoenix-handler is very faster than hive-hbase-handler because of applying predicate
push down.
> I am publishing source code to github for contribution and maybe will be completed by
next week.
> https://github.com/mini666/hive-phoenix-handler
> please, review my proposal.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message