bahir-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rmetzger <...@git.apache.org>
Subject [GitHub] bahir-flink pull request #17: [BAHIR-99] Kudu connector to read/write from/t...
Date Sun, 23 Jul 2017 15:10:22 GMT
Github user rmetzger commented on a diff in the pull request:

    https://github.com/apache/bahir-flink/pull/17#discussion_r128921813
  
    --- Diff: flink-connector-kudu/src/main/java/es/accenture/flink/Sources/KuduInputFormat.java
---
    @@ -0,0 +1,340 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package es.accenture.flink.Sources;
    +
    +import es.accenture.flink.Utils.RowSerializable;
    +import org.apache.flink.api.common.io.InputFormat;
    +import org.apache.flink.api.common.io.LocatableInputSplitAssigner;
    +import org.apache.flink.api.common.io.statistics.BaseStatistics;
    +import org.apache.flink.configuration.Configuration;
    +import org.apache.flink.core.io.InputSplitAssigner;
    +import org.apache.kudu.client.*;
    +import org.apache.log4j.Logger;
    +
    +import java.io.IOException;
    +import java.util.ArrayList;
    +import java.util.List;
    +
    +/**
    + * {@link InputFormat} subclass that wraps the access for KuduTables.
    + */
    +public class KuduInputFormat implements InputFormat<RowSerializable, KuduInputSplit>
{
    +
    +    private String KUDU_MASTER;
    +    private String TABLE_NAME;
    +
    +    private transient KuduTable table = null;
    +    private transient KuduScanner scanner = null;
    +    private transient KuduClient client = null;
    +
    +    private transient RowResultIterator results = null;
    +    private List<RowSerializable> rows = null;
    +    private List<KuduScanToken> tokens = null;
    +    private boolean endReached = false;
    +    private int scannedRows = 0;
    +
    +    private static final Logger LOG = Logger.getLogger(KuduInputFormat.class);
    +
    +    private List<String> projectColumns;
    +
    +    /**
    +     * Constructor of class KuduInputFormat
    +     * @param tableName Name of the Kudu table in which we are going to read
    +     * @param IP Kudu-master server's IP direction
    +     */
    +    public KuduInputFormat(String tableName, String IP){
    +        LOG.info("1. CONSTRUCTOR");
    +        KUDU_MASTER = IP;
    +        TABLE_NAME = tableName;
    +
    +    }
    +
    +    /**
    +     * Returns an instance of Scan that retrieves the required subset of records from
the Kudu table.
    +     * @return The appropriate instance of Scan for this usecase.
    +     */
    +    private KuduScanner getScanner(){
    +        return this.scanner;
    +    }
    +
    +    /**
    +     * What table is to be read.
    +     * Per instance of a TableInputFormat derivative only a single tablename is possible.
    +     * @return The name of the table
    +     */
    +    public String getTableName(){
    +        return TABLE_NAME;
    +    }
    +
    +    /**
    +     * @return A list of rows ({@link RowSerializable}) from the Kudu table
    +     */
    +    public List<RowSerializable> getRows(){
    +        return this.rows;
    +    }
    +
    +    /**
    +     * The output from Kudu is always an instance of {@link RowResult}.
    +     * This method is to copy the data in the RowResult instance into the required {@link
RowSerializable}
    +     * @param rowResult The Result instance from Kudu that needs to be converted
    +     * @return The appropriate instance of {@link RowSerializable} that contains the
needed information.
    +     */
    +    private RowSerializable RowResultToRowSerializable(RowResult rowResult) throws IllegalAccessException
{
    +        RowSerializable row = new RowSerializable(rowResult.getColumnProjection().getColumnCount());
    +        for (int i=0; i<rowResult.getColumnProjection().getColumnCount(); i++){
    +            switch(rowResult.getColumnType(i).getDataType()){
    +                case INT8:
    +                    row.setField(i, rowResult.getByte(i));
    +                    break;
    +                case INT16:
    +                    row.setField(i, rowResult.getShort(i));
    +                    break;
    +                case INT32:
    +                    row.setField(i, rowResult.getInt(i));
    +                    break;
    +                case INT64:
    +                    row.setField(i, rowResult.getLong(i));
    +                    break;
    +                case FLOAT:
    +                    row.setField(i, rowResult.getFloat(i));
    +                    break;
    +                case DOUBLE:
    +                    row.setField(i, rowResult.getDouble(i));
    +                    break;
    +                case STRING:
    +                    row.setField(i, rowResult.getString(i));
    +                    break;
    +                case BOOL:
    +                    row.setField(i, rowResult.getBoolean(i));
    +                    break;
    +                case BINARY:
    +                    row.setField(i, rowResult.getBinary(i));
    +                    break;
    +            }
    +        }
    +        return row;
    +    }
    +
    +    /**
    +     * Creates a object and opens the {@link KuduTable} connection.
    +     * These are opened here because they are needed in the createInputSplits
    +     * which is called before the openInputFormat method.
    +     *
    +     * @param parameters The configuration that is to be used
    +     * @see Configuration
    +     */
    +
    --- End diff --
    
    empty line


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message