flink-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (FLINK-3967) Provide RethinkDB Sink for Flink
Date Wed, 25 May 2016 07:39:13 GMT

    [ https://issues.apache.org/jira/browse/FLINK-3967?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15299618#comment-15299618
] 

ASF GitHub Bot commented on FLINK-3967:
---------------------------------------

Github user zentol commented on a diff in the pull request:

    https://github.com/apache/flink/pull/2031#discussion_r64527750
  
    --- Diff: flink-streaming-connectors/flink-connector-rethinkdb/src/main/java/org/apache/flink/streaming/connectors/rethinkdb/FlinkRethinkDbSink.java
---
    @@ -0,0 +1,289 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.flink.streaming.connectors.rethinkdb;
    +
    +import java.io.Serializable;
    +import java.util.HashMap;
    +import java.util.Objects;
    +
    +import org.apache.commons.lang3.StringUtils;
    +import org.apache.flink.configuration.Configuration;
    +import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
    +
    +import com.rethinkdb.RethinkDB;
    +import com.rethinkdb.gen.ast.Insert;
    +import com.rethinkdb.gen.ast.Table;
    +import com.rethinkdb.net.Connection;
    +
    +/**
    + * This class is the Flink sink for RethinkDB which is a tcp/JSON protocol based document
    + * oriented NoSQL database.
    + * 
    + * <p/>
    + * This sink provides two constuctors:
    + * <p/>
    + * {@link #FlinkRethinkDbSink(String hostname, int hostport, String database, String
table, JSONSerializationSchema schema)}, and
    + * <p/>
    + * {@link #FlinkRethinkDbSink(String hostname, int hostport, String database, String
table, JSONSerializationSchema schema, ConflictStrategy conflictStrategy)}
    + * <p/>
    + * 
    + * The parameter for the constructor are as follows:
    + * <p/>
    + * <ul>
    + * <li>hostname - the rethinkdb hostname</li>
    + * <li>hosport - the rethinkdb port for the driver to connect</li>
    + * <li>database - the rethinkdb database name to which the table belongs</li>
    + * <li>table - the rethinkdb table name where documents are inserted</li>
    + * <li>schema - the schema tranfromer that converts input to JSONObject, or JSONArray</li>
    + * <li>conflictStrategy - the conflict resolution strategy in case inserted document
has id which exists in the db</li>
    + * </ul>
    + * <p/>
    + *
    + * The user can also set:
    + * <p/>
    + * <ul>
    + * <li>username - default is admin</li>
    + * <li>password - default is blank</li>
    + * </ul>
    + * <p/> with the {@link #setUsernameAndPassword(String, String)} method.
    + * <p/>
    + * <b>NOTE: If multiple documents are getting inserted (eg: using JSONArray), the
sink 
    + * checks if there is an error entry in the result HashMap and throws a runtime exception
if errors
    + * counts is not zero.  The exception message contains the results HashMap. 
    + * In case of multiple errors only the first error is noted in the result HashMap.
    + * </b>
    + * 
    + * @see {@link ConflictStrategy} for conflict resolution strategies
    + * 
    + * @param <OUT> a value that can be transformed into a {@link org.json.simple.JSONArray;}
or {@link org.json.simple.JSONObject}
    + */
    +public class FlinkRethinkDbSink<OUT> extends RichSinkFunction<OUT> implements
Serializable{
    +
    +	/**
    +	 * Serial version for the class
    +	 */
    +	private static final long serialVersionUID = -2135499016796158755L;
    +
    +	/**
    +	 * Logger for the class
    +	 */
    +	private static final Logger LOG = LoggerFactory.getLogger(FlinkRethinkDbSink.class);
    +
    +	/**
    +	 * Conflict resolution option key in case document ids are same 
    +	 */
    +	public static final String CONFLICT_OPT = "conflict";
    +
    +	/**
    +	 * Result key indicating number of errors
    +	 */
    +	public static final String RESULT_ERROR_KEY = "errors";
    +
    +	/**
    +	 * Serialization schema for the sink
    +	 */
    +	private JSONSerializationSchema<OUT> serializationSchema;
    +
    +	/**
    +	 * RethinkDB connection object
    +	 */
    +	private transient Connection rethinkDbConnection;
    +
    +	/**
    +	 * RethinkDB hostname
    +	 */
    +	private String hostname;
    +
    +	/**
    +	 * RethinkDB port
    +	 */
    +	private int hostport;
    +
    +	/**
    +	 * RethinkDB tablename where documents are inserted
    +	 */
    +	private String tableName;
    +
    +	/**
    +	 * RethinkDB database where document are inserted
    +	 */
    +	private String databaseName;
    +
    +	/**
    +	 * Conflict resolution strategy
    +	 */
    +	private ConflictStrategy conflict;
    +	
    +	/**
    +	 * Default user name
    +	 */
    +	public static final String DEFAULT_USER_NAME = "admin";
    +	
    +	/**
    +	 * User name
    +	 */
    +	private String username = DEFAULT_USER_NAME;
    +	
    +	/**
    +	 * Default user name
    +	 */
    +	public static final String DEFAULT_PASSWORD = "";
    +	
    +	/**
    +	 * password
    +	 */
    +	private String password = DEFAULT_PASSWORD;
    +
    +	
    +	/**
    +	 * Constructor for RethinkDB sink
    +	 * @param hostname
    +	 * @param hostport
    +	 * @param database
    +	 * @param table
    +	 * @param schema
    +	 */
    +	public FlinkRethinkDbSink(String hostname, int hostport, String database, String table,

    +			JSONSerializationSchema<OUT> schema) {
    +		this(hostname, hostport, database, table, schema, ConflictStrategy.update);
    +	}
    +
    +	/**
    +	 * Constructor for sink
    +	 * @param hostname
    +	 * @param hostport
    +	 * @param database name
    +	 * @param table name
    +	 * @param schema serialization converter
    +	 * @param conflict resolution strategy for document id conflict
    +	 */
    +	public FlinkRethinkDbSink(String hostname, int hostport, String database, String table,

    +			JSONSerializationSchema<OUT> schema, 
    +			ConflictStrategy conflict) {
    +		this.hostname = Objects.requireNonNull(hostname);
    +		this.hostport = hostport;
    +		this.databaseName = Objects.requireNonNull(database);
    +		this.tableName = Objects.requireNonNull(table);
    +		this.serializationSchema = Objects.requireNonNull(schema);
    +		this.conflict = conflict;
    +	}
    +
    +	/**
    +	 * Open the sink
    +	 */
    +	@Override
    +	public void open(Configuration parameters) throws Exception {
    +		LOG.info("Received parameters : {}", parameters);
    +		
    +		super.open(parameters);
    +
    +		rethinkDbConnection = getRethinkDB().connection().hostname(hostname)
    +				.port(hostport).user(username, password).connect();
    +
    +		LOG.info("RethinkDb connection created for host {} port {} and db {}", 
    +				hostname, hostport,databaseName);
    +	}
    +
    +	/**
    +	 * Helper method to help testability
    +	 * @return RethinkDB instance
    +	 */ 
    +	protected RethinkDB getRethinkDB() {
    +		return RethinkDB.r;
    +	}
    +	
    +	/**
    +	 * Set username and password. If username and password are not provided,
    +	 * then default username (admin) and blank password are used.
    +	 * 
    +	 * @param username
    +	 * @param password
    +	 * 
    +	 * @throws IllegalArgumentException if arguments is null or empty
    +	 */
    +	public void setUsernameAndPassword(String username, String password) {
    +		
    +		if ( StringUtils.isBlank(username) )  {
    +			throw new IllegalArgumentException("username " + username + " cannot be null or empty"
); 
    +		} else {
    +			this.username = username;
    +		}
    +		
    +		if ( StringUtils.isBlank(password) ) {
    +			throw new IllegalArgumentException("password " + password + " cannot be null or empty"
); 
    +		} else {
    +			this.password = password;
    +		}
    +	}
    +	
    +	/**
    +	 * Invoke the sink with the input
    +	 * 
    +	 * @param the value to be inserted
    +	 * 
    +	 * @throws RuntimeException if there are errors while inserting row into rethinkdb
    +	 */
    +	@Override
    +	public void invoke(OUT value) throws Exception {
    +		LOG.debug("Received value {}", value);
    +		
    +		Object json = serializationSchema.toJSON(value);
    +		LOG.debug("Object/Json: {}/{}", value, json);
    +		Insert insert = getRdbTable().insert(json).optArg(CONFLICT_OPT, conflict.toString());
    +		HashMap<String,Object> result = runInsert(insert);
    +		
    +		LOG.debug("Object/Json/Result: {}/{}/{}", value, json, result);
    +		
    +		if ( (Long)result.get(RESULT_ERROR_KEY) != 0 ) {
    --- End diff --
    
    this is a synchronous operation, correct? If so I'd be curious about a benchmark for this
sink.


> Provide RethinkDB Sink for Flink
> --------------------------------
>
>                 Key: FLINK-3967
>                 URL: https://issues.apache.org/jira/browse/FLINK-3967
>             Project: Flink
>          Issue Type: New Feature
>          Components: Streaming, Streaming Connectors
>    Affects Versions: 1.0.3
>         Environment: All
>            Reporter: Mans Singh
>            Assignee: Mans Singh
>            Priority: Minor
>              Labels: features
>             Fix For: 1.1.0
>
>   Original Estimate: 48h
>  Remaining Estimate: 48h
>
> Provide Sink to stream data from flink to rethink db.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message