spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pwendell <...@git.apache.org>
Subject [GitHub] spark pull request: [SPARK-4964][Streaming][Kafka] More updates to...
Date Tue, 10 Feb 2015 03:10:03 GMT
Github user pwendell commented on a diff in the pull request:

    https://github.com/apache/spark/pull/4384#discussion_r24386265
  
    --- Diff: external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
---
    @@ -179,121 +182,194 @@ object KafkaUtils {
           errs => throw new SparkException(errs.mkString("\n")),
           ok => ok
         )
    -    new KafkaRDD[K, V, U, T, (K, V)](sc, kafkaParams, offsetRanges, leaders, messageHandler)
    +    new KafkaRDD[K, V, KD, VD, (K, V)](sc, kafkaParams, offsetRanges, leaders, messageHandler)
       }
     
    -  /** A batch-oriented interface for consuming from Kafka.
    -   * Starting and ending offsets are specified in advance,
    -   * so that you can control exactly-once semantics.
    +  /**
    +   * :: Experimental ::
    +   * Create a RDD from Kafka using offset ranges for each topic and partition. This allows
you
    +   * specify the Kafka leader to connect to (to optimize fetching) and access the message
as well
    +   * as the metadata.
    +   *
        * @param sc SparkContext object
        * @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
    -   * configuration parameters</a>.
    -   *   Requires "metadata.broker.list" or "bootstrap.servers" to be set with Kafka broker(s),
    -   *   NOT zookeeper servers, specified in host1:port1,host2:port2 form.
    +   *    configuration parameters</a>. Requires "metadata.broker.list" or "bootstrap.servers"
    +   *    to be set with Kafka broker(s) (NOT zookeeper servers) specified in
    +   *    host1:port1,host2:port2 form.
        * @param offsetRanges Each OffsetRange in the batch corresponds to a
        *   range of offsets for a given Kafka topic/partition
        * @param leaders Kafka leaders for each offset range in batch
    -   * @param messageHandler function for translating each message into the desired type
    +   * @param messageHandler Function for translating each message and metadata into the
desired type
        */
       @Experimental
       def createRDD[
         K: ClassTag,
         V: ClassTag,
    -    U <: Decoder[_]: ClassTag,
    -    T <: Decoder[_]: ClassTag,
    -    R: ClassTag] (
    +    KD <: Decoder[K]: ClassTag,
    +    VD <: Decoder[V]: ClassTag,
    +    R: ClassTag](
           sc: SparkContext,
           kafkaParams: Map[String, String],
           offsetRanges: Array[OffsetRange],
           leaders: Array[Leader],
           messageHandler: MessageAndMetadata[K, V] => R
    -  ): RDD[R] = {
    -
    +    ): RDD[R] = {
         val leaderMap = leaders
           .map(l => TopicAndPartition(l.topic, l.partition) -> (l.host, l.port))
           .toMap
    -    new KafkaRDD[K, V, U, T, R](sc, kafkaParams, offsetRanges, leaderMap, messageHandler)
    +    new KafkaRDD[K, V, KD, VD, R](sc, kafkaParams, offsetRanges, leaderMap, messageHandler)
       }
     
    +
       /**
    -   * This stream can guarantee that each message from Kafka is included in transformations
    -   * (as opposed to output actions) exactly once, even in most failure situations.
    +   * Create a RDD from Kafka using offset ranges for each topic and partition.
        *
    -   * Points to note:
    -   *
    -   * Failure Recovery - You must checkpoint this stream, or save offsets yourself and
provide them
    -   * as the fromOffsets parameter on restart.
    -   * Kafka must have sufficient log retention to obtain messages after failure.
    -   *
    -   * Getting offsets from the stream - see programming guide
    +   * @param jsc JavaSparkContext object
    +   * @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
    +   *    configuration parameters</a>. Requires "metadata.broker.list" or "bootstrap.servers"
    +   *    to be set with Kafka broker(s) (NOT zookeeper servers) specified in
    +   *    host1:port1,host2:port2 form.
    +   * @param offsetRanges Each OffsetRange in the batch corresponds to a
    +   *   range of offsets for a given Kafka topic/partition
    +   */
    +  @Experimental
    +  def createRDD[K, V, KD <: Decoder[K], VD <: Decoder[V]](
    +      jsc: JavaSparkContext,
    +      keyClass: Class[K],
    +      valueClass: Class[V],
    +      keyDecoderClass: Class[KD],
    +      valueDecoderClass: Class[VD],
    +      kafkaParams: JMap[String, String],
    +      offsetRanges: Array[OffsetRange]
    +    ): JavaPairRDD[K, V] = {
    +    implicit val keyCmt: ClassTag[K] = ClassTag(keyClass)
    +    implicit val valueCmt: ClassTag[V] = ClassTag(valueClass)
    +    implicit val keyDecoderCmt: ClassTag[KD] = ClassTag(keyDecoderClass)
    +    implicit val valueDecoderCmt: ClassTag[VD] = ClassTag(valueDecoderClass)
    +    new JavaPairRDD(createRDD[K, V, KD, VD](
    +      jsc.sc, Map(kafkaParams.toSeq: _*), offsetRanges))
    +  }
    +
    +  /**
    +   * :: Experimental ::
    +   * Create a RDD from Kafka using offset ranges for each topic and partition. This allows
you
    +   * specify the Kafka leader to connect to (to optimize fetching) and access the message
as well
    +   * as the metadata.
        *
    -.  * Zookeeper - This does not use Zookeeper to store offsets.  For interop with Kafka
monitors
    -   * that depend on Zookeeper, you must store offsets in ZK yourself.
    +   * @param jsc JavaSparkContext object
    +   * @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
    +   *    configuration parameters</a>. Requires "metadata.broker.list" or "bootstrap.servers"
    +   *    to be set with Kafka broker(s) (NOT zookeeper servers) specified in
    +   *    host1:port1,host2:port2 form.
    +   * @param offsetRanges Each OffsetRange in the batch corresponds to a
    +   *   range of offsets for a given Kafka topic/partition
    +   * @param leaders Kafka leaders for each offset range in batch
    +   * @param messageHandler Function for translating each message and metadata into the
desired type
    +   */
    +  @Experimental
    +  def createRDD[K, V, KD <: Decoder[K], VD <: Decoder[V], R](
    +      jsc: JavaSparkContext,
    +      keyClass: Class[K],
    +      valueClass: Class[V],
    +      keyDecoderClass: Class[KD],
    +      valueDecoderClass: Class[VD],
    +      recordClass: Class[R],
    +      kafkaParams: JMap[String, String],
    +      offsetRanges: Array[OffsetRange],
    +      leaders: Array[Leader],
    +      messageHandler: JFunction[MessageAndMetadata[K, V], R]
    +    ): JavaRDD[R] = {
    +    implicit val keyCmt: ClassTag[K] = ClassTag(keyClass)
    +    implicit val valueCmt: ClassTag[V] = ClassTag(valueClass)
    +    implicit val keyDecoderCmt: ClassTag[KD] = ClassTag(keyDecoderClass)
    +    implicit val valueDecoderCmt: ClassTag[VD] = ClassTag(valueDecoderClass)
    +    implicit val recordCmt: ClassTag[R] = ClassTag(recordClass)
    +    createRDD[K, V, KD, VD, R](
    +      jsc.sc, Map(kafkaParams.toSeq: _*), offsetRanges, leaders, messageHandler.call
_)
    +  }
    +
    +  /**
    +   * :: Experimental ::
    +   * Create an input stream that pulls messages from a Kafka Broker. This stream can
guarantee
    --- End diff --
    
    what about saying "without using receivers" at the end?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message