spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [spark] gaborgsomogyi commented on a change in pull request #29729: [SPARK-32032][SS] Avoid infinite wait in driver because of KafkaConsumer.poll(long) API
Date Mon, 14 Sep 2020 06:50:20 GMT

gaborgsomogyi commented on a change in pull request #29729:
URL: https://github.com/apache/spark/pull/29729#discussion_r487686563



##########
File path: external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
##########
@@ -46,41 +49,35 @@ import org.apache.spark.util.{UninterruptibleThread, UninterruptibleThreadRunner
 private[kafka010] class KafkaOffsetReader(
     consumerStrategy: ConsumerStrategy,
     val driverKafkaParams: ju.Map[String, Object],
-    readerOptions: CaseInsensitiveMap[String],
-    driverGroupIdPrefix: String) extends Logging {
+    readerOptions: CaseInsensitiveMap[String]) extends Logging {
 
   /**
-   * [[UninterruptibleThreadRunner]] ensures that all [[KafkaConsumer]] communication called
in an
+   * [[UninterruptibleThreadRunner]] ensures that all Kafka communication called in an
    * [[UninterruptibleThread]]. In the case of streaming queries, we are already running
in an
    * [[UninterruptibleThread]], however for batch mode this is not the case.
    */
   val uninterruptibleThreadRunner = new UninterruptibleThreadRunner("Kafka Offset Reader")
 
-  /**
-   * Place [[groupId]] and [[nextId]] here so that they are initialized before any consumer
is
-   * created -- see SPARK-19564.
-   */
-  private var groupId: String = null
-  private var nextId = 0
-
-  /**
-   * A KafkaConsumer used in the driver to query the latest Kafka offsets. This only queries
the
-   * offsets and never commits them.
-   */
-  @volatile protected var _consumer: Consumer[Array[Byte], Array[Byte]] = null
+  @volatile protected var _admin: Admin = null
 
-  protected def consumer: Consumer[Array[Byte], Array[Byte]] = synchronized {
+  protected def admin: Admin = synchronized {
     assert(Thread.currentThread().isInstanceOf[UninterruptibleThread])
-    if (_consumer == null) {
-      val newKafkaParams = new ju.HashMap[String, Object](driverKafkaParams)
-      if (driverKafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG) == null) {
-        newKafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, nextGroupId())
-      }
-      _consumer = consumerStrategy.createConsumer(newKafkaParams)
+    if (_admin == null) {
+      _admin = consumerStrategy.createAdmin(driverKafkaParams)
     }
-    _consumer
+    _admin
   }
 
+  def isolationLevel(): IsolationLevel = {
+    driverKafkaParams.get(ConsumerConfig.ISOLATION_LEVEL_CONFIG) match {
+      case s: String => IsolationLevel.valueOf(s.toUpperCase(Locale.ROOT))
+      case null => IsolationLevel.valueOf(
+        ConsumerConfig.DEFAULT_ISOLATION_LEVEL.toUpperCase(Locale.ROOT))
+    }
+  }
+
+  private def listOffsetsOptions() = new ListOffsetsOptions(isolationLevel())

Review comment:
       Same fix here.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message