carbondata-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jackylk <...@git.apache.org>
Subject [GitHub] carbondata pull request #1470: [CARBONDATA-1572] Support streaming ingest an...
Date Mon, 06 Nov 2017 12:32:05 GMT
Github user jackylk commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/1470#discussion_r149066956
  
    --- Diff: integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala
---
    @@ -82,8 +84,43 @@ class CarbonScanRDD(
     
         // get splits
         val splits = format.getSplits(job)
    -    val result = distributeSplits(splits)
    -    result
    +
    +    // separate split
    +    // 1. for batch splits, invoke distributeSplits method to create partitions
    +    // 2. for stream splits, create partition for each split by default
    +    val columnarSplits = new ArrayList[InputSplit]()
    +    val streamSplits = new ArrayBuffer[InputSplit]()
    +    for(i <- 0 until splits.size()) {
    +      val carbonInputSplit = splits.get(i).asInstanceOf[CarbonInputSplit]
    +      if ("row-format".equals(carbonInputSplit.getFormat)) {
    +        streamSplits += splits.get(i)
    +      } else {
    +        columnarSplits.add(splits.get(i))
    +      }
    +    }
    +    val batchPartitions = distributeSplits(columnarSplits)
    --- End diff --
    
    suggest rename to `distributeBatchSplits`


---

Mime
View raw message