spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cloud-fan <...@git.apache.org>
Subject [GitHub] spark pull request #21018: [SPARK-23880][SQL] Do not trigger any jobs for ca...
Date Thu, 19 Apr 2018 07:54:55 GMT
Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21018#discussion_r182660703
  
    --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala ---
    @@ -119,26 +119,60 @@ class CacheManager extends Logging {
         while (it.hasNext) {
           val cd = it.next()
           if (cd.plan.find(_.sameResult(plan)).isDefined) {
    -        cd.cachedRepresentation.cachedColumnBuffers.unpersist(blocking)
    +        cd.cachedRepresentation.clearCache(blocking)
             it.remove()
           }
         }
       }
     
    +  /**
    +   * Materialize the cache that refers to the given physical plan.
    --- End diff --
    
    ```
    class CachedRDDBuilder(private var _cachedColumnBuffers: RDD[CachedBatch] = null) {
      def cachedColumnBuffers = {
        if (_cachedColumnBuffers == null) {
          synchronized {
            if (_cachedColumnBuffers == null) {
              _cachedColumnBuffers = buildBuffer()
            }
          }
        }
        _cachedColumnBuffers
      } 
    }
    
    class InMemoryRelation(cacheBuilder: CachedRDDBuilder = new CachedRDDBuilder()) {
      // newInstance should keep the existing CachedRDDBuilder
      def newInstance()...
    }
    ```
    
    then in the physical plan and cache manager, just call `relation.cacheBuilder.cachedColumnBuffers`


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message