spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From l...@apache.org
Subject spark git commit: [SQL][MINOR] use stricter type parameter to make it clear that parquet reader returns UnsafeRow
Date Wed, 03 Aug 2016 00:23:36 GMT
Repository: spark
Updated Branches:
  refs/heads/master 386127377 -> ae226283e


[SQL][MINOR] use stricter type parameter to make it clear that parquet reader returns UnsafeRow

## What changes were proposed in this pull request?

a small code style change, it's better to make the type parameter more accurate.

## How was this patch tested?

N/A

Author: Wenchen Fan <wenchen@databricks.com>

Closes #14458 from cloud-fan/parquet.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ae226283
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ae226283
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ae226283

Branch: refs/heads/master
Commit: ae226283e19ce396216c73b0ae2470efa122b65b
Parents: 3861273
Author: Wenchen Fan <wenchen@databricks.com>
Authored: Wed Aug 3 08:23:26 2016 +0800
Committer: Cheng Lian <lian@databricks.com>
Committed: Wed Aug 3 08:23:26 2016 +0800

----------------------------------------------------------------------
 .../execution/datasources/parquet/ParquetFileFormat.scala |  4 ++--
 .../datasources/parquet/ParquetReadSupport.scala          | 10 +++++-----
 .../datasources/parquet/ParquetRecordMaterializer.scala   |  6 +++---
 3 files changed, 10 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/ae226283/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 772e031..c3e75f1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -370,11 +370,11 @@ private[sql] class ParquetFileFormat
         logDebug(s"Falling back to parquet-mr")
         val reader = pushed match {
           case Some(filter) =>
-            new ParquetRecordReader[InternalRow](
+            new ParquetRecordReader[UnsafeRow](
               new ParquetReadSupport,
               FilterCompat.get(filter, null))
           case _ =>
-            new ParquetRecordReader[InternalRow](new ParquetReadSupport)
+            new ParquetRecordReader[UnsafeRow](new ParquetReadSupport)
         }
         reader.initialize(split, hadoopAttemptContext)
         reader

http://git-wip-us.apache.org/repos/asf/spark/blob/ae226283/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
index 8a2e0d7..f1a35dd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
@@ -29,12 +29,12 @@ import org.apache.parquet.schema._
 import org.apache.parquet.schema.Type.Repetition
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.types._
 
 /**
  * A Parquet [[ReadSupport]] implementation for reading Parquet records as Catalyst
- * [[InternalRow]]s.
+ * [[UnsafeRow]]s.
  *
  * The API interface of [[ReadSupport]] is a little bit over complicated because of historical
  * reasons.  In older versions of parquet-mr (say 1.6.0rc3 and prior), [[ReadSupport]] need
to be
@@ -48,7 +48,7 @@ import org.apache.spark.sql.types._
  * Due to this reason, we no longer rely on [[ReadContext]] to pass requested schema from
[[init()]]
  * to [[prepareForRead()]], but use a private `var` for simplicity.
  */
-private[parquet] class ParquetReadSupport extends ReadSupport[InternalRow] with Logging {
+private[parquet] class ParquetReadSupport extends ReadSupport[UnsafeRow] with Logging {
   private var catalystRequestedSchema: StructType = _
 
   /**
@@ -72,13 +72,13 @@ private[parquet] class ParquetReadSupport extends ReadSupport[InternalRow]
with
   /**
    * Called on executor side after [[init()]], before instantiating actual Parquet record
readers.
    * Responsible for instantiating [[RecordMaterializer]], which is used for converting Parquet
-   * records to Catalyst [[InternalRow]]s.
+   * records to Catalyst [[UnsafeRow]]s.
    */
   override def prepareForRead(
       conf: Configuration,
       keyValueMetaData: JMap[String, String],
       fileSchema: MessageType,
-      readContext: ReadContext): RecordMaterializer[InternalRow] = {
+      readContext: ReadContext): RecordMaterializer[UnsafeRow] = {
     log.debug(s"Preparing for read Parquet file with message type: $fileSchema")
     val parquetRequestedSchema = readContext.getRequestedSchema
 

http://git-wip-us.apache.org/repos/asf/spark/blob/ae226283/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala
index d12e780..4e49a0d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.parquet
 import org.apache.parquet.io.api.{GroupConverter, RecordMaterializer}
 import org.apache.parquet.schema.MessageType
 
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -32,12 +32,12 @@ import org.apache.spark.sql.types.StructType
  */
 private[parquet] class ParquetRecordMaterializer(
     parquetSchema: MessageType, catalystSchema: StructType, schemaConverter: ParquetSchemaConverter)
-  extends RecordMaterializer[InternalRow] {
+  extends RecordMaterializer[UnsafeRow] {
 
   private val rootConverter =
     new ParquetRowConverter(schemaConverter, parquetSchema, catalystSchema, NoopUpdater)
 
-  override def getCurrentRecord: InternalRow = rootConverter.currentRecord
+  override def getCurrentRecord: UnsafeRow = rootConverter.currentRecord
 
   override def getRootConverter: GroupConverter = rootConverter
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message