spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wenc...@apache.org
Subject spark git commit: [SPARK-16374][SQL] Remove Alias from MetastoreRelation and SimpleCatalogRelation
Date Thu, 07 Jul 2016 04:07:35 GMT
Repository: spark
Updated Branches:
  refs/heads/master 34283de16 -> 42279bff6


[SPARK-16374][SQL] Remove Alias from MetastoreRelation and SimpleCatalogRelation

#### What changes were proposed in this pull request?
Different from the other leaf nodes, `MetastoreRelation` and `SimpleCatalogRelation` have
a pre-defined `alias`, which is used to change the qualifier of the node. However, based on
the existing alias handling, alias should be put in `SubqueryAlias`.

This PR is to separate alias handling from `MetastoreRelation` and `SimpleCatalogRelation`
to make it consistent with the other nodes. It simplifies the signature and conversion to
a `BaseRelation`.

For example, below is an example query for `MetastoreRelation`,  which is converted to a `LogicalRelation`:
```SQL
SELECT tmp.a + 1 FROM test_parquet_ctas tmp WHERE tmp.a > 2
```

Before changes, the analyzed plan is
```
== Analyzed Logical Plan ==
(a + 1): int
Project [(a#951 + 1) AS (a + 1)#952]
+- Filter (a#951 > 2)
   +- SubqueryAlias tmp
      +- Relation[a#951] parquet
```
After changes, the analyzed plan becomes
```
== Analyzed Logical Plan ==
(a + 1): int
Project [(a#951 + 1) AS (a + 1)#952]
+- Filter (a#951 > 2)
   +- SubqueryAlias tmp
      +- SubqueryAlias test_parquet_ctas
         +- Relation[a#951] parquet
```

**Note: the optimized plans are the same.**

For `SimpleCatalogRelation`, the existing code always generates two Subqueries. Thus, no change
is needed.

#### How was this patch tested?
Added test cases.

Author: gatorsmile <gatorsmile@gmail.com>

Closes #14053 from gatorsmile/removeAliasFromMetastoreRelation.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/42279bff
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/42279bff
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/42279bff

Branch: refs/heads/master
Commit: 42279bff686f9808ec7a9e8f4da95c717edc6026
Parents: 34283de
Author: gatorsmile <gatorsmile@gmail.com>
Authored: Thu Jul 7 12:07:19 2016 +0800
Committer: Wenchen Fan <wenchen@databricks.com>
Committed: Thu Jul 7 12:07:19 2016 +0800

----------------------------------------------------------------------
 .../spark/sql/catalyst/catalog/SessionCatalog.scala       |  2 +-
 .../org/apache/spark/sql/catalyst/catalog/interface.scala |  5 ++---
 .../spark/sql/catalyst/catalog/SessionCatalogSuite.scala  |  2 +-
 .../org/apache/spark/sql/hive/HiveMetastoreCatalog.scala  | 10 ++++++----
 .../org/apache/spark/sql/hive/MetastoreRelation.scala     | 10 ++++------
 5 files changed, 14 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/42279bff/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index e1d4991..ffaefeb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -403,7 +403,7 @@ class SessionCatalog(
       val relation =
         if (name.database.isDefined || !tempTables.contains(table)) {
           val metadata = externalCatalog.getTable(db, table)
-          SimpleCatalogRelation(db, metadata, alias)
+          SimpleCatalogRelation(db, metadata)
         } else {
           tempTables(table)
         }

http://git-wip-us.apache.org/repos/asf/spark/blob/42279bff/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 6197aca..b12606e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -244,8 +244,7 @@ trait CatalogRelation {
  */
 case class SimpleCatalogRelation(
     databaseName: String,
-    metadata: CatalogTable,
-    alias: Option[String] = None)
+    metadata: CatalogTable)
   extends LeafNode with CatalogRelation {
 
   override def catalogTable: CatalogTable = metadata
@@ -261,7 +260,7 @@ case class SimpleCatalogRelation(
         CatalystSqlParser.parseDataType(f.dataType),
         // Since data can be dumped in randomly with no validation, everything is nullable.
         nullable = true
-      )(qualifier = Some(alias.getOrElse(metadata.identifier.table)))
+      )(qualifier = Some(metadata.identifier.table))
     }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/42279bff/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index c8e7c51..05eb302 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -407,7 +407,7 @@ class SessionCatalogSuite extends SparkFunSuite {
     val relationWithAlias =
       SubqueryAlias(alias,
         SubqueryAlias("tbl1",
-          SimpleCatalogRelation("db2", tableMetadata, Some(alias))))
+          SimpleCatalogRelation("db2", tableMetadata)))
     assert(catalog.lookupRelation(
       TableIdentifier("tbl1", Some("db2")), alias = None) == relation)
     assert(catalog.lookupRelation(

http://git-wip-us.apache.org/repos/asf/spark/blob/42279bff/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 20e64a4..2be51ed 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -180,8 +180,10 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession)
extends Log
           SubqueryAlias(aliasText, sessionState.sqlParser.parsePlan(viewText))
       }
     } else {
-      MetastoreRelation(
-        qualifiedTableName.database, qualifiedTableName.name, alias)(table, client, sparkSession)
+      val qualifiedTable =
+        MetastoreRelation(
+          qualifiedTableName.database, qualifiedTableName.name)(table, client, sparkSession)
+      alias.map(a => SubqueryAlias(a, qualifiedTable)).getOrElse(qualifiedTable)
     }
   }
 
@@ -385,7 +387,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends
Log
         // Read path
         case relation: MetastoreRelation if shouldConvertMetastoreParquet(relation) =>
           val parquetRelation = convertToParquetRelation(relation)
-          SubqueryAlias(relation.alias.getOrElse(relation.tableName), parquetRelation)
+          SubqueryAlias(relation.tableName, parquetRelation)
       }
     }
   }
@@ -423,7 +425,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends
Log
         // Read path
         case relation: MetastoreRelation if shouldConvertMetastoreOrc(relation) =>
           val orcRelation = convertToOrcRelation(relation)
-          SubqueryAlias(relation.alias.getOrElse(relation.tableName), orcRelation)
+          SubqueryAlias(relation.tableName, orcRelation)
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/42279bff/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
index 58bca20..3ab1bda 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
@@ -41,8 +41,7 @@ import org.apache.spark.sql.hive.client.HiveClient
 
 private[hive] case class MetastoreRelation(
     databaseName: String,
-    tableName: String,
-    alias: Option[String])
+    tableName: String)
     (val catalogTable: CatalogTable,
      @transient private val client: HiveClient,
      @transient private val sparkSession: SparkSession)
@@ -52,13 +51,12 @@ private[hive] case class MetastoreRelation(
     case relation: MetastoreRelation =>
       databaseName == relation.databaseName &&
         tableName == relation.tableName &&
-        alias == relation.alias &&
         output == relation.output
     case _ => false
   }
 
   override def hashCode(): Int = {
-    Objects.hashCode(databaseName, tableName, alias, output)
+    Objects.hashCode(databaseName, tableName, output)
   }
 
   override protected def otherCopyArgs: Seq[AnyRef] = catalogTable :: sparkSession :: Nil
@@ -208,7 +206,7 @@ private[hive] case class MetastoreRelation(
       CatalystSqlParser.parseDataType(f.dataType),
       // Since data can be dumped in randomly with no validation, everything is nullable.
       nullable = true
-    )(qualifier = Some(alias.getOrElse(tableName)))
+    )(qualifier = Some(tableName))
   }
 
   /** PartitionKey attributes */
@@ -243,6 +241,6 @@ private[hive] case class MetastoreRelation(
   }
 
   override def newInstance(): MetastoreRelation = {
-    MetastoreRelation(databaseName, tableName, alias)(catalogTable, client, sparkSession)
+    MetastoreRelation(databaseName, tableName)(catalogTable, client, sparkSession)
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message