carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chenliang...@apache.org
Subject [1/2] incubator-carbondata git commit: remove unnecessary projection
Date Sat, 26 Nov 2016 02:29:41 GMT
Repository: incubator-carbondata
Updated Branches:
  refs/heads/master e05c0d5da -> 739afbd71


remove unnecessary projection

fix


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/51b23544
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/51b23544
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/51b23544

Branch: refs/heads/master
Commit: 51b235447e5d40202a0588d1da1b171d72d7bd33
Parents: e05c0d5
Author: jackylk <jacky.likun@huawei.com>
Authored: Sat Nov 26 09:48:14 2016 +0800
Committer: chenliang613 <chenliang613@apache.org>
Committed: Sat Nov 26 10:28:15 2016 +0800

----------------------------------------------------------------------
 .../spark/sql/hive/CarbonStrategies.scala       | 48 +++++++++++++-------
 .../AllDataTypesTestCaseAggregate.scala         |  4 +-
 .../AllDataTypesTestCaseAggregate.scala         |  5 ++
 .../filterexpr/FilterProcessorTestCase.scala    |  3 +-
 4 files changed, 38 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/51b23544/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonStrategies.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonStrategies.scala
b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonStrategies.scala
index a755d5d..1e944f7 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonStrategies.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonStrategies.scala
@@ -22,9 +22,10 @@ import java.util
 import scala.collection.JavaConverters._
 
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.{expressions, TableIdentifier}
 import org.apache.spark.sql.catalyst.CarbonTableIdentifierImplicit._
+import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions.{AttributeSet, _}
 import org.apache.spark.sql.catalyst.planning.{PhysicalOperation, QueryPlanner}
 import org.apache.spark.sql.catalyst.plans.logical.{Filter => LogicalFilter, LogicalPlan}
@@ -39,7 +40,6 @@ import org.apache.spark.sql.types.IntegerType
 import org.apache.carbondata.common.logging.LogServiceFactory
 import org.apache.carbondata.spark.exception.MalformedCarbonCommandException
 
-
 class CarbonStrategies(sqlContext: SQLContext) extends QueryPlanner[SparkPlan] {
 
   override def strategies: Seq[Strategy] = getStrategies
@@ -88,35 +88,49 @@ class CarbonStrategies(sqlContext: SQLContext) extends QueryPlanner[SparkPlan]
{
         relation.carbonRelation.metaData.carbonTable.getFactTableName.toLowerCase
       // Check out any expressions are there in project list. if they are present then we
need to
       // decode them as well.
+
       val projectSet = AttributeSet(projectList.flatMap(_.references))
-      val scan = CarbonScan(projectSet.toSeq,
-        relation.carbonRelation,
-        predicates)(sqlContext)
+      val filterSet = AttributeSet(predicates.flatMap(_.references))
+
+      val scan = CarbonScan(projectSet.toSeq, relation.carbonRelation, predicates)(sqlContext)
       projectList.map {
         case attr: AttributeReference =>
         case Alias(attr: AttributeReference, _) =>
         case others =>
-          others.references.map{f =>
+          others.references.map { f =>
             val dictionary = relation.carbonRelation.metaData.dictionaryMap.get(f.name)
             if (dictionary.isDefined && dictionary.get) {
               scan.attributesNeedToDecode.add(f.asInstanceOf[AttributeReference])
             }
           }
       }
-      if (scan.attributesNeedToDecode.size() > 0) {
-        val decoder = getCarbonDecoder(logicalRelation,
-          sc,
-          tableName,
-          scan.attributesNeedToDecode.asScala.toSeq,
-          scan)
-        if (scan.unprocessedExprs.nonEmpty) {
-          val filterCondToAdd = scan.unprocessedExprs.reduceLeftOption(expressions.And)
-          Project(projectList, filterCondToAdd.map(Filter(_, decoder)).getOrElse(decoder))
+      val scanWithDecoder =
+        if (scan.attributesNeedToDecode.size() > 0) {
+          val decoder = getCarbonDecoder(logicalRelation,
+            sc,
+            tableName,
+            scan.attributesNeedToDecode.asScala.toSeq,
+            scan)
+          if (scan.unprocessedExprs.nonEmpty) {
+            val filterCondToAdd = scan.unprocessedExprs.reduceLeftOption(expressions.And)
+            filterCondToAdd.map(Filter(_, decoder)).getOrElse(decoder)
+          } else {
+            decoder
+          }
         } else {
-          Project(projectList, decoder)
+          scan
         }
+
+      if (projectList.map(_.toAttribute) == scan.attributesRaw &&
+          projectSet.size == projectList.size &&
+          filterSet.subsetOf(projectSet)) {
+        // copied from spark pruneFilterProjectRaw
+        // When it is possible to just use column pruning to get the right projection and
+        // when the columns of this projection are enough to evaluate all filter conditions,
+        // just do a scan with no extra project.
+        scanWithDecoder
       } else {
-        Project(projectList, scan)
+        Project(projectList, scanWithDecoder)
       }
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/51b23544/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/aggquery/AllDataTypesTestCaseAggregate.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/aggquery/AllDataTypesTestCaseAggregate.scala
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/aggquery/AllDataTypesTestCaseAggregate.scala
index 94aa63c..f8b126e 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/aggquery/AllDataTypesTestCaseAggregate.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/aggquery/AllDataTypesTestCaseAggregate.scala
@@ -19,7 +19,6 @@
 
 package org.apache.carbondata.spark.testsuite.aggquery
 
-import org.apache.spark.sql.Row
 import org.apache.spark.sql.common.util.CarbonHiveContext._
 import org.apache.spark.sql.common.util.QueryTest
 import org.scalatest.BeforeAndAfterAll
@@ -37,6 +36,7 @@ class AllDataTypesTestCaseAggregate extends QueryTest with BeforeAndAfterAll
{
     CarbonProperties.getInstance()
       .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
     sql("DROP TABLE IF EXISTS alldatatypestableAGG")
+    sql("DROP TABLE IF EXISTS alldatatypescubeAGG_hive")
     sql(
       "CREATE TABLE alldatatypestableAGG (empno int, empname String, designation String,
doj " +
       "Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, deptname
" +
@@ -53,8 +53,6 @@ class AllDataTypesTestCaseAggregate extends QueryTest with BeforeAndAfterAll
{
       "int,utilization int,salary int)row format delimited fields terminated by ','")
     sql(
       "LOAD DATA LOCAL INPATH './src/test/resources/datawithoutheader.csv' INTO TABLE alldatatypescubeAGG_hive")
-      
-       
   }
 
   test(

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/51b23544/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/AllDataTypesTestCaseAggregate.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/AllDataTypesTestCaseAggregate.scala
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/AllDataTypesTestCaseAggregate.scala
index f02d4e7..c6a1405 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/AllDataTypesTestCaseAggregate.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/AllDataTypesTestCaseAggregate.scala
@@ -38,6 +38,11 @@ class AllDataTypesTestCaseAggregate extends QueryTest with BeforeAndAfterAll
{
     clean
     val currentDirectory = new File(this.getClass.getResource("/").getPath + "/../../")
       .getCanonicalPath
+
+    sql("drop table if exists Carbon_automation_test")
+    sql("drop table if exists Carbon_automation_hive")
+    sql("drop table if exists Carbon_automation_test_hive")
+
     sql("create table if not exists Carbon_automation_test (imei string,deviceInformationId
int,MAC string,deviceColor string,device_backColor string,modelId string,marketName string,AMSize
string,ROMSize string,CUPAudit string,CPIClocked string,series string,productionDate timestamp,bomCode
string,internalModels string, deliveryTime string, channelsId string, channelsName string
, deliveryAreaId string, deliveryCountry string, deliveryProvince string, deliveryCity string,deliveryDistrict
string, deliveryStreet string, oxSingleNumber string, ActiveCheckTime string, ActiveAreaId
string, ActiveCountry string, ActiveProvince string, Activecity string, ActiveDistrict string,
ActiveStreet string, ActiveOperatorId string, Active_releaseId string, Active_EMUIVersion
string, Active_operaSysVersion string, Active_BacVerNumber string, Active_BacFlashVer string,
Active_webUIVersion string, Active_webUITypeCarrVer string,Active_webTypeDataVerNumber string,
Active_operatorsVersion string, Active
 _phonePADPartitionedVersions string, Latest_YEAR int, Latest_MONTH int, Latest_DAY int, Latest_HOUR
string, Latest_areaId string, Latest_country string, Latest_province string, Latest_city string,
Latest_district string, Latest_street string, Latest_releaseId string, Latest_EMUIVersion
string, Latest_operaSysVersion string, Latest_BacVerNumber string, Latest_BacFlashVer string,
Latest_webUIVersion string, Latest_webUITypeCarrVer string, Latest_webTypeDataVerNumber string,
Latest_operatorsVersion string, Latest_phonePADPartitionedVersions string, Latest_operatorId
string, gamePointDescription string, gamePointId int,contractNumber int) STORED BY 'org.apache.carbondata.format'
TBLPROPERTIES('DICTIONARY_INCLUDE'='Latest_MONTH,Latest_DAY,deviceInformationId')");
     CarbonProperties.getInstance()
       .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/51b23544/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/FilterProcessorTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/FilterProcessorTestCase.scala
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/FilterProcessorTestCase.scala
index 7e29cd1..9b3ac45 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/FilterProcessorTestCase.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/FilterProcessorTestCase.scala
@@ -51,7 +51,7 @@ class FilterProcessorTestCase extends QueryTest with BeforeAndAfterAll {
     )
      CarbonProperties.getInstance()
         .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "MM-dd-yyyy HH:mm:ss")
-        
+
      sql("CREATE TABLE filterTimestampDataType (ID int, date Timestamp, country String, "
+
       "name String, phonetype String, serialname String, salary int) " +
         "STORED BY 'org.apache.carbondata.format'"
@@ -129,7 +129,6 @@ class FilterProcessorTestCase extends QueryTest with BeforeAndAfterAll
{
     sql("load data local inpath './src/test/resources/big_int_Decimal.csv' into table big_int_basicc_Hive_1")
   }
 
-
   test("Is not null filter") {
     checkAnswer(
       sql("select id from filtertestTablesWithNull " + "where id is not null"),


Mime
View raw message