spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pwend...@apache.org
Subject [1/2] Fixed coding style issues in Spark SQL
Date Sun, 23 Mar 2014 22:21:50 GMT
Repository: spark
Updated Branches:
  refs/heads/master 57a4379c0 -> 8265dc773


http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
new file mode 100644
index 0000000..1ddc41a
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.trees
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.sql.catalyst.expressions._
+
+class TreeNodeSuite extends FunSuite {
+  test("top node changed") {
+    val after = Literal(1) transform { case Literal(1, _) => Literal(2) }
+    assert(after === Literal(2))
+  }
+
+  test("one child changed") {
+    val before = Add(Literal(1), Literal(2))
+    val after = before transform { case Literal(2, _) => Literal(1) }
+
+    assert(after === Add(Literal(1), Literal(1)))
+  }
+
+  test("no change") {
+    val before = Add(Literal(1), Add(Literal(2), Add(Literal(3), Literal(4))))
+    val after = before transform { case Literal(5, _) => Literal(1)}
+
+    assert(before === after)
+    assert(before.map(_.id) === after.map(_.id))
+  }
+
+  test("collect") {
+    val tree = Add(Literal(1), Add(Literal(2), Add(Literal(3), Literal(4))))
+    val literals = tree collect {case l: Literal => l}
+
+    assert(literals.size === 4)
+    (1 to 4).foreach(i => assert(literals contains Literal(i)))
+  }
+
+  test("pre-order transform") {
+    val actual = new ArrayBuffer[String]()
+    val expected = Seq("+", "1", "*", "2", "-", "3", "4")
+    val expression = Add(Literal(1), Multiply(Literal(2), Subtract(Literal(3), Literal(4))))
+    expression transformDown {
+      case b: BinaryExpression => actual.append(b.symbol); b
+      case l: Literal => actual.append(l.toString); l
+    }
+
+    assert(expected === actual)
+  }
+
+  test("post-order transform") {
+    val actual = new ArrayBuffer[String]()
+    val expected = Seq("1", "2", "3", "4", "-", "*", "+")
+    val expression = Add(Literal(1), Multiply(Literal(2), Subtract(Literal(3), Literal(4))))
+    expression transformUp {
+      case b: BinaryExpression => actual.append(b.symbol); b
+      case l: Literal => actual.append(l.toString); l
+    }
+
+    assert(expected === actual)
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/main/scala/org/apache/spark/rdd/PartitionLocalRDDFunctions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/rdd/PartitionLocalRDDFunctions.scala b/sql/core/src/main/scala/org/apache/spark/rdd/PartitionLocalRDDFunctions.scala
index b8b9e58..f1230e7 100644
--- a/sql/core/src/main/scala/org/apache/spark/rdd/PartitionLocalRDDFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/rdd/PartitionLocalRDDFunctions.scala
@@ -22,11 +22,12 @@ import scala.language.implicitConversions
 import scala.reflect._
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark._
-import org.apache.spark.Aggregator
-import org.apache.spark.SparkContext._
+import org.apache.spark.{Aggregator, InterruptibleIterator, Logging}
 import org.apache.spark.util.collection.AppendOnlyMap
 
+/* Implicit conversions */
+import org.apache.spark.SparkContext._
+
 /**
  * Extra functions on RDDs that perform only local operations.  These can be used when data has
  * already been partitioned correctly.

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 587cc74..3e98bd3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -20,14 +20,13 @@ package org.apache.spark.sql
 import scala.language.implicitConversions
 import scala.reflect.runtime.universe.TypeTag
 
-import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.dsl
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.optimizer.Optimizer
-import org.apache.spark.sql.catalyst.planning.QueryPlanner
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, NativeCommand, WriteToFile}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.execution._
 

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
index 91c3aaa..770cabc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
@@ -17,13 +17,13 @@
 
 package org.apache.spark.sql
 
-import org.apache.spark.{OneToOneDependency, Dependency, Partition, TaskContext}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.{Inner, JoinType}
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.plans.{Inner, JoinType}
 import org.apache.spark.sql.catalyst.types.BooleanType
+import org.apache.spark.{Dependency, OneToOneDependency, Partition, TaskContext}
 
 /**
  * <span class="badge" style="float: right; background-color: darkblue;">ALPHA COMPONENT</span>

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
index e934c4c..65d77e3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
@@ -18,14 +18,13 @@
 package org.apache.spark.sql
 package execution
 
-import org.apache.spark.{SparkConf, RangePartitioner, HashPartitioner}
 import org.apache.spark.rdd.ShuffledRDD
+import org.apache.spark.sql.catalyst.errors.attachTree
+import org.apache.spark.sql.catalyst.expressions.{MutableProjection, RowOrdering}
+import org.apache.spark.sql.catalyst.plans.physical._
+import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.util.MutablePair
-
-import catalyst.rules.Rule
-import catalyst.errors._
-import catalyst.expressions._
-import catalyst.plans.physical._
+import org.apache.spark.{HashPartitioner, RangePartitioner, SparkConf}
 
 case class Exchange(newPartitioning: Partitioning, child: SparkPlan) extends UnaryNode {
 
@@ -35,7 +34,7 @@ case class Exchange(newPartitioning: Partitioning, child: SparkPlan) extends Una
 
   def execute() = attachTree(this , "execute") {
     newPartitioning match {
-      case HashPartitioning(expressions, numPartitions) => {
+      case HashPartitioning(expressions, numPartitions) =>
         // TODO: Eliminate redundant expressions in grouping key and value.
         val rdd = child.execute().mapPartitions { iter =>
           val hashExpressions = new MutableProjection(expressions)
@@ -46,8 +45,8 @@ case class Exchange(newPartitioning: Partitioning, child: SparkPlan) extends Una
         val shuffled = new ShuffledRDD[Row, Row, MutablePair[Row, Row]](rdd, part)
         shuffled.setSerializer(new SparkSqlSerializer(new SparkConf(false)))
         shuffled.map(_._2)
-      }
-      case RangePartitioning(sortingExpressions, numPartitions) => {
+
+      case RangePartitioning(sortingExpressions, numPartitions) =>
         // TODO: RangePartitioner should take an Ordering.
         implicit val ordering = new RowOrdering(sortingExpressions)
 
@@ -60,9 +59,9 @@ case class Exchange(newPartitioning: Partitioning, child: SparkPlan) extends Una
         shuffled.setSerializer(new SparkSqlSerializer(new SparkConf(false)))
 
         shuffled.map(_._1)
-      }
+
       case SinglePartition =>
-        child.execute().coalesce(1, true)
+        child.execute().coalesce(1, shuffle = true)
 
       case _ => sys.error(s"Exchange not implemented for $newPartitioning")
       // TODO: Handle BroadcastPartitioning.

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
index c1da365..7e50fda 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
@@ -18,8 +18,7 @@
 package org.apache.spark.sql
 package execution
 
-import catalyst.expressions._
-import catalyst.types._
+import org.apache.spark.sql.catalyst.expressions._
 
 /**
  * Applies a [[catalyst.expressions.Generator Generator]] to a stream of input rows, combining the

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 85035b8..9eb1032 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -20,13 +20,13 @@ package execution
 
 import org.apache.spark.SparkContext
 
-import catalyst.expressions._
-import catalyst.planning._
-import catalyst.plans._
-import catalyst.plans.logical.LogicalPlan
-import catalyst.plans.physical._
-import parquet.ParquetRelation
-import parquet.InsertIntoParquetTable
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.planning._
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.physical._
+import org.apache.spark.sql.parquet.ParquetRelation
+import org.apache.spark.sql.parquet.InsertIntoParquetTable
 
 abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
@@ -172,7 +172,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
   // Can we automate these 'pass through' operations?
   object BasicOperators extends Strategy {
-    // TOOD: Set
+    // TODO: Set
     val numPartitions = 200
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case logical.Distinct(child) =>

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregates.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregates.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregates.scala
index 51889c1..14e5ab6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregates.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregates.scala
@@ -19,12 +19,11 @@ package org.apache.spark.sql
 package execution
 
 import org.apache.spark.SparkContext
+import org.apache.spark.sql.catalyst.errors._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.physical._
 
-import catalyst.errors._
-import catalyst.expressions._
-import catalyst.plans.physical.{UnspecifiedDistribution, ClusteredDistribution, AllTuples}
-import catalyst.types._
-
+/* Implicit conversions */
 import org.apache.spark.rdd.PartitionLocalRDDFunctions._
 
 /**

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
index c6d31d9..e4f918b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
@@ -23,18 +23,17 @@ import scala.reflect.runtime.universe.TypeTag
 import org.apache.spark.rdd.RDD
 import org.apache.spark.SparkContext
 
-import catalyst.errors._
-import catalyst.expressions._
-import catalyst.plans.physical.{UnspecifiedDistribution, OrderedDistribution}
-import catalyst.plans.logical.LogicalPlan
-import catalyst.ScalaReflection
+import org.apache.spark.sql.catalyst.errors._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.physical.{OrderedDistribution, UnspecifiedDistribution}
+import org.apache.spark.sql.catalyst.ScalaReflection
 
 case class Project(projectList: Seq[NamedExpression], child: SparkPlan) extends UnaryNode {
   def output = projectList.map(_.toAttribute)
 
   def execute() = child.execute().mapPartitions { iter =>
-    @transient val resuableProjection = new MutableProjection(projectList)
-    iter.map(resuableProjection)
+    @transient val reusableProjection = new MutableProjection(projectList)
+    iter.map(reusableProjection)
   }
 }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala
index 5934fd1..a6e3892 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins.scala
@@ -23,10 +23,10 @@ import scala.collection.mutable
 import org.apache.spark.rdd.RDD
 import org.apache.spark.SparkContext
 
-import catalyst.errors._
-import catalyst.expressions._
-import catalyst.plans._
-import catalyst.plans.physical.{ClusteredDistribution, Partitioning}
+import org.apache.spark.sql.catalyst.errors._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Partitioning}
 
 import org.apache.spark.rdd.PartitionLocalRDDFunctions._
 

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/main/scala/org/apache/spark/sql/execution/package.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/package.scala
index 67f6f43..e4a2dec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/package.scala
@@ -25,5 +25,4 @@ package org.apache.spark.sql
  * documented here in order to make it easier for others to understand the performance
  * characteristics of query plans that are generated by Spark SQL.
  */
-package object execution {
-}
+package object execution

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala
index e87561f..011aaf7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala
@@ -19,28 +19,27 @@ package org.apache.spark.sql.parquet
 
 import java.io.{IOException, FileNotFoundException}
 
-import org.apache.hadoop.fs.{Path, FileSystem}
+import scala.collection.JavaConversions._
+
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.mapreduce.Job
 import org.apache.hadoop.fs.permission.FsAction
+import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.mapreduce.Job
 
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, BaseRelation}
-import org.apache.spark.sql.catalyst.types._
-import org.apache.spark.sql.catalyst.types.ArrayType
-import org.apache.spark.sql.catalyst.expressions.{Row, AttributeReference, Attribute}
-import org.apache.spark.sql.catalyst.analysis.UnresolvedException
-
-import parquet.schema.{MessageTypeParser, MessageType}
+import parquet.hadoop.metadata.{FileMetaData, ParquetMetadata}
+import parquet.hadoop.util.ContextUtil
+import parquet.hadoop.{Footer, ParquetFileReader, ParquetFileWriter}
+import parquet.io.api.{Binary, RecordConsumer}
 import parquet.schema.PrimitiveType.{PrimitiveTypeName => ParquetPrimitiveTypeName}
+import parquet.schema.Type.Repetition
+import parquet.schema.{MessageType, MessageTypeParser}
 import parquet.schema.{PrimitiveType => ParquetPrimitiveType}
 import parquet.schema.{Type => ParquetType}
-import parquet.schema.Type.Repetition
-import parquet.io.api.{Binary, RecordConsumer}
-import parquet.hadoop.{Footer, ParquetFileWriter, ParquetFileReader}
-import parquet.hadoop.metadata.{FileMetaData, ParquetMetadata}
-import parquet.hadoop.util.ContextUtil
 
-import scala.collection.JavaConversions._
+import org.apache.spark.sql.catalyst.analysis.UnresolvedException
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Row}
+import org.apache.spark.sql.catalyst.plans.logical.{BaseRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.types._
 
 /**
  * Relation that consists of data stored in a Parquet columnar format.
@@ -55,7 +54,7 @@ import scala.collection.JavaConversions._
  * @param tableName The name of the relation that can be used in queries.
  * @param path The path to the Parquet file.
  */
-case class ParquetRelation(val tableName: String, val path: String) extends BaseRelation {
+case class ParquetRelation(tableName: String, path: String) extends BaseRelation {
 
   /** Schema derived from ParquetFile **/
   def parquetSchema: MessageType =
@@ -145,11 +144,10 @@ object ParquetTypesConverter {
     case ParquetPrimitiveTypeName.FLOAT => FloatType
     case ParquetPrimitiveTypeName.INT32 => IntegerType
     case ParquetPrimitiveTypeName.INT64 => LongType
-    case ParquetPrimitiveTypeName.INT96 => {
+    case ParquetPrimitiveTypeName.INT96 =>
       // TODO: add BigInteger type? TODO(andre) use DecimalType instead????
       sys.error("Warning: potential loss of precision: converting INT96 to long")
       LongType
-    }
     case _ => sys.error(
       s"Unsupported parquet datatype $parquetType")
   }
@@ -186,11 +184,10 @@ object ParquetTypesConverter {
 
   def convertToAttributes(parquetSchema: MessageType) : Seq[Attribute] = {
     parquetSchema.getColumns.map {
-      case (desc) => {
+      case (desc) =>
         val ctype = toDataType(desc.getType)
         val name: String = desc.getPath.mkString(".")
         new AttributeReference(name, ctype, false)()
-      }
     }
   }
 
@@ -245,7 +242,7 @@ object ParquetTypesConverter {
    * Try to read Parquet metadata at the given Path. We first see if there is a summary file
    * in the parent directory. If so, this is used. Else we read the actual footer at the given
    * location.
-   * @param path The path at which we expect one (or more) Parquet files.
+   * @param origPath The path at which we expect one (or more) Parquet files.
    * @return The `ParquetMetadata` containing among other things the schema.
    */
   def readMetaData(origPath: Path): ParquetMetadata = {

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
index 6112110..7285f5b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
@@ -17,24 +17,24 @@
 
 package org.apache.spark.sql.parquet
 
-import parquet.io.InvalidRecordException
-import parquet.schema.MessageType
-import parquet.hadoop.{ParquetOutputFormat, ParquetInputFormat}
-import parquet.hadoop.util.ContextUtil
-
-import org.apache.spark.rdd.RDD
-import org.apache.spark.{TaskContext, SerializableWritable, SparkContext}
-import org.apache.spark.sql.catalyst.expressions.{Row, Attribute, Expression}
-import org.apache.spark.sql.execution.{SparkPlan, UnaryNode, LeafNode}
+import java.io.IOException
+import java.text.SimpleDateFormat
+import java.util.Date
 
-import org.apache.hadoop.mapreduce.lib.output.{FileOutputFormat => NewFileOutputFormat}
-import org.apache.hadoop.mapreduce._
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce._
+import org.apache.hadoop.mapreduce.lib.output.{FileOutputFormat => NewFileOutputFormat}
 
-import java.io.IOException
-import java.text.SimpleDateFormat
-import java.util.Date
+import parquet.hadoop.util.ContextUtil
+import parquet.hadoop.{ParquetInputFormat, ParquetOutputFormat}
+import parquet.io.InvalidRecordException
+import parquet.schema.MessageType
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, Row}
+import org.apache.spark.sql.execution.{LeafNode, SparkPlan, UnaryNode}
+import org.apache.spark.{SerializableWritable, SparkContext, TaskContext}
 
 /**
  * Parquet table scan operator. Imports the file that backs the given

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
index c2ae18b..91b4848 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
@@ -19,16 +19,15 @@ package org.apache.spark.sql.parquet
 
 import org.apache.hadoop.conf.Configuration
 
-import org.apache.spark.Logging
-
-import parquet.io.api._
-import parquet.schema.{MessageTypeParser, MessageType}
-import parquet.hadoop.api.{WriteSupport, ReadSupport}
-import parquet.hadoop.api.ReadSupport.ReadContext
-import parquet.hadoop.ParquetOutputFormat
 import parquet.column.ParquetProperties
+import parquet.hadoop.ParquetOutputFormat
+import parquet.hadoop.api.ReadSupport.ReadContext
+import parquet.hadoop.api.{ReadSupport, WriteSupport}
+import parquet.io.api._
+import parquet.schema.{MessageType, MessageTypeParser}
 
-import org.apache.spark.sql.catalyst.expressions.{Row, Attribute}
+import org.apache.spark.Logging
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Row}
 import org.apache.spark.sql.catalyst.types._
 
 /**
@@ -95,8 +94,7 @@ class RowWriteSupport extends WriteSupport[Row] with Logging {
   }
 
   def getSchema(configuration: Configuration): MessageType = {
-    return MessageTypeParser.parseMessageType(
-      configuration.get(RowWriteSupport.PARQUET_ROW_SCHEMA))
+    MessageTypeParser.parseMessageType(configuration.get(RowWriteSupport.PARQUET_ROW_SCHEMA))
   }
 
   private var schema: MessageType = null
@@ -108,7 +106,7 @@ class RowWriteSupport extends WriteSupport[Row] with Logging {
     attributes = ParquetTypesConverter.convertToAttributes(schema)
     new WriteSupport.WriteContext(
       schema,
-      new java.util.HashMap[java.lang.String, java.lang.String]());
+      new java.util.HashMap[java.lang.String, java.lang.String]())
   }
 
   override def prepareForWrite(recordConsumer: RecordConsumer): Unit = {

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTestData.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTestData.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTestData.scala
index bbe409f..3340c3f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTestData.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTestData.scala
@@ -17,17 +17,16 @@
 
 package org.apache.spark.sql.parquet
 
-import org.apache.hadoop.fs.Path
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.Job
 
-import parquet.schema.{MessageTypeParser, MessageType}
-import parquet.hadoop.util.ContextUtil
 import parquet.hadoop.ParquetWriter
+import parquet.hadoop.util.ContextUtil
+import parquet.schema.{MessageType, MessageTypeParser}
 
-import org.apache.spark.sql.catalyst.util.getTempFilePath
 import org.apache.spark.sql.catalyst.expressions.GenericRow
-import java.nio.charset.Charset
+import org.apache.spark.sql.catalyst.util.getTempFilePath
 
 object ParquetTestData {
 
@@ -69,7 +68,7 @@ object ParquetTestData {
 
   lazy val testData = new ParquetRelation("testData", testFile.toURI.toString)
 
-  def writeFile = {
+  def writeFile() = {
     testFile.delete
     val path: Path = new Path(testFile.toURI)
     val job = new Job()

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala
index 37c90a1..2524a37 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala
@@ -17,14 +17,9 @@
 
 package org.apache.spark.sql
 
-import org.scalatest.{BeforeAndAfterAll, FunSuite}
-
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.types._
-import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.test._
 
 /* Implicits */
@@ -198,4 +193,4 @@ class DslQuerySuite extends QueryTest {
       (null, null, 5, "E") ::
       (null, null, 6, "F") :: Nil)
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/test/scala/org/apache/spark/sql/PlannerSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PlannerSuite.scala
deleted file mode 100644
index 83908ed..0000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/PlannerSuite.scala
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-package execution
-
-import org.scalatest.FunSuite
-
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical
-import org.apache.spark.sql.TestData._
-import org.apache.spark.sql.test.TestSQLContext._
-import org.apache.spark.sql.test.TestSQLContext.planner._
-
-class PlannerSuite extends FunSuite {
-
-
-  test("unions are collapsed") {
-    val query = testData.unionAll(testData).unionAll(testData).logicalPlan
-    val planned = BasicOperators(query).head
-    val logicalUnions = query collect { case u: logical.Union => u}
-    val physicalUnions = planned collect { case u: execution.Union => u}
-
-    assert(logicalUnions.size === 2)
-    assert(physicalUnions.size === 1)
-  }
-
-  test("count is partially aggregated") {
-    val query = testData.groupBy('value)(Count('key)).analyze.logicalPlan
-    val planned = PartialAggregation(query).head
-    val aggregations = planned.collect { case a: Aggregate => a }
-
-    assert(aggregations.size === 2)
-  }
-
-  test("count distinct is not partially aggregated") {
-    val query = testData.groupBy('value)(CountDistinct('key :: Nil)).analyze.logicalPlan
-    val planned = PartialAggregation(query.logicalPlan)
-    assert(planned.isEmpty)
-  }
-
-  test("mixed aggregates are not partially aggregated") {
-    val query =
-      testData.groupBy('value)(Count('value), CountDistinct('key :: Nil)).analyze.logicalPlan
-    val planned = PartialAggregation(query)
-    assert(planned.isEmpty)
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
index aa84211..5c8cb08 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
@@ -17,18 +17,12 @@
 
 package org.apache.spark.sql
 
-import org.scalatest.{BeforeAndAfterAll, FunSuite}
+import org.scalatest.FunSuite
 
-import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.test._
 
 /* Implicits */
-import TestSQLContext._
 
 class QueryTest extends FunSuite {
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 6371fa2..fa4a1d5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -17,13 +17,7 @@
 
 package org.apache.spark.sql
 
-import org.scalatest.{BeforeAndAfterAll, FunSuite}
-
-import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.test._
 
 /* Implicits */
@@ -37,8 +31,7 @@ class SQLQuerySuite extends QueryTest {
   test("agg") {
     checkAnswer(
       sql("SELECT a, SUM(b) FROM testData2 GROUP BY a"),
-      Seq((1,3),(2,3),(3,3))
-    )
+      Seq((1,3),(2,3),(3,3)))
   }
 
   test("select *") {
@@ -88,13 +81,11 @@ class SQLQuerySuite extends QueryTest {
   ignore("null count") {
     checkAnswer(
       sql("SELECT a, COUNT(b) FROM testData3"),
-      Seq((1,0), (2, 1))
-    )
+      Seq((1,0), (2, 1)))
 
     checkAnswer(
       testData3.groupBy()(Count('a), Count('b), Count(1), CountDistinct('a :: Nil), CountDistinct('b :: Nil)),
-      (2, 1, 2, 2, 1) :: Nil
-    )
+      (2, 1, 2, 2, 1) :: Nil)
   }
 
   test("inner join where, one match per row") {
@@ -104,8 +95,7 @@ class SQLQuerySuite extends QueryTest {
         (1, "A", 1, "a"),
         (2, "B", 2, "b"),
         (3, "C", 3, "c"),
-        (4, "D", 4, "d")
-      ))
+        (4, "D", 4, "d")))
   }
 
   test("inner join ON, one match per row") {
@@ -115,8 +105,7 @@ class SQLQuerySuite extends QueryTest {
         (1, "A", 1, "a"),
         (2, "B", 2, "b"),
         (3, "C", 3, "c"),
-        (4, "D", 4, "d")
-      ))
+        (4, "D", 4, "d")))
   }
 
   test("inner join, where, multiple matches") {
@@ -129,8 +118,7 @@ class SQLQuerySuite extends QueryTest {
       (1,1,1,1) ::
       (1,1,1,2) ::
       (1,2,1,1) ::
-      (1,2,1,2) :: Nil
-    )
+      (1,2,1,2) :: Nil)
   }
 
   test("inner join, no matches") {
@@ -164,7 +152,7 @@ class SQLQuerySuite extends QueryTest {
         row => Seq.fill(16)((row ++ row).toSeq)).collect().toSeq)
   }
 
-  ignore("cartisian product join") {
+  ignore("cartesian product join") {
     checkAnswer(
       testData3.join(testData3),
       (1, null, 1, null) ::

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
index 6402925..0bb13cf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala
@@ -37,16 +37,14 @@ object TestData {
       TestData2(2, 1) ::
       TestData2(2, 2) ::
       TestData2(3, 1) ::
-      TestData2(3, 2) :: Nil
-    )
+      TestData2(3, 2) :: Nil)
   testData2.registerAsTable("testData2")
 
   // TODO: There is no way to express null primitives as case classes currently...
   val testData3 =
     logical.LocalRelation('a.int, 'b.int).loadData(
       (1, null) ::
-      (2, 2) :: Nil
-    )
+      (2, 2) :: Nil)
 
   case class UpperCaseData(N: Int, L: String)
   val upperCaseData =
@@ -56,8 +54,7 @@ object TestData {
       UpperCaseData(3, "C") ::
       UpperCaseData(4, "D") ::
       UpperCaseData(5, "E") ::
-      UpperCaseData(6, "F") :: Nil
-    )
+      UpperCaseData(6, "F") :: Nil)
   upperCaseData.registerAsTable("upperCaseData")
 
   case class LowerCaseData(n: Int, l: String)
@@ -66,7 +63,6 @@ object TestData {
       LowerCaseData(1, "a") ::
       LowerCaseData(2, "b") ::
       LowerCaseData(3, "c") ::
-      LowerCaseData(4, "d") :: Nil
-    )
+      LowerCaseData(4, "d") :: Nil)
   lowerCaseData.registerAsTable("lowerCaseData")
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/test/scala/org/apache/spark/sql/TgfSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TgfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TgfSuite.scala
deleted file mode 100644
index 08265b7..0000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/TgfSuite.scala
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-package execution
-
-import org.scalatest.{BeforeAndAfterAll, FunSuite}
-
-import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.types._
-import org.apache.spark.sql.test._
-
-
-import TestSQLContext._
-
-/**
- * This is an example TGF that uses UnresolvedAttributes 'name and 'age to access specific columns
- * from the input data.  These will be replaced during analysis with specific AttributeReferences
- * and then bound to specific ordinals during query planning. While TGFs could also access specific
- * columns using hand-coded ordinals, doing so violates data independence.
- *
- * Note: this is only a rough example of how TGFs can be expressed, the final version will likely
- * involve a lot more sugar for cleaner use in Scala/Java/etc.
- */
-case class ExampleTGF(input: Seq[Attribute] = Seq('name, 'age)) extends Generator {
-  def children = input
-  protected def makeOutput() = 'nameAndAge.string :: Nil
-
-  val Seq(nameAttr, ageAttr) = input
-
-  override def apply(input: Row): TraversableOnce[Row] = {
-    val name = nameAttr.apply(input)
-    val age = ageAttr.apply(input).asInstanceOf[Int]
-
-    Iterator(
-      new GenericRow(Array[Any](s"$name is $age years old")),
-      new GenericRow(Array[Any](s"Next year, $name will be ${age + 1} years old")))
-  }
-}
-
-class TgfSuite extends QueryTest {
-  val inputData =
-    logical.LocalRelation('name.string, 'age.int).loadData(
-      ("michael", 29) :: Nil
-    )
-
-  test("simple tgf example") {
-    checkAnswer(
-      inputData.generate(ExampleTGF()),
-      Seq(
-        "michael is 29 years old" :: Nil,
-        "Next year, michael will be 30 years old" :: Nil))
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
new file mode 100644
index 0000000..658ff09
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.sql.TestData._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical
+import org.apache.spark.sql.execution
+import org.apache.spark.sql.test.TestSQLContext._
+import org.apache.spark.sql.test.TestSQLContext.planner._
+
+class PlannerSuite extends FunSuite {
+  test("unions are collapsed") {
+    val query = testData.unionAll(testData).unionAll(testData).logicalPlan
+    val planned = BasicOperators(query).head
+    val logicalUnions = query collect { case u: logical.Union => u}
+    val physicalUnions = planned collect { case u: execution.Union => u}
+
+    assert(logicalUnions.size === 2)
+    assert(physicalUnions.size === 1)
+  }
+
+  test("count is partially aggregated") {
+    val query = testData.groupBy('value)(Count('key)).analyze.logicalPlan
+    val planned = PartialAggregation(query).head
+    val aggregations = planned.collect { case a: Aggregate => a }
+
+    assert(aggregations.size === 2)
+  }
+
+  test("count distinct is not partially aggregated") {
+    val query = testData.groupBy('value)(CountDistinct('key :: Nil)).analyze.logicalPlan
+    val planned = PartialAggregation(query.logicalPlan)
+    assert(planned.isEmpty)
+  }
+
+  test("mixed aggregates are not partially aggregated") {
+    val query =
+      testData.groupBy('value)(Count('value), CountDistinct('key :: Nil)).analyze.logicalPlan
+    val planned = PartialAggregation(query)
+    assert(planned.isEmpty)
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/test/scala/org/apache/spark/sql/execution/TgfSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/TgfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/TgfSuite.scala
new file mode 100644
index 0000000..93b2a30
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/TgfSuite.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+package execution
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans._
+
+/* Implicit conversions */
+import org.apache.spark.sql.test.TestSQLContext._
+
+/**
+ * This is an example TGF that uses UnresolvedAttributes 'name and 'age to access specific columns
+ * from the input data.  These will be replaced during analysis with specific AttributeReferences
+ * and then bound to specific ordinals during query planning. While TGFs could also access specific
+ * columns using hand-coded ordinals, doing so violates data independence.
+ *
+ * Note: this is only a rough example of how TGFs can be expressed, the final version will likely
+ * involve a lot more sugar for cleaner use in Scala/Java/etc.
+ */
+case class ExampleTGF(input: Seq[Attribute] = Seq('name, 'age)) extends Generator {
+  def children = input
+  protected def makeOutput() = 'nameAndAge.string :: Nil
+
+  val Seq(nameAttr, ageAttr) = input
+
+  override def apply(input: Row): TraversableOnce[Row] = {
+    val name = nameAttr.apply(input)
+    val age = ageAttr.apply(input).asInstanceOf[Int]
+
+    Iterator(
+      new GenericRow(Array[Any](s"$name is $age years old")),
+      new GenericRow(Array[Any](s"Next year, $name will be ${age + 1} years old")))
+  }
+}
+
+class TgfSuite extends QueryTest {
+  val inputData =
+    logical.LocalRelation('name.string, 'age.int).loadData(
+      ("michael", 29) :: Nil
+    )
+
+  test("simple tgf example") {
+    checkAnswer(
+      inputData.generate(ExampleTGF()),
+      Seq(
+        "michael is 29 years old" :: Nil,
+        "Next year, michael will be 30 years old" :: Nil))
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
index 8b2ccb5..71caa70 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
@@ -19,21 +19,20 @@ package org.apache.spark.sql.parquet
 
 import org.scalatest.{BeforeAndAfterAll, FunSuite}
 
+import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.mapreduce.Job
+import parquet.hadoop.ParquetFileWriter
+import parquet.hadoop.util.ContextUtil
+import parquet.schema.MessageTypeParser
+
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.expressions.Row
 import org.apache.spark.sql.catalyst.util.getTempFilePath
 import org.apache.spark.sql.test.TestSQLContext
 
-import org.apache.hadoop.mapreduce.Job
-import org.apache.hadoop.fs.{Path, FileSystem}
-
-import parquet.schema.MessageTypeParser
-import parquet.hadoop.ParquetFileWriter
-import parquet.hadoop.util.ContextUtil
-
 class ParquetQuerySuite extends FunSuite with BeforeAndAfterAll {
   override def beforeAll() {
-    ParquetTestData.writeFile
+    ParquetTestData.writeFile()
   }
 
   override def afterAll() {

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/hive/src/main/scala/org/apache/hadoop/mapred/SparkHadoopWriter.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/hadoop/mapred/SparkHadoopWriter.scala b/sql/hive/src/main/scala/org/apache/hadoop/mapred/SparkHadoopWriter.scala
index 08d390e..0b38731 100644
--- a/sql/hive/src/main/scala/org/apache/hadoop/mapred/SparkHadoopWriter.scala
+++ b/sql/hive/src/main/scala/org/apache/hadoop/mapred/SparkHadoopWriter.scala
@@ -22,15 +22,14 @@ import java.text.NumberFormat
 import java.util.Date
 
 import org.apache.hadoop.fs.Path
+import org.apache.hadoop.hive.ql.exec.{FileSinkOperator, Utilities}
+import org.apache.hadoop.hive.ql.io.{HiveFileFormatUtils, HiveOutputFormat}
+import org.apache.hadoop.hive.ql.plan.FileSinkDesc
 import org.apache.hadoop.io.Writable
 
 import org.apache.spark.Logging
 import org.apache.spark.SerializableWritable
 
-import org.apache.hadoop.hive.ql.exec.{Utilities, FileSinkOperator}
-import org.apache.hadoop.hive.ql.io.{HiveFileFormatUtils, HiveOutputFormat}
-import org.apache.hadoop.hive.ql.plan.FileSinkDesc
-
 /**
  * Internal helper class that saves an RDD using a Hive OutputFormat.
  * It is based on [[SparkHadoopWriter]].

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 4aad876..491b3a6 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -18,25 +18,26 @@
 package org.apache.spark.sql
 package hive
 
-import java.io.{PrintStream, InputStreamReader, BufferedReader, File}
-import java.util.{ArrayList => JArrayList}
 import scala.language.implicitConversions
 
-import org.apache.spark.SparkContext
+import java.io.{BufferedReader, File, InputStreamReader, PrintStream}
+import java.util.{ArrayList => JArrayList}
+
 import org.apache.hadoop.hive.conf.HiveConf
-import org.apache.hadoop.hive.ql.session.SessionState
-import org.apache.hadoop.hive.ql.processors.{CommandProcessorResponse, CommandProcessorFactory}
-import org.apache.hadoop.hive.ql.processors.CommandProcessor
 import org.apache.hadoop.hive.ql.Driver
-import org.apache.spark.rdd.RDD
-
-import catalyst.analysis.{Analyzer, OverrideCatalog}
-import catalyst.expressions.GenericRow
-import catalyst.plans.logical.{BaseRelation, LogicalPlan, NativeCommand, ExplainCommand}
-import catalyst.types._
+import org.apache.hadoop.hive.ql.processors._
+import org.apache.hadoop.hive.ql.session.SessionState
 
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.analysis.{Analyzer, OverrideCatalog}
+import org.apache.spark.sql.catalyst.expressions.GenericRow
+import org.apache.spark.sql.catalyst.plans.logical.{BaseRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{NativeCommand, ExplainCommand}
+import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.execution._
 
+/* Implicit conversions */
 import scala.collection.JavaConversions._
 
 /**

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index e4d5072..a5db283 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -27,12 +27,12 @@ import org.apache.hadoop.hive.ql.plan.TableDesc
 import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hadoop.hive.serde2.Deserializer
 
-import catalyst.analysis.Catalog
-import catalyst.expressions._
-import catalyst.plans.logical
-import catalyst.plans.logical._
-import catalyst.rules._
-import catalyst.types._
+import org.apache.spark.sql.catalyst.analysis.Catalog
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.catalyst.types._
 
 import scala.collection.JavaConversions._
 
@@ -45,7 +45,7 @@ class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with Logging {
       db: Option[String],
       tableName: String,
       alias: Option[String]): LogicalPlan = {
-    val databaseName = db.getOrElse(hive.sessionState.getCurrentDatabase())
+    val databaseName = db.getOrElse(hive.sessionState.getCurrentDatabase)
     val table = client.getTable(databaseName, tableName)
     val partitions: Seq[Partition] =
       if (table.isPartitioned) {
@@ -91,7 +91,7 @@ class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with Logging {
   object CreateTables extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan transform {
       case InsertIntoCreatedTable(db, tableName, child) =>
-        val databaseName = db.getOrElse(SessionState.get.getCurrentDatabase())
+        val databaseName = db.getOrElse(SessionState.get.getCurrentDatabase)
 
         createTable(databaseName, tableName, child.output)
 
@@ -123,8 +123,8 @@ class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with Logging {
         } else {
           // Only do the casting when child output data types differ from table output data types.
           val castedChildOutput = child.output.zip(table.output).map {
-            case (input, table) if input.dataType != table.dataType =>
-              Alias(Cast(input, table.dataType), input.name)()
+            case (input, output) if input.dataType != output.dataType =>
+              Alias(Cast(input, output.dataType), input.name)()
             case (input, _) => input
           }
 
@@ -135,7 +135,7 @@ class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with Logging {
 
   /**
    * UNIMPLEMENTED: It needs to be decided how we will persist in-memory tables to the metastore.
-   * For now, if this functionallity is desired mix in the in-memory [[OverrideCatalog]].
+   * For now, if this functionality is desired mix in the in-memory [[OverrideCatalog]].
    */
   override def registerTable(
       databaseName: Option[String], tableName: String, plan: LogicalPlan): Unit = ???

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 4f33a29..8e76a73 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -18,18 +18,19 @@
 package org.apache.spark.sql
 package hive
 
-import scala.collection.JavaConversions._
-
 import org.apache.hadoop.hive.ql.lib.Node
 import org.apache.hadoop.hive.ql.parse._
 import org.apache.hadoop.hive.ql.plan.PlanUtils
 
-import catalyst.analysis._
-import catalyst.expressions._
-import catalyst.plans._
-import catalyst.plans.logical
-import catalyst.plans.logical._
-import catalyst.types._
+import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.types._
+
+/* Implicit conversions */
+import scala.collection.JavaConversions._
 
 /**
  * Used when we need to start parsing the AST before deciding that we are going to pass the command
@@ -48,7 +49,7 @@ case class AddJar(jarPath: String) extends Command
 
 case class AddFile(filePath: String) extends Command
 
-/** Provides a mapping from HiveQL statments to catalyst logical plans and expression trees. */
+/** Provides a mapping from HiveQL statements to catalyst logical plans and expression trees. */
 object HiveQl {
   protected val nativeCommands = Seq(
     "TOK_DESCFUNCTION",
@@ -150,13 +151,13 @@ object HiveQl {
     }
 
     /**
-     * Returns a scala.Seq equivilent to [s] or Nil if [s] is null.
+     * Returns a scala.Seq equivalent to [s] or Nil if [s] is null.
      */
     private def nilIfEmpty[A](s: java.util.List[A]): Seq[A] =
       Option(s).map(_.toSeq).getOrElse(Nil)
 
     /**
-     * Returns this ASTNode with the text changed to `newText``.
+     * Returns this ASTNode with the text changed to `newText`.
      */
     def withText(newText: String): ASTNode = {
       n.token.asInstanceOf[org.antlr.runtime.CommonToken].setText(newText)
@@ -667,7 +668,7 @@ object HiveQl {
     case Token(allJoinTokens(joinToken),
            relation1 ::
            relation2 :: other) =>
-      assert(other.size <= 1, s"Unhandled join child ${other}")
+      assert(other.size <= 1, s"Unhandled join child $other")
       val joinType = joinToken match {
         case "TOK_JOIN" => Inner
         case "TOK_RIGHTOUTERJOIN" => RightOuter

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 92d8420..c71141c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -18,13 +18,12 @@
 package org.apache.spark.sql
 package hive
 
-import catalyst.expressions._
-import catalyst.planning._
-import catalyst.plans._
-import catalyst.plans.logical.{BaseRelation, LogicalPlan}
-
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.planning._
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical.{BaseRelation, LogicalPlan}
 import org.apache.spark.sql.execution._
-import org.apache.spark.sql.parquet.{ParquetRelation, InsertIntoParquetTable, ParquetTableScan}
+import org.apache.spark.sql.parquet.{InsertIntoParquetTable, ParquetRelation, ParquetTableScan}
 
 trait HiveStrategies {
   // Possibly being too clever with types here... or not clever enough.

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala
index f20e9d4..dc4181e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ScriptTransformation.scala
@@ -18,11 +18,12 @@
 package org.apache.spark.sql
 package hive
 
-import java.io.{InputStreamReader, BufferedReader}
+import java.io.{BufferedReader, InputStreamReader}
 
-import catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.execution._
 
+/* Implicit conversions */
 import scala.collection.JavaConversions._
 
 /**

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index 71d751c..99dc85e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -19,19 +19,18 @@ package org.apache.spark.sql
 package hive
 
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{Path, PathFilter}
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants._
+import org.apache.hadoop.hive.ql.exec.Utilities
 import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition, Table => HiveTable}
 import org.apache.hadoop.hive.ql.plan.TableDesc
 import org.apache.hadoop.hive.serde2.Deserializer
-import org.apache.hadoop.hive.ql.exec.Utilities
 import org.apache.hadoop.io.Writable
-import org.apache.hadoop.fs.{Path, PathFilter}
-import org.apache.hadoop.mapred.{FileInputFormat, JobConf, InputFormat}
+import org.apache.hadoop.mapred.{FileInputFormat, InputFormat, JobConf}
 
 import org.apache.spark.SerializableWritable
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.rdd.{HadoopRDD, UnionRDD, EmptyRDD, RDD}
-
+import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, RDD, UnionRDD}
 
 /**
  * A trait for subclasses that handle table scans.
@@ -40,7 +39,6 @@ private[hive] sealed trait TableReader {
   def makeRDDForTable(hiveTable: HiveTable): RDD[_]
 
   def makeRDDForPartitionedTable(partitions: Seq[HivePartition]): RDD[_]
-
 }
 
 
@@ -57,7 +55,6 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
   private val _minSplitsPerRDD = math.max(
     sc.hiveconf.getInt("mapred.map.tasks", 1), sc.sparkContext.defaultMinSplits)
 
-
   // TODO: set aws s3 credentials.
 
   private val _broadcastedHiveConf =
@@ -85,8 +82,8 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
   def makeRDDForTable(
       hiveTable: HiveTable,
       deserializerClass: Class[_ <: Deserializer],
-      filterOpt: Option[PathFilter]): RDD[_] =
-  {
+      filterOpt: Option[PathFilter]): RDD[_] = {
+
     assert(!hiveTable.isPartitioned, """makeRDDForTable() cannot be called on a partitioned table,
       since input formats may differ across partitions. Use makeRDDForTablePartitions() instead.""")
 
@@ -115,6 +112,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
           sys.error(s"Unable to deserialize non-Writable: $value of ${value.getClass.getName}")
       }
     }
+
     deserializedHadoopRDD
   }
 
@@ -136,8 +134,8 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
    */
   def makeRDDForPartitionedTable(
       partitionToDeserializer: Map[HivePartition, Class[_ <: Deserializer]],
-      filterOpt: Option[PathFilter]): RDD[_] =
-  {
+      filterOpt: Option[PathFilter]): RDD[_] = {
+
     val hivePartitionRDDs = partitionToDeserializer.map { case (partition, partDeserializer) =>
       val partDesc = Utilities.getPartitionDesc(partition)
       val partPath = partition.getPartitionPath
@@ -178,6 +176,7 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
         }
       }
     }.toSeq
+
     // Even if we don't use any partitions, we still need an empty RDD
     if (hivePartitionRDDs.size == 0) {
       new EmptyRDD[Object](sc.sparkContext)
@@ -207,8 +206,8 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
   private def createHadoopRdd(
     tableDesc: TableDesc,
     path: String,
-    inputFormatClass: Class[InputFormat[Writable, Writable]])
-  : RDD[Writable] = {
+    inputFormatClass: Class[InputFormat[Writable, Writable]]): RDD[Writable] = {
+
     val initializeJobConfFunc = HadoopTableReader.initializeLocalJobConfFunc(path, tableDesc) _
 
     val rdd = new HadoopRDD(
@@ -227,7 +226,6 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
 }
 
 private[hive] object HadoopTableReader {
-
   /**
    * Curried. After given an argument for 'path', the resulting JobConf => Unit closure is used to
    * instantiate a HadoopRDD.

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
index 17ae4ef..a26b0ff 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
@@ -22,23 +22,22 @@ import java.io.File
 import java.util.{Set => JavaSet}
 
 import scala.collection.mutable
-import scala.collection.JavaConversions._
 import scala.language.implicitConversions
 
-import org.apache.hadoop.hive.metastore.api.{SerDeInfo, StorageDescriptor}
-import org.apache.hadoop.hive.metastore.MetaStoreUtils
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry
-import org.apache.hadoop.hive.ql.io.avro.{AvroContainerOutputFormat, AvroContainerInputFormat}
+import org.apache.hadoop.hive.ql.io.avro.{AvroContainerInputFormat, AvroContainerOutputFormat}
 import org.apache.hadoop.hive.ql.metadata.Table
-import org.apache.hadoop.hive.serde2.avro.AvroSerDe
-import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 import org.apache.hadoop.hive.serde2.RegexSerDe
+import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
+import org.apache.hadoop.hive.serde2.avro.AvroSerDe
 
-import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, NativeCommand}
+import org.apache.spark.sql.catalyst.util._
 
-import catalyst.analysis._
-import catalyst.plans.logical.{LogicalPlan, NativeCommand}
-import catalyst.util._
+/* Implicit conversions */
+import scala.collection.JavaConversions._
 
 object TestHive
   extends TestHiveContext(new SparkContext("local", "TestSQLContext", new SparkConf()))
@@ -52,7 +51,7 @@ object TestHive
  *
  * TestHive is singleton object version of this class because instantiating multiple copies of the
  * hive metastore seems to lead to weird non-deterministic failures.  Therefore, the execution of
- * testcases that rely on TestHive must be serialized.
+ * test cases that rely on TestHive must be serialized.
  */
 class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
   self =>

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveOperators.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveOperators.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveOperators.scala
index d20fd87..9aa9e17 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveOperators.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveOperators.scala
@@ -24,24 +24,18 @@ import org.apache.hadoop.hive.ql.Context
 import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition, Hive}
 import org.apache.hadoop.hive.ql.plan.{TableDesc, FileSinkDesc}
 import org.apache.hadoop.hive.serde2.Serializer
-import org.apache.hadoop.hive.serde2.objectinspector._
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption
+import org.apache.hadoop.hive.serde2.objectinspector._
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveDecimalObjectInspector
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveVarcharObjectInspector
-
-import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.Writable
 import org.apache.hadoop.mapred._
 
-import catalyst.expressions._
-import catalyst.types.{BooleanType, DataType}
-import org.apache.spark.{TaskContext, SparkException}
-import catalyst.expressions.Cast
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.types.{BooleanType, DataType}
 import org.apache.spark.sql.execution._
-
-import scala.Some
-import scala.collection.immutable.ListMap
+import org.apache.spark.{TaskContext, SparkException}
 
 /* Implicits */
 import scala.collection.JavaConversions._
@@ -194,20 +188,26 @@ case class InsertIntoHiveTable(
    * TODO: Consolidate all hive OI/data interface code.
    */
   protected def wrap(a: (Any, ObjectInspector)): Any = a match {
-    case (s: String, oi: JavaHiveVarcharObjectInspector) => new HiveVarchar(s, s.size)
+    case (s: String, oi: JavaHiveVarcharObjectInspector) =>
+      new HiveVarchar(s, s.size)
+
     case (bd: BigDecimal, oi: JavaHiveDecimalObjectInspector) =>
       new HiveDecimal(bd.underlying())
+
     case (row: Row, oi: StandardStructObjectInspector) =>
       val struct = oi.create()
-      row.zip(oi.getAllStructFieldRefs).foreach {
+      row.zip(oi.getAllStructFieldRefs: Seq[StructField]).foreach {
         case (data, field) =>
           oi.setStructFieldData(struct, field, wrap(data, field.getFieldObjectInspector))
       }
       struct
+
     case (s: Seq[_], oi: ListObjectInspector) =>
       val wrappedSeq = s.map(wrap(_, oi.getListElementObjectInspector))
       seqAsJavaList(wrappedSeq)
-    case (obj, _) => obj
+
+    case (obj, _) =>
+      obj
   }
 
   def saveAsHiveFile(
@@ -324,7 +324,7 @@ case class InsertIntoHiveTable(
         case (key, Some(value)) => key -> value
         case (key, None) => key -> "" // Should not reach here right now.
       }
-      val partVals = MetaStoreUtils.getPvals(table.hiveQlTable.getPartCols(), partitionSpec)
+      val partVals = MetaStoreUtils.getPvals(table.hiveQlTable.getPartCols, partitionSpec)
       db.validatePartitionNameCharacters(partVals)
       // inheritTableSpecs is set to true. It should be set to false for a IMPORT query
       // which is currently considered as a Hive native command.

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
index 5e775d6..72ccd4f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
@@ -18,22 +18,24 @@
 package org.apache.spark.sql
 package hive
 
-import scala.collection.JavaConversions._
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.hadoop.hive.common.`type`.HiveDecimal
-import org.apache.hadoop.hive.serde2.{io => hiveIo}
-import org.apache.hadoop.hive.serde2.objectinspector.primitive._
-import org.apache.hadoop.hive.serde2.objectinspector._
+import org.apache.hadoop.hive.ql.exec.UDF
 import org.apache.hadoop.hive.ql.exec.{FunctionInfo, FunctionRegistry}
 import org.apache.hadoop.hive.ql.udf.generic._
-import org.apache.hadoop.hive.ql.exec.UDF
+import org.apache.hadoop.hive.serde2.objectinspector._
+import org.apache.hadoop.hive.serde2.objectinspector.primitive._
+import org.apache.hadoop.hive.serde2.{io => hiveIo}
 import org.apache.hadoop.{io => hadoopIo}
 
-import catalyst.analysis
-import catalyst.expressions._
-import catalyst.types
-import catalyst.types._
+import org.apache.spark.sql.catalyst.analysis
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.types
+import org.apache.spark.sql.catalyst.types._
+
+/* Implicit conversions */
+import scala.collection.JavaConversions._
 
 object HiveFunctionRegistry
   extends analysis.FunctionRegistry with HiveFunctionFactory with HiveInspectors {
@@ -148,7 +150,7 @@ abstract class HiveUdf
 }
 
 case class HiveSimpleUdf(name: String, children: Seq[Expression]) extends HiveUdf {
-  import HiveFunctionRegistry._
+  import org.apache.spark.sql.hive.HiveFunctionRegistry._
   type UDFType = UDF
 
   @transient

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala
index a12ab23..02ee2a0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala
@@ -20,7 +20,6 @@ package sql
 package hive
 package execution
 
-
 import org.scalatest.{FunSuite, BeforeAndAfterAll}
 
 class ConcurrentHiveSuite extends FunSuite with BeforeAndAfterAll {
@@ -35,4 +34,4 @@ class ConcurrentHiveSuite extends FunSuite with BeforeAndAfterAll {
       }
     }
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index 8a5b97b..e8fcc27 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -20,12 +20,11 @@ package hive
 package execution
 
 import java.io._
-import org.scalatest.{BeforeAndAfterAll, FunSuite, GivenWhenThen}
 
-import catalyst.plans.logical.{ExplainCommand, NativeCommand}
-import catalyst.plans._
-import catalyst.util._
+import org.scalatest.{BeforeAndAfterAll, FunSuite, GivenWhenThen}
 
+import org.apache.spark.sql.catalyst.plans.logical.{ExplainCommand, NativeCommand}
+import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.execution.Sort
 
 /**
@@ -38,7 +37,8 @@ import org.apache.spark.sql.execution.Sort
  * See the documentation of public vals in this class for information on how test execution can be
  * configured using system properties.
  */
-abstract class HiveComparisonTest extends FunSuite with BeforeAndAfterAll with GivenWhenThen with Logging {
+abstract class HiveComparisonTest
+  extends FunSuite with BeforeAndAfterAll with GivenWhenThen with Logging {
 
   /**
    * When set, any cache files that result in test failures will be deleted.  Used when the test
@@ -376,4 +376,4 @@ abstract class HiveComparisonTest extends FunSuite with BeforeAndAfterAll with G
       }
     }
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index d010023..16bcded 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -19,11 +19,6 @@ package org.apache.spark.sql
 package hive
 package execution
 
-
-import java.io._
-
-import util._
-
 /**
  * Runs the test cases that are included in the hive distribution.
  */

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala
index f0a4ec3..2d2f133 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala
@@ -19,9 +19,9 @@ package org.apache.spark.sql
 package hive
 package execution
 
-import java.io._
+import java.io.File
 
-import catalyst.util._
+import org.apache.spark.sql.catalyst.util._
 
 /**
  * A framework for running the query tests that are listed as a set of text files.
@@ -67,4 +67,4 @@ abstract class HiveQueryFileTest extends HiveComparisonTest {
           ignore(testCaseName) {}
       }
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 28a5d26..b804634 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql
 package hive
 package execution
 
-
 /**
  * A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
  */
@@ -141,4 +140,4 @@ class HiveQuerySuite extends HiveComparisonTest {
     sql("SELECT * FROM src TABLESAMPLE(0.1 PERCENT) s")
   }
 
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
index 0dd79fa..996bd4e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
@@ -23,8 +23,6 @@ package execution
  * A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
  */
 class HiveResolutionSuite extends HiveComparisonTest {
-  import TestHive._
-
   createQueryTest("table.attr",
     "SELECT src.key FROM src ORDER BY key LIMIT 1")
 
@@ -62,4 +60,4 @@ class HiveResolutionSuite extends HiveComparisonTest {
   createQueryTest("tableName.attr from aliased subquery",
     "SELECT src.key FROM (SELECT * FROM src ORDER BY key LIMIT 1) a") */
 
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
index 8542f42..bb65c91 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
@@ -19,10 +19,11 @@ package org.apache.spark.sql
 package hive
 package execution
 
-import scala.collection.JavaConversions._
-
 import org.apache.spark.sql.hive.TestHive
 
+/* Implicit conversions */
+import scala.collection.JavaConversions._
+
 /**
  * A set of test cases that validate partition and column pruning.
  */

http://git-wip-us.apache.org/repos/asf/spark/blob/8265dc77/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala
index ee90061..05ad85b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/HiveParquetSuite.scala
@@ -19,21 +19,23 @@ package org.apache.spark.sql.parquet
 
 import java.io.File
 
-import org.scalatest.{BeforeAndAfterEach, BeforeAndAfterAll, FunSuite}
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
 
+import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.expressions.Row
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.util.getTempFilePath
 import org.apache.spark.sql.hive.TestHive
 
-
 class HiveParquetSuite extends FunSuite with BeforeAndAfterAll with BeforeAndAfterEach {
-
   val filename = getTempFilePath("parquettest").getCanonicalFile.toURI.toString
 
   // runs a SQL and optionally resolves one Parquet table
-  def runQuery(querystr: String, tableName: Option[String] = None, filename: Option[String] = None): Array[Row] = {
+  def runQuery(
+      querystr: String,
+      tableName: Option[String] = None,
+      filename: Option[String] = None): Array[Row] = {
+
     // call to resolve references in order to get CREATE TABLE AS to work
     val query = TestHive
       .parseSql(querystr)
@@ -90,7 +92,7 @@ class HiveParquetSuite extends FunSuite with BeforeAndAfterAll with BeforeAndAft
 
   override def beforeAll() {
     // write test data
-    ParquetTestData.writeFile
+    ParquetTestData.writeFile()
     // Override initial Parquet test table
     TestHive.catalog.registerTable(Some[String]("parquet"), "testsource", ParquetTestData.testData)
   }
@@ -151,7 +153,7 @@ class HiveParquetSuite extends FunSuite with BeforeAndAfterAll with BeforeAndAft
     (rddOne, rddTwo).zipped.foreach {
       (a,b) => (a,b).zipped.toArray.zipWithIndex.foreach {
         case ((value_1:Array[Byte], value_2:Array[Byte]), index) =>
-          assert(new String(value_1) === new String(value_2), s"table $tableName row ${counter} field ${fieldNames(index)} don't match")
+          assert(new String(value_1) === new String(value_2), s"table $tableName row $counter field ${fieldNames(index)} don't match")
         case ((value_1, value_2), index) =>
           assert(value_1 === value_2, s"table $tableName row $counter field ${fieldNames(index)} don't match")
       }


Mime
View raw message