Return-Path: X-Original-To: apmail-spark-commits-archive@minotaur.apache.org Delivered-To: apmail-spark-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 9AC2F18C51 for ; Tue, 4 Aug 2015 21:40:49 +0000 (UTC) Received: (qmail 86927 invoked by uid 500); 4 Aug 2015 21:40:49 -0000 Delivered-To: apmail-spark-commits-archive@spark.apache.org Received: (qmail 86896 invoked by uid 500); 4 Aug 2015 21:40:49 -0000 Mailing-List: contact commits-help@spark.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list commits@spark.apache.org Received: (qmail 86887 invoked by uid 99); 4 Aug 2015 21:40:49 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 04 Aug 2015 21:40:49 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 65BA4E03E8; Tue, 4 Aug 2015 21:40:49 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: rxin@apache.org To: commits@spark.apache.org Message-Id: <8ec665889e664f05bad360375dbb51c1@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: spark git commit: [SPARK-9553][SQL] remove the no-longer-necessary createCode and createStructCode, and replace the usage Date: Tue, 4 Aug 2015 21:40:49 +0000 (UTC) Repository: spark Updated Branches: refs/heads/master a0cc01759 -> f4b1ac08a [SPARK-9553][SQL] remove the no-longer-necessary createCode and createStructCode, and replace the usage Author: Wenchen Fan Closes #7890 from cloud-fan/minor and squashes the following commits: c3b1be3 [Wenchen Fan] fix style b0cbe2e [Wenchen Fan] remove the createCode and createStructCode, and replace the usage of them by createStructCode Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f4b1ac08 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f4b1ac08 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f4b1ac08 Branch: refs/heads/master Commit: f4b1ac08a1327e6d0ddc317cdf3997a0f68dec72 Parents: a0cc017 Author: Wenchen Fan Authored: Tue Aug 4 14:40:46 2015 -0700 Committer: Reynold Xin Committed: Tue Aug 4 14:40:46 2015 -0700 ---------------------------------------------------------------------- .../codegen/GenerateUnsafeProjection.scala | 161 ++----------------- .../expressions/complexTypeCreator.scala | 10 +- 2 files changed, 17 insertions(+), 154 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/f4b1ac08/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala index fc3ecf5..71f8ea0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala @@ -117,161 +117,13 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro } /** - * Generates the code to create an [[UnsafeRow]] object based on the input expressions. - * @param ctx context for code generation - * @param ev specifies the name of the variable for the output [[UnsafeRow]] object - * @param expressions input expressions - * @return generated code to put the expression output into an [[UnsafeRow]] - */ - def createCode(ctx: CodeGenContext, ev: GeneratedExpressionCode, expressions: Seq[Expression]) - : String = { - - val ret = ev.primitive - ctx.addMutableState("UnsafeRow", ret, s"$ret = new UnsafeRow();") - val buffer = ctx.freshName("buffer") - ctx.addMutableState("byte[]", buffer, s"$buffer = new byte[64];") - val cursor = ctx.freshName("cursor") - val numBytes = ctx.freshName("numBytes") - - val exprs = expressions.map { e => e.dataType match { - case st: StructType => createCodeForStruct(ctx, e.gen(ctx), st) - case _ => e.gen(ctx) - }} - val allExprs = exprs.map(_.code).mkString("\n") - - val fixedSize = 8 * exprs.length + UnsafeRow.calculateBitSetWidthInBytes(exprs.length) - val additionalSize = expressions.zipWithIndex.map { - case (e, i) => genAdditionalSize(e.dataType, exprs(i)) - }.mkString("") - - val writers = expressions.zipWithIndex.map { case (e, i) => - val update = genFieldWriter(ctx, e.dataType, exprs(i), ret, i, cursor) - s"""if (${exprs(i).isNull}) { - $ret.setNullAt($i); - } else { - $update; - }""" - }.mkString("\n ") - - s""" - $allExprs - int $numBytes = $fixedSize $additionalSize; - if ($numBytes > $buffer.length) { - $buffer = new byte[$numBytes]; - } - - $ret.pointTo( - $buffer, - $PlatformDependent.BYTE_ARRAY_OFFSET, - ${expressions.size}, - $numBytes); - int $cursor = $fixedSize; - - $writers - boolean ${ev.isNull} = false; - """ - } - - /** - * Generates the Java code to convert a struct (backed by InternalRow) to UnsafeRow. - * - * This function also handles nested structs by recursively generating the code to do conversion. - * - * @param ctx code generation context - * @param input the input struct, identified by a [[GeneratedExpressionCode]] - * @param schema schema of the struct field - */ - // TODO: refactor createCode and this function to reduce code duplication. - private def createCodeForStruct( - ctx: CodeGenContext, - input: GeneratedExpressionCode, - schema: StructType): GeneratedExpressionCode = { - - val isNull = input.isNull - val primitive = ctx.freshName("structConvert") - ctx.addMutableState("UnsafeRow", primitive, s"$primitive = new UnsafeRow();") - val buffer = ctx.freshName("buffer") - ctx.addMutableState("byte[]", buffer, s"$buffer = new byte[64];") - val cursor = ctx.freshName("cursor") - - val exprs: Seq[GeneratedExpressionCode] = schema.map(_.dataType).zipWithIndex.map { - case (dt, i) => dt match { - case st: StructType => - val nestedStructEv = GeneratedExpressionCode( - code = "", - isNull = s"${input.primitive}.isNullAt($i)", - primitive = s"${ctx.getValue(input.primitive, dt, i.toString)}" - ) - createCodeForStruct(ctx, nestedStructEv, st) - case _ => - GeneratedExpressionCode( - code = "", - isNull = s"${input.primitive}.isNullAt($i)", - primitive = s"${ctx.getValue(input.primitive, dt, i.toString)}" - ) - } - } - val allExprs = exprs.map(_.code).mkString("\n") - - val fixedSize = 8 * exprs.length + UnsafeRow.calculateBitSetWidthInBytes(exprs.length) - val additionalSize = schema.toSeq.map(_.dataType).zip(exprs).map { case (dt, ev) => - genAdditionalSize(dt, ev) - }.mkString("") - - val writers = schema.toSeq.map(_.dataType).zip(exprs).zipWithIndex.map { case ((dt, ev), i) => - val update = genFieldWriter(ctx, dt, ev, primitive, i, cursor) - s""" - if (${exprs(i).isNull}) { - $primitive.setNullAt($i); - } else { - $update; - } - """ - }.mkString("\n ") - - // Note that we add a shortcut here for performance: if the input is already an UnsafeRow, - // just copy the bytes directly into our buffer space without running any conversion. - // We also had to use a hack to introduce a "tmp" variable, to avoid the Java compiler from - // complaining that a GenericMutableRow (generated by expressions) cannot be cast to UnsafeRow. - val tmp = ctx.freshName("tmp") - val numBytes = ctx.freshName("numBytes") - val code = s""" - |${input.code} - |if (!${input.isNull}) { - | Object $tmp = (Object) ${input.primitive}; - | if ($tmp instanceof UnsafeRow) { - | $primitive = (UnsafeRow) $tmp; - | } else { - | $allExprs - | - | int $numBytes = $fixedSize $additionalSize; - | if ($numBytes > $buffer.length) { - | $buffer = new byte[$numBytes]; - | } - | - | $primitive.pointTo( - | $buffer, - | $PlatformDependent.BYTE_ARRAY_OFFSET, - | ${exprs.size}, - | $numBytes); - | int $cursor = $fixedSize; - | - | $writers - | } - |} - """.stripMargin - - GeneratedExpressionCode(code, isNull, primitive) - } - - /** * Generates the Java code to convert a struct (backed by InternalRow) to UnsafeRow. * * @param ctx code generation context * @param inputs could be the codes for expressions or input struct fields. * @param inputTypes types of the inputs */ - private def createCodeForStruct2( + private def createCodeForStruct( ctx: CodeGenContext, inputs: Seq[GeneratedExpressionCode], inputTypes: Seq[DataType]): GeneratedExpressionCode = { @@ -537,7 +389,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro val fieldIsNull = s"$tmp.isNullAt($i)" GeneratedExpressionCode("", fieldIsNull, getFieldCode) } - val converter = createCodeForStruct2(ctx, fieldEvals, fieldTypes) + val converter = createCodeForStruct(ctx, fieldEvals, fieldTypes) val code = s""" ${input.code} UnsafeRow $output = null; @@ -561,6 +413,12 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro case _ => input } + def createCode(ctx: CodeGenContext, expressions: Seq[Expression]): GeneratedExpressionCode = { + val exprEvals = expressions.map(e => e.gen(ctx)) + val exprTypes = expressions.map(_.dataType) + createCodeForStruct(ctx, exprEvals, exprTypes) + } + protected def canonicalize(in: Seq[Expression]): Seq[Expression] = in.map(ExpressionCanonicalizer.execute) @@ -570,8 +428,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro protected def create(expressions: Seq[Expression]): UnsafeProjection = { val ctx = newCodeGenContext() - val exprEvals = expressions.map(e => e.gen(ctx)) - val eval = createCodeForStruct2(ctx, exprEvals, expressions.map(_.dataType)) + val eval = createCode(ctx, expressions) val code = s""" public Object generate($exprType[] exprs) { http://git-wip-us.apache.org/repos/asf/spark/blob/f4b1ac08/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index a145dfb..4a071e6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -211,7 +211,10 @@ case class CreateStructUnsafe(children: Seq[Expression]) extends Expression { override def eval(input: InternalRow): Any = throw new UnsupportedOperationException override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { - GenerateUnsafeProjection.createCode(ctx, ev, children) + val eval = GenerateUnsafeProjection.createCode(ctx, children) + ev.isNull = eval.isNull + ev.primitive = eval.primitive + eval.code } override def prettyName: String = "struct_unsafe" @@ -246,7 +249,10 @@ case class CreateNamedStructUnsafe(children: Seq[Expression]) extends Expression override def eval(input: InternalRow): Any = throw new UnsupportedOperationException override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { - GenerateUnsafeProjection.createCode(ctx, ev, valExprs) + val eval = GenerateUnsafeProjection.createCode(ctx, valExprs) + ev.isNull = eval.isNull + ev.primitive = eval.primitive + eval.code } override def prettyName: String = "named_struct_unsafe" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org For additional commands, e-mail: commits-help@spark.apache.org