spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From eyalfa <...@git.apache.org>
Subject [GitHub] spark pull request #16043: [SPARK-18601][SQL] Simplify Create/Get complex ex...
Date Fri, 27 Jan 2017 14:53:39 GMT
Github user eyalfa commented on a diff in the pull request:

    https://github.com/apache/spark/pull/16043#discussion_r98216632
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
---
    @@ -0,0 +1,128 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.spark.sql.catalyst.optimizer
    +
    +import org.apache.spark.sql.catalyst.expressions._
    +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
    +import org.apache.spark.sql.catalyst.rules.Rule
    +
    +/**
    +* push down operations into [[CreateNamedStructLike]].
    +*/
    +object SimplifyCreateStructOps extends Rule[LogicalPlan] {
    +  override def apply(plan: LogicalPlan): LogicalPlan = {
    +    plan.transformExpressionsUp {
    +      // push down field extraction
    +      case GetStructField(createNamedStructLike: CreateNamedStructLike, ordinal, _) =>
    +        createNamedStructLike.valExprs(ordinal)
    +    }
    +  }
    +}
    +
    +/**
    +* push down operations into [[CreateArray]].
    +*/
    +object SimplifyCreateArrayOps extends Rule[LogicalPlan] {
    +  override def apply(plan: LogicalPlan): LogicalPlan = {
    +    plan.transformExpressionsUp {
    +      // push down field selection (array of structs)
    +      case GetArrayStructFields(CreateArray(elems), field, ordinal, numFields, containsNull)
=>
    +        CreateArray(elems.map(GetStructField(_, ordinal, Some(field.name))))
    +      // push down item selection.
    +      case ga @ GetArrayItem(CreateArray(elems), IntegerLiteral(idx)) =>
    +        if (idx >= 0 && idx < elems.size) {
    +          elems(idx)
    +        } else {
    +          Cast(Literal(null), ga.dataType)
    +        }
    +    }
    +  }
    +}
    +
    +/**
    +* push down operations into [[CreateMap]].
    +*/
    +object SimplifyCreateMapOps extends Rule[LogicalPlan] {
    +  object ComparisonResult extends Enumeration {
    +    val PositiveMatch = Value
    +    val NegativeMatch = Value
    +    val UnDetermined = Value
    +  }
    +
    +  def compareKeys(k1 : Expression, k2 : Expression) : ComparisonResult.Value = {
    +    (k1, k2) match {
    +      case (x, y) if x.semanticEquals(y) => ComparisonResult.PositiveMatch
    +      // make surethis is null safe, especially when datatypes differ
    +      // is this even possible?
    +      case (_ : Literal, _ : Literal) => ComparisonResult.NegativeMatch
    +      case _ => ComparisonResult.UnDetermined
    +    }
    +  }
    +
    +  case class ClassifiedEntries(
    +    undetermined : Seq[Expression],
    +    nullable : Boolean,
    +    firstPositive : Option[Expression]) {
    +    def normalize(k : Expression) : ClassifiedEntries = this match {
    +      /**
    +      * when we have undetermined matches that might bproduce a null value,
    +      * we can't separate a positive match and use [[Coalesce]] to choose the final result.
    +      * so we 'hide' the positive match as an undetermined match.
    +      */
    +      case ClassifiedEntries(u, true, Some(p)) if u.nonEmpty =>
    +        ClassifiedEntries(u ++ Seq(k, p), true, None)
    +      case _ => this
    +    }
    +  }
    +
    +  def classifyEntries(mapEntries : Seq[(Expression, Expression)],
    +                      requestedKey : Expression) : ClassifiedEntries = {
    +    val res1 = mapEntries.foldLeft(ClassifiedEntries(Seq.empty, nullable = false, None))
{
    +      case (prev @ ClassifiedEntries(_, _, Some(_)), _) => prev
    +      case (ClassifiedEntries(prev, nullable, None), (k, v)) =>
    +        compareKeys(k, requestedKey) match {
    +          case ComparisonResult.UnDetermined =>
    +            val vIsNullable = v.nullable
    +            val nextNullbale = nullable || vIsNullable
    +            ClassifiedEntries(prev ++ Seq(k, v), nullable = nextNullbale, None)
    +          case ComparisonResult.NegativeMatch => ClassifiedEntries(prev, nullable,
None)
    +          case ComparisonResult.PositiveMatch => ClassifiedEntries(prev, nullable,
Some(v))
    +        }
    +    }
    +    res1.normalize(requestedKey)
    +  }
    +
    +  override def apply(plan: LogicalPlan): LogicalPlan = {
    +    plan.transformExpressionsUp {
    +      // attempt to unfold 'constant' key extraction,
    +      // this enables other optimizations to take place.
    +      case gmv @ GetMapValue(cm @ CreateMap(elems), key) =>
    --- End diff --
    
    @hvanhovell it's a bit more complicated than that.
    the optimization also trims away all unknowns following a positive.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message