From reviews-return-995049-archive-asf-public=cust-asf.ponee.io@spark.apache.org Fri Dec 20 09:40:31 2019 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by mx-eu-01.ponee.io (Postfix) with SMTP id 81C4D18064C for ; Fri, 20 Dec 2019 10:40:31 +0100 (CET) Received: (qmail 40562 invoked by uid 500); 20 Dec 2019 09:40:31 -0000 Mailing-List: contact reviews-help@spark.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list reviews@spark.apache.org Received: (qmail 40549 invoked by uid 99); 20 Dec 2019 09:40:30 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 20 Dec 2019 09:40:30 +0000 From: GitBox To: reviews@spark.apache.org Subject: [GitHub] [spark] yaooqinn commented on a change in pull request #26942: [SPARK-30301][SQL] Fix wrong results when datetimes as fields of complex types Message-ID: <157683483068.25044.5655966106239946781.gitbox@gitbox.apache.org> References: In-Reply-To: Date: Fri, 20 Dec 2019 09:40:30 -0000 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit yaooqinn commented on a change in pull request #26942: [SPARK-30301][SQL] Fix wrong results when datetimes as fields of complex types URL: https://github.com/apache/spark/pull/26942#discussion_r360295033 ########## File path: sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala ########## @@ -56,78 +56,41 @@ object HiveResult { // We need the types so we can output struct field names val types = executedPlan.output.map(_.dataType) // Reformat to match hive tab delimited output. - result.map(_.zip(types).map(toHiveString)).map(_.mkString("\t")) + result.map(_.zip(types).map(e => toHiveString(e))) + .map(_.mkString("\t")) } - private val primitiveTypes = Seq( - StringType, - IntegerType, - LongType, - DoubleType, - FloatType, - BooleanType, - ByteType, - ShortType, - DateType, - TimestampType, - BinaryType) - private lazy val zoneId = DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone) private lazy val dateFormatter = DateFormatter(zoneId) private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId) - /** Hive outputs fields of structs slightly differently than top level attributes. */ - private def toHiveStructString(a: (Any, DataType)): String = a match { - case (struct: Row, StructType(fields)) => - struct.toSeq.zip(fields).map { - case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}""" - }.mkString("{", ",", "}") - case (seq: Seq[_], ArrayType(typ, _)) => - seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]") - case (map: Map[_, _], MapType(kType, vType, _)) => - map.map { - case (key, value) => - toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType)) - }.toSeq.sorted.mkString("{", ",", "}") - case (null, _) => "null" - case (s: String, StringType) => "\"" + s + "\"" - case (decimal, DecimalType()) => decimal.toString - case (interval: CalendarInterval, CalendarIntervalType) => - SQLConf.get.intervalOutputStyle match { - case SQL_STANDARD => toSqlStandardString(interval) - case ISO_8601 => toIso8601String(interval) - case MULTI_UNITS => toMultiUnitsString(interval) - } - case (other, tpe) if primitiveTypes contains tpe => other.toString - } - /** Formats a datum (based on the given data type) and returns the string representation. */ - def toHiveString(a: (Any, DataType)): String = a match { - case (struct: Row, StructType(fields)) => - struct.toSeq.zip(fields).map { - case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}""" - }.mkString("{", ",", "}") - case (seq: Seq[_], ArrayType(typ, _)) => - seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]") - case (map: Map[_, _], MapType(kType, vType, _)) => - map.map { - case (key, value) => - toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType)) - }.toSeq.sorted.mkString("{", ",", "}") - case (null, _) => "NULL" + def toHiveString(a: (Any, DataType), nested: Boolean = false): String = a match { + case (null, _) => if (nested) "null" else "NULL" Review comment: yes, there are many hive compatibility unit tests there ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: users@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org For additional commands, e-mail: reviews-help@spark.apache.org