spark-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Xiangrui Meng (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (SPARK-10953) Benchmark codegen vs. hand-written code for univariate statistics
Date Fri, 16 Oct 2015 19:02:05 GMT

    [ https://issues.apache.org/jira/browse/SPARK-10953?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14961209#comment-14961209
] 

Xiangrui Meng commented on SPARK-10953:
---------------------------------------

That sounds good. I'm closing this for now since the conclusion is clear.

> Benchmark codegen vs. hand-written code for univariate statistics
> -----------------------------------------------------------------
>
>                 Key: SPARK-10953
>                 URL: https://issues.apache.org/jira/browse/SPARK-10953
>             Project: Spark
>          Issue Type: Sub-task
>          Components: SQL
>            Reporter: Xiangrui Meng
>            Assignee: Jihong MA
>             Fix For: 1.6.0
>
>
> I checked the generated code for a simple stddev_pop call:
> {code}
> val df = sqlContext.range(100)
> df.select(stddev_pop(col("id"))).show()
> {code}
> This is the generated code for the merge part, which is very long and complex. I'm not
sure whether we can get benefit from the code generation for univariate statistics. We should
benchmark it against Scala implementation.
> {code}
> 15/10/06 10:10:57 DEBUG GenerateMutableProjection: code for if (isnull(input[1, DoubleType]))
cast(0 as double) else input[1, DoubleType],if (isnull(input[1, DoubleType])) input[6, DoubleType]
else if (isnull(input[6, DoubleType])) input[1, DoubleType] else (input[1, DoubleType] + input[6,
DoubleType]),if (isnull(input[3, DoubleType])) cast(0 as double) else input[3, DoubleType],if
(isnull(input[3, DoubleType])) input[8, DoubleType] else if (isnull(input[8, DoubleType]))
input[3, DoubleType] else (((input[3, DoubleType] * input[0, DoubleType]) + (input[8, DoubleType]
* input[6, DoubleType])) / (input[0, DoubleType] + input[6, DoubleType])),if (isnull(input[4,
DoubleType])) input[9, DoubleType] else if (isnull(input[9, DoubleType])) input[4, DoubleType]
else ((input[4, DoubleType] + input[9, DoubleType]) + ((((input[8, DoubleType] - input[2,
DoubleType]) * (input[8, DoubleType] - input[2, DoubleType])) * (input[0, DoubleType] * input[6,
DoubleType])) / (input[0, DoubleType] + input[6, DoubleType]))):
> public Object generate(org.apache.spark.sql.catalyst.expressions.Expression[] expr) {
>   return new SpecificMutableProjection(expr);
> }
> class SpecificMutableProjection extends org.apache.spark.sql.catalyst.expressions.codegen.BaseMutableProjection
{
>   private org.apache.spark.sql.catalyst.expressions.Expression[] expressions;
>   private org.apache.spark.sql.catalyst.expressions.MutableRow mutableRow;
>   public SpecificMutableProjection(org.apache.spark.sql.catalyst.expressions.Expression[]
expr) {
>     expressions = expr;
>     mutableRow = new org.apache.spark.sql.catalyst.expressions.GenericMutableRow(5);
>   }
>   public org.apache.spark.sql.catalyst.expressions.codegen.BaseMutableProjection target(org.apache.spark.sql.catalyst.expressions.MutableRow
row) {
>     mutableRow = row;
>     return this;
>   }
>   /* Provide immutable access to the last projected row. */
>   public InternalRow currentValue() {
>     return (InternalRow) mutableRow;
>   }
>   public Object apply(Object _i) {
>     InternalRow i = (InternalRow) _i;
>     /* if (isnull(input[1, DoubleType])) cast(0 as double) else input[1, DoubleType]
*/
>     /* isnull(input[1, DoubleType]) */
>     /* input[1, DoubleType] */
>     boolean isNull4 = i.isNullAt(1);
>     double primitive5 = isNull4 ? -1.0 : (i.getDouble(1));
>     boolean isNull0 = false;
>     double primitive1 = -1.0;
>     if (!false && isNull4) {
>       /* cast(0 as double) */
>       /* 0 */
>       boolean isNull6 = false;
>       double primitive7 = -1.0;
>       if (!false) {
>         primitive7 = (double) 0;
>       }
>       isNull0 = isNull6;
>       primitive1 = primitive7;
>     } else {
>       /* input[1, DoubleType] */
>       boolean isNull10 = i.isNullAt(1);
>       double primitive11 = isNull10 ? -1.0 : (i.getDouble(1));
>       isNull0 = isNull10;
>       primitive1 = primitive11;
>     }
>     if (isNull0) {
>       mutableRow.setNullAt(0);
>     } else {
>       mutableRow.setDouble(0, primitive1);
>     }
>     /* if (isnull(input[1, DoubleType])) input[6, DoubleType] else if (isnull(input[6,
DoubleType])) input[1, DoubleType] else (input[1, DoubleType] + input[6, DoubleType]) */
>     /* isnull(input[1, DoubleType]) */
>     /* input[1, DoubleType] */
>     boolean isNull16 = i.isNullAt(1);
>     double primitive17 = isNull16 ? -1.0 : (i.getDouble(1));
>     boolean isNull12 = false;
>     double primitive13 = -1.0;
>     if (!false && isNull16) {
>       /* input[6, DoubleType] */
>       boolean isNull18 = i.isNullAt(6);
>       double primitive19 = isNull18 ? -1.0 : (i.getDouble(6));
>       isNull12 = isNull18;
>       primitive13 = primitive19;
>     } else {
>       /* if (isnull(input[6, DoubleType])) input[1, DoubleType] else (input[1, DoubleType]
+ input[6, DoubleType]) */
>       /* isnull(input[6, DoubleType]) */
>       /* input[6, DoubleType] */
>       boolean isNull24 = i.isNullAt(6);
>       double primitive25 = isNull24 ? -1.0 : (i.getDouble(6));
>       boolean isNull20 = false;
>       double primitive21 = -1.0;
>       if (!false && isNull24) {
>         /* input[1, DoubleType] */
>         boolean isNull26 = i.isNullAt(1);
>         double primitive27 = isNull26 ? -1.0 : (i.getDouble(1));
>         isNull20 = isNull26;
>         primitive21 = primitive27;
>       } else {
>         /* (input[1, DoubleType] + input[6, DoubleType]) */
>         /* input[1, DoubleType] */
>         boolean isNull30 = i.isNullAt(1);
>         double primitive31 = isNull30 ? -1.0 : (i.getDouble(1));
>         boolean isNull28 = isNull30;
>         double primitive29 = -1.0;
>         if (!isNull28) {
>           /* input[6, DoubleType] */
>           boolean isNull32 = i.isNullAt(6);
>           double primitive33 = isNull32 ? -1.0 : (i.getDouble(6));
>           if (!isNull32) {
>             primitive29 = primitive31 + primitive33;
>           } else {
>             isNull28 = true;
>           }
>         }
>         isNull20 = isNull28;
>         primitive21 = primitive29;
>       }
>       isNull12 = isNull20;
>       primitive13 = primitive21;
>     }
>     if (isNull12) {
>       mutableRow.setNullAt(1);
>     } else {
>       mutableRow.setDouble(1, primitive13);
>     }
>     /* if (isnull(input[3, DoubleType])) cast(0 as double) else input[3, DoubleType]
*/
>     /* isnull(input[3, DoubleType]) */
>     /* input[3, DoubleType] */
>     boolean isNull38 = i.isNullAt(3);
>     double primitive39 = isNull38 ? -1.0 : (i.getDouble(3));
>     boolean isNull34 = false;
>     double primitive35 = -1.0;
>     if (!false && isNull38) {
>       /* cast(0 as double) */
>       /* 0 */
>       boolean isNull40 = false;
>       double primitive41 = -1.0;
>       if (!false) {
>         primitive41 = (double) 0;
>       }
>       isNull34 = isNull40;
>       primitive35 = primitive41;
>     } else {
>       /* input[3, DoubleType] */
>       boolean isNull44 = i.isNullAt(3);
>       double primitive45 = isNull44 ? -1.0 : (i.getDouble(3));
>       isNull34 = isNull44;
>       primitive35 = primitive45;
>     }
>     if (isNull34) {
>       mutableRow.setNullAt(2);
>     } else {
>       mutableRow.setDouble(2, primitive35);
>     }
>     /* if (isnull(input[3, DoubleType])) input[8, DoubleType] else if (isnull(input[8,
DoubleType])) input[3, DoubleType] else (((input[3, DoubleType] * input[0, DoubleType]) +
(input[8, DoubleType] * input[6, DoubleType])) / (input[0, DoubleType] + input[6, DoubleType]))
*/
>     /* isnull(input[3, DoubleType]) */
>     /* input[3, DoubleType] */
>     boolean isNull50 = i.isNullAt(3);
>     double primitive51 = isNull50 ? -1.0 : (i.getDouble(3));
>     boolean isNull46 = false;
>     double primitive47 = -1.0;
>     if (!false && isNull50) {
>       /* input[8, DoubleType] */
>       boolean isNull52 = i.isNullAt(8);
>       double primitive53 = isNull52 ? -1.0 : (i.getDouble(8));
>       isNull46 = isNull52;
>       primitive47 = primitive53;
>     } else {
>       /* if (isnull(input[8, DoubleType])) input[3, DoubleType] else (((input[3, DoubleType]
* input[0, DoubleType]) + (input[8, DoubleType] * input[6, DoubleType])) / (input[0, DoubleType]
+ input[6, DoubleType])) */
>       /* isnull(input[8, DoubleType]) */
>       /* input[8, DoubleType] */
>       boolean isNull58 = i.isNullAt(8);
>       double primitive59 = isNull58 ? -1.0 : (i.getDouble(8));
>       boolean isNull54 = false;
>       double primitive55 = -1.0;
>       if (!false && isNull58) {
>         /* input[3, DoubleType] */
>         boolean isNull60 = i.isNullAt(3);
>         double primitive61 = isNull60 ? -1.0 : (i.getDouble(3));
>         isNull54 = isNull60;
>         primitive55 = primitive61;
>       } else {
>         /* (((input[3, DoubleType] * input[0, DoubleType]) + (input[8, DoubleType] *
input[6, DoubleType])) / (input[0, DoubleType] + input[6, DoubleType])) */
>         /* (input[0, DoubleType] + input[6, DoubleType]) */
>         /* input[0, DoubleType] */
>         boolean isNull80 = i.isNullAt(0);
>         double primitive81 = isNull80 ? -1.0 : (i.getDouble(0));
>         boolean isNull78 = isNull80;
>         double primitive79 = -1.0;
>         if (!isNull78) {
>           /* input[6, DoubleType] */
>           boolean isNull82 = i.isNullAt(6);
>           double primitive83 = isNull82 ? -1.0 : (i.getDouble(6));
>           if (!isNull82) {
>             primitive79 = primitive81 + primitive83;
>           } else {
>             isNull78 = true;
>           }
>         }
>         boolean isNull62 = false;
>         double primitive63 = -1.0;
>         if (isNull78 || primitive79 == 0) {
>           isNull62 = true;
>         } else {
>           /* ((input[3, DoubleType] * input[0, DoubleType]) + (input[8, DoubleType] *
input[6, DoubleType])) */
>           /* (input[3, DoubleType] * input[0, DoubleType]) */
>           /* input[3, DoubleType] */
>           boolean isNull68 = i.isNullAt(3);
>           double primitive69 = isNull68 ? -1.0 : (i.getDouble(3));
>           boolean isNull66 = isNull68;
>           double primitive67 = -1.0;
>           if (!isNull66) {
>             /* input[0, DoubleType] */
>             boolean isNull70 = i.isNullAt(0);
>             double primitive71 = isNull70 ? -1.0 : (i.getDouble(0));
>             if (!isNull70) {
>               primitive67 = primitive69 * primitive71;
>             } else {
>               isNull66 = true;
>             }
>           }
>           boolean isNull64 = isNull66;
>           double primitive65 = -1.0;
>           if (!isNull64) {
>             /* (input[8, DoubleType] * input[6, DoubleType]) */
>             /* input[8, DoubleType] */
>             boolean isNull74 = i.isNullAt(8);
>             double primitive75 = isNull74 ? -1.0 : (i.getDouble(8));
>             boolean isNull72 = isNull74;
>             double primitive73 = -1.0;
>             if (!isNull72) {
>               /* input[6, DoubleType] */
>               boolean isNull76 = i.isNullAt(6);
>               double primitive77 = isNull76 ? -1.0 : (i.getDouble(6));
>               if (!isNull76) {
>                 primitive73 = primitive75 * primitive77;
>               } else {
>                 isNull72 = true;
>               }
>             }
>             if (!isNull72) {
>               primitive65 = primitive67 + primitive73;
>             } else {
>               isNull64 = true;
>             }
>           }
>           if (isNull64) {
>             isNull62 = true;
>           } else {
>             primitive63 = (double)(primitive65 / primitive79);
>           }
>         }
>         isNull54 = isNull62;
>         primitive55 = primitive63;
>       }
>       isNull46 = isNull54;
>       primitive47 = primitive55;
>     }
>     if (isNull46) {
>       mutableRow.setNullAt(3);
>     } else {
>       mutableRow.setDouble(3, primitive47);
>     }
>     /* if (isnull(input[4, DoubleType])) input[9, DoubleType] else if (isnull(input[9,
DoubleType])) input[4, DoubleType] else ((input[4, DoubleType] + input[9, DoubleType]) + ((((input[8,
DoubleType] - input[2, DoubleType]) * (input[8, DoubleType] - input[2, DoubleType])) * (input[0,
DoubleType] * input[6, DoubleType])) / (input[0, DoubleType] + input[6, DoubleType]))) */
>     /* isnull(input[4, DoubleType]) */
>     /* input[4, DoubleType] */
>     boolean isNull88 = i.isNullAt(4);
>     double primitive89 = isNull88 ? -1.0 : (i.getDouble(4));
>     boolean isNull84 = false;
>     double primitive85 = -1.0;
>     if (!false && isNull88) {
>       /* input[9, DoubleType] */
>       boolean isNull90 = i.isNullAt(9);
>       double primitive91 = isNull90 ? -1.0 : (i.getDouble(9));
>       isNull84 = isNull90;
>       primitive85 = primitive91;
>     } else {
>       /* if (isnull(input[9, DoubleType])) input[4, DoubleType] else ((input[4, DoubleType]
+ input[9, DoubleType]) + ((((input[8, DoubleType] - input[2, DoubleType]) * (input[8, DoubleType]
- input[2, DoubleType])) * (input[0, DoubleType] * input[6, DoubleType])) / (input[0, DoubleType]
+ input[6, DoubleType]))) */
>       /* isnull(input[9, DoubleType]) */
>       /* input[9, DoubleType] */
>       boolean isNull96 = i.isNullAt(9);
>       double primitive97 = isNull96 ? -1.0 : (i.getDouble(9));
>       boolean isNull92 = false;
>       double primitive93 = -1.0;
>       if (!false && isNull96) {
>         /* input[4, DoubleType] */
>         boolean isNull98 = i.isNullAt(4);
>         double primitive99 = isNull98 ? -1.0 : (i.getDouble(4));
>         isNull92 = isNull98;
>         primitive93 = primitive99;
>       } else {
>         /* ((input[4, DoubleType] + input[9, DoubleType]) + ((((input[8, DoubleType]
- input[2, DoubleType]) * (input[8, DoubleType] - input[2, DoubleType])) * (input[0, DoubleType]
* input[6, DoubleType])) / (input[0, DoubleType] + input[6, DoubleType]))) */
>         /* (input[4, DoubleType] + input[9, DoubleType]) */
>         /* input[4, DoubleType] */
>         boolean isNull104 = i.isNullAt(4);
>         double primitive105 = isNull104 ? -1.0 : (i.getDouble(4));
>         boolean isNull102 = isNull104;
>         double primitive103 = -1.0;
>         if (!isNull102) {
>           /* input[9, DoubleType] */
>           boolean isNull106 = i.isNullAt(9);
>           double primitive107 = isNull106 ? -1.0 : (i.getDouble(9));
>           if (!isNull106) {
>             primitive103 = primitive105 + primitive107;
>           } else {
>             isNull102 = true;
>           }
>         }
>         boolean isNull100 = isNull102;
>         double primitive101 = -1.0;
>         if (!isNull100) {
>           /* ((((input[8, DoubleType] - input[2, DoubleType]) * (input[8, DoubleType]
- input[2, DoubleType])) * (input[0, DoubleType] * input[6, DoubleType])) / (input[0, DoubleType]
+ input[6, DoubleType])) */
>           /* (input[0, DoubleType] + input[6, DoubleType]) */
>           /* input[0, DoubleType] */
>           boolean isNull134 = i.isNullAt(0);
>           double primitive135 = isNull134 ? -1.0 : (i.getDouble(0));
>           boolean isNull132 = isNull134;
>           double primitive133 = -1.0;
>           if (!isNull132) {
>             /* input[6, DoubleType] */
>             boolean isNull136 = i.isNullAt(6);
>             double primitive137 = isNull136 ? -1.0 : (i.getDouble(6));
>             if (!isNull136) {
>               primitive133 = primitive135 + primitive137;
>             } else {
>               isNull132 = true;
>             }
>           }
>           boolean isNull108 = false;
>           double primitive109 = -1.0;
>           if (isNull132 || primitive133 == 0) {
>             isNull108 = true;
>           } else {
>             /* (((input[8, DoubleType] - input[2, DoubleType]) * (input[8, DoubleType]
- input[2, DoubleType])) * (input[0, DoubleType] * input[6, DoubleType])) */
>             /* ((input[8, DoubleType] - input[2, DoubleType]) * (input[8, DoubleType]
- input[2, DoubleType])) */
>             /* (input[8, DoubleType] - input[2, DoubleType]) */
>             /* input[8, DoubleType] */
>             boolean isNull116 = i.isNullAt(8);
>             double primitive117 = isNull116 ? -1.0 : (i.getDouble(8));
>             boolean isNull114 = isNull116;
>             double primitive115 = -1.0;
>             if (!isNull114) {
>               /* input[2, DoubleType] */
>               boolean isNull118 = i.isNullAt(2);
>               double primitive119 = isNull118 ? -1.0 : (i.getDouble(2));
>               if (!isNull118) {
>                 primitive115 = primitive117 - primitive119;
>               } else {
>                 isNull114 = true;
>               }
>             }
>             boolean isNull112 = isNull114;
>             double primitive113 = -1.0;
>             if (!isNull112) {
>               /* (input[8, DoubleType] - input[2, DoubleType]) */
>               /* input[8, DoubleType] */
>               boolean isNull122 = i.isNullAt(8);
>               double primitive123 = isNull122 ? -1.0 : (i.getDouble(8));
>               boolean isNull120 = isNull122;
>               double primitive121 = -1.0;
>               if (!isNull120) {
>                 /* input[2, DoubleType] */
>                 boolean isNull124 = i.isNullAt(2);
>                 double primitive125 = isNull124 ? -1.0 : (i.getDouble(2));
>                 if (!isNull124) {
>                   primitive121 = primitive123 - primitive125;
>                 } else {
>                   isNull120 = true;
>                 }
>               }
>               if (!isNull120) {
>                 primitive113 = primitive115 * primitive121;
>               } else {
>                 isNull112 = true;
>               }
>             }
>             boolean isNull110 = isNull112;
>             double primitive111 = -1.0;
>             if (!isNull110) {
>               /* (input[0, DoubleType] * input[6, DoubleType]) */
>               /* input[0, DoubleType] */
>               boolean isNull128 = i.isNullAt(0);
>               double primitive129 = isNull128 ? -1.0 : (i.getDouble(0));
>               boolean isNull126 = isNull128;
>               double primitive127 = -1.0;
>               if (!isNull126) {
>                 /* input[6, DoubleType] */
>                 boolean isNull130 = i.isNullAt(6);
>                 double primitive131 = isNull130 ? -1.0 : (i.getDouble(6));
>                 if (!isNull130) {
>                   primitive127 = primitive129 * primitive131;
>                 } else {
>                   isNull126 = true;
>                 }
>               }
>               if (!isNull126) {
>                 primitive111 = primitive113 * primitive127;
>               } else {
>                 isNull110 = true;
>               }
>             }
>             if (isNull110) {
>               isNull108 = true;
>             } else {
>               primitive109 = (double)(primitive111 / primitive133);
>             }
>           }
>           if (!isNull108) {
>             primitive101 = primitive103 + primitive109;
>           } else {
>             isNull100 = true;
>           }
>         }
>         isNull92 = isNull100;
>         primitive93 = primitive101;
>       }
>       isNull84 = isNull92;
>       primitive85 = primitive93;
>     }
>     if (isNull84) {
>       mutableRow.setNullAt(4);
>     } else {
>       mutableRow.setDouble(4, primitive85);
>     }
>     return mutableRow;
>   }
> }
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org


Mime
View raw message