hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Apache Wiki <wikidi...@apache.org>
Subject [Hadoop Wiki] Update of "Hive/LanguageManual/Transform" by ZhengShao
Date Thu, 16 Apr 2009 21:23:49 GMT
Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Hadoop Wiki" for change notification.

The following page has been changed by ZhengShao:
http://wiki.apache.org/hadoop/Hive/LanguageManual/Transform

------------------------------------------------------------------------------
  
  Note that columns will be transformed to ''STRING'' and delimited by TAB before feeding
to the user script, and the standard output of the user script will be treated as TAB-separated
''STRING'' columns. User scripts can output debug information to standard error which will
be shown on the task detail page on hadoop.
  
- In the syntax, both ''MAP'' and ''REDUCE'' can be also written as ''SELECT TRANSFORM''.
 There are actually no difference between these three.
+ In the syntax, both ''MAP ...'' and ''REDUCE ...'' can be also written as ''SELECT TRANSFORM
( ... )''.  There are actually no difference between these three.
  Hive runs the reduce script in the reduce task (instead of the map task) because of the
''clusterBy''/''distributeBy''/''sortBy'' clause in the inner query.
  
  Please also see [wiki:Self:Hive/LanguageManual/SortBy Sort By / Cluster By / Distribute
By].
@@ -23, +23 @@

  query:
    FROM (
      FROM src
-     MAP '(' expression (',' expression)* ')'
+     MAP expression (',' expression)*
      USING 'my_map_script'
      ( AS colName (',' colName)* )?
      ( clusterBy? | distributeBy? sortBy? ) src_alias
    )
-   REDUCE '(' expression (, expression)* ')'
+   REDUCE expression (',' expression)*
+     USING 'my_reduce_script'
+     ( AS colName (',' colName)* )?
+ 
+   FROM (
+     FROM src
+     SELECT TRANSFORM '(' expression (',' expression)* ')'
+     USING 'my_map_script'
+     ( AS colName (',' colName)* )?
+     ( clusterBy? | distributeBy? sortBy? ) src_alias
+   )
+   SELECT TRANSFORM '(' expression (',' expression)* ')'
      USING 'my_reduce_script'
      ( AS colName (',' colName)* )?
  }}}
@@ -37, +48 @@

  {{{
    FROM (
      FROM pv_users
-     MAP ( pv_users.userid, pv_users.date )
+     MAP pv_users.userid, pv_users.date
      USING 'map_script'
      AS dt, uid
      CLUSTER BY dt) map_output
    INSERT OVERWRITE TABLE pv_users_reduced
-     REDUCE ( map_output.dt, map_output.uid )
+     REDUCE map_output.dt, map_output.uid
+     USING 'reduce_script'
+     AS date, count;
+   FROM (
+     FROM pv_users
+     SELECT TRANSFORM(pv_users.userid, pv_users.date)
+     USING 'map_script'
+     AS dt, uid
+     CLUSTER BY dt) map_output
+   INSERT OVERWRITE TABLE pv_users_reduced
+     SELECT TRANSFORM(map_output.dt, map_output.uid)
      USING 'reduce_script'
      AS date, count;
  }}}
@@ -54, +75 @@

  {{{
    FROM (
      FROM pv_users
-     MAP ( pv_users.userid, pv_users.date )
+     MAP pv_users.userid, pv_users.date
      USING 'map_script'
      CLUSTER BY key) map_output
    INSERT OVERWRITE TABLE pv_users_reduced
-     REDUCE ( map_output.key, map_output.value )
+     REDUCE map_output.key, map_output.value
      USING 'reduce_script'
      AS date, count;
  }}}

Mime
View raw message