hadoop-hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From zs...@apache.org
Subject svn commit: r752802 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/udf/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Date Thu, 12 Mar 2009 08:06:32 GMT
Author: zshao
Date: Thu Mar 12 08:06:31 2009
New Revision: 752802

URL: http://svn.apache.org/viewvc?rev=752802&view=rev
Log:
HIVE-313. Add UDF date_add, date_sub, datediff. (zshao)

Added:
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateAdd.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateDiff.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateSub.java
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf9.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf9.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=752802&r1=752801&r2=752802&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Mar 12 08:06:31 2009
@@ -6,6 +6,8 @@
 
   NEW FEATURES
 
+    HIVE-313. Add UDF date_add, date_sub, datediff. (zshao)
+
     HIVE-79. Print number of rows inserted to table(s).
     (Suresh Antony via zshao)
 

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=752802&r1=752801&r2=752802&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Thu
Mar 12 08:06:31 2009
@@ -90,6 +90,10 @@
     registerUDF("from_unixtime", UDFFromUnixTime.class, OperatorType.PREFIX, false);
     registerUDF("to_date", UDFDate.class, OperatorType.PREFIX, false);
 
+    registerUDF("date_add", UDFDateAdd.class, OperatorType.PREFIX, false);
+    registerUDF("date_sub", UDFDateSub.class, OperatorType.PREFIX, false);
+    registerUDF("datediff", UDFDateDiff.class, OperatorType.PREFIX, false);
+    
     registerUDF("get_json_object", UDFJson.class, OperatorType.PREFIX, false);
 
     registerUDF("+", UDFOPPlus.class, OperatorType.INFIX, true);

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateAdd.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateAdd.java?rev=752802&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateAdd.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateAdd.java Thu Mar 12
08:06:31 2009
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.TimeZone;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.UDF;
+
+
+public class UDFDateAdd extends UDF {
+
+  private static Log LOG = LogFactory.getLog(UDFDateAdd.class.getName());
+
+  private SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
+  private Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
+
+  public UDFDateAdd() {
+  }
+
+  /**
+   * Add a number of days to the date. 
+   * The time part of the string will be ignored.
+   * 
+   * NOTE: This is a subset of what MySQL offers as:
+   * http://dev.mysql.com/doc/refman/5.1/en/date-and-time-functions.html#function_date-add
+   * 
+   * @param date1 the date string in the format of "yyyy-MM-dd HH:mm:ss" or "yyyy-MM-dd".
+   * @return the date in the format of "yyyy-MM-dd".
+   */
+  public String evaluate(String dateString1, int days)  {
+    
+    try {
+      calendar.setTime(formatter.parse(dateString1));
+      calendar.add(Calendar.DAY_OF_MONTH, days);
+      Date newDate = calendar.getTime();
+      return formatter.format(newDate);
+    } catch (ParseException e) {
+      return null;
+    }
+  }
+
+}

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateDiff.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateDiff.java?rev=752802&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateDiff.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateDiff.java Thu Mar 12
08:06:31 2009
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.TimeZone;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.UDF;
+
+
+public class UDFDateDiff extends UDF {
+
+  private static Log LOG = LogFactory.getLog(UDFDateDiff.class.getName());
+
+  private SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
+
+  public UDFDateDiff() {
+    formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+  }
+
+  /**
+   * Calculate the difference in the number of days.
+   * The time part of the string will be ignored.
+   * If dateString1 is earlier than dateString2, then the result can be negative. 
+   * 
+   * @param dateString1 the date string in the format of "yyyy-MM-dd HH:mm:ss" or "yyyy-MM-dd".
+   * @param dateString2 the date string in the format of "yyyy-MM-dd HH:mm:ss" or "yyyy-MM-dd".
+   * @return the difference in days.
+   */
+  public Integer evaluate(String dateString1, String dateString2)  {
+    try {
+      // NOTE: This implementation avoids the extra-second problem
+      // by comparing with UTC epoch and integer division.
+      long diffInMilliSeconds = (formatter.parse(dateString1).getTime() - formatter.parse(dateString2).getTime());
+      // 86400 is the number of seconds in a day
+      return Integer.valueOf((int)(diffInMilliSeconds / (86400 * 1000)));
+    } catch (ParseException e) {
+      return null;
+    }
+  }
+
+}

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateSub.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateSub.java?rev=752802&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateSub.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateSub.java Thu Mar 12
08:06:31 2009
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.TimeZone;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.UDF;
+
+
+public class UDFDateSub extends UDF {
+
+  private static Log LOG = LogFactory.getLog(UDFDateSub.class.getName());
+
+  private SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
+  private Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
+
+  public UDFDateSub() {
+  }
+
+  /**
+   * Subtract a number of days to the date. 
+   * The time part of the string will be ignored.
+   * 
+   * NOTE: This is a subset of what MySQL offers as:
+   * http://dev.mysql.com/doc/refman/5.1/en/date-and-time-functions.html#function_date-sub
+   * 
+   * @param date1 the date string in the format of "yyyy-MM-dd HH:mm:ss" or "yyyy-MM-dd".
+   * @return the date in the format of "yyyy-MM-dd".
+   */
+  public String evaluate(String dateString1, int days)  {
+    
+    try {
+      calendar.setTime(formatter.parse(dateString1));
+      calendar.add(Calendar.DAY_OF_MONTH, -days);
+      Date newDate = calendar.getTime();
+      return formatter.format(newDate);
+    } catch (ParseException e) {
+      return null;
+    }
+  }
+
+}

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf9.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf9.q?rev=752802&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf9.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf9.q Thu Mar 12 08:06:31 2009
@@ -0,0 +1,20 @@
+EXPLAIN
+SELECT DATEDIFF('2008-12-31', '2009-01-01'), DATEDIFF('2008-03-01', '2008-02-28'),
+       DATEDIFF('2007-03-01', '2007-01-28'), DATEDIFF('2008-03-01 23:59:59', '2008-03-02
00:00:00'),
+       DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365),
+       DATE_ADD('2008-02-28', 2), DATE_ADD('2009-02-28', 2),
+       DATE_ADD('2007-02-28', 365), DATE_ADD('2007-02-28 23:59:59', 730),
+       DATE_SUB('2009-01-01', 1), DATE_SUB('2009-01-01', 365),
+       DATE_SUB('2008-02-28', 2), DATE_SUB('2009-02-28', 2),
+       DATE_SUB('2007-02-28', 365), DATE_SUB('2007-02-28 01:12:34', 730)
+       FROM src WHERE src.key = 86;
+
+SELECT DATEDIFF('2008-12-31', '2009-01-01'), DATEDIFF('2008-03-01', '2008-02-28'),
+       DATEDIFF('2007-03-01', '2007-01-28'), DATEDIFF('2008-03-01 23:59:59', '2008-03-02
00:00:00'),
+       DATE_ADD('2008-12-31', 1), DATE_ADD('2008-12-31', 365),
+       DATE_ADD('2008-02-28', 2), DATE_ADD('2009-02-28', 2),
+       DATE_ADD('2007-02-28', 365), DATE_ADD('2007-02-28 23:59:59', 730),
+       DATE_SUB('2009-01-01', 1), DATE_SUB('2009-01-01', 365),
+       DATE_SUB('2008-03-01', 2), DATE_SUB('2009-03-01', 2),
+       DATE_SUB('2007-02-28', 365), DATE_SUB('2007-02-28 01:12:34', 730)
+       FROM src WHERE src.key = 86;

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/udf9.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/udf9.q.out?rev=752802&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/udf9.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/udf9.q.out Thu Mar 12 08:06:31 2009
@@ -0,0 +1,67 @@
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE))
(TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION DATEDIFF '2008-12-31' '2009-01-01')) (TOK_SELEXPR (TOK_FUNCTION
DATEDIFF '2008-03-01' '2008-02-28')) (TOK_SELEXPR (TOK_FUNCTION DATEDIFF '2007-03-01' '2007-01-28'))
(TOK_SELEXPR (TOK_FUNCTION DATEDIFF '2008-03-01 23:59:59' '2008-03-02 00:00:00')) (TOK_SELEXPR
(TOK_FUNCTION DATE_ADD '2008-12-31' 1)) (TOK_SELEXPR (TOK_FUNCTION DATE_ADD '2008-12-31' 365))
(TOK_SELEXPR (TOK_FUNCTION DATE_ADD '2008-02-28' 2)) (TOK_SELEXPR (TOK_FUNCTION DATE_ADD '2009-02-28'
2)) (TOK_SELEXPR (TOK_FUNCTION DATE_ADD '2007-02-28' 365)) (TOK_SELEXPR (TOK_FUNCTION DATE_ADD
'2007-02-28 23:59:59' 730)) (TOK_SELEXPR (TOK_FUNCTION DATE_SUB '2009-01-01' 1)) (TOK_SELEXPR
(TOK_FUNCTION DATE_SUB '2009-01-01' 365)) (TOK_SELEXPR (TOK_FUNCTION DATE_SUB '2008-02-28'
2)) (TOK_SELEXPR (TOK_FUNCTION DATE_SUB '2009-02-28' 2)) (TOK_SELEXPR (TOK_FUNCTION DATE_SUB
'2007-02-28' 365)) (TOK_SE
 LEXPR (TOK_FUNCTION DATE_SUB '2007-02-28 01:12:34' 730))) (TOK_WHERE (= (TOK_COLREF src key)
86))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src 
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+              Filter Operator
+                predicate:
+                    expr: (UDFToDouble(0) = UDFToDouble(86))
+                    type: boolean
+                Select Operator
+                  expressions:
+                        expr: datediff('2008-12-31', '2009-01-01')
+                        type: int
+                        expr: datediff('2008-03-01', '2008-02-28')
+                        type: int
+                        expr: datediff('2007-03-01', '2007-01-28')
+                        type: int
+                        expr: datediff('2008-03-01 23:59:59', '2008-03-02 00:00:00')
+                        type: int
+                        expr: date_add('2008-12-31', 1)
+                        type: string
+                        expr: date_add('2008-12-31', 365)
+                        type: string
+                        expr: date_add('2008-02-28', 2)
+                        type: string
+                        expr: date_add('2009-02-28', 2)
+                        type: string
+                        expr: date_add('2007-02-28', 365)
+                        type: string
+                        expr: date_add('2007-02-28 23:59:59', 730)
+                        type: string
+                        expr: date_sub('2009-01-01', 1)
+                        type: string
+                        expr: date_sub('2009-01-01', 365)
+                        type: string
+                        expr: date_sub('2008-02-28', 2)
+                        type: string
+                        expr: date_sub('2009-02-28', 2)
+                        type: string
+                        expr: date_sub('2007-02-28', 365)
+                        type: string
+                        expr: date_sub('2007-02-28 01:12:34', 730)
+                        type: string
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+-1	2	32	-1	2009-01-01	2009-12-31	2008-03-01	2009-03-02	2008-02-28	2009-02-27	2008-12-31	2008-01-02
2008-02-28	2009-02-27	2006-02-28	2005-02-28



Mime
View raw message