Return-Path: Delivered-To: apmail-hadoop-pig-commits-archive@www.apache.org Received: (qmail 53991 invoked from network); 23 Jun 2010 03:38:20 -0000 Received: from unknown (HELO mail.apache.org) (140.211.11.3) by 140.211.11.9 with SMTP; 23 Jun 2010 03:38:20 -0000 Received: (qmail 16902 invoked by uid 500); 23 Jun 2010 03:38:20 -0000 Delivered-To: apmail-hadoop-pig-commits-archive@hadoop.apache.org Received: (qmail 16841 invoked by uid 500); 23 Jun 2010 03:38:19 -0000 Mailing-List: contact pig-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: pig-dev@hadoop.apache.org Delivered-To: mailing list pig-commits@hadoop.apache.org Received: (qmail 16834 invoked by uid 500); 23 Jun 2010 03:38:19 -0000 Delivered-To: apmail-incubator-pig-commits@incubator.apache.org Received: (qmail 16831 invoked by uid 99); 23 Jun 2010 03:38:19 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 23 Jun 2010 03:38:19 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 23 Jun 2010 03:38:12 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 8CB3123889B6; Wed, 23 Jun 2010 03:37:13 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r957100 [2/2] - in /hadoop/pig/trunk: ./ src/org/apache/pig/builtin/ test/org/apache/pig/test/ Date: Wed, 23 Jun 2010 03:37:13 -0000 To: pig-commits@incubator.apache.org From: daijy@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20100623033721.8CB3123889B6@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Added: hadoop/pig/trunk/src/org/apache/pig/builtin/REGEX_EXTRACT_ALL.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/REGEX_EXTRACT_ALL.java?rev=957100&view=auto ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/builtin/REGEX_EXTRACT_ALL.java (added) +++ hadoop/pig/trunk/src/org/apache/pig/builtin/REGEX_EXTRACT_ALL.java Wed Jun 23 03:37:11 2010 @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pig.builtin; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.pig.EvalFunc; +import org.apache.pig.FuncSpec; +import org.apache.pig.data.DataType; +import org.apache.pig.data.Tuple; +import org.apache.pig.data.TupleFactory; +import org.apache.pig.impl.logicalLayer.FrontendException; +import org.apache.pig.impl.logicalLayer.schema.Schema; + +/** + *

Syntax:: String RegexExtractAll(String expression, String regex).
Input:: expression-source string.; regex-regular expression.
Output:: A tuple of matched strings.

+ */ + +public class REGEX_EXTRACT_ALL extends EvalFunc { + + private static TupleFactory tupleFactory = TupleFactory.getInstance(); + + @Override + public Tuple exec(Tuple input) throws IOException { + if (input.size()!=2) { + String msg = "RegexExtractAll : Only 2 parameters are allowed."; + throw new IOException(msg); + } + + if (input.get(0)==null) + return null; + try { + if (!input.get(1).equals(mExpression)) { + try { + mExpression = (String)input.get(1); + mPattern = Pattern.compile(mExpression); + } catch (Exception e) { + String msg = "RegexExtractAll : Mal-Formed Regular expression : "+input.get(1); + throw new IOException(msg); + } + } + } catch (NullPointerException e) { + String msg = "RegexExtractAll : Regular expression is null"; + throw new IOException(msg); + } + + Matcher m = mPattern.matcher((String)input.get(0)); + if (!m.matches()) { + return null; + } + Tuple result = tupleFactory.newTuple(m.groupCount()); + for (int i = 0; i< m.groupCount(); i++) { + result.set(i, m.group(i+1)); + } + return result; + } + + String mExpression = null; + Pattern mPattern = null; + @Override + public Schema outputSchema(Schema input) { + try { + return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), + DataType.TUPLE)); + } catch (Exception e) { + return null; + } + } + + @Override + public List getArgToFuncMapping() throws FrontendException { + List funcList = new ArrayList(); + Schema s = new Schema(); + s.add(new Schema.FieldSchema(null, DataType.CHARARRAY)); + s.add(new Schema.FieldSchema(null, DataType.CHARARRAY)); + funcList.add(new FuncSpec(this.getClass().getName(), s)); + return funcList; + } +} + Added: hadoop/pig/trunk/src/org/apache/pig/builtin/REPLACE.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/REPLACE.java?rev=957100&view=auto ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/builtin/REPLACE.java (added) +++ hadoop/pig/trunk/src/org/apache/pig/builtin/REPLACE.java Wed Jun 23 03:37:11 2010 @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pig.builtin; + +import java.io.IOException; + +import org.apache.pig.EvalFunc; +import org.apache.pig.data.Tuple; +import org.apache.pig.data.DataType; +import org.apache.pig.impl.logicalLayer.schema.Schema; + + +/** + * REPLACE implements eval function to replace part of a string. + * Example:


+ *      A = load 'mydata' as (name);
+ *      B = foreach A generate REPLACE(name, 'blabla', 'bla');
+ * The first argument is a string on which to perform the operation. The second argument
+ * is treated as a regular expression. The third argument is the replacement string.
+ * This is a wrapper around Java's String.replaceAll(String, String);
+ * 
+ */
+public class REPLACE extends EvalFunc
+{
+    /**
+     * Method invoked on every tuple during foreach evaluation
+     * @param input tuple; first column is assumed to have the column to convert
+     * @exception java.io.IOException
+     */
+    public String exec(Tuple input) throws IOException {
+        if (input == null || input.size() < 3)
+            return null;
+
+        try{
+            String source = (String)input.get(0);
+            String target = (String)input.get(1);
+            String replacewith = (String)input.get(2);
+            return source.replaceAll(target, replacewith);
+        }catch(Exception e){
+            log.warn("Failed to process input; error - " + e.getMessage());
+            return null;
+        }
+    }
+
+    @Override
+    public Schema outputSchema(Schema input) {
+        return new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY));
+    }
+
+}
\ No newline at end of file

Added: hadoop/pig/trunk/src/org/apache/pig/builtin/ROUND.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/ROUND.java?rev=957100&view=auto
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/builtin/ROUND.java (added)
+++ hadoop/pig/trunk/src/org/apache/pig/builtin/ROUND.java Wed Jun 23 03:37:11 2010
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.builtin;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.FuncSpec;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.data.DataType;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+
+/**
+ * ROUND implements a binding to the Java function
+ * {@link java.lang.Math#round(double) Math.round(double)}. 
+ * Given a single data atom it Returns the closest long to the argument.
+ * 
+ */
+public class ROUND extends EvalFunc{
+	/**
+	 * java level API
+	 * @param input expects a single numeric value
+	 * @return output returns a single numeric value, 
+	 * the closest long to the argument
+	 */
+	@Override
+	public Long exec(Tuple input) throws IOException {
+        if (input == null || input.size() == 0)
+            return null;
+
+        try{
+            Double d =  DataType.toDouble(input.get(0));
+		    return Math.round(d);
+        } catch (NumberFormatException nfe){
+            System.err.println("Failed to process input; error - " + nfe.getMessage());
+            return null;
+        } catch (Exception e){
+            throw new IOException("Caught exception processing input row ", e);
+        }
+	}
+	
+	@Override
+	public Schema outputSchema(Schema input) {
+        return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), DataType.LONG));
+	}
+
+    /* (non-Javadoc)
+     * @see org.apache.pig.EvalFunc#getArgToFuncMapping()
+     */
+    @Override
+    public List getArgToFuncMapping() throws FrontendException {
+        List funcList = new ArrayList();
+        funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.BYTEARRAY))));
+        funcList.add(new FuncSpec(DoubleRound.class.getName(),  new Schema(new Schema.FieldSchema(null, DataType.DOUBLE))));
+        funcList.add(new FuncSpec(FloatRound.class.getName(),   new Schema(new Schema.FieldSchema(null, DataType.FLOAT))));
+        return funcList;
+    }
+}

Added: hadoop/pig/trunk/src/org/apache/pig/builtin/SIN.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/SIN.java?rev=957100&view=auto
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/builtin/SIN.java (added)
+++ hadoop/pig/trunk/src/org/apache/pig/builtin/SIN.java Wed Jun 23 03:37:11 2010
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.builtin;
+
+/**
+ * SIN implements a binding to the Java function
+ * {@link java.lang.Math#sin(double) Math.sin(double)}. 
+ * Given a single data atom it Returns the sine of the argument.
+ *
+ */
+public class SIN extends DoubleBase{
+	Double compute(Double input){
+		return Math.sin(input);
+		
+	}
+}

Added: hadoop/pig/trunk/src/org/apache/pig/builtin/SINH.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/SINH.java?rev=957100&view=auto
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/builtin/SINH.java (added)
+++ hadoop/pig/trunk/src/org/apache/pig/builtin/SINH.java Wed Jun 23 03:37:11 2010
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.builtin;
+
+/**
+ * SINH implements a binding to the Java function
+ * {@link java.lang.Math#sinh(double) Math.sinh(double)}. 
+ * Given a single data atom it Returns the hyperbolic sine of the argument.
+ *
+ */
+public class SINH extends DoubleBase{
+	Double compute(Double input){
+		return Math.sinh(input);
+	}
+}

Added: hadoop/pig/trunk/src/org/apache/pig/builtin/SPLIT.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/SPLIT.java?rev=957100&view=auto
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/builtin/SPLIT.java (added)
+++ hadoop/pig/trunk/src/org/apache/pig/builtin/SPLIT.java Wed Jun 23 03:37:11 2010
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.builtin;
+
+import java.io.IOException;
+
+import java.util.Arrays;
+import java.util.regex.PatternSyntaxException;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+
+/**
+ * Wrapper around Java's String.split

+ * input tuple: first column is assumed to have a string to split;

+ * the optional second column is assumed to have the delimiter or regex to split on;

+ * if not provided, it's assumed to be '\s' (space)

+ * the optional third column may provide a limit to the number of results.

+ * If limit is not provided, 0 is assumed, as per Java's split().
+ */
+
+public class SPLIT extends EvalFunc {
+
+    private final static TupleFactory tupleFactory = TupleFactory.getInstance();
+
+    /**
+     * Wrapper around Java's String.split
+     * @param input tuple; first column is assumed to have a string to split;
+     * the optional second column is assumed to have the delimiter or regex to split on;

+     * if not provided, it's assumed to be '\s' (space)
+     * the optional third column may provide a limit to the number of results.

+     * If limit is not provided, 0 is assumed, as per Java's split().
+     * @exception java.io.IOException
+     */
+    public Tuple exec(Tuple input) throws IOException {
+        if (input == null || input.size() < 1)
+            return null;
+        try {
+            String source = (String) input.get(0);
+            String delim = (input.size() > 1 ) ? (String) input.get(1) : "\\s";
+            int length = (input.size() > 2) ? (Integer) input.get(2) : 0;
+            if (source == null || delim == null) {
+                return null;
+            }
+            String[] splits = source.split(delim, length); 
+            return tupleFactory.newTuple(Arrays.asList(splits));
+        } catch (ClassCastException e) {
+            log.warn("class cast exception at "+e.getStackTrace()[0]);
+        } catch (PatternSyntaxException e) {
+            log.warn(e.getMessage());
+        }
+        // this only happens if the try block did not complete normally
+        return null;
+    }
+}

Added: hadoop/pig/trunk/src/org/apache/pig/builtin/SQRT.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/SQRT.java?rev=957100&view=auto
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/builtin/SQRT.java (added)
+++ hadoop/pig/trunk/src/org/apache/pig/builtin/SQRT.java Wed Jun 23 03:37:11 2010
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.builtin;
+/**
+ * SQRT implements a binding to the Java function
+ * {@link java.lang.Math#sqrt(double) Math.sqrt(double)}. 
+ * Given a single data atom it Returns the square root of the argument.
+ *
+ */
+public class SQRT extends DoubleBase{
+	Double compute(Double input){
+		return Math.sqrt(input);
+	}
+}

Added: hadoop/pig/trunk/src/org/apache/pig/builtin/SUBSTRING.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/SUBSTRING.java?rev=957100&view=auto
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/builtin/SUBSTRING.java (added)
+++ hadoop/pig/trunk/src/org/apache/pig/builtin/SUBSTRING.java Wed Jun 23 03:37:11 2010
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.builtin;
+
+import java.io.IOException;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+
+/**
+ * SUBSTRING implements eval function to get a part of a string.
+ * Example:
+ *      A = load 'mydata' as (name);
+ *      B = foreach A generate SUBSTRING(name, 10, 12);
+ *      
+ * First argument is the string to take a substring of.

+ * Second argument is the index of the first character of substring.

+ * Third argument is the index of the last character of substring.

+ * if the last argument is past the end of the string, substring of (beginIndex, length(str)) is returned.
+ */
+public class SUBSTRING extends EvalFunc {
+
+    /**
+     * Method invoked on every tuple during foreach evaluation
+     * @param input tuple; first column is assumed to have the column to convert
+     * @exception java.io.IOException
+     */
+    public String exec(Tuple input) throws IOException {
+        if (input == null || input.size() < 3) {
+            log.warn("invalid number of arguments to SUBSTRING");
+            return null;
+        }
+        try {
+            String source = (String)input.get(0);
+            Integer beginindex = (Integer)input.get(1);
+            Integer endindex = (Integer)input.get(2);
+            return source.substring(beginindex, Math.min(source.length(), endindex));
+        } catch (NullPointerException npe) {
+            log.warn(npe.toString());
+            return null;
+        } catch (ClassCastException e) {
+            log.warn(e.toString());
+            return null;
+        }
+    }
+
+    @Override
+    public Schema outputSchema(Schema input) {
+        return new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY));
+    }
+
+}
\ No newline at end of file

Added: hadoop/pig/trunk/src/org/apache/pig/builtin/TAN.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/TAN.java?rev=957100&view=auto
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/builtin/TAN.java (added)
+++ hadoop/pig/trunk/src/org/apache/pig/builtin/TAN.java Wed Jun 23 03:37:11 2010
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.builtin;
+
+/**
+ * TAN implements a binding to the Java function
+ * {@link java.lang.Math#tan(double) Math.tan(double)}. 
+ * Given a single data atom it Returns the tangent of the argument.
+ *
+ */
+public class TAN extends DoubleBase{
+	Double compute(Double input){
+		return Math.tan(input);
+	}
+}

Added: hadoop/pig/trunk/src/org/apache/pig/builtin/TANH.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/TANH.java?rev=957100&view=auto
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/builtin/TANH.java (added)
+++ hadoop/pig/trunk/src/org/apache/pig/builtin/TANH.java Wed Jun 23 03:37:11 2010
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.builtin;
+
+/**
+ * TANH implements a binding to the Java function
+ * {@link java.lang.Math#tanh(double) Math.tanh(double)}. 
+ * Given a single data atom it Returns the hyperbolic tangent 
+ * of the argument.
+ *
+ */
+public class TANH extends DoubleBase{
+	Double compute(Double input){
+		return Math.tanh(input);
+	}
+}

Added: hadoop/pig/trunk/src/org/apache/pig/builtin/TOBAG.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/TOBAG.java?rev=957100&view=auto
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/builtin/TOBAG.java (added)
+++ hadoop/pig/trunk/src/org/apache/pig/builtin/TOBAG.java Wed Jun 23 03:37:11 2010
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.builtin;
+
+
+import java.io.IOException;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.BagFactory;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+
+/**
+ * This class takes a list of items and puts them into a bag
+ * T = foreach U generate TOBAG($0, $1, $2);
+ * It's like saying this:
+ * T = foreach U generate {($0), ($1), ($2)}
+ */
+public class TOBAG extends EvalFunc {
+
+    @Override
+    public DataBag exec(Tuple input) throws IOException {
+        try {
+            DataBag bag = BagFactory.getInstance().newDefaultBag();
+
+            for (int i = 0; i < input.size(); ++i) {
+                final Object object = input.get(i);
+                if (object != null) {
+                    Tuple tp2 = TupleFactory.getInstance().newTuple(1);
+                    tp2.set(0, object);
+                    bag.add(tp2);
+                }
+            }
+
+            return bag;
+        } catch (Exception ee) {
+            throw new RuntimeException("Error while creating a bag", ee);
+        }
+    }
+}

Added: hadoop/pig/trunk/src/org/apache/pig/builtin/TOP.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/TOP.java?rev=957100&view=auto
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/builtin/TOP.java (added)
+++ hadoop/pig/trunk/src/org/apache/pig/builtin/TOP.java Wed Jun 23 03:37:11 2010
@@ -0,0 +1,349 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.builtin;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.PriorityQueue;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.pig.Algebraic;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.FuncSpec;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.BagFactory;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
+
+/**
+ * Top UDF accepts a bag of tuples and returns top-n tuples depending upon the
+ * tuple field value of type long. Both n and field number needs to be provided
+ * to the UDF. The UDF iterates through the input bag and just retains top-n
+ * tuples by storing them in a priority queue of size n+1 where priority is the
+ * long field. This is efficient as priority queue provides constant time - O(1)
+ * removal of the least element and O(log n) time for heap restructuring. The
+ * UDF is especially helpful for turning the nested grouping operation inside
+ * out and retaining top-n in a nested group. 
+ * 
+ * Assumes all tuples in the bag contain an element of the same type in the compared column.
+ * 
+ * Sample usage: 
+ * A = LOAD 'test.tsv' as (first: chararray, second: chararray); 
+ * B = GROUP A BY (first, second);
+ * C = FOREACH B generate FLATTEN(group), COUNT(*) as count;
+ * D = GROUP C BY first; // again group by first 
+ * topResults = FOREACH D { 
+ *          result = Top(10, 2, C); // and retain top 10 occurrences of 'second' in first 
+ *          GENERATE FLATTEN(result); 
+ *  }
+ */
+public class TOP extends EvalFunc implements Algebraic{
+    private static final Log log = LogFactory.getLog(TOP.class);
+    static BagFactory mBagFactory = BagFactory.getInstance();
+    static TupleFactory mTupleFactory = TupleFactory.getInstance();
+    private Random randomizer = new Random();
+
+    static class TupleComparator implements Comparator {
+        private final int fieldNum;
+        private byte datatype;
+        private boolean typeFound=false;
+
+        public TupleComparator(int fieldNum) {
+            this.fieldNum = fieldNum;
+        }
+
+        /*          
+         * (non-Javadoc)
+         * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
+         */
+        @Override
+        public int compare(Tuple o1, Tuple o2) {
+            if (o1 == null)
+                return -1;
+            if (o2 == null)
+                return 1;
+            try {
+                Object field1 = o1.get(fieldNum);
+                Object field2 = o2.get(fieldNum);
+                if (!typeFound) {
+                    datatype = DataType.findType(field1);
+                    typeFound = true;
+                }
+                return DataType.compare(field1, field2, datatype, datatype);
+            } catch (ExecException e) {
+                throw new RuntimeException("Error while comparing o1:" + o1
+                        + " and o2:" + o2, e);
+            }
+        }
+    }
+
+    @Override
+    public DataBag exec(Tuple tuple) throws IOException {
+        if (tuple == null || tuple.size() < 3) {
+            return null;
+        }
+        try {
+            int n = (Integer) tuple.get(0);
+            int fieldNum = (Integer) tuple.get(1);
+            DataBag inputBag = (DataBag) tuple.get(2);
+            PriorityQueue store = new PriorityQueue(n + 1,
+                    new TupleComparator(fieldNum));
+            updateTop(store, n, inputBag);
+            DataBag outputBag = mBagFactory.newDefaultBag();
+            for (Tuple t : store) {
+                outputBag.add(t);
+            }
+            if (log.isDebugEnabled()) {
+                if (randomizer.nextInt(1000) == 1) {
+                    log.debug("outputting a bag: ");
+                    for (Tuple t : outputBag) 
+                        log.debug("outputting "+t.toDelimitedString("\t"));
+                    log.debug("==================");
+                }
+            }
+            return outputBag;
+        } catch (ExecException e) {
+            throw new RuntimeException("ExecException executing function: ", e);
+        } catch (Exception e) {
+            throw new RuntimeException("General Exception executing function: " + e);
+        }
+    }
+
+    protected static void updateTop(PriorityQueue store, int limit, DataBag inputBag) {
+        Iterator itr = inputBag.iterator();
+        while (itr.hasNext()) {
+            Tuple t = itr.next();
+            store.add(t);
+            if (store.size() > limit)
+                store.poll();
+        }
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see org.apache.pig.EvalFunc#getArgToFuncMapping()
+     */
+    @Override
+    public List getArgToFuncMapping() throws FrontendException {
+        List fields = new ArrayList(3);
+        fields.add(new Schema.FieldSchema(null, DataType.INTEGER));
+        fields.add(new Schema.FieldSchema(null, DataType.INTEGER));
+        fields.add(new Schema.FieldSchema(null, DataType.BAG));
+        FuncSpec funcSpec = new FuncSpec(this.getClass().getName(), new Schema(fields));
+        List funcSpecs = new ArrayList(1);
+        funcSpecs.add(funcSpec);
+        return funcSpecs;
+    }
+
+    @Override
+    public Schema outputSchema(Schema input) {
+        try {
+            if (input.size() < 3) {
+                return null;
+            }
+            Schema.FieldSchema bagFs = new Schema.FieldSchema(null,
+                    input.getField(2).schema, DataType.BAG);
+            return new Schema(bagFs);
+
+        } catch (Exception e) {
+            return null;
+        }
+    }
+
+    @Override
+    public String getInitial() {
+        return Initial.class.getName();
+    }
+
+    @Override
+    public String getIntermed() {
+        return Intermed.class.getName();
+    }
+
+    @Override
+    public String getFinal() {
+        return Final.class.getName();
+    }
+
+    /*
+     * Same as normal code-path exec, but outputs a Tuple with the schema
+     *  -- same schema as expected input.
+     */
+    static public class Initial extends EvalFunc {
+        //private static final Log log = LogFactory.getLog(Initial.class);
+        //private final Random randomizer = new Random();
+        @Override
+        public Tuple exec(Tuple tuple) throws IOException {
+            if (tuple == null || tuple.size() < 3) {
+                return null;
+            }
+            
+            try {
+                int n = (Integer) tuple.get(0);
+                int fieldNum = (Integer) tuple.get(1);
+                DataBag inputBag = (DataBag) tuple.get(2);
+                Tuple retTuple = mTupleFactory.newTuple(3);
+                DataBag outputBag = mBagFactory.newDefaultBag();
+                // initially, there should only be one, so not much point in doing the priority queue
+                for (Tuple t : inputBag) {
+                    outputBag.add(t);
+                }
+                retTuple.set(0, n);
+                retTuple.set(1,fieldNum);
+                retTuple.set(2, outputBag);               
+                return retTuple;
+            } catch (Exception e) {
+                throw new RuntimeException("General Exception executing function: " + e);
+            }
+        }
+    }
+
+    static public class Intermed extends EvalFunc {
+        private static final Log log = LogFactory.getLog(Intermed.class);
+        private final Random randomizer = new Random();
+        /* The input is a tuple that contains a single bag.
+         * This bag contains outputs of the Initial step --
+         * tuples of the format (limit, index, { top_tuples })
+         * 
+         * We need to take the top of tops and return a similar tuple.
+         * 
+         * (non-Javadoc)
+         * @see org.apache.pig.EvalFunc#exec(org.apache.pig.data.Tuple)
+         */
+        @Override
+        public Tuple exec(Tuple input) throws IOException {
+            if (input == null || input.size() < 1) {
+                return null;
+            }
+            try {
+                DataBag bagOfIntermediates = (DataBag) input.get(0);
+                Iterator intermediateIterator = bagOfIntermediates.iterator();
+                if (!intermediateIterator.hasNext()) {
+                    return null;
+                }
+                Tuple peekTuple = intermediateIterator.next();
+                if (peekTuple == null || peekTuple.size() < 3 ) return null;
+                int n = (Integer) peekTuple.get(0);
+                int fieldNum = (Integer) peekTuple.get(1);
+                DataBag inputBag = (DataBag) peekTuple.get(2);
+
+                PriorityQueue store = new PriorityQueue(n + 1,
+                        new TupleComparator(fieldNum));
+
+                updateTop(store, n, inputBag);
+
+                while (intermediateIterator.hasNext()) {
+                    Tuple t = intermediateIterator.next();
+                    if (t == null || t.size() < 3 ) continue;
+                    updateTop(store, n, (DataBag) t.get(2));
+                }   
+
+                DataBag outputBag = mBagFactory.newDefaultBag();
+                for (Tuple t : store) {
+                    outputBag.add(t);
+                }
+                Tuple retTuple = mTupleFactory.newTuple(3);
+                retTuple.set(0, n);
+                retTuple.set(1,fieldNum);
+                retTuple.set(2, outputBag);
+                if (log.isDebugEnabled()) { 
+                    if (randomizer.nextInt(1000) == 1) log.debug("outputting "+retTuple.toDelimitedString("\t")); 
+                }
+                return retTuple;
+            } catch (ExecException e) {
+                throw new RuntimeException("ExecException executing function: ", e);
+            } catch (Exception e) {
+                throw new RuntimeException("General Exception executing function: " + e);
+            }
+        }
+        
+    }
+    
+    static public class Final extends EvalFunc {
+
+        private static final Log log = LogFactory.getLog(Final.class);
+        private final Random randomizer = new Random();
+
+
+
+        /*
+         * The input to this function is a tuple that contains a single bag.
+         * This bag, in turn, contains outputs of the Intermediate step -- 
+         * tuples of the format (limit, index, { top_tuples } )
+         * 
+         * we want to return a bag of top tuples
+         * 
+         * (non-Javadoc)
+         * @see org.apache.pig.EvalFunc#exec(org.apache.pig.data.Tuple)
+         */
+        @Override
+        public DataBag exec(Tuple tuple) throws IOException {
+            if (tuple == null || tuple.size() < 1) {
+                return null;
+            }
+            try {
+                DataBag bagOfIntermediates = (DataBag) tuple.get(0);
+                Iterator intermediateIterator = bagOfIntermediates.iterator();
+                if (!intermediateIterator.hasNext()) {
+                    return null;
+                }
+                Tuple peekTuple = intermediateIterator.next();
+                if (peekTuple == null || peekTuple.size() < 3 ) return null;
+                int n = (Integer) peekTuple.get(0);
+                int fieldNum = (Integer) peekTuple.get(1);
+                DataBag inputBag = (DataBag) peekTuple.get(2);
+
+                PriorityQueue store = new PriorityQueue(n + 1,
+                        new TupleComparator(fieldNum));
+
+                updateTop(store, n, inputBag);
+
+                while (intermediateIterator.hasNext()) {
+                    Tuple t = intermediateIterator.next();
+                    if (t == null || t.size() < 3 ) continue;
+                    updateTop(store, n, (DataBag) t.get(2));
+                }   
+
+                DataBag outputBag = mBagFactory.newDefaultBag();
+                for (Tuple t : store) {
+                    outputBag.add(t);
+                }
+                if (log.isDebugEnabled()) {
+                    if (randomizer.nextInt(1000) == 1) for (Tuple t : outputBag) log.debug("outputting "+t.toDelimitedString("\t"));
+                }
+                return outputBag;
+            } catch (ExecException e) {
+                throw new RuntimeException("ExecException executing function: ", e);
+            } catch (Exception e) {
+                throw new RuntimeException("General Exception executing function: " + e);
+            }
+        }
+    }
+}
+

Added: hadoop/pig/trunk/src/org/apache/pig/builtin/TOTUPLE.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/TOTUPLE.java?rev=957100&view=auto
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/builtin/TOTUPLE.java (added)
+++ hadoop/pig/trunk/src/org/apache/pig/builtin/TOTUPLE.java Wed Jun 23 03:37:11 2010
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.builtin;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+
+/**
+ * This class makes a tuple out of the parameter
+ * T = foreach U generate TOTUPLE($0, $1, $2);
+ * It generates a tuple containing $0, $1, and $2
+ */
+public class TOTUPLE extends EvalFunc {
+
+    @Override
+    public Tuple exec(Tuple input) throws IOException {
+        try {
+            List