Return-Path: X-Original-To: apmail-pig-commits-archive@www.apache.org Delivered-To: apmail-pig-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id DE5FA9565 for ; Mon, 14 Nov 2011 07:28:01 +0000 (UTC) Received: (qmail 64749 invoked by uid 500); 14 Nov 2011 07:28:01 -0000 Delivered-To: apmail-pig-commits-archive@pig.apache.org Received: (qmail 64682 invoked by uid 500); 14 Nov 2011 07:28:00 -0000 Mailing-List: contact commits-help@pig.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@pig.apache.org Delivered-To: mailing list commits@pig.apache.org Received: (qmail 64671 invoked by uid 99); 14 Nov 2011 07:28:00 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 14 Nov 2011 07:28:00 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 14 Nov 2011 07:27:56 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 06B4F2388993 for ; Mon, 14 Nov 2011 07:27:35 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1201618 - in /pig/trunk: CHANGES.txt src/org/apache/pig/EvalFunc.java src/org/apache/pig/builtin/OutputSchema.java test/org/apache/pig/test/TestEvalFuncOutputAnnotation.java Date: Mon, 14 Nov 2011 07:27:34 -0000 To: commits@pig.apache.org From: dvryaboy@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20111114072735.06B4F2388993@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: dvryaboy Date: Mon Nov 14 07:27:34 2011 New Revision: 1201618 URL: http://svn.apache.org/viewvc?rev=1201618&view=rev Log: PIG-2151: Add annotation to specify output schema in Java UDFs (dvryaboy) Added: pig/trunk/src/org/apache/pig/builtin/OutputSchema.java pig/trunk/test/org/apache/pig/test/TestEvalFuncOutputAnnotation.java Modified: pig/trunk/CHANGES.txt pig/trunk/src/org/apache/pig/EvalFunc.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1201618&r1=1201617&r2=1201618&view=diff ============================================================================== --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Mon Nov 14 07:27:34 2011 @@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES IMPROVEMENTS +PIG-2151: Add annotation to specify output schema in Java UDFs (dvryaboy) + PIG-2230: Improved error message for invalid parameter format (xuitingz via olgan) PIG-2328: Add builtin UDFs for building and using bloom filters (gates) Modified: pig/trunk/src/org/apache/pig/EvalFunc.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/EvalFunc.java?rev=1201618&r1=1201617&r2=1201618&view=diff ============================================================================== --- pig/trunk/src/org/apache/pig/EvalFunc.java (original) +++ pig/trunk/src/org/apache/pig/EvalFunc.java Mon Nov 14 07:27:34 2011 @@ -22,12 +22,13 @@ import java.io.IOException; import java.lang.reflect.ParameterizedType; import java.lang.reflect.Type; import java.util.List; -import java.util.Properties; import java.util.Stack; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; - +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PigLogger; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PigProgressable; +import org.apache.pig.builtin.OutputSchema; import org.apache.pig.classification.InterfaceAudience; import org.apache.pig.classification.InterfaceStability; import org.apache.pig.data.Tuple; @@ -35,9 +36,8 @@ import org.apache.pig.impl.PigContext; import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.pig.impl.util.UDFContext; -import org.apache.pig.LoadPushDown.RequiredFieldList; -import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PigLogger; -import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PigProgressable; +import org.apache.pig.impl.util.Utils; +import org.apache.pig.parser.ParserException; /** @@ -215,11 +215,20 @@ public abstract class EvalFunc { * Report the schema of the output of this UDF. Pig will make use of * this in error checking, optimization, and planning. The schema * of input data to this UDF is provided. + *

+ * The default implementation interprets the {@link OutputSchema} annotation, + * if one is present. Otherwise, it returns null (no known output schema). + * * @param input Schema of the input * @return Schema of the output */ public Schema outputSchema(Schema input) { - return null; + OutputSchema schema = this.getClass().getAnnotation(OutputSchema.class); + try { + return (schema == null) ? null : Utils.getSchemaFromString(schema.value()); + } catch (ParserException e) { + throw new RuntimeException(e); + } } /** Added: pig/trunk/src/org/apache/pig/builtin/OutputSchema.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/OutputSchema.java?rev=1201618&view=auto ============================================================================== --- pig/trunk/src/org/apache/pig/builtin/OutputSchema.java (added) +++ pig/trunk/src/org/apache/pig/builtin/OutputSchema.java Mon Nov 14 07:27:34 2011 @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pig.builtin; + +import java.lang.annotation.Documented; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; + +import org.apache.pig.EvalFunc; +import org.apache.pig.classification.InterfaceAudience; +import org.apache.pig.classification.InterfaceStability; + +/** + * An EvalFunc can annotated with an OutputSchema to + * tell Pig what the expected output is. This can be used in place + * of {@link EvalFunc#outputSchema(Schema)} + *

+ * The default implementation of {@link EvalFunc#outputSchema(Schema)} + * will look at this annotation and return an interpreted schema, if the annotation is present. + *

+ * Implementing a custom {@link EvalFunc#outputSchema(Schema)} will + * override the annotation (unless you deal with it explicitly, or by calling super.outputSchema(schema)). + *

+ * Here's an example of a complex schema declared in an annotation: + * @OutputSchema("y:bag{t:tuple(len:int,word:chararray)}") + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +@Documented +@Retention(value=RetentionPolicy.RUNTIME) +public @interface OutputSchema { + String value(); +} Added: pig/trunk/test/org/apache/pig/test/TestEvalFuncOutputAnnotation.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestEvalFuncOutputAnnotation.java?rev=1201618&view=auto ============================================================================== --- pig/trunk/test/org/apache/pig/test/TestEvalFuncOutputAnnotation.java (added) +++ pig/trunk/test/org/apache/pig/test/TestEvalFuncOutputAnnotation.java Mon Nov 14 07:27:34 2011 @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pig.test; + +import static org.junit.Assert.*; +import java.io.IOException; + +import org.apache.pig.EvalFunc; +import org.apache.pig.builtin.OutputSchema; +import org.apache.pig.data.DataBag; +import org.apache.pig.data.DataType; +import org.apache.pig.data.Tuple; +import org.apache.pig.impl.logicalLayer.FrontendException; +import org.apache.pig.impl.logicalLayer.schema.Schema; +import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; +import org.junit.Test; + +import com.google.common.collect.Lists; + +public class TestEvalFuncOutputAnnotation { + + @OutputSchema("foo:chararray") + public static class AnnotatedFunc extends EvalFunc { + @Override + public String exec(Tuple input) throws IOException { + return null; + } + } + + @OutputSchema("foo:chararray") + public static class OverriddenFunc extends EvalFunc { + @Override + public String exec(Tuple input) throws IOException { + return null; + } + @Override + public Schema outputSchema(Schema input) { + return new Schema(new FieldSchema("bar", DataType.CHARARRAY)); + } + } + + // This would give the same result: "y:bag{tuple(len:int,word:chararray)}" + @OutputSchema("y:bag{t:tuple(len:int,word:chararray)}") + public static class ComplexFunc extends EvalFunc { + @Override + public DataBag exec(Tuple input) throws IOException { + return null; + } + } + + public static class UnannotatedFunc extends EvalFunc { + @Override + public DataBag exec(Tuple input) throws IOException { + return null; + } + } + + @Test + public void testSimpleAnnotation() { + EvalFunc myFunc =new AnnotatedFunc(); + Schema s = new Schema(new FieldSchema("foo", DataType.CHARARRAY)); + assertEquals(s, myFunc.outputSchema(null)); + } + + @Test + public void testOverriddenAnnotation() { + EvalFunc myFunc =new OverriddenFunc(); + Schema s = new Schema(new FieldSchema("bar", DataType.CHARARRAY)); + assertEquals(s, myFunc.outputSchema(null)); + } + + @Test + public void testUnannotated() { + EvalFunc myFunc = new UnannotatedFunc(); + assertNull(myFunc.outputSchema(null)); + } + + @Test + public void testComplex() throws FrontendException { + EvalFunc myFunc = new ComplexFunc(); + // y:bag{t:tuple(len:int,word:chararray)} + Schema ts = new Schema(Lists.asList(new FieldSchema("len", DataType.INTEGER), + new FieldSchema[] {new FieldSchema("word", DataType.CHARARRAY)})); + // Pig silently drops the name of a tuple the bag hold, since it's more or less invisible. + FieldSchema bfs = new FieldSchema(null, ts, DataType.TUPLE); + Schema bs = new Schema(); + bs.add(bfs); + Schema s = new Schema(); + s.add(new FieldSchema("y", bs, DataType.BAG)); + assertEquals(s, myFunc.outputSchema(null)); + } + + +}