pig-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cheol...@apache.org
Subject svn commit: r1601186 - in /pig/trunk: CHANGES.txt src/docs/src/documentation/content/xdocs/func.xml src/docs/src/documentation/content/xdocs/pig-index.xml src/org/apache/pig/builtin/SPRINTF.java test/org/apache/pig/test/TestBuiltin.java
Date Sat, 07 Jun 2014 23:44:26 GMT
Author: cheolsoo
Date: Sat Jun  7 23:44:25 2014
New Revision: 1601186

URL: http://svn.apache.org/r1601186
Log:
PIG-3939: SPRINTF function to format strings using a printf-style template (mrflip via cheolsoo)

Added:
    pig/trunk/src/org/apache/pig/builtin/SPRINTF.java
Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/docs/src/documentation/content/xdocs/func.xml
    pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml
    pig/trunk/test/org/apache/pig/test/TestBuiltin.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1601186&r1=1601185&r2=1601186&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Sat Jun  7 23:44:25 2014
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
  
 IMPROVEMENTS
 
+PIG-3939: SPRINTF function to format strings using a printf-style template (mrflip via cheolsoo)
+
 PIG-3970: Merge Tez branch into trunk (daijy)
  
 OPTIMIZATIONS

Modified: pig/trunk/src/docs/src/documentation/content/xdocs/func.xml
URL: http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/func.xml?rev=1601186&r1=1601185&r2=1601186&view=diff
==============================================================================
--- pig/trunk/src/docs/src/documentation/content/xdocs/func.xml (original)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/func.xml Sat Jun  7 23:44:25 2014
@@ -924,37 +924,36 @@ DUMP X;
            <td>
              <p>DEFINE pluck PluckTuple(expression1)</p>
              <p>pluck(expression2)</p>
-            </td>
-          </tr>
-        </table>
+           </td>
+         </tr>
+       </table>
      </section>
-   </section>
-   
-   <section>
-     <title>Terms</title>
-     <table>
-       <tr>
-         <td>
-           <p>expression1</p>
-         </td>
-         <td>
-           <p>A prefix to pluck by</p>
-         </td>
-       </tr>
-       <tr>
-         <td>
-           <p>expression2</p>
-         </td>
-         <td>
-           <p>The fields to apply the pluck to, usually '*'</p>
-         </td>
-       </tr>
-     </table>
-   </section>
-   
-   <section>
-   <title>Usage</title>
-   <p>Example:</p>
+     
+     <section>
+       <title>Terms</title>
+       <table>
+	 <tr>
+           <td>
+             <p>expression1</p>
+           </td>
+           <td>
+             <p>A prefix to pluck by</p>
+           </td>
+	 </tr>
+	 <tr>
+           <td>
+             <p>expression2</p>
+           </td>
+           <td>
+             <p>The fields to apply the pluck to, usually '*'</p>
+           </td>
+	 </tr>
+       </table>
+     </section>
+     
+     <section>
+       <title>Usage</title>
+       <p>Example:</p>
 <source>
 a = load 'a' as (x, y);
 b = load 'b' as (x, y);
@@ -966,6 +965,7 @@ c: {a::x: bytearray,a::y: bytearray,b::x
 describe d;
 d: {plucked::a::x: bytearray,plucked::a::y: bytearray}
 </source>
+     </section>
    </section>
    
      <!-- ++++++++++++++++++++++++++++++++++++++++++++++ -->
@@ -3341,7 +3341,7 @@ Use the RANDOM function to return a pseu
     </p>
     <table>
 
-      <tr><th><p>val</p></th> <th><p>digits</p></th>
<th><p>mode</p></th> <th><p>ROUND(val, digits)</p></th></tr>
+      <tr><th><p>val</p></th> <th><p>digits</p></th>
<th><p>mode</p></th> <th><p>ROUND_TO(val, digits)</p></th></tr>
 
       <tr><td><p>  1234.1789</p> </td> <td><p>
8</p></td> <td><p></p></td> <td><p> 1234.1789</p>
</td> </tr>
       <tr><td><p>  1234.1789</p> </td> <td><p>
4</p></td> <td><p></p></td> <td><p> 1234.1789</p>
</td> </tr>
@@ -4140,6 +4140,104 @@ Use the RTRIM function to remove trailin
 </section>
 </section> 
 
+
+<!-- ======================================================== -->
+<section id="sprintf">
+  <title>SPRINTF</title>
+  <p>Formats a set of values according to a printf-style template, using the <a
href="http://docs.oracle.com/javase/7/docs/api/java/util/Formatter.html">native Java Formatter</a>
library.</p>
+
+  <section>
+    <title>Syntax</title>
+    <table>
+      <tr>
+        <td>
+          <p>SPRINTF(format, [...vals])</p>
+        </td>
+      </tr>
+
+  </table></section>
+
+  <section>
+    <title>Terms</title>
+    <table>
+      <tr>
+        <td>
+          <p>format</p>
+        </td>
+        <td>
+          <p>The printf-style string describing the template.</p>
+        </td>
+      </tr>
+      <tr>
+        <td>
+          <p>vals</p>
+        </td>
+        <td>
+          <p>
+            The values to place in the template.  There must be a tuple element
+            for each formatting placeholder, and it must have the correct type:
+            <code>int</code> or <code>long</code> for integer formats
such as
+            <code>%d</code>; <code>float</code> or <code>double</code>
for
+            decimal formats such as <code>%f</code>; and <code>long</code>
for
+            date/time formats such as <code>%t</code>.
+          </p>
+        </td>
+      </tr>
+    </table>
+  </section>
+
+  <section>
+    <title>Usage</title>
+    <p>
+      Use the SPRINTF function to format a string according to a template. For example, SPRINTF("part-%05d",
69) will return 'part-00069'.
+    </p>
+    <table>
+
+      <tr><th><p>String&nbsp;format&nbsp;specification</p></th>
<th><p>arg1</p></th> <th><p>arg2</p></th>
<th><p>arg3</p></th> <th><p>SPRINTF(format, arg1, arg2)</p></th>
<th><p>notes</p></th></tr>
+
+      <tr><td><p><code>'%8s|%8d|%-8s'</code></p></td>
+      <td><p><code>1234567</code></p></td>      <td><p><code>1234567</code></p></td>
   <td><p><code>'yay'</code></p></td>
+      <td><p><code>' 1234567| 1234567|yay     '</code></p></td>
+      <td><p>Format strings with %s, integers with %d. Types are converted for
you where reasonable (here, int -&gt; string).</p></td></tr>
+
+      <tr><td><p><code>'%8.3f|%6x'</code></p></td>
+      <td><p><code>123.14159</code></p></td>    <td><p><code>665568</code></p></td>
    <td><p><code></code></p></td>
+      <td><p><code>' 123.142| a27e0'</code></p></td>
+      <td><p>Format floats/doubles with %f, hexadecimal integers with %x (there
are others besides -- see the <a href='http://docs.oracle.com/javase/7/docs/api/java/util/Formatter.html'>Java
docs</a>)</p></td></tr>
+
+      <tr><td><p><code>'%,+10d|%(06d'</code></p></td>
+      <td><p><code>1234567</code></p></td>      <td><p><code>-123</code></p></td>
      <td><p><code></code></p></td>
+      <td><p><code>'+1,234,567|(0123)'</code></p></td>
+      <td><p>Numerics take a prefix modifier: <code>,</code> for
locale-specific thousands-delimiting, 0 for zero-padding; <code>+</code> to always
show a plus sign for positive numbers; space <code> </code> to allow a space preceding
positive numbers; <code>(</code> to indicate negative numbers with parentheses
(accountant-style).</p></td></tr>
+
+      <tr><td><p><code>'%2$5d: %3$6s %1$3s %2$4x (%&lt;4X)'</code></p></td>
+      <td><p><code>'the'</code></p></td>   <td><p><code>48879</code></p></td>
          <td><p><code>'wheres'</code></p></td>
+      <td><p><code>'48879: wheres the beef (BEEF)'</code></p></td>
+      <td><p>Refer to args positionally and as many times as you like using <code>%(pos)$...</code>.
Use <code>%&lt;...</code> to refer to the previously-specified arg.</p></td></tr>
+
+
+      <tr><td><p><code>'Launch Time: %14d %s'</code></p></td>
+        <td><p><code>ToMilliSeconds(CurrentTime())</code></p></td>
  <td><p><code>ToString(CurrentTime(), 'yyyy-MM-dd HH:mm:ss Z')</code></p></td>
    <td><p><code></code></p></td>
+        <td><p><code>'Launch Time:  1400164132000 2014-05-15 09:28:52 -0500'</code></p></td>
+        <td><p>Instead use ToString to format the date/time portions and SPRINTF
to layout the results.</p></td></tr>
+      
+      <tr><td><p><code>'%8s|%-8s'</code></p></td>
                <td><p><code>1234567</code></p></td> <td><p><code></code></p></td>
<td><p><code></code></p></td>
+      <td><p><code>MissingFormatArgumentException: Format specifier '%-8s'
</code></p></td><td><p>You must supply arguments for all specifiers</p></td></tr>
+
+      <tr><td><p><code>'%8s'</code></p></td>  
                   <td><p><code>1234567</code></p></td>
<td><p><code>'ignored'</code></p></td> <td><p><code>'also'</code></p></td>
+      <td><p><code> 1234567</code></p></td>         
           <td><p>It's OK to supply too many, though</p></td></tr>
+
+    </table>
+    <p>
+      <em>Note: although the Java formatter (and thus this function) offers the
+      <code>%t</code> specifier for date/time elements, it's best avoided: it's
+      cumbersome, the output and timezone handling may differ from what you
+      expect, and it doesn't accept datetime objects from pig. Instead, just
+      prepare dates usint the ToString UDF as shown.</em>
+    </p>
+  </section>
+</section>
+
 <!-- ======================================================== -->  
 <section id="startswith">
    <title>STARTSWITH</title>

Modified: pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml
URL: http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml?rev=1601186&r1=1601185&r2=1601186&view=diff
==============================================================================
--- pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml (original)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml Sat Jun  7 23:44:25 2014
@@ -925,6 +925,8 @@
 
 <p><a href="perf.html#splits">splits</a> (implicit, explicit)</p>
 
+<p><a href="func.html#sprintf">SPRINTF</a> function</p>
+
 <p><a href="func.html#sqrt">SQRT</a> function</p>
 
 <p><a href="basic.html#sexp">star expression</a> ( * )</p>

Added: pig/trunk/src/org/apache/pig/builtin/SPRINTF.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/SPRINTF.java?rev=1601186&view=auto
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/SPRINTF.java (added)
+++ pig/trunk/src/org/apache/pig/builtin/SPRINTF.java Sat Jun  7 23:44:25 2014
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.builtin;
+
+import java.io.IOException;
+
+import java.util.Formatter;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.PigException;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+
+import org.apache.pig.backend.executionengine.ExecException;
+
+/**
+ * Formatted strings using java.util.Formatter
+ *
+ * See http://docs.oracle.com/javase/7/docs/api/java/util/Formatter.html
+ *
+ * ex:
+ *     SPRINTF('%2$10s %1$-17s %2$,10d %2$8x %3$10.3f %4$1TFT%<tT%<tz',
+ *         city, pop_2011, (float)(pop_2011/69.0f), (long)(pop_2011 * 1000000L));
+ *
+ *     -'   8244910 New York           8,244,910   7dceae 119491.453 2231-04-09T23:46:40-0500'
+ */
+public class SPRINTF extends EvalFunc<String> {
+
+    @Override
+    public String exec(Tuple input) throws IOException {
+        StringBuilder sb        = new StringBuilder();
+        Formatter     formatter = new Formatter(sb);
+        try{
+            if (input == null || input.size() == 0) return null;
+
+            String   fmt  = String.valueOf(input.get(0));
+            Object[] args = new Object[input.size()-1];
+            for (int i = 1; i < input.size(); i++) {
+                args[i-1] =  input.get(i);
+            }
+
+            formatter.format(fmt, args);
+            return sb.toString();
+        } catch (ExecException exp) {
+            throw exp;
+        } catch (Exception err) {
+            int errCode = 2106;
+            String msg = "Error while computing string format in " +
+                this.getClass().getSimpleName() + " -- " + err.toString();
+            throw new ExecException(msg, errCode, PigException.BUG, err);
+        } finally {
+            formatter.close();
+        }
+    }
+
+    @Override
+    public Schema outputSchema(Schema input) {
+        return new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY));
+    }
+
+    @Override
+    public SchemaType getSchemaType() {
+        return SchemaType.VARARG;
+    }
+}

Modified: pig/trunk/test/org/apache/pig/test/TestBuiltin.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestBuiltin.java?rev=1601186&r1=1601185&r2=1601186&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestBuiltin.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestBuiltin.java Sat Jun  7 23:44:25 2014
@@ -87,6 +87,7 @@ import org.apache.pig.builtin.ROUND;
 import org.apache.pig.builtin.ROUND_TO;
 import org.apache.pig.builtin.RTRIM;
 import org.apache.pig.builtin.SIZE;
+import org.apache.pig.builtin.SPRINTF;
 import org.apache.pig.builtin.STRSPLIT;
 import org.apache.pig.builtin.SUBSTRING;
 import org.apache.pig.builtin.SecondsBetween;
@@ -2428,7 +2429,59 @@ public class TestBuiltin {
         its = pigServer.openIterator("B");
         t = its.next();
         assertEquals("abcd",t.get(0));
-        
+
+        // Concat on a null value returns null
+        pigServer.registerQuery("B = foreach A generate CONCAT('a', CONCAT('b',Null), 'd');");
+        its = pigServer.openIterator("B");
+        t = its.next();
+        assertNull(t.get(0));
+    }
+
+    @Test
+    public void testSPRINTF() throws Exception {
+        // String Sprintf
+        String  fmt = "%2$10s <%1$-6s< %2$,10d >%1$7s> %2$8x %3$10.3f";
+        String  s1  = "meep";
+        Integer ii  = 665568;
+        Float   ff  = 993.14159265f;
+        String  exp = "    665568 <meep  <    665,568 >   meep>    a27e0    993.142";
+        Tuple   ts;
+        String  res;
+        EvalFunc<String> sprinter = new SPRINTF();
+        //
+        // Formats output, happily navigating strings, numbers, etc
+        ts = TupleFactory.getInstance().newTuple(5);
+        ts.set(0, fmt);
+        ts.set(1, s1);
+        ts.set(2, ii);
+        ts.set(3, ff);
+        ts.set(4, (long)(ii * 1000000L));
+        res = sprinter.exec(ts);
+        assertEquals(exp, res);
+        //
+        // Happy with float/double, int/long
+        ts.set(2, 665568l);
+        ts.set(3, 993.14159265d);
+        res = sprinter.exec(ts);
+        assertEquals(exp, res);
+        //
+        // Works with just one arg
+        ts = TupleFactory.getInstance().newTuple(1);
+        ts.set(0, "meep!");
+        res = sprinter.exec(ts);
+        assertEquals("meep!", res);
+
+        // Test in script
+        //
+        String input = "vararg_sprintf_test_jira_3939.txt";
+        Util.createLocalInputFile(input, new String[]{"dummy"});
+        PigServer pigServer = new PigServer(ExecType.LOCAL);
+        pigServer.registerQuery("A = LOAD '"+input+"' as (x:chararray);");
+        //
+        pigServer.registerQuery("B = foreach A generate SPRINTF('%6s|%-8s|%2$,+12d %2$8x',
'yay', 665568);");
+        Iterator<Tuple> its = pigServer.openIterator("B");
+        Tuple t = its.next();
+        assertEquals("   yay|665568  |    +665,568    a27e0", t.get(0));
     }
 
     @Test



Mime
View raw message