avro-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r1172742 - in /avro/trunk: CHANGES.txt lang/py/scripts/avro lang/py/test/test_script.py
Date Mon, 19 Sep 2011 18:48:55 GMT
Author: cutting
Date: Mon Sep 19 18:48:54 2011
New Revision: 1172742

URL: http://svn.apache.org/viewvc?rev=1172742&view=rev
Log:
AVRO-858. Python: Add --fields option to 'avro cat' command.  Contributed by Miki Tebeka.

Modified:
    avro/trunk/CHANGES.txt
    avro/trunk/lang/py/scripts/avro
    avro/trunk/lang/py/test/test_script.py

Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1172742&r1=1172741&r2=1172742&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Mon Sep 19 18:48:54 2011
@@ -54,6 +54,9 @@ Avro 1.6.0 (unreleased)
     top-level pom.xml as parent, permitting use of Maven versions
     plugin.  (cutting)
 
+    AVRO-858. Python: Add --fields option to 'avro cat' command.
+    (Miki Tebeka via cutting)
+
   BUG FIXES
 
     AVRO-824. Java: Fix usage message of BinaryFragmentToJsonTool.

Modified: avro/trunk/lang/py/scripts/avro
URL: http://svn.apache.org/viewvc/avro/trunk/lang/py/scripts/avro?rev=1172742&r1=1172741&r2=1172742&view=diff
==============================================================================
--- avro/trunk/lang/py/scripts/avro (original)
+++ avro/trunk/lang/py/scripts/avro Mon Sep 19 18:48:54 2011
@@ -62,6 +62,19 @@ def select_printer(format):
 def record_match(expr, record):
     return eval(expr, None, {"r" : record})
 
+def parse_fields(fields):
+    fields = fields or ''
+    if not fields.strip():
+        return None
+
+    return [field.strip() for field in fields.split(',') if field.strip()]
+
+def field_selector(fields):
+    fields = set(fields)
+    def keys_filter(obj):
+        return dict((k, obj[k]) for k in (set(obj) & fields))
+    return keys_filter
+
 def print_avro(avro, opts):
     if opts.header and (opts.format != "csv"):
         raise AvroError("--header applies only to CSV format")
@@ -76,6 +89,10 @@ def print_avro(avro, opts):
         except StopIteration:
             return
 
+    fields = parse_fields(opts.fields)
+    if fields:
+        avro = imap(field_selector(fields), avro)
+
     printer = select_printer(opts.format)
     for i, record in enumerate(avro):
         if i == 0 and opts.header:
@@ -210,6 +227,8 @@ def main(argv=None):
                     default=None)
     cat_options.add_option("--print-schema", help="print schema",
                       action="store_true", default=False)
+    cat_options.add_option('--fields', default=None,
+                help='fields to show, comma separated (show all by default)')
     parser.add_option_group(cat_options)
 
     # write options

Modified: avro/trunk/lang/py/test/test_script.py
URL: http://svn.apache.org/viewvc/avro/trunk/lang/py/test/test_script.py?rev=1172742&r1=1172741&r2=1172742&view=diff
==============================================================================
--- avro/trunk/lang/py/test/test_script.py (original)
+++ avro/trunk/lang/py/test/test_script.py Mon Sep 19 18:48:54 2011
@@ -151,6 +151,24 @@ class TestCat(unittest.TestCase):
         out = self._run(self.avro_file)
         assert len(out) == 2 * NUM_RECORDS
 
+    def test_fields(self):
+        # One field selection (no comma)
+        out = self._run('--fields', 'last')
+        assert json.loads(out[0]) == {'last': 'duck'}
+
+        # Field selection (with comma and space)
+        out = self._run('--fields', 'first, last')
+        assert json.loads(out[0]) == {'first': 'daffy', 'last': 'duck'}
+
+        # Empty fields should get all
+        out = self._run('--fields', '')
+        assert json.loads(out[0]) == \
+                {'first': 'daffy', 'last': 'duck', 'type': 'duck'}
+
+        # Non existing fields are ignored
+        out = self._run('--fields', 'first,last,age')
+        assert json.loads(out[0]) == {'first': 'daffy', 'last': 'duck'}
+
 class TestWrite(unittest.TestCase):
     def setUp(self):
         self.json_file = tempfile() + ".json"



Mime
View raw message