Return-Path: Delivered-To: apmail-hadoop-avro-commits-archive@minotaur.apache.org Received: (qmail 10094 invoked from network); 15 Apr 2009 05:18:08 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 15 Apr 2009 05:18:08 -0000 Received: (qmail 90974 invoked by uid 500); 15 Apr 2009 05:18:05 -0000 Delivered-To: apmail-hadoop-avro-commits-archive@hadoop.apache.org Received: (qmail 90952 invoked by uid 500); 15 Apr 2009 05:18:05 -0000 Mailing-List: contact avro-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: avro-dev@hadoop.apache.org Delivered-To: mailing list avro-commits@hadoop.apache.org Received: (qmail 90942 invoked by uid 99); 15 Apr 2009 05:18:05 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 15 Apr 2009 05:18:05 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 15 Apr 2009 05:17:58 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 47B762388970; Wed, 15 Apr 2009 05:17:37 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r765052 - in /hadoop/avro/trunk: CHANGES.txt src/py/avro/generic.py src/py/avro/reflect.py Date: Wed, 15 Apr 2009 05:17:36 -0000 To: avro-commits@hadoop.apache.org From: sharad@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20090415051737.47B762388970@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: sharad Date: Wed Apr 15 05:17:36 2009 New Revision: 765052 URL: http://svn.apache.org/viewvc?rev=765052&view=rev Log: AVRO-13. Use dictionary instead of if-else in validate. Modified: hadoop/avro/trunk/CHANGES.txt hadoop/avro/trunk/src/py/avro/generic.py hadoop/avro/trunk/src/py/avro/reflect.py Modified: hadoop/avro/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=765052&r1=765051&r2=765052&view=diff ============================================================================== --- hadoop/avro/trunk/CHANGES.txt (original) +++ hadoop/avro/trunk/CHANGES.txt Wed Apr 15 05:17:36 2009 @@ -15,6 +15,8 @@ AVRO-11. Re-implement specific and reflect datum readers and writers to leverage AVRO-6. (cutting) + AVRO-13. Use dictionary instead of if-else in validate. (sharad) + OPTIMIZATIONS BUG FIXES Modified: hadoop/avro/trunk/src/py/avro/generic.py URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/py/avro/generic.py?rev=765052&r1=765051&r2=765052&view=diff ============================================================================== --- hadoop/avro/trunk/src/py/avro/generic.py (original) +++ hadoop/avro/trunk/src/py/avro/generic.py Wed Apr 15 05:17:36 2009 @@ -34,56 +34,61 @@ import avro.io as io import avro.ipc as ipc -def validate(schm, object): - """Returns True if a python datum matches a schema.""" - - if schm.gettype() == schema.NULL: - return object is None - elif schm.gettype() == schema.STRING: - return isinstance(object, unicode) - elif schm.gettype() == schema.INT: - if ((isinstance(object, long) or isinstance(object, int)) - and io._INT_MIN_VALUE <= object <= io._INT_MAX_VALUE): - return True - elif schm.gettype() == schema.LONG: - if ((isinstance(object, long) or isinstance(object, int)) - and io._LONG_MIN_VALUE <= object <= io._LONG_MAX_VALUE): - return True - elif schm.gettype() == schema.FLOAT: - return isinstance(object, float) - elif schm.gettype() == schema.DOUBLE: - return isinstance(object, float) - elif schm.gettype() == schema.BYTES: - return isinstance(object, str) - elif schm.gettype() == schema.BOOLEAN: - return isinstance(object, bool) - elif schm.gettype() == schema.ARRAY: - if not isinstance(object, list): - return False - for elem in object: - if not validate(schm.getelementtype(), elem): - return False - return True - elif schm.gettype() == schema.MAP: - if not isinstance(object, dict): +def _validatearray(schm, object): + if not isinstance(object, list): + return False + for elem in object: + if not validate(schm.getelementtype(), elem): return False - for k,v in object.items(): - if not (validate(schm.getkeytype(), k) and - validate(schm.getvaluetype(), v)): - return False - return True - elif schm.gettype() == schema.RECORD: - if not isinstance(object, dict): + return True + +def _validatemap(schm, object): + if not isinstance(object, dict): + return False + for k,v in object.items(): + if not (validate(schm.getkeytype(), k) and + validate(schm.getvaluetype(), v)): return False - for field,fieldschema in schm.getfields(): - if not validate(fieldschema, object.get(field)): - return False - return True - elif schm.gettype() == schema.UNION: - for elemtype in schm.getelementtypes(): - if validate(elemtype, object): - return True + return True + +def _validaterecord(schm, object): + if not isinstance(object, dict): return False + for field,fieldschema in schm.getfields(): + if not validate(fieldschema, object.get(field)): + return False + return True + +def _validateunion(schm, object): + for elemtype in schm.getelementtypes(): + if validate(elemtype, object): + return True + return False + +_validatefn = { + schema.NULL : lambda schm, object: object is None, + schema.BOOLEAN : lambda schm, object: isinstance(object, bool), + schema.STRING : lambda schm, object: isinstance(object, unicode), + schema.FLOAT : lambda schm, object: isinstance(object, float), + schema.DOUBLE : lambda schm, object: isinstance(object, float), + schema.BYTES : lambda schm, object: isinstance(object, str), + schema.INT : lambda schm, object: ((isinstance(object, long) or + isinstance(object, int)) and + io._INT_MIN_VALUE <= object <= io._INT_MAX_VALUE), + schema.LONG : lambda schm, object: ((isinstance(object, long) or + isinstance(object, int)) and + io._LONG_MIN_VALUE <= object <= io._LONG_MAX_VALUE), + schema.ARRAY : _validatearray, + schema.MAP : _validatemap, + schema.RECORD : _validaterecord, + schema.UNION : _validateunion + } + +def validate(schm, object): + """Returns True if a python datum matches a schema.""" + fn = _validatefn.get(schm.gettype()) + if fn is not None: + return fn(schm, object) else: return False Modified: hadoop/avro/trunk/src/py/avro/reflect.py URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/py/avro/reflect.py?rev=765052&r1=765051&r2=765052&view=diff ============================================================================== --- hadoop/avro/trunk/src/py/avro/reflect.py (original) +++ hadoop/avro/trunk/src/py/avro/reflect.py Wed Apr 15 05:17:36 2009 @@ -24,56 +24,63 @@ #TODO pkgname should not be passed, instead classes should be constructed #based on schema namespace -def validate(schm, pkgname, object): - """Returns True if a python datum matches a schema.""" - if schm.gettype() == schema.NULL: - return object is None - elif schm.gettype() == schema.STRING: - return isinstance(object, unicode) - elif schm.gettype() == schema.INT: - if ((isinstance(object, long) or isinstance(object, int)) - and io._INT_MIN_VALUE <= object <= io._INT_MAX_VALUE): - return True - elif schm.gettype() == schema.LONG: - if ((isinstance(object, long) or isinstance(object, int)) - and io._LONG_MIN_VALUE <= object <= io._LONG_MAX_VALUE): - return True - elif schm.gettype() == schema.FLOAT: - return isinstance(object, float) - elif schm.gettype() == schema.DOUBLE: - return isinstance(object, float) - elif schm.gettype() == schema.BYTES: - return isinstance(object, str) - elif schm.gettype() == schema.BOOLEAN: - return isinstance(object, bool) - elif schm.gettype() == schema.ARRAY: - if not isinstance(object, list): - return False - for elem in object: - if not validate(schm.getelementtype(), pkgname, elem): - return False - return True - elif schm.gettype() == schema.MAP: - if not isinstance(object, dict): + +def _validatearray(schm, pkgname, object): + if not isinstance(object, list): + return False + for elem in object: + if not validate(schm.getelementtype(), pkgname, elem): return False - for k,v in object.items(): - if not (validate(schm.getkeytype(), pkgname, k) and - validate(schm.getvaluetype(), pkgname, v)): - return False - return True - elif schm.gettype() == schema.RECORD: - if not isinstance(object, gettype(schm.getname(), pkgname)): + return True + +def _validatemap(schm, pkgname, object): + if not isinstance(object, dict): + return False + for k,v in object.items(): + if not (validate(schm.getkeytype(), pkgname, k) and + validate(schm.getvaluetype(), pkgname, v)): return False - for field,fieldschema in schm.getfields(): - data = object.__getattribute__(field) - if not validate(fieldschema, pkgname, data): - return False - return True - elif schm.gettype() == schema.UNION: - for elemtype in schm.getelementtypes(): - if validate(elemtype, pkgname, object): - return True + return True + +def _validaterecord(schm, pkgname, object): + if not isinstance(object, gettype(schm.getname(), pkgname)): return False + for field,fieldschema in schm.getfields(): + data = object.__getattribute__(field) + if not validate(fieldschema, pkgname, data): + return False + return True + +def _validateunion(schm, pkgname, object): + for elemtype in schm.getelementtypes(): + if validate(elemtype, pkgname, object): + return True + return False + +_validatefn = { + schema.NULL : lambda schm, pkgname, object: object is None, + schema.BOOLEAN : lambda schm, pkgname, object: isinstance(object, bool), + schema.STRING : lambda schm, pkgname, object: isinstance(object, unicode), + schema.FLOAT : lambda schm, pkgname, object: isinstance(object, float), + schema.DOUBLE : lambda schm, pkgname, object: isinstance(object, float), + schema.BYTES : lambda schm, pkgname, object: isinstance(object, str), + schema.INT : lambda schm, pkgname, object: ((isinstance(object, long) or + isinstance(object, int)) and + io._INT_MIN_VALUE <= object <= io._INT_MAX_VALUE), + schema.LONG : lambda schm, pkgname, object: ((isinstance(object, long) or + isinstance(object, int)) and + io._LONG_MIN_VALUE <= object <= io._LONG_MAX_VALUE), + schema.ARRAY : _validatearray, + schema.MAP : _validatemap, + schema.RECORD : _validaterecord, + schema.UNION : _validateunion + } + +def validate(schm, pkgname, object): + """Returns True if a python datum matches a schema.""" + fn = _validatefn.get(schm.gettype()) + if fn is not None: + return fn(schm, pkgname, object) else: return False