Return-Path: X-Original-To: apmail-pig-commits-archive@www.apache.org Delivered-To: apmail-pig-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 18620D991 for ; Sun, 2 Sep 2012 22:38:18 +0000 (UTC) Received: (qmail 42572 invoked by uid 500); 2 Sep 2012 22:38:18 -0000 Delivered-To: apmail-pig-commits-archive@pig.apache.org Received: (qmail 42537 invoked by uid 500); 2 Sep 2012 22:38:17 -0000 Mailing-List: contact commits-help@pig.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@pig.apache.org Delivered-To: mailing list commits@pig.apache.org Received: (qmail 42528 invoked by uid 99); 2 Sep 2012 22:38:17 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 02 Sep 2012 22:38:17 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 02 Sep 2012 22:38:15 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 3C5CE2388980 for ; Sun, 2 Sep 2012 22:37:31 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1380078 - in /pig/trunk: CHANGES.txt src/org/apache/pig/builtin/Utf8StorageConverter.java Date: Sun, 02 Sep 2012 22:37:31 -0000 To: commits@pig.apache.org From: dvryaboy@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20120902223731.3C5CE2388980@eris.apache.org> Author: dvryaboy Date: Sun Sep 2 22:37:30 2012 New Revision: 1380078 URL: http://svn.apache.org/viewvc?rev=1380078&view=rev Log: PIG-2835: Optimizing the convertion from bytes to Integer/Long Modified: pig/trunk/CHANGES.txt pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1380078&r1=1380077&r2=1380078&view=diff ============================================================================== --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Sun Sep 2 22:37:30 2012 @@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES IMPROVEMENTS +PIG-2835: Optimizing the convertion from bytes to Integer/Long (jay23jack via dvryaboy) + PIG-2886: Add Scan TimeRange to HBaseStorage (ted.m via dvryaboy) PIG-2895: jodatime jar missing in pig-withouthadoop.jar (thejas) Modified: pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java?rev=1380078&r1=1380077&r2=1380078&view=diff ============================================================================== --- pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java (original) +++ pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java Sun Sep 2 22:37:30 2012 @@ -398,15 +398,41 @@ public class Utf8StorageConverter implem return null; } } + + /** + * Sanity check of whether this number is a valid integer or long. + * @param number the number to check + * @return true if it doesn't contain any invalid characters, i.e. only contains digits and '-' + */ + private static boolean sanityCheckIntegerLong(String number){ + for (int i=0; i < number.length(); i++){ + if (number.charAt(i) >= '0' && number.charAt(i) <='9' || i == 0 && number.charAt(i) == '-'){ + // valid one + } + else{ + // contains invalid characters, must not be a integer or long. + return false; + } + } + return true; + } @Override public Integer bytesToInteger(byte[] b) throws IOException { if(b == null) return null; String s = new String(b); - try { - return Integer.valueOf(s); - } catch (NumberFormatException nfe) { + Integer ret = null; + + // See PIG-2835. Using exception handling to check if it's a double is very expensive. + // So we write our sanity check. + if (sanityCheckIntegerLong(s)){ + try { + ret = Integer.valueOf(s); + } catch (NumberFormatException nfe) { + } + } + if (ret == null){ // It's possible that this field can be interpreted as a double. // Unfortunately Java doesn't handle this in Integer.valueOf. So // we need to try to convert it to a double and if that works then @@ -424,11 +450,12 @@ public class Utf8StorageConverter implem } catch (NumberFormatException nfe2) { LogUtils.warn(this, "Unable to interpret value " + Arrays.toString(b) + " in field being " + "converted to int, caught NumberFormatException <" + - nfe.getMessage() + "> field discarded", + nfe2.getMessage() + "> field discarded", PigWarning.FIELD_DISCARDED_TYPE_CONVERSION_FAILED, mLog); return null; } } + return ret; } @Override @@ -442,9 +469,17 @@ public class Utf8StorageConverter implem s = new String(b); } - try { - return Long.valueOf(s); - } catch (NumberFormatException nfe) { + // See PIG-2835. Using exception handling to check if it's a double is very expensive. + // So we write our sanity check. + Long ret = null; + if (sanityCheckIntegerLong(s)) { + try { + ret = Long.valueOf(s); + } catch (NumberFormatException nfe) { + } + } + + if (ret == null) { // It's possible that this field can be interpreted as a double. // Unfortunately Java doesn't handle this in Long.valueOf. So // we need to try to convert it to a double and if that works then @@ -462,11 +497,12 @@ public class Utf8StorageConverter implem } catch (NumberFormatException nfe2) { LogUtils.warn(this, "Unable to interpret value " + Arrays.toString(b) + " in field being " + "converted to long, caught NumberFormatException <" + - nfe.getMessage() + "> field discarded", + nfe2.getMessage() + "> field discarded", PigWarning.FIELD_DISCARDED_TYPE_CONVERSION_FAILED, mLog); return null; } } + return ret; } @Override