Return-Path: X-Original-To: apmail-flink-commits-archive@minotaur.apache.org Delivered-To: apmail-flink-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 4EA8811C9A for ; Thu, 26 Jun 2014 09:47:24 +0000 (UTC) Received: (qmail 24564 invoked by uid 500); 26 Jun 2014 09:47:24 -0000 Delivered-To: apmail-flink-commits-archive@flink.apache.org Received: (qmail 24545 invoked by uid 500); 26 Jun 2014 09:47:24 -0000 Mailing-List: contact commits-help@flink.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@flink.incubator.apache.org Delivered-To: mailing list commits@flink.incubator.apache.org Received: (qmail 24533 invoked by uid 99); 26 Jun 2014 09:47:24 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 26 Jun 2014 09:47:24 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED,T_RP_MATCHES_RCVD X-Spam-Check-By: apache.org Received: from [140.211.11.3] (HELO mail.apache.org) (140.211.11.3) by apache.org (qpsmtpd/0.29) with SMTP; Thu, 26 Jun 2014 09:47:00 +0000 Received: (qmail 23404 invoked by uid 99); 26 Jun 2014 09:46:29 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 26 Jun 2014 09:46:29 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id 16CDC834A79; Thu, 26 Jun 2014 09:46:29 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: rmetzger@apache.org To: commits@flink.incubator.apache.org Date: Thu, 26 Jun 2014 09:47:11 -0000 Message-Id: In-Reply-To: <082fb8975e2c47f2b349d6589076af26@git.apache.org> References: <082fb8975e2c47f2b349d6589076af26@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [46/53] [abbrv] git commit: Change string construction to work around a known JVM performance bug still present in some older JVM versions. X-Virus-Checked: Checked by ClamAV on apache.org Change string construction to work around a known JVM performance bug still present in some older JVM versions. Project: http://git-wip-us.apache.org/repos/asf/incubator-flink/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-flink/commit/ef623e9b Tree: http://git-wip-us.apache.org/repos/asf/incubator-flink/tree/ef623e9b Diff: http://git-wip-us.apache.org/repos/asf/incubator-flink/diff/ef623e9b Branch: refs/heads/travis_test Commit: ef623e9b883dea3f57d0fb33c3af3a804dc041e6 Parents: 8431395 Author: Stephan Ewen Authored: Wed Jun 25 16:12:33 2014 +0200 Committer: Stephan Ewen Committed: Wed Jun 25 16:14:18 2014 +0200 ---------------------------------------------------------------------- .../api/common/io/DelimitedInputFormat.java | 2 +- .../types/parser/AsciiStringParser.java | 9 ++---- .../api/java/io/TextInputFormat.java | 29 +++++++------------- 3 files changed, 13 insertions(+), 27 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/ef623e9b/stratosphere-core/src/main/java/eu/stratosphere/api/common/io/DelimitedInputFormat.java ---------------------------------------------------------------------- diff --git a/stratosphere-core/src/main/java/eu/stratosphere/api/common/io/DelimitedInputFormat.java b/stratosphere-core/src/main/java/eu/stratosphere/api/common/io/DelimitedInputFormat.java index 88eee94..185f7f8 100644 --- a/stratosphere-core/src/main/java/eu/stratosphere/api/common/io/DelimitedInputFormat.java +++ b/stratosphere-core/src/main/java/eu/stratosphere/api/common/io/DelimitedInputFormat.java @@ -258,7 +258,7 @@ public abstract class DelimitedInputFormat extends FileInputFormat { * * @return returns whether the record was successfully deserialized or not. */ - public abstract OT readRecord(OT reuse, byte[] bytes, int offset, int numBytes); + public abstract OT readRecord(OT reuse, byte[] bytes, int offset, int numBytes) throws IOException; // -------------------------------------------------------------------------------------------- // Pre-flight: Configuration, Splits, Sampling http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/ef623e9b/stratosphere-core/src/main/java/eu/stratosphere/types/parser/AsciiStringParser.java ---------------------------------------------------------------------- diff --git a/stratosphere-core/src/main/java/eu/stratosphere/types/parser/AsciiStringParser.java b/stratosphere-core/src/main/java/eu/stratosphere/types/parser/AsciiStringParser.java index 381d968..c3e6784 100644 --- a/stratosphere-core/src/main/java/eu/stratosphere/types/parser/AsciiStringParser.java +++ b/stratosphere-core/src/main/java/eu/stratosphere/types/parser/AsciiStringParser.java @@ -13,17 +13,12 @@ package eu.stratosphere.types.parser; -import java.nio.charset.Charset; - /** * Converts a variable length field of a byte array into a {@link String}. The byte contents between * delimiters is interpreted as an ASCII string. The string may be quoted in double quotes. For quoted * strings, whitespaces (space and tab) leading and trailing before and after the quotes are removed. */ public class AsciiStringParser extends FieldParser { - - // the default (ascii style) charset. should be available really everywhere. - private static final Charset CHARSET = Charset.forName("ISO-8859-1"); private static final byte WHITESPACE_SPACE = (byte) ' '; private static final byte WHITESPACE_TAB = (byte) '\t'; @@ -58,7 +53,7 @@ public class AsciiStringParser extends FieldParser { if (i < limit) { // end of the string - this.result = new String(bytes, quoteStart, i-quoteStart, CHARSET); + this.result = new String(bytes, quoteStart, i-quoteStart); i++; // the quote @@ -87,7 +82,7 @@ public class AsciiStringParser extends FieldParser { } // set from the beginning. unquoted strings include the leading whitespaces - this.result = new String(bytes, startPos, i-startPos, CHARSET); + this.result = new String(bytes, startPos, i-startPos); return (i == limit ? limit : i+1); } } http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/ef623e9b/stratosphere-java/src/main/java/eu/stratosphere/api/java/io/TextInputFormat.java ---------------------------------------------------------------------- diff --git a/stratosphere-java/src/main/java/eu/stratosphere/api/java/io/TextInputFormat.java b/stratosphere-java/src/main/java/eu/stratosphere/api/java/io/TextInputFormat.java index 6febb74..759c9e9 100644 --- a/stratosphere-java/src/main/java/eu/stratosphere/api/java/io/TextInputFormat.java +++ b/stratosphere-java/src/main/java/eu/stratosphere/api/java/io/TextInputFormat.java @@ -14,6 +14,7 @@ **********************************************************************************************************************/ package eu.stratosphere.api.java.io; +import java.io.IOException; import java.nio.charset.Charset; import eu.stratosphere.api.common.io.DelimitedInputFormat; @@ -25,12 +26,6 @@ public class TextInputFormat extends DelimitedInputFormat { private static final long serialVersionUID = 1L; - private String charsetName = "UTF-8"; - -// private boolean skipInvalidLines; - - private transient Charset charset; - /** * Code of \r, used to remove \r from a line when the line ends with \r\n */ @@ -40,7 +35,12 @@ public class TextInputFormat extends DelimitedInputFormat { * Code of \n, used to identify if \n is used as delimiter */ private static final byte NEW_LINE = (byte) '\n'; - + + + /** + * The name of the charset to use for decoding. + */ + private String charsetName = "UTF-8"; // -------------------------------------------------------------------------------------------- @@ -62,14 +62,6 @@ public class TextInputFormat extends DelimitedInputFormat { this.charsetName = charsetName; } -// public boolean isSkipInvalidLines() { -// return skipInvalidLines; -// } -// -// public void setSkipInvalidLines(boolean skipInvalidLines) { -// this.skipInvalidLines = skipInvalidLines; -// } - // -------------------------------------------------------------------------------------------- @Override @@ -79,13 +71,12 @@ public class TextInputFormat extends DelimitedInputFormat { if (charsetName == null || !Charset.isSupported(charsetName)) { throw new RuntimeException("Unsupported charset: " + charsetName); } - this.charset = Charset.forName(charsetName); } // -------------------------------------------------------------------------------------------- @Override - public String readRecord(String reusable, byte[] bytes, int offset, int numBytes) { + public String readRecord(String reusable, byte[] bytes, int offset, int numBytes) throws IOException { //Check if \n is used as delimiter and the end of this line is a \r, then remove \r from the line if (this.getDelimiter() != null && this.getDelimiter().length == 1 && this.getDelimiter()[0] == NEW_LINE && offset+numBytes >= 1 @@ -93,13 +84,13 @@ public class TextInputFormat extends DelimitedInputFormat { numBytes -= 1; } - return new String(bytes, offset, numBytes, this.charset); + return new String(bytes, offset, numBytes, this.charsetName); } // -------------------------------------------------------------------------------------------- @Override public String toString() { - return "TextInputFormat (" + getFilePath() + ") - " + this.charsetName; // + (this.skipInvalidLines ? "(skipping invalid lines)" : ""); + return "TextInputFormat (" + getFilePath() + ") - " + this.charsetName; } }