flink-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From se...@apache.org
Subject [1/4] git commit: Change string construction to work around a known JVM performance bug still present in some older JVM versions.
Date Wed, 25 Jun 2014 15:48:27 GMT
Repository: incubator-flink
Updated Branches:
  refs/heads/master e2aabd906 -> 49cd35a76


Change string construction to work around a known JVM performance bug still present in some
older JVM versions.


Project: http://git-wip-us.apache.org/repos/asf/incubator-flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-flink/commit/ef623e9b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-flink/tree/ef623e9b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-flink/diff/ef623e9b

Branch: refs/heads/master
Commit: ef623e9b883dea3f57d0fb33c3af3a804dc041e6
Parents: 8431395
Author: Stephan Ewen <sewen@apache.org>
Authored: Wed Jun 25 16:12:33 2014 +0200
Committer: Stephan Ewen <sewen@apache.org>
Committed: Wed Jun 25 16:14:18 2014 +0200

----------------------------------------------------------------------
 .../api/common/io/DelimitedInputFormat.java     |  2 +-
 .../types/parser/AsciiStringParser.java         |  9 ++----
 .../api/java/io/TextInputFormat.java            | 29 +++++++-------------
 3 files changed, 13 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/ef623e9b/stratosphere-core/src/main/java/eu/stratosphere/api/common/io/DelimitedInputFormat.java
----------------------------------------------------------------------
diff --git a/stratosphere-core/src/main/java/eu/stratosphere/api/common/io/DelimitedInputFormat.java
b/stratosphere-core/src/main/java/eu/stratosphere/api/common/io/DelimitedInputFormat.java
index 88eee94..185f7f8 100644
--- a/stratosphere-core/src/main/java/eu/stratosphere/api/common/io/DelimitedInputFormat.java
+++ b/stratosphere-core/src/main/java/eu/stratosphere/api/common/io/DelimitedInputFormat.java
@@ -258,7 +258,7 @@ public abstract class DelimitedInputFormat<OT> extends FileInputFormat<OT>
{
 	 * 
 	 * @return returns whether the record was successfully deserialized or not.
 	 */
-	public abstract OT readRecord(OT reuse, byte[] bytes, int offset, int numBytes);
+	public abstract OT readRecord(OT reuse, byte[] bytes, int offset, int numBytes) throws IOException;
 	
 	// --------------------------------------------------------------------------------------------
 	//  Pre-flight: Configuration, Splits, Sampling

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/ef623e9b/stratosphere-core/src/main/java/eu/stratosphere/types/parser/AsciiStringParser.java
----------------------------------------------------------------------
diff --git a/stratosphere-core/src/main/java/eu/stratosphere/types/parser/AsciiStringParser.java
b/stratosphere-core/src/main/java/eu/stratosphere/types/parser/AsciiStringParser.java
index 381d968..c3e6784 100644
--- a/stratosphere-core/src/main/java/eu/stratosphere/types/parser/AsciiStringParser.java
+++ b/stratosphere-core/src/main/java/eu/stratosphere/types/parser/AsciiStringParser.java
@@ -13,17 +13,12 @@
 
 package eu.stratosphere.types.parser;
 
-import java.nio.charset.Charset;
-
 /**
  * Converts a variable length field of a byte array into a {@link String}. The byte contents
between
  * delimiters is interpreted as an ASCII string. The string may be quoted in double quotes.
For quoted
  * strings, whitespaces (space and tab) leading and trailing before and after the quotes
are removed.
  */
 public class AsciiStringParser extends FieldParser<String> {
-
-	// the default (ascii style) charset. should be available really everywhere.
-	private static final Charset CHARSET = Charset.forName("ISO-8859-1");
 	
 	private static final byte WHITESPACE_SPACE = (byte) ' ';
 	private static final byte WHITESPACE_TAB = (byte) '\t';
@@ -58,7 +53,7 @@ public class AsciiStringParser extends FieldParser<String> {
 			
 			if (i < limit) {
 				// end of the string
-				this.result = new String(bytes, quoteStart, i-quoteStart, CHARSET);
+				this.result = new String(bytes, quoteStart, i-quoteStart);
 				
 				i++; // the quote
 				
@@ -87,7 +82,7 @@ public class AsciiStringParser extends FieldParser<String> {
 			}
 			
 			// set from the beginning. unquoted strings include the leading whitespaces
-			this.result = new String(bytes, startPos, i-startPos, CHARSET);
+			this.result = new String(bytes, startPos, i-startPos);
 			return (i == limit ? limit : i+1);
 		}
 	}

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/ef623e9b/stratosphere-java/src/main/java/eu/stratosphere/api/java/io/TextInputFormat.java
----------------------------------------------------------------------
diff --git a/stratosphere-java/src/main/java/eu/stratosphere/api/java/io/TextInputFormat.java
b/stratosphere-java/src/main/java/eu/stratosphere/api/java/io/TextInputFormat.java
index 6febb74..759c9e9 100644
--- a/stratosphere-java/src/main/java/eu/stratosphere/api/java/io/TextInputFormat.java
+++ b/stratosphere-java/src/main/java/eu/stratosphere/api/java/io/TextInputFormat.java
@@ -14,6 +14,7 @@
  **********************************************************************************************************************/
 package eu.stratosphere.api.java.io;
 
+import java.io.IOException;
 import java.nio.charset.Charset;
 
 import eu.stratosphere.api.common.io.DelimitedInputFormat;
@@ -25,12 +26,6 @@ public class TextInputFormat extends DelimitedInputFormat<String>
{
 	
 	private static final long serialVersionUID = 1L;
 	
-	private String charsetName = "UTF-8";
-	
-//	private boolean skipInvalidLines;
-	
-	private transient Charset charset;
-
 	/**
 	 * Code of \r, used to remove \r from a line when the line ends with \r\n
 	 */
@@ -40,7 +35,12 @@ public class TextInputFormat extends DelimitedInputFormat<String>
{
 	 * Code of \n, used to identify if \n is used as delimiter
 	 */
 	private static final byte NEW_LINE = (byte) '\n';
-
+	
+	
+	/**
+	 * The name of the charset to use for decoding.
+	 */
+	private String charsetName = "UTF-8";
 	
 	// --------------------------------------------------------------------------------------------
 	
@@ -62,14 +62,6 @@ public class TextInputFormat extends DelimitedInputFormat<String>
{
 		this.charsetName = charsetName;
 	}
 	
-//	public boolean isSkipInvalidLines() {
-//		return skipInvalidLines;
-//	}
-//	
-//	public void setSkipInvalidLines(boolean skipInvalidLines) {
-//		this.skipInvalidLines = skipInvalidLines;
-//	}
-	
 	// --------------------------------------------------------------------------------------------
 
 	@Override
@@ -79,13 +71,12 @@ public class TextInputFormat extends DelimitedInputFormat<String>
{
 		if (charsetName == null || !Charset.isSupported(charsetName)) {
 			throw new RuntimeException("Unsupported charset: " + charsetName);
 		}
-		this.charset = Charset.forName(charsetName);
 	}
 
 	// --------------------------------------------------------------------------------------------
 
 	@Override
-	public String readRecord(String reusable, byte[] bytes, int offset, int numBytes) {
+	public String readRecord(String reusable, byte[] bytes, int offset, int numBytes) throws
IOException {
 		//Check if \n is used as delimiter and the end of this line is a \r, then remove \r from
the line
 		if (this.getDelimiter() != null && this.getDelimiter().length == 1 
 				&& this.getDelimiter()[0] == NEW_LINE && offset+numBytes >= 1 
@@ -93,13 +84,13 @@ public class TextInputFormat extends DelimitedInputFormat<String>
{
 			numBytes -= 1;
 		}
 		
-		return new String(bytes, offset, numBytes, this.charset);
+		return new String(bytes, offset, numBytes, this.charsetName);
 	}
 	
 	// --------------------------------------------------------------------------------------------
 	
 	@Override
 	public String toString() {
-		return "TextInputFormat (" + getFilePath() + ") - " + this.charsetName; // + (this.skipInvalidLines
? "(skipping invalid lines)" : "");
+		return "TextInputFormat (" + getFilePath() + ") - " + this.charsetName;
 	}
 }


Mime
View raw message