parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tians...@apache.org
Subject git commit: PARQUET-11: Reduce memory pressure when reading footers
Date Fri, 22 Aug 2014 22:34:59 GMT
Repository: incubator-parquet-format
Updated Branches:
  refs/heads/master a109b970a -> f4064910c


PARQUET-11: Reduce memory pressure when reading footers

based on https://github.com/apache/incubator-parquet-format/pull/2

Author: julien <julien@twitter.com>
Author: Dmitriy Ryaboy <dvryaboy@gmail.com>

Closes #7 from julienledem/reduce_metadata_memory and squashes the following commits:

96ff408 [julien] Merge branch 'master' into reduce_metadata_memory
1c382cc [julien] implement delegate instead
7664919 [Dmitriy Ryaboy] intern parquet metadata strings when reading them


Project: http://git-wip-us.apache.org/repos/asf/incubator-parquet-format/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-parquet-format/commit/f4064910
Tree: http://git-wip-us.apache.org/repos/asf/incubator-parquet-format/tree/f4064910
Diff: http://git-wip-us.apache.org/repos/asf/incubator-parquet-format/diff/f4064910

Branch: refs/heads/master
Commit: f4064910c56fee956e18df19f79b065cc32da0fa
Parents: a109b97
Author: julien <julien@twitter.com>
Authored: Fri Aug 22 15:34:48 2014 -0700
Committer: Tianshuo Deng <tdeng@twitter.com>
Committed: Fri Aug 22 15:34:48 2014 -0700

----------------------------------------------------------------------
 .../java/parquet/format/InterningProtocol.java  | 215 +++++++++++++++++++
 src/main/java/parquet/format/Util.java          |  13 +-
 2 files changed, 224 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-parquet-format/blob/f4064910/src/main/java/parquet/format/InterningProtocol.java
----------------------------------------------------------------------
diff --git a/src/main/java/parquet/format/InterningProtocol.java b/src/main/java/parquet/format/InterningProtocol.java
new file mode 100644
index 0000000..d367896
--- /dev/null
+++ b/src/main/java/parquet/format/InterningProtocol.java
@@ -0,0 +1,215 @@
+package parquet.format;
+
+import java.nio.ByteBuffer;
+
+import org.apache.thrift.TException;
+import org.apache.thrift.protocol.TField;
+import org.apache.thrift.protocol.TList;
+import org.apache.thrift.protocol.TMap;
+import org.apache.thrift.protocol.TMessage;
+import org.apache.thrift.protocol.TProtocol;
+import org.apache.thrift.protocol.TSet;
+import org.apache.thrift.protocol.TStruct;
+import org.apache.thrift.transport.TTransport;
+
+/**
+ * TProtocol that interns the strings.
+ *
+ * @author Julien Le Dem
+ *
+ */
+public class InterningProtocol extends TProtocol {
+
+  private final TProtocol delegate;
+
+  public InterningProtocol(TProtocol delegate) {
+    super(delegate.getTransport());
+    this.delegate = delegate;
+  }
+
+  public TTransport getTransport() {
+    return delegate.getTransport();
+  }
+
+  public void writeMessageBegin(TMessage message) throws TException {
+    delegate.writeMessageBegin(message);
+  }
+
+  public void writeMessageEnd() throws TException {
+    delegate.writeMessageEnd();
+  }
+
+  public int hashCode() {
+    return delegate.hashCode();
+  }
+
+  public void writeStructBegin(TStruct struct) throws TException {
+    delegate.writeStructBegin(struct);
+  }
+
+  public void writeStructEnd() throws TException {
+    delegate.writeStructEnd();
+  }
+
+  public void writeFieldBegin(TField field) throws TException {
+    delegate.writeFieldBegin(field);
+  }
+
+  public void writeFieldEnd() throws TException {
+    delegate.writeFieldEnd();
+  }
+
+  public void writeFieldStop() throws TException {
+    delegate.writeFieldStop();
+  }
+
+  public void writeMapBegin(TMap map) throws TException {
+    delegate.writeMapBegin(map);
+  }
+
+  public void writeMapEnd() throws TException {
+    delegate.writeMapEnd();
+  }
+
+  public void writeListBegin(TList list) throws TException {
+    delegate.writeListBegin(list);
+  }
+
+  public void writeListEnd() throws TException {
+    delegate.writeListEnd();
+  }
+
+  public void writeSetBegin(TSet set) throws TException {
+    delegate.writeSetBegin(set);
+  }
+
+  public void writeSetEnd() throws TException {
+    delegate.writeSetEnd();
+  }
+
+  public void writeBool(boolean b) throws TException {
+    delegate.writeBool(b);
+  }
+
+  public void writeByte(byte b) throws TException {
+    delegate.writeByte(b);
+  }
+
+  public void writeI16(short i16) throws TException {
+    delegate.writeI16(i16);
+  }
+
+  public void writeI32(int i32) throws TException {
+    delegate.writeI32(i32);
+  }
+
+  public void writeI64(long i64) throws TException {
+    delegate.writeI64(i64);
+  }
+
+  public void writeDouble(double dub) throws TException {
+    delegate.writeDouble(dub);
+  }
+
+  public void writeString(String str) throws TException {
+    delegate.writeString(str);
+  }
+
+  public void writeBinary(ByteBuffer buf) throws TException {
+    delegate.writeBinary(buf);
+  }
+
+  public TMessage readMessageBegin() throws TException {
+    return delegate.readMessageBegin();
+  }
+
+  public void readMessageEnd() throws TException {
+    delegate.readMessageEnd();
+  }
+
+  public TStruct readStructBegin() throws TException {
+    return delegate.readStructBegin();
+  }
+
+  public void readStructEnd() throws TException {
+    delegate.readStructEnd();
+  }
+
+  public TField readFieldBegin() throws TException {
+    return delegate.readFieldBegin();
+  }
+
+  public void readFieldEnd() throws TException {
+    delegate.readFieldEnd();
+  }
+
+  public TMap readMapBegin() throws TException {
+    return delegate.readMapBegin();
+  }
+
+  public void readMapEnd() throws TException {
+    delegate.readMapEnd();
+  }
+
+  public TList readListBegin() throws TException {
+    return delegate.readListBegin();
+  }
+
+  public void readListEnd() throws TException {
+    delegate.readListEnd();
+  }
+
+  public TSet readSetBegin() throws TException {
+    return delegate.readSetBegin();
+  }
+
+  public void readSetEnd() throws TException {
+    delegate.readSetEnd();
+  }
+
+  public boolean equals(Object obj) {
+    return delegate.equals(obj);
+  }
+
+  public boolean readBool() throws TException {
+    return delegate.readBool();
+  }
+
+  public byte readByte() throws TException {
+    return delegate.readByte();
+  }
+
+  public short readI16() throws TException {
+    return delegate.readI16();
+  }
+
+  public int readI32() throws TException {
+    return delegate.readI32();
+  }
+
+  public long readI64() throws TException {
+    return delegate.readI64();
+  }
+
+  public double readDouble() throws TException {
+    return delegate.readDouble();
+  }
+
+  public String readString() throws TException {
+    // this is where we intern the strings
+    return delegate.readString().intern();
+  }
+
+  public ByteBuffer readBinary() throws TException {
+    return delegate.readBinary();
+  }
+
+  public void reset() {
+    delegate.reset();
+  }
+
+  public String toString() {
+    return delegate.toString();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-parquet-format/blob/f4064910/src/main/java/parquet/format/Util.java
----------------------------------------------------------------------
diff --git a/src/main/java/parquet/format/Util.java b/src/main/java/parquet/format/Util.java
index 40e3ef5..9210f37 100644
--- a/src/main/java/parquet/format/Util.java
+++ b/src/main/java/parquet/format/Util.java
@@ -7,6 +7,7 @@ import java.io.OutputStream;
 import org.apache.thrift.TBase;
 import org.apache.thrift.TException;
 import org.apache.thrift.protocol.TCompactProtocol;
+import org.apache.thrift.protocol.TProtocol;
 import org.apache.thrift.transport.TIOStreamTransport;
 
 /**
@@ -34,12 +35,16 @@ public class Util {
     return read(from, new FileMetaData());
   }
 
-  private static TCompactProtocol protocol(OutputStream to) {
-    return new TCompactProtocol(new TIOStreamTransport(to));
+  private static TProtocol protocol(OutputStream to) {
+    return protocol(new TIOStreamTransport(to));
   }
 
-  private static TCompactProtocol protocol(InputStream from) {
-    return new TCompactProtocol(new TIOStreamTransport(from));
+  private static TProtocol protocol(InputStream from) {
+    return protocol(new TIOStreamTransport(from));
+  }
+
+  private static InterningProtocol protocol(TIOStreamTransport t) {
+    return new InterningProtocol(new TCompactProtocol(t));
   }
 
   private static <T extends TBase<?,?>> T read(InputStream from, T tbase) throws
IOException {


Mime
View raw message