parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From b...@apache.org
Subject parquet-mr git commit: PARQUET-1215: Add getFooter to ParquetWriter.
Date Thu, 15 Feb 2018 17:07:38 GMT
Repository: parquet-mr
Updated Branches:
  refs/heads/master 6a4bbe94a -> 445cb9dc2


PARQUET-1215: Add getFooter to ParquetWriter.

This adds getFooter to ParquetWriter, which will return the file footer that was written after
the file is closed.

Author: Ryan Blue <blue@apache.org>

Closes #457 from rdblue/PARQUET-1215-add-footer-accessor-to-writers and squashes the following
commits:

79c5965a1 [Ryan Blue] PARQUET-1215: Add getFooter to ParquetWriter.


Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/445cb9dc
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/445cb9dc
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/445cb9dc

Branch: refs/heads/master
Commit: 445cb9dc2f07553f8e1e5f7c1150f00fbb05c63f
Parents: 6a4bbe9
Author: Ryan Blue <blue@apache.org>
Authored: Thu Feb 15 09:07:29 2018 -0800
Committer: Ryan Blue <blue@apache.org>
Committed: Thu Feb 15 09:07:29 2018 -0800

----------------------------------------------------------------------
 .../parquet/hadoop/InternalParquetRecordWriter.java       |  5 +++++
 .../java/org/apache/parquet/hadoop/ParquetFileWriter.java | 10 +++++++++-
 .../java/org/apache/parquet/hadoop/ParquetWriter.java     |  8 ++++++++
 3 files changed, 22 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/445cb9dc/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
index 2a221ac..d9e9b5e 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
@@ -32,6 +32,7 @@ import org.apache.parquet.column.ParquetProperties;
 import org.apache.parquet.hadoop.CodecFactory.BytesCompressor;
 import org.apache.parquet.hadoop.api.WriteSupport;
 import org.apache.parquet.hadoop.api.WriteSupport.FinalizedWriteContext;
+import org.apache.parquet.hadoop.metadata.ParquetMetadata;
 import org.apache.parquet.io.ColumnIOFactory;
 import org.apache.parquet.io.MessageColumnIO;
 import org.apache.parquet.io.api.RecordConsumer;
@@ -96,6 +97,10 @@ class InternalParquetRecordWriter<T> {
     initStore();
   }
 
+  public ParquetMetadata getFooter() {
+    return parquetFileWriter.getFooter();
+  }
+
   private void initStore() {
     pageStore = new ColumnChunkPageWriteStore(compressor, schema, props.getAllocator());
     columnStore = props.newColumnWriteStore(schema, pageStore);

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/445cb9dc/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
index 285c2db..f94fd9c 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
@@ -121,6 +121,9 @@ public class ParquetFileWriter {
   private long currentChunkFirstDataPage;         // set in startColumn (out.pos())
   private long currentChunkDictionaryPageOffset;  // set in writeDictionaryPage
 
+  // set when end is called
+  private ParquetMetadata footer = null;
+
   /**
    * Captures the order in which methods should be called
    *
@@ -670,7 +673,7 @@ public class ParquetFileWriter {
   public void end(Map<String, String> extraMetaData) throws IOException {
     state = state.end();
     LOG.debug("{}: end", out.getPos());
-    ParquetMetadata footer = new ParquetMetadata(new FileMetaData(schema, extraMetaData,
Version.FULL_VERSION), blocks);
+    this.footer = new ParquetMetadata(new FileMetaData(schema, extraMetaData, Version.FULL_VERSION),
blocks);
     serializeFooter(footer, out);
     out.close();
   }
@@ -684,6 +687,11 @@ public class ParquetFileWriter {
     out.write(MAGIC);
   }
 
+  public ParquetMetadata getFooter() {
+    Preconditions.checkState(state == STATE.ENDED, "Cannot return unfinished footer.");
+    return footer;
+  }
+
   /**
    * Given a list of metadata files, merge them into a single ParquetMetadata
    * Requires that the schemas be compatible, and the extraMetadata be exactly equal.

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/445cb9dc/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
index bdde70e..1908206 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
@@ -28,6 +28,7 @@ import org.apache.parquet.column.ParquetProperties;
 import org.apache.parquet.column.ParquetProperties.WriterVersion;
 import org.apache.parquet.hadoop.api.WriteSupport;
 import org.apache.parquet.hadoop.metadata.CompressionCodecName;
+import org.apache.parquet.hadoop.metadata.ParquetMetadata;
 import org.apache.parquet.hadoop.util.HadoopOutputFile;
 import org.apache.parquet.io.OutputFile;
 import org.apache.parquet.schema.MessageType;
@@ -311,6 +312,13 @@ public class ParquetWriter<T> implements Closeable {
   }
 
   /**
+   * @return the ParquetMetadata written to the (closed) file.
+   */
+  public ParquetMetadata getFooter() {
+    return writer.getFooter();
+  }
+
+  /**
    * @return the total size of data written to the file and buffered in memory
    */
   public long getDataSize() {


Mime
View raw message