Repository: parquet-mr
Updated Branches:
refs/heads/master 6a4bbe94a -> 445cb9dc2
PARQUET-1215: Add getFooter to ParquetWriter.
This adds getFooter to ParquetWriter, which will return the file footer that was written after
the file is closed.
Author: Ryan Blue <blue@apache.org>
Closes #457 from rdblue/PARQUET-1215-add-footer-accessor-to-writers and squashes the following
commits:
79c5965a1 [Ryan Blue] PARQUET-1215: Add getFooter to ParquetWriter.
Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/445cb9dc
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/445cb9dc
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/445cb9dc
Branch: refs/heads/master
Commit: 445cb9dc2f07553f8e1e5f7c1150f00fbb05c63f
Parents: 6a4bbe9
Author: Ryan Blue <blue@apache.org>
Authored: Thu Feb 15 09:07:29 2018 -0800
Committer: Ryan Blue <blue@apache.org>
Committed: Thu Feb 15 09:07:29 2018 -0800
----------------------------------------------------------------------
.../parquet/hadoop/InternalParquetRecordWriter.java | 5 +++++
.../java/org/apache/parquet/hadoop/ParquetFileWriter.java | 10 +++++++++-
.../java/org/apache/parquet/hadoop/ParquetWriter.java | 8 ++++++++
3 files changed, 22 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/445cb9dc/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
index 2a221ac..d9e9b5e 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
@@ -32,6 +32,7 @@ import org.apache.parquet.column.ParquetProperties;
import org.apache.parquet.hadoop.CodecFactory.BytesCompressor;
import org.apache.parquet.hadoop.api.WriteSupport;
import org.apache.parquet.hadoop.api.WriteSupport.FinalizedWriteContext;
+import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.io.ColumnIOFactory;
import org.apache.parquet.io.MessageColumnIO;
import org.apache.parquet.io.api.RecordConsumer;
@@ -96,6 +97,10 @@ class InternalParquetRecordWriter<T> {
initStore();
}
+ public ParquetMetadata getFooter() {
+ return parquetFileWriter.getFooter();
+ }
+
private void initStore() {
pageStore = new ColumnChunkPageWriteStore(compressor, schema, props.getAllocator());
columnStore = props.newColumnWriteStore(schema, pageStore);
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/445cb9dc/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
index 285c2db..f94fd9c 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
@@ -121,6 +121,9 @@ public class ParquetFileWriter {
private long currentChunkFirstDataPage; // set in startColumn (out.pos())
private long currentChunkDictionaryPageOffset; // set in writeDictionaryPage
+ // set when end is called
+ private ParquetMetadata footer = null;
+
/**
* Captures the order in which methods should be called
*
@@ -670,7 +673,7 @@ public class ParquetFileWriter {
public void end(Map<String, String> extraMetaData) throws IOException {
state = state.end();
LOG.debug("{}: end", out.getPos());
- ParquetMetadata footer = new ParquetMetadata(new FileMetaData(schema, extraMetaData,
Version.FULL_VERSION), blocks);
+ this.footer = new ParquetMetadata(new FileMetaData(schema, extraMetaData, Version.FULL_VERSION),
blocks);
serializeFooter(footer, out);
out.close();
}
@@ -684,6 +687,11 @@ public class ParquetFileWriter {
out.write(MAGIC);
}
+ public ParquetMetadata getFooter() {
+ Preconditions.checkState(state == STATE.ENDED, "Cannot return unfinished footer.");
+ return footer;
+ }
+
/**
* Given a list of metadata files, merge them into a single ParquetMetadata
* Requires that the schemas be compatible, and the extraMetadata be exactly equal.
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/445cb9dc/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
index bdde70e..1908206 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
@@ -28,6 +28,7 @@ import org.apache.parquet.column.ParquetProperties;
import org.apache.parquet.column.ParquetProperties.WriterVersion;
import org.apache.parquet.hadoop.api.WriteSupport;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
+import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.hadoop.util.HadoopOutputFile;
import org.apache.parquet.io.OutputFile;
import org.apache.parquet.schema.MessageType;
@@ -311,6 +312,13 @@ public class ParquetWriter<T> implements Closeable {
}
/**
+ * @return the ParquetMetadata written to the (closed) file.
+ */
+ public ParquetMetadata getFooter() {
+ return writer.getFooter();
+ }
+
+ /**
* @return the total size of data written to the file and buffered in memory
*/
public long getDataSize() {
|