parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From b...@apache.org
Subject incubator-parquet-mr git commit: PARQUET-208: Revert PARQUET-197
Date Sat, 07 Mar 2015 00:38:57 GMT
Repository: incubator-parquet-mr
Updated Branches:
  refs/heads/master a7155a8d7 -> 12ee6b442


PARQUET-208: Revert PARQUET-197

Revert "PARQUET-197 : fix parquet-cascading not writing parquet metadata...

Author: Tianshuo Deng <tdeng@twitter.com>

Closes #139 from tsdeng/revert_parquet_197 and squashes the following commits:

a74b5c8 [Tianshuo Deng] Revert "PARQUET-197 : fix parquet-cascading not writing parquet metadata
file"


Project: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/commit/12ee6b44
Tree: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/tree/12ee6b44
Diff: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/diff/12ee6b44

Branch: refs/heads/master
Commit: 12ee6b442bbf6557c06ecd7c1f7ae2fceeae55d6
Parents: a7155a8
Author: Tianshuo Deng <tdeng@twitter.com>
Authored: Fri Mar 6 16:38:49 2015 -0800
Committer: Ryan Blue <blue@apache.org>
Committed: Fri Mar 6 16:38:49 2015 -0800

----------------------------------------------------------------------
 .../parquet/cascading/ParquetTBaseScheme.java   |  2 +-
 .../parquet/cascading/ParquetTupleScheme.java   |  2 +-
 .../cascading/TestParquetTBaseScheme.java       |  3 --
 .../parquet/hadoop/ParquetOutputCommitter.java  |  4 --
 .../mapred/DeprecatedParquetOutputFormat.java   |  5 ---
 .../mapred/MapredParquetOutputCommitter.java    | 42 --------------------
 .../parquet/scrooge/ParquetScroogeScheme.java   |  5 ++-
 7 files changed, 6 insertions(+), 57 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/12ee6b44/parquet-cascading/src/main/java/parquet/cascading/ParquetTBaseScheme.java
----------------------------------------------------------------------
diff --git a/parquet-cascading/src/main/java/parquet/cascading/ParquetTBaseScheme.java b/parquet-cascading/src/main/java/parquet/cascading/ParquetTBaseScheme.java
index ab84749..41f8c4f 100644
--- a/parquet-cascading/src/main/java/parquet/cascading/ParquetTBaseScheme.java
+++ b/parquet-cascading/src/main/java/parquet/cascading/ParquetTBaseScheme.java
@@ -73,7 +73,7 @@ public class ParquetTBaseScheme<T extends TBase<?,?>> extends
ParquetValueScheme
       throw new IllegalArgumentException("To use ParquetTBaseScheme as a sink, you must specify
a thrift class in the constructor");
     }
 
-    DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf);
+    jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class);
     DeprecatedParquetOutputFormat.setWriteSupportClass(jobConf, TBaseWriteSupport.class);
     TBaseWriteSupport.<T>setThriftClass(jobConf, this.config.getKlass());
   }

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/12ee6b44/parquet-cascading/src/main/java/parquet/cascading/ParquetTupleScheme.java
----------------------------------------------------------------------
diff --git a/parquet-cascading/src/main/java/parquet/cascading/ParquetTupleScheme.java b/parquet-cascading/src/main/java/parquet/cascading/ParquetTupleScheme.java
index 7f6ac3a..ea0a953 100644
--- a/parquet-cascading/src/main/java/parquet/cascading/ParquetTupleScheme.java
+++ b/parquet-cascading/src/main/java/parquet/cascading/ParquetTupleScheme.java
@@ -171,7 +171,7 @@ public class ParquetTupleScheme extends Scheme<JobConf, RecordReader,
OutputColl
   @Override
   public void sinkConfInit(FlowProcess<JobConf> fp,
           Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {
-    DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf);
+    jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class);
     jobConf.set(TupleWriteSupport.PARQUET_CASCADING_SCHEMA, parquetSchema);
     ParquetOutputFormat.setWriteSupportClass(jobConf, TupleWriteSupport.class);
   }

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/12ee6b44/parquet-cascading/src/test/java/parquet/cascading/TestParquetTBaseScheme.java
----------------------------------------------------------------------
diff --git a/parquet-cascading/src/test/java/parquet/cascading/TestParquetTBaseScheme.java
b/parquet-cascading/src/test/java/parquet/cascading/TestParquetTBaseScheme.java
index 7d1454c..11a5b5e 100644
--- a/parquet-cascading/src/test/java/parquet/cascading/TestParquetTBaseScheme.java
+++ b/parquet-cascading/src/test/java/parquet/cascading/TestParquetTBaseScheme.java
@@ -78,9 +78,6 @@ public class TestParquetTBaseScheme {
     Flow flow  = new HadoopFlowConnector().connect("namecp", source, sink, assembly);
 
     flow.complete();
-    assertTrue(fs.exists(new Path(parquetOutputPath)));
-    assertTrue(fs.exists(new Path(parquetOutputPath + "/_metadata")));
-    assertTrue(fs.exists(new Path(parquetOutputPath + "/_common_metadata")));
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/12ee6b44/parquet-hadoop/src/main/java/parquet/hadoop/ParquetOutputCommitter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/parquet/hadoop/ParquetOutputCommitter.java b/parquet-hadoop/src/main/java/parquet/hadoop/ParquetOutputCommitter.java
index 0e0ce42..841c211 100644
--- a/parquet-hadoop/src/main/java/parquet/hadoop/ParquetOutputCommitter.java
+++ b/parquet-hadoop/src/main/java/parquet/hadoop/ParquetOutputCommitter.java
@@ -45,10 +45,6 @@ public class ParquetOutputCommitter extends FileOutputCommitter {
   public void commitJob(JobContext jobContext) throws IOException {
     super.commitJob(jobContext);
     Configuration configuration = ContextUtil.getConfiguration(jobContext);
-    writeMetaDataFile(configuration,outputPath);
-  }
-
-  public static void writeMetaDataFile(Configuration configuration, Path outputPath) {
     if (configuration.getBoolean(ParquetOutputFormat.ENABLE_JOB_SUMMARY, true)) {
       try {
         final FileSystem fileSystem = outputPath.getFileSystem(configuration);

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/12ee6b44/parquet-hadoop/src/main/java/parquet/hadoop/mapred/DeprecatedParquetOutputFormat.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/parquet/hadoop/mapred/DeprecatedParquetOutputFormat.java
b/parquet-hadoop/src/main/java/parquet/hadoop/mapred/DeprecatedParquetOutputFormat.java
index c0defb1..5b84e54 100644
--- a/parquet-hadoop/src/main/java/parquet/hadoop/mapred/DeprecatedParquetOutputFormat.java
+++ b/parquet-hadoop/src/main/java/parquet/hadoop/mapred/DeprecatedParquetOutputFormat.java
@@ -55,11 +55,6 @@ public class DeprecatedParquetOutputFormat<V> extends org.apache.hadoop.mapred.F
     configuration.setBoolean(ParquetOutputFormat.ENABLE_DICTIONARY, enableDictionary);
   }
 
-  public static void setAsOutputFormat(JobConf jobConf) {
-    jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class);
-    jobConf.setOutputCommitter(MapredParquetOutputCommitter.class);
-  }
-
   private CompressionCodecName getCodec(final JobConf conf) {
     return CodecConfig.from(conf).getCodec();
   }

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/12ee6b44/parquet-hadoop/src/main/java/parquet/hadoop/mapred/MapredParquetOutputCommitter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/parquet/hadoop/mapred/MapredParquetOutputCommitter.java
b/parquet-hadoop/src/main/java/parquet/hadoop/mapred/MapredParquetOutputCommitter.java
deleted file mode 100644
index eb97c09..0000000
--- a/parquet-hadoop/src/main/java/parquet/hadoop/mapred/MapredParquetOutputCommitter.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package parquet.hadoop.mapred;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.*;
-import parquet.hadoop.ParquetOutputCommitter;
-
-import java.io.IOException;
-
-/**
- *
- * Adapter for supporting ParquetOutputCommitter in mapred API
- *
- * @author Tianshuo Deng
- */
-public class MapredParquetOutputCommitter extends FileOutputCommitter {
-
-  @Override
-  public void commitJob(JobContext jobContext) throws IOException {
-    JobConf jobConf = jobContext.getJobConf();
-    Path outputPath = FileOutputFormat.getOutputPath(jobConf);
-    ParquetOutputCommitter.writeMetaDataFile(jobConf, outputPath);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/12ee6b44/parquet-scrooge/src/main/java/parquet/scrooge/ParquetScroogeScheme.java
----------------------------------------------------------------------
diff --git a/parquet-scrooge/src/main/java/parquet/scrooge/ParquetScroogeScheme.java b/parquet-scrooge/src/main/java/parquet/scrooge/ParquetScroogeScheme.java
index 2745307..3abe957 100644
--- a/parquet-scrooge/src/main/java/parquet/scrooge/ParquetScroogeScheme.java
+++ b/parquet-scrooge/src/main/java/parquet/scrooge/ParquetScroogeScheme.java
@@ -18,6 +18,8 @@
  */
 package parquet.scrooge;
 
+import java.io.IOException;
+
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.RecordReader;
@@ -25,6 +27,7 @@ import org.apache.hadoop.mapred.RecordReader;
 import com.twitter.scrooge.ThriftStruct;
 
 import cascading.flow.FlowProcess;
+import cascading.scheme.SinkCall;
 import cascading.tap.Tap;
 import parquet.cascading.ParquetValueScheme;
 import parquet.filter2.predicate.FilterPredicate;
@@ -53,7 +56,7 @@ public class ParquetScroogeScheme<T extends ThriftStruct> extends
ParquetValueSc
   @Override
   public void sinkConfInit(FlowProcess<JobConf> fp,
       Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {
-    DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf);
+    jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class);
     ParquetOutputFormat.setWriteSupportClass(jobConf, ScroogeWriteSupport.class);
     ScroogeWriteSupport.setScroogeClass(jobConf, this.config.getKlass());
   }


Mime
View raw message