parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From b...@apache.org
Subject [46/51] [partial] parquet-mr git commit: PARQUET-23: Rename to org.apache.parquet.
Date Mon, 27 Apr 2015 23:12:43 GMT
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-avro/src/test/java/parquet/avro/TestReadWrite.java
----------------------------------------------------------------------
diff --git a/parquet-avro/src/test/java/parquet/avro/TestReadWrite.java b/parquet-avro/src/test/java/parquet/avro/TestReadWrite.java
deleted file mode 100644
index 7467378..0000000
--- a/parquet-avro/src/test/java/parquet/avro/TestReadWrite.java
+++ /dev/null
@@ -1,460 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.avro;
-
-import com.google.common.base.Charsets;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Lists;
-import com.google.common.io.Resources;
-import java.io.File;
-import java.nio.ByteBuffer;
-import java.util.*;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericData.Fixed;
-import org.apache.avro.generic.GenericFixed;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.GenericRecordBuilder;
-import org.apache.avro.util.Utf8;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.codehaus.jackson.node.NullNode;
-import org.junit.Test;
-import parquet.hadoop.ParquetWriter;
-import parquet.hadoop.api.WriteSupport;
-import parquet.io.api.Binary;
-import parquet.io.api.RecordConsumer;
-import parquet.schema.MessageTypeParser;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertNotNull;
-
-public class TestReadWrite {
-
-  @Test
-  public void testEmptyArray() throws Exception {
-    Schema schema = new Schema.Parser().parse(
-        Resources.getResource("array.avsc").openStream());
-
-    File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
-    tmp.deleteOnExit();
-    tmp.delete();
-    Path file = new Path(tmp.getPath());
-
-    AvroParquetWriter<GenericRecord> writer = 
-        new AvroParquetWriter<GenericRecord>(file, schema);
-
-    // Write a record with an empty array.
-    List<Integer> emptyArray = new ArrayList<Integer>();
-    GenericData.Record record = new GenericRecordBuilder(schema)
-        .set("myarray", emptyArray).build();
-    writer.write(record);
-    writer.close();
-
-    AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(file);
-    GenericRecord nextRecord = reader.read();
-
-    assertNotNull(nextRecord);
-    assertEquals(emptyArray, nextRecord.get("myarray"));
-  }
-
-  @Test
-  public void testEmptyMap() throws Exception {
-    Schema schema = new Schema.Parser().parse(
-        Resources.getResource("map.avsc").openStream());
-
-    File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
-    tmp.deleteOnExit();
-    tmp.delete();
-    Path file = new Path(tmp.getPath());
-
-    AvroParquetWriter<GenericRecord> writer = 
-        new AvroParquetWriter<GenericRecord>(file, schema);
-
-    // Write a record with an empty map.
-    ImmutableMap emptyMap = new ImmutableMap.Builder<String, Integer>().build();
-    GenericData.Record record = new GenericRecordBuilder(schema)
-        .set("mymap", emptyMap).build();
-    writer.write(record);
-    writer.close();
-
-    AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(file);
-    GenericRecord nextRecord = reader.read();
-
-    assertNotNull(nextRecord);
-    assertEquals(emptyMap, nextRecord.get("mymap"));
-  }
-
-  @Test
-  public void testMapWithNulls() throws Exception {
-    Schema schema = new Schema.Parser().parse(
-        Resources.getResource("map_with_nulls.avsc").openStream());
-
-    File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
-    tmp.deleteOnExit();
-    tmp.delete();
-    Path file = new Path(tmp.getPath());
-
-    AvroParquetWriter<GenericRecord> writer =
-        new AvroParquetWriter<GenericRecord>(file, schema);
-
-    // Write a record with a null value
-    Map<String, Integer> map = new HashMap<String, Integer>();
-    map.put("thirty-four", 34);
-    map.put("eleventy-one", null);
-    map.put("one-hundred", 100);
-
-    GenericData.Record record = new GenericRecordBuilder(schema)
-        .set("mymap", map).build();
-    writer.write(record);
-    writer.close();
-
-    AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(file);
-    GenericRecord nextRecord = reader.read();
-
-    assertNotNull(nextRecord);
-    assertEquals(map, nextRecord.get("mymap"));
-  }
-
-  @Test(expected=RuntimeException.class)
-  public void testMapRequiredValueWithNull() throws Exception {
-    Schema schema = Schema.createRecord("record1", null, null, false);
-    schema.setFields(Lists.newArrayList(
-        new Schema.Field("mymap", Schema.createMap(Schema.create(Schema.Type.INT)), null, null)));
-
-    File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
-    tmp.deleteOnExit();
-    tmp.delete();
-    Path file = new Path(tmp.getPath());
-
-    AvroParquetWriter<GenericRecord> writer =
-        new AvroParquetWriter<GenericRecord>(file, schema);
-
-    // Write a record with a null value
-    Map<String, Integer> map = new HashMap<String, Integer>();
-    map.put("thirty-four", 34);
-    map.put("eleventy-one", null);
-    map.put("one-hundred", 100);
-
-    GenericData.Record record = new GenericRecordBuilder(schema)
-        .set("mymap", map).build();
-    writer.write(record);
-  }
-
-  @Test
-  public void testMapWithUtf8Key() throws Exception {
-    Schema schema = new Schema.Parser().parse(
-        Resources.getResource("map.avsc").openStream());
-
-    File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
-    tmp.deleteOnExit();
-    tmp.delete();
-    Path file = new Path(tmp.getPath());
-
-    AvroParquetWriter<GenericRecord> writer = 
-        new AvroParquetWriter<GenericRecord>(file, schema);
-
-    // Write a record with a map with Utf8 keys.
-    GenericData.Record record = new GenericRecordBuilder(schema)
-        .set("mymap", ImmutableMap.of(new Utf8("a"), 1, new Utf8("b"), 2))
-        .build();
-    writer.write(record);
-    writer.close();
-
-    AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(file);
-    GenericRecord nextRecord = reader.read();
-
-    assertNotNull(nextRecord);
-    assertEquals(ImmutableMap.of("a", 1, "b", 2), nextRecord.get("mymap"));
-  }
-
-  @Test
-  public void testAll() throws Exception {
-    Schema schema = new Schema.Parser().parse(
-        Resources.getResource("all.avsc").openStream());
-
-    File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
-    tmp.deleteOnExit();
-    tmp.delete();
-    Path file = new Path(tmp.getPath());
-    
-    AvroParquetWriter<GenericRecord> writer = new
-        AvroParquetWriter<GenericRecord>(file, schema);
-
-    GenericData.Record nestedRecord = new GenericRecordBuilder(
-        schema.getField("mynestedrecord").schema())
-            .set("mynestedint", 1).build();
-
-    List<Integer> integerArray = Arrays.asList(1, 2, 3);
-    GenericData.Array<Integer> genericIntegerArray = new GenericData.Array<Integer>(
-        Schema.createArray(Schema.create(Schema.Type.INT)), integerArray);
-
-    GenericFixed genericFixed = new GenericData.Fixed(
-        Schema.createFixed("fixed", null, null, 1), new byte[] { (byte) 65 });
-
-    List<Integer> emptyArray = new ArrayList<Integer>();
-    ImmutableMap emptyMap = new ImmutableMap.Builder<String, Integer>().build();
-
-    GenericData.Record record = new GenericRecordBuilder(schema)
-        .set("mynull", null)
-        .set("myboolean", true)
-        .set("myint", 1)
-        .set("mylong", 2L)
-        .set("myfloat", 3.1f)
-        .set("mydouble", 4.1)
-        .set("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)))
-        .set("mystring", "hello")
-        .set("mynestedrecord", nestedRecord)
-        .set("myenum", "a")
-        .set("myarray", genericIntegerArray)
-        .set("myemptyarray", emptyArray)
-        .set("myoptionalarray", genericIntegerArray)
-        .set("mymap", ImmutableMap.of("a", 1, "b", 2))
-        .set("myemptymap", emptyMap)
-        .set("myfixed", genericFixed)
-        .build();
-
-    writer.write(record);
-    writer.close();
-
-    AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(file);
-    GenericRecord nextRecord = reader.read();
-
-    assertNotNull(nextRecord);
-    assertEquals(null, nextRecord.get("mynull"));
-    assertEquals(true, nextRecord.get("myboolean"));
-    assertEquals(1, nextRecord.get("myint"));
-    assertEquals(2L, nextRecord.get("mylong"));
-    assertEquals(3.1f, nextRecord.get("myfloat"));
-    assertEquals(4.1, nextRecord.get("mydouble"));
-    assertEquals(ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)), nextRecord.get("mybytes"));
-    assertEquals("hello", nextRecord.get("mystring"));
-    assertEquals("a", nextRecord.get("myenum"));
-    assertEquals(nestedRecord, nextRecord.get("mynestedrecord"));
-    assertEquals(integerArray, nextRecord.get("myarray"));
-    assertEquals(emptyArray, nextRecord.get("myemptyarray"));
-    assertEquals(integerArray, nextRecord.get("myoptionalarray"));
-    assertEquals(ImmutableMap.of("a", 1, "b", 2), nextRecord.get("mymap"));
-    assertEquals(emptyMap, nextRecord.get("myemptymap"));
-    assertEquals(genericFixed, nextRecord.get("myfixed"));
-  }
-
-  @Test
-  public void testAllUsingDefaultAvroSchema() throws Exception {
-    File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
-    tmp.deleteOnExit();
-    tmp.delete();
-    Path file = new Path(tmp.getPath());
-
-    // write file using Parquet APIs
-    ParquetWriter<Map<String, Object>> parquetWriter = new ParquetWriter<Map<String, Object>>(file,
-        new WriteSupport<Map<String, Object>>() {
-
-      private RecordConsumer recordConsumer;
-
-      @Override
-      public WriteContext init(Configuration configuration) {
-        return new WriteContext(MessageTypeParser.parseMessageType(TestAvroSchemaConverter.ALL_PARQUET_SCHEMA),
-            new HashMap<String, String>());
-      }
-
-      @Override
-      public void prepareForWrite(RecordConsumer recordConsumer) {
-        this.recordConsumer = recordConsumer;
-      }
-
-      @Override
-      public void write(Map<String, Object> record) {
-        recordConsumer.startMessage();
-
-        int index = 0;
-
-        recordConsumer.startField("myboolean", index);
-        recordConsumer.addBoolean((Boolean) record.get("myboolean"));
-        recordConsumer.endField("myboolean", index++);
-
-        recordConsumer.startField("myint", index);
-        recordConsumer.addInteger((Integer) record.get("myint"));
-        recordConsumer.endField("myint", index++);
-
-        recordConsumer.startField("mylong", index);
-        recordConsumer.addLong((Long) record.get("mylong"));
-        recordConsumer.endField("mylong", index++);
-
-        recordConsumer.startField("myfloat", index);
-        recordConsumer.addFloat((Float) record.get("myfloat"));
-        recordConsumer.endField("myfloat", index++);
-
-        recordConsumer.startField("mydouble", index);
-        recordConsumer.addDouble((Double) record.get("mydouble"));
-        recordConsumer.endField("mydouble", index++);
-
-        recordConsumer.startField("mybytes", index);
-        recordConsumer.addBinary(Binary.fromByteBuffer((ByteBuffer) record.get("mybytes")));
-        recordConsumer.endField("mybytes", index++);
-
-        recordConsumer.startField("mystring", index);
-        recordConsumer.addBinary(Binary.fromString((String) record.get("mystring")));
-        recordConsumer.endField("mystring", index++);
-
-        recordConsumer.startField("mynestedrecord", index);
-        recordConsumer.startGroup();
-        recordConsumer.startField("mynestedint", 0);
-        recordConsumer.addInteger((Integer) record.get("mynestedint"));
-        recordConsumer.endField("mynestedint", 0);
-        recordConsumer.endGroup();
-        recordConsumer.endField("mynestedrecord", index++);
-
-        recordConsumer.startField("myenum", index);
-        recordConsumer.addBinary(Binary.fromString((String) record.get("myenum")));
-        recordConsumer.endField("myenum", index++);
-
-        recordConsumer.startField("myarray", index);
-        recordConsumer.startGroup();
-        recordConsumer.startField("array", 0);
-        for (int val : (int[]) record.get("myarray")) {
-          recordConsumer.addInteger(val);
-        }
-        recordConsumer.endField("array", 0);
-        recordConsumer.endGroup();
-        recordConsumer.endField("myarray", index++);
-
-        recordConsumer.startField("myoptionalarray", index);
-        recordConsumer.startGroup();
-        recordConsumer.startField("array", 0);
-        for (int val : (int[]) record.get("myoptionalarray")) {
-          recordConsumer.addInteger(val);
-        }
-        recordConsumer.endField("array", 0);
-        recordConsumer.endGroup();
-        recordConsumer.endField("myoptionalarray", index++);
-
-        recordConsumer.startField("myrecordarray", index);
-        recordConsumer.startGroup();
-        recordConsumer.startField("array", 0);
-        recordConsumer.startGroup();
-        recordConsumer.startField("a", 0);
-        for (int val : (int[]) record.get("myrecordarraya")) {
-          recordConsumer.addInteger(val);
-        }
-        recordConsumer.endField("a", 0);
-        recordConsumer.startField("b", 1);
-        for (int val : (int[]) record.get("myrecordarrayb")) {
-          recordConsumer.addInteger(val);
-        }
-        recordConsumer.endField("b", 1);
-        recordConsumer.endGroup();
-        recordConsumer.endField("array", 0);
-        recordConsumer.endGroup();
-        recordConsumer.endField("myrecordarray", index++);
-
-        recordConsumer.startField("mymap", index);
-        recordConsumer.startGroup();
-        recordConsumer.startField("map", 0);
-        recordConsumer.startGroup();
-        Map<String, Integer> mymap = (Map<String, Integer>) record.get("mymap");
-        recordConsumer.startField("key", 0);
-        for (String key : mymap.keySet()) {
-          recordConsumer.addBinary(Binary.fromString(key));
-        }
-        recordConsumer.endField("key", 0);
-        recordConsumer.startField("value", 1);
-        for (int val : mymap.values()) {
-          recordConsumer.addInteger(val);
-        }
-        recordConsumer.endField("value", 1);
-        recordConsumer.endGroup();
-        recordConsumer.endField("map", 0);
-        recordConsumer.endGroup();
-        recordConsumer.endField("mymap", index++);
-
-        recordConsumer.startField("myfixed", index);
-        recordConsumer.addBinary(Binary.fromByteArray((byte[]) record.get("myfixed")));
-        recordConsumer.endField("myfixed", index++);
-
-        recordConsumer.endMessage();
-      }
-    });
-    Map<String, Object> record = new HashMap<String, Object>();
-    record.put("myboolean", true);
-    record.put("myint", 1);
-    record.put("mylong", 2L);
-    record.put("myfloat", 3.1f);
-    record.put("mydouble", 4.1);
-    record.put("mybytes", ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)));
-    record.put("mystring", "hello");
-    record.put("myenum", "a");
-    record.put("mynestedint", 1);
-    record.put("myarray", new int[] {1, 2, 3});
-    record.put("myoptionalarray", new int[]{1, 2, 3});
-    record.put("myrecordarraya", new int[] {1, 2, 3});
-    record.put("myrecordarrayb", new int[] {4, 5, 6});
-    record.put("mymap", ImmutableMap.of("a", 1, "b", 2));
-    record.put("myfixed", new byte[] { (byte) 65 });
-    parquetWriter.write(record);
-    parquetWriter.close();
-
-    Schema nestedRecordSchema = Schema.createRecord("mynestedrecord", null, null, false);
-    nestedRecordSchema.setFields(Arrays.asList(
-        new Schema.Field("mynestedint", Schema.create(Schema.Type.INT), null, null)
-    ));
-    GenericData.Record nestedRecord = new GenericRecordBuilder(nestedRecordSchema)
-        .set("mynestedint", 1).build();
-
-    List<Integer> integerArray = Arrays.asList(1, 2, 3);
-
-    Schema recordArraySchema = Schema.createRecord("array", null, null, false);
-    recordArraySchema.setFields(Arrays.asList(
-        new Schema.Field("a", Schema.create(Schema.Type.INT), null, null),
-        new Schema.Field("b", Schema.create(Schema.Type.INT), null, null)
-    ));
-    GenericRecordBuilder builder = new GenericRecordBuilder(recordArraySchema);
-    List<GenericData.Record> recordArray = new ArrayList<GenericData.Record>();
-    recordArray.add(builder.set("a", 1).set("b", 4).build());
-    recordArray.add(builder.set("a", 2).set("b", 5).build());
-    recordArray.add(builder.set("a", 3).set("b", 6).build());
-    GenericData.Array<GenericData.Record> genericRecordArray = new GenericData.Array<GenericData.Record>(
-        Schema.createArray(recordArraySchema), recordArray);
-
-    GenericFixed genericFixed = new GenericData.Fixed(
-        Schema.createFixed("fixed", null, null, 1), new byte[] { (byte) 65 });
-
-    AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(file);
-    GenericRecord nextRecord = reader.read();
-    assertNotNull(nextRecord);
-    assertEquals(true, nextRecord.get("myboolean"));
-    assertEquals(1, nextRecord.get("myint"));
-    assertEquals(2L, nextRecord.get("mylong"));
-    assertEquals(3.1f, nextRecord.get("myfloat"));
-    assertEquals(4.1, nextRecord.get("mydouble"));
-    assertEquals(ByteBuffer.wrap("hello".getBytes(Charsets.UTF_8)), nextRecord.get("mybytes"));
-    assertEquals("hello", nextRecord.get("mystring"));
-    assertEquals("a", nextRecord.get("myenum"));
-    assertEquals(nestedRecord, nextRecord.get("mynestedrecord"));
-    assertEquals(integerArray, nextRecord.get("myarray"));
-    assertEquals(integerArray, nextRecord.get("myoptionalarray"));
-    assertEquals(genericRecordArray, nextRecord.get("myrecordarray"));
-    assertEquals(ImmutableMap.of("a", 1, "b", 2), nextRecord.get("mymap"));
-    assertEquals(genericFixed, nextRecord.get("myfixed"));
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-avro/src/test/java/parquet/avro/TestSpecificInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/parquet-avro/src/test/java/parquet/avro/TestSpecificInputOutputFormat.java b/parquet-avro/src/test/java/parquet/avro/TestSpecificInputOutputFormat.java
deleted file mode 100644
index 2cfe13e..0000000
--- a/parquet-avro/src/test/java/parquet/avro/TestSpecificInputOutputFormat.java
+++ /dev/null
@@ -1,286 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.avro;
-
-import static java.lang.Thread.sleep;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import com.google.common.collect.Lists;
-import java.io.IOException;
-import java.util.List;
-import org.apache.avro.Schema;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-import parquet.Log;
-import parquet.column.ColumnReader;
-import parquet.filter.ColumnPredicates;
-import parquet.filter.ColumnRecordFilter;
-import parquet.filter.RecordFilter;
-import parquet.filter.UnboundRecordFilter;
-
-public class TestSpecificInputOutputFormat {
-  private static final Log LOG = Log.getLog(TestSpecificInputOutputFormat.class);
-
-  public static Car nextRecord(int i) {
-    String vin = "1VXBR12EXCP000000";
-    Car.Builder carBuilder = Car.newBuilder()
-        .setDoors(2)
-        .setMake("Tesla")
-        .setModel(String.format("Model X v%d", i % 2))
-        .setVin(new Vin(vin.getBytes()))
-        .setYear(2014 + i)
-        .setOptionalExtra(LeatherTrim.newBuilder().setColour("black").build())
-        .setRegistration("California");
-    Engine.Builder engineBuilder = Engine.newBuilder()
-        .setCapacity(85.0f)
-        .setHasTurboCharger(false);
-    if (i % 2 == 0) {
-      engineBuilder.setType(EngineType.ELECTRIC);
-    } else {
-      engineBuilder.setType(EngineType.PETROL);
-    }
-    carBuilder.setEngine(engineBuilder.build());
-    if (i % 4 == 0) {
-      List<Service> serviceList = Lists.newArrayList();
-      serviceList.add(Service.newBuilder()
-          .setDate(1374084640)
-          .setMechanic("Elon Musk").build());
-      carBuilder.setServiceHistory(serviceList);
-    }
-    return carBuilder.build();
-  }
-
-  public static class MyMapper extends Mapper<LongWritable, Text, Void, Car> {
-
-    @Override
-    public void run(Context context) throws IOException ,InterruptedException {
-      for (int i = 0; i < 10; i++) {
-        context.write(null, nextRecord(i));
-      }
-    }
-  }
-
-  public static class MyMapper2 extends Mapper<Void, Car, Void, Car> {
-    @Override
-    protected void map(Void key, Car car, Context context) throws IOException ,InterruptedException {
-      // Note: Car can be null because of predicate pushdown defined by an UnboundedRecordFilter (see below)
-      if (car != null) {
-        context.write(null, car);
-      }
-    }
-
-  }
-
-  public static class MyMapperShort extends
-      Mapper<Void, ShortCar, Void, ShortCar> {
-    @Override
-    protected void map(Void key, ShortCar car, Context context)
-        throws IOException, InterruptedException {
-      // Note: Car can be null because of predicate pushdown defined by an
-      // UnboundedRecordFilter (see below)
-      if (car != null) {
-        context.write(null, car);
-      }
-    }
-
-  }
-
-  public static class ElectricCarFilter implements UnboundRecordFilter {
-
-    private final UnboundRecordFilter filter;
-
-    public ElectricCarFilter() {
-      filter = ColumnRecordFilter.column("engine.type", ColumnPredicates.equalTo(EngineType.ELECTRIC));
-    }
-
-    @Override
-    public RecordFilter bind(Iterable<ColumnReader> readers) {
-      return filter.bind(readers);
-    }
-  }
-
-  final Configuration conf = new Configuration();
-  final Path inputPath = new Path("src/test/java/parquet/avro/TestSpecificInputOutputFormat.java");
-  final Path parquetPath = new Path("target/test/hadoop/TestSpecificInputOutputFormat/parquet");
-  final Path outputPath = new Path("target/test/hadoop/TestSpecificInputOutputFormat/out");
-
-  @Before
-  public void createParquetFile() throws Exception {
-    final FileSystem fileSystem = parquetPath.getFileSystem(conf);
-    fileSystem.delete(parquetPath, true);
-    fileSystem.delete(outputPath, true);
-    {
-      final Job job = new Job(conf, "write");
-
-      // input not really used
-      TextInputFormat.addInputPath(job, inputPath);
-      job.setInputFormatClass(TextInputFormat.class);
-
-      job.setMapperClass(TestSpecificInputOutputFormat.MyMapper.class);
-      job.setNumReduceTasks(0);
-
-      job.setOutputFormatClass(AvroParquetOutputFormat.class);
-      AvroParquetOutputFormat.setOutputPath(job, parquetPath);
-      AvroParquetOutputFormat.setSchema(job, Car.SCHEMA$);
-
-      waitForJob(job);
-    }
-  }
-  
-  @Test
-  public void testReadWrite() throws Exception {
-
-    final Job job = new Job(conf, "read");
-    job.setInputFormatClass(AvroParquetInputFormat.class);
-    AvroParquetInputFormat.setInputPaths(job, parquetPath);
-    // Test push-down predicates by using an electric car filter
-    AvroParquetInputFormat.setUnboundRecordFilter(job, ElectricCarFilter.class);
-
-    // Test schema projection by dropping the optional extras
-    Schema projection = Schema.createRecord(Car.SCHEMA$.getName(),
-        Car.SCHEMA$.getDoc(), Car.SCHEMA$.getNamespace(), false);
-    List<Schema.Field> fields = Lists.newArrayList();
-    for (Schema.Field field : Car.SCHEMA$.getFields()) {
-      if (!"optionalExtra".equals(field.name())) {
-        fields.add(new Schema.Field(field.name(), field.schema(), field.doc(),
-            field.defaultValue(), field.order()));
-      }
-    }
-    projection.setFields(fields);
-    AvroParquetInputFormat.setRequestedProjection(job, projection);
-
-    job.setMapperClass(TestSpecificInputOutputFormat.MyMapper2.class);
-    job.setNumReduceTasks(0);
-
-    job.setOutputFormatClass(AvroParquetOutputFormat.class);
-    AvroParquetOutputFormat.setOutputPath(job, outputPath);
-    AvroParquetOutputFormat.setSchema(job, Car.SCHEMA$);
-
-    waitForJob(job);
-
-    final Path mapperOutput = new Path(outputPath.toString(),
-        "part-m-00000.parquet");
-    final AvroParquetReader<Car> out = new AvroParquetReader<Car>(mapperOutput);
-    Car car;
-    Car previousCar = null;
-    int lineNumber = 0;
-    while ((car = out.read()) != null) {
-      if (previousCar != null) {
-         // Testing reference equality here. The "model" field should be dictionary-encoded.
-         assertTrue(car.getModel() == previousCar.getModel());
-      }
-      // Make sure that predicate push down worked as expected
-      if (car.getEngine().getType() == EngineType.PETROL) {
-        fail("UnboundRecordFilter failed to remove cars with PETROL engines");
-      }
-      // Note we use lineNumber * 2 because of predicate push down
-      Car expectedCar = nextRecord(lineNumber * 2);
-      // We removed the optional extra field using projection so we shouldn't
-      // see it here...
-      expectedCar.setOptionalExtra(null);
-      assertEquals("line " + lineNumber, expectedCar, car);
-      ++lineNumber;
-      previousCar = car;
-    }
-    out.close();
-  }
-
-  @Test
-  public void testReadWriteChangedCar() throws Exception {
-
-    final Job job = new Job(conf, "read changed/short");
-    job.setInputFormatClass(AvroParquetInputFormat.class);
-    AvroParquetInputFormat.setInputPaths(job, parquetPath);
-    // Test push-down predicates by using an electric car filter
-    AvroParquetInputFormat.setUnboundRecordFilter(job, ElectricCarFilter.class);
-
-    // Test schema projection by dropping the engine, year, and vin (like ShortCar),
-    // but making make optional (unlike ShortCar)
-    Schema projection = Schema.createRecord(Car.SCHEMA$.getName(),
-        Car.SCHEMA$.getDoc(), Car.SCHEMA$.getNamespace(), false);
-    List<Schema.Field> fields = Lists.newArrayList();
-    for (Schema.Field field : Car.SCHEMA$.getFields()) {
-      // No make!
-      if ("engine".equals(field.name()) || "year".equals(field.name()) || "vin".equals(field.name())) {
-        fields.add(new Schema.Field(field.name(), field.schema(), field.doc(),
-            field.defaultValue(), field.order()));
-      }
-    }
-    projection.setFields(fields);
-    AvroParquetInputFormat.setRequestedProjection(job, projection);
-    AvroParquetInputFormat.setAvroReadSchema(job, ShortCar.SCHEMA$);
-
-    job.setMapperClass(TestSpecificInputOutputFormat.MyMapperShort.class);
-    job.setNumReduceTasks(0);
-
-    job.setOutputFormatClass(AvroParquetOutputFormat.class);
-    AvroParquetOutputFormat.setOutputPath(job, outputPath);
-    AvroParquetOutputFormat.setSchema(job, ShortCar.SCHEMA$);
-
-    waitForJob(job);
-
-    final Path mapperOutput = new Path(outputPath.toString(), "part-m-00000.parquet");
-    final AvroParquetReader<ShortCar> out = new AvroParquetReader<ShortCar>(mapperOutput);
-    ShortCar car;
-    int lineNumber = 0;
-    while ((car = out.read()) != null) {
-      // Make sure that predicate push down worked as expected
-      // Note we use lineNumber * 2 because of predicate push down
-      Car expectedCar = nextRecord(lineNumber * 2);
-      // We removed the optional extra field using projection so we shouldn't see it here...
-      assertNull(car.getMake());
-      assertEquals(car.getEngine(), expectedCar.getEngine());
-      assertEquals(car.getYear(), expectedCar.getYear());
-      assertEquals(car.getVin(), expectedCar.getVin());
-      ++lineNumber;
-    }
-    out.close();
-  }
-
-  private void waitForJob(Job job) throws Exception {
-    job.submit();
-    while (!job.isComplete()) {
-      LOG.debug("waiting for job " + job.getJobName());
-      sleep(100);
-    }
-    LOG.info("status for job " + job.getJobName() + ": " + (job.isSuccessful() ? "SUCCESS" : "FAILURE"));
-    if (!job.isSuccessful()) {
-      throw new RuntimeException("job failed " + job.getJobName());
-    }
-  }
-
-  @After
-  public void deleteOutputFile() throws IOException {
-    final FileSystem fileSystem = parquetPath.getFileSystem(conf);
-    fileSystem.delete(parquetPath, true);
-    fileSystem.delete(outputPath, true);
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-avro/src/test/java/parquet/avro/TestSpecificReadWrite.java
----------------------------------------------------------------------
diff --git a/parquet-avro/src/test/java/parquet/avro/TestSpecificReadWrite.java b/parquet-avro/src/test/java/parquet/avro/TestSpecificReadWrite.java
deleted file mode 100644
index 6e29dfb..0000000
--- a/parquet-avro/src/test/java/parquet/avro/TestSpecificReadWrite.java
+++ /dev/null
@@ -1,288 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.avro;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static parquet.filter.ColumnPredicates.equalTo;
-import static parquet.filter.ColumnRecordFilter.column;
-import static parquet.hadoop.ParquetWriter.DEFAULT_BLOCK_SIZE;
-import static parquet.hadoop.ParquetWriter.DEFAULT_PAGE_SIZE;
-
-import com.google.common.collect.ImmutableList;
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import org.apache.avro.Schema;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.junit.Test;
-import parquet.hadoop.ParquetReader;
-import parquet.hadoop.ParquetWriter;
-import parquet.hadoop.metadata.CompressionCodecName;
-
-/**
- * Other tests exercise the use of Avro Generic, a dynamic data representation. This class focuses
- * on Avro Speific whose schemas are pre-compiled to POJOs with built in SerDe for faster serialization.
- */
-public class TestSpecificReadWrite {
-
-  @Test
-  public void testReadWriteSpecific() throws IOException {
-    Path path = writeCarsToParquetFile(10, CompressionCodecName.UNCOMPRESSED, false);
-    ParquetReader<Car> reader = new AvroParquetReader<Car>(path);
-    for (int i = 0; i < 10; i++) {
-      assertEquals(getVwPolo().toString(), reader.read().toString());
-      assertEquals(getVwPassat().toString(), reader.read().toString());
-      assertEquals(getBmwMini().toString(), reader.read().toString());
-    }
-    assertNull(reader.read());
-  }
-
-  @Test
-  public void testReadWriteSpecificWithDictionary() throws IOException {
-    Path path = writeCarsToParquetFile(10, CompressionCodecName.UNCOMPRESSED, true);
-    ParquetReader<Car> reader = new AvroParquetReader<Car>(path);
-    for (int i = 0; i < 10; i++) {
-      assertEquals(getVwPolo().toString(), reader.read().toString());
-      assertEquals(getVwPassat().toString(), reader.read().toString());
-      assertEquals(getBmwMini().toString(), reader.read().toString());
-    }
-    assertNull(reader.read());
-  }
-
-  @Test
-  public void testFilterMatchesMultiple() throws IOException {
-    Path path = writeCarsToParquetFile(10, CompressionCodecName.UNCOMPRESSED, false);
-    ParquetReader<Car> reader = new AvroParquetReader<Car>(path, column("make", equalTo("Volkswagen")));
-    for (int i = 0; i < 10; i++) {
-      assertEquals(getVwPolo().toString(), reader.read().toString());
-      assertEquals(getVwPassat().toString(), reader.read().toString());
-    }
-    assertNull(reader.read());
-  }
-
-  @Test
-  public void testFilterMatchesMultipleBlocks() throws IOException {
-    Path path = writeCarsToParquetFile(10000, CompressionCodecName.UNCOMPRESSED, false, DEFAULT_BLOCK_SIZE/64, DEFAULT_PAGE_SIZE/64);
-    ParquetReader<Car> reader = new AvroParquetReader<Car>(path, column("make", equalTo("Volkswagen")));
-    for (int i = 0; i < 10000; i++) {
-      assertEquals(getVwPolo().toString(), reader.read().toString());
-      assertEquals(getVwPassat().toString(), reader.read().toString());
-    }
-    assertNull(reader.read());
-  }
-
-  @Test
-  public void testFilterMatchesNoBlocks() throws IOException {
-    Path path = writeCarsToParquetFile(10000, CompressionCodecName.UNCOMPRESSED, false, DEFAULT_BLOCK_SIZE/64, DEFAULT_PAGE_SIZE/64);
-    ParquetReader<Car> reader = new AvroParquetReader<Car>(path, column("make", equalTo("Bogus")));
-    assertNull(reader.read());
-  }
-
-  @Test
-  public void testFilterMatchesFinalBlockOnly() throws IOException {
-    File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
-    tmp.deleteOnExit();
-    tmp.delete();
-    Path path = new Path(tmp.getPath());
-
-    Car vwPolo   = getVwPolo();
-    Car vwPassat = getVwPassat();
-    Car bmwMini  = getBmwMini();
-
-    ParquetWriter<Car> writer = new AvroParquetWriter<Car>(path, Car.SCHEMA$,
-        CompressionCodecName.UNCOMPRESSED, DEFAULT_BLOCK_SIZE/128, DEFAULT_PAGE_SIZE/128,
-        false);
-    for (int i = 0; i < 10000; i++) {
-      writer.write(vwPolo);
-      writer.write(vwPassat);
-      writer.write(vwPolo);
-    }
-    writer.write(bmwMini); // only write BMW in last block
-    writer.close();
-
-    ParquetReader<Car> reader = new AvroParquetReader<Car>(path, column("make",
-        equalTo("BMW")));
-    assertEquals(getBmwMini().toString(), reader.read().toString());
-    assertNull(reader.read());
-  }
-
-  @Test
-  public void testFilterWithDictionary() throws IOException {
-    Path path = writeCarsToParquetFile(1,CompressionCodecName.UNCOMPRESSED,true);
-    ParquetReader<Car> reader = new AvroParquetReader<Car>(path, column("make", equalTo("Volkswagen")));
-    assertEquals(getVwPolo().toString(), reader.read().toString());
-    assertEquals(getVwPassat().toString(), reader.read().toString());
-    assertNull(reader.read());
-  }
-
-  @Test
-  public void testFilterOnSubAttribute() throws IOException {
-    Path path = writeCarsToParquetFile(1, CompressionCodecName.UNCOMPRESSED, false);
-    
-    ParquetReader<Car> reader = new AvroParquetReader<Car>(path, column("engine.type", equalTo(EngineType.DIESEL)));
-    assertEquals(reader.read().toString(), getVwPassat().toString());
-    assertNull(reader.read());
-
-    reader = new AvroParquetReader<Car>(path, column("engine.capacity", equalTo(1.4f)));
-    assertEquals(getVwPolo().toString(), reader.read().toString());
-    assertNull(reader.read());
-
-    reader = new AvroParquetReader<Car>(path, column("engine.hasTurboCharger", equalTo(true)));
-    assertEquals(getBmwMini().toString(), reader.read().toString());
-    assertNull(reader.read());
-  }
-
-  @Test
-  public void testProjection() throws IOException {
-    Path path = writeCarsToParquetFile(1, CompressionCodecName.UNCOMPRESSED, false);
-    Configuration conf = new Configuration();
-
-    Schema schema = Car.getClassSchema();
-    List<Schema.Field> fields = schema.getFields();
-
-    //Schema.Parser parser = new Schema.Parser();
-    List<Schema.Field> projectedFields = new ArrayList<Schema.Field>();
-    for (Schema.Field field : fields) {
-      String name = field.name();
-      if ("optionalExtra".equals(name) ||
-          "serviceHistory".equals(name)) {
-        continue;
-      }
-
-      //Schema schemaClone = parser.parse(field.schema().toString(false));
-      Schema.Field fieldClone = new Schema.Field(name, field.schema(), field.doc(), field.defaultValue());
-      projectedFields.add(fieldClone);
-    }
-
-    Schema projectedSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError());
-    projectedSchema.setFields(projectedFields);
-    AvroReadSupport.setRequestedProjection(conf, projectedSchema);
-
-    ParquetReader<Car> reader = new AvroParquetReader<Car>(conf, path);
-    for (Car car = reader.read(); car != null; car = reader.read()) {
-      assertEquals(car.getDoors() != null, true);
-      assertEquals(car.getEngine() != null, true);
-      assertEquals(car.getMake() != null, true);
-      assertEquals(car.getModel() != null, true);
-      assertEquals(car.getYear() != null, true);
-      assertEquals(car.getVin() != null, true);
-      assertNull(car.getOptionalExtra());
-      assertNull(car.getServiceHistory());
-    }
-  }
-
-  @Test
-  public void testAvroReadSchema() throws IOException {
-    Path path = writeCarsToParquetFile(1, CompressionCodecName.UNCOMPRESSED, false);
-    Configuration conf = new Configuration();
-    AvroReadSupport.setAvroReadSchema(conf, NewCar.SCHEMA$);
-
-    ParquetReader<NewCar> reader = new AvroParquetReader<NewCar>(conf, path);
-    for (NewCar car = reader.read(); car != null; car = reader.read()) {
-      assertEquals(car.getEngine() != null, true);
-      assertEquals(car.getBrand() != null, true);
-      assertEquals(car.getYear() != null, true);
-      assertEquals(car.getVin() != null, true);
-      assertEquals(car.getDescription() == null, true);
-      assertEquals(car.getOpt() == 5, true);
-    }
-  }
-
-  private Path writeCarsToParquetFile( int num, CompressionCodecName compression, boolean enableDictionary) throws IOException {
-    return writeCarsToParquetFile(num, compression, enableDictionary, DEFAULT_BLOCK_SIZE, DEFAULT_PAGE_SIZE);
-  }
-
-  private Path writeCarsToParquetFile( int num, CompressionCodecName compression, boolean enableDictionary, int blockSize, int pageSize) throws IOException {
-    File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
-    tmp.deleteOnExit();
-    tmp.delete();
-    Path path = new Path(tmp.getPath());
-
-    Car vwPolo   = getVwPolo();
-    Car vwPassat = getVwPassat();
-    Car bmwMini  = getBmwMini();
-
-    ParquetWriter<Car> writer = new AvroParquetWriter<Car>(path,Car.SCHEMA$, compression,
-        blockSize, pageSize, enableDictionary);
-    for (int i = 0; i < num; i++) {
-      writer.write(vwPolo);
-      writer.write(vwPassat);
-      writer.write(bmwMini);
-    }
-    writer.close();
-    return path;
-  }
-
-  public static Car getVwPolo() {
-    String vin = "WVWDB4505LK000001";
-    return Car.newBuilder()
-        .setYear(2010)
-        .setRegistration("A123 GTR")
-        .setMake("Volkswagen")
-        .setModel("Polo")
-        .setVin(new Vin(vin.getBytes()))
-        .setDoors(4)
-        .setEngine(Engine.newBuilder().setType(EngineType.PETROL)
-                  .setCapacity(1.4f).setHasTurboCharger(false).build())
-        .setOptionalExtra(
-            Stereo.newBuilder().setMake("Blaupunkt").setSpeakers(4).build())
-        .setServiceHistory(ImmutableList.of(
-            Service.newBuilder().setDate(1325376000l).setMechanic("Jim").build(),
-            Service.newBuilder().setDate(1356998400l).setMechanic("Mike").build()))
-        .build();
-  }
-
-  public static Car getVwPassat() {
-    String vin = "WVWDB4505LK000002";
-    return Car.newBuilder()
-        .setYear(2010)
-        .setRegistration("A123 GXR")
-        .setMake("Volkswagen")
-        .setModel("Passat")
-        .setVin(new Vin(vin.getBytes()))
-        .setDoors(5)
-        .setEngine(Engine.newBuilder().setType(EngineType.DIESEL)
-            .setCapacity(2.0f).setHasTurboCharger(false).build())
-        .setOptionalExtra(
-            LeatherTrim.newBuilder().setColour("Black").build())
-        .setServiceHistory(ImmutableList.of(
-            Service.newBuilder().setDate(1325376000l).setMechanic("Jim").build()))
-        .build();
-  }
-
-  public static Car getBmwMini() {
-    String vin = "WBABA91060AL00003";
-    return Car.newBuilder()
-        .setYear(2010)
-        .setRegistration("A124 GSR")
-        .setMake("BMW")
-        .setModel("Mini")
-        .setVin(new Vin(vin.getBytes()))
-        .setDoors(4)
-        .setEngine(Engine.newBuilder().setType(EngineType.PETROL)
-            .setCapacity(1.6f).setHasTurboCharger(true).build())
-        .setOptionalExtra(null)
-        .setServiceHistory(ImmutableList.of(
-            Service.newBuilder().setDate(1356998400l).setMechanic("Mike").build()))
-        .build();
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-avro/src/test/resources/all.avsc
----------------------------------------------------------------------
diff --git a/parquet-avro/src/test/resources/all.avsc b/parquet-avro/src/test/resources/all.avsc
index 0e78894..59fd91c 100644
--- a/parquet-avro/src/test/resources/all.avsc
+++ b/parquet-avro/src/test/resources/all.avsc
@@ -1,6 +1,6 @@
 {
   "name" : "myrecord",
-  "namespace": "parquet.avro",
+  "namespace": "org.apache.parquet.avro",
   "type" : "record",
   "fields" : [ {
     "name" : "mynull",

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-avro/src/test/resources/allFromParquet.avsc
----------------------------------------------------------------------
diff --git a/parquet-avro/src/test/resources/allFromParquet.avsc b/parquet-avro/src/test/resources/allFromParquet.avsc
index 12e6f04..6f6f97f 100644
--- a/parquet-avro/src/test/resources/allFromParquet.avsc
+++ b/parquet-avro/src/test/resources/allFromParquet.avsc
@@ -1,6 +1,6 @@
 {
   "name" : "myrecord",
-  "namespace": "parquet.avro",
+  "namespace": "org.apache.parquet.avro",
   "type" : "record",
   "fields" : [ {
     "name" : "myboolean",

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-avro/src/test/resources/car.avdl
----------------------------------------------------------------------
diff --git a/parquet-avro/src/test/resources/car.avdl b/parquet-avro/src/test/resources/car.avdl
index 17b530e..b848da5 100644
--- a/parquet-avro/src/test/resources/car.avdl
+++ b/parquet-avro/src/test/resources/car.avdl
@@ -17,7 +17,7 @@
  * under the License.
  */
 
-@namespace("parquet.avro")
+@namespace("org.apache.parquet.avro")
 protocol Cars {
 
     record Service {

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-benchmarks/pom.xml
----------------------------------------------------------------------
diff --git a/parquet-benchmarks/pom.xml b/parquet-benchmarks/pom.xml
index a3989ec..25779d4 100644
--- a/parquet-benchmarks/pom.xml
+++ b/parquet-benchmarks/pom.xml
@@ -18,7 +18,7 @@
   -->
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
   <parent>
-    <groupId>com.twitter</groupId>
+    <groupId>org.apache.parquet</groupId>
     <artifactId>parquet</artifactId>
     <relativePath>../pom.xml</relativePath>
     <version>1.7.0-incubating-SNAPSHOT</version>
@@ -28,7 +28,7 @@
 
   <artifactId>parquet-benchmarks</artifactId>
   <packaging>jar</packaging>
-  <name>Parquet Benchmarks</name>
+  <name>Apache Parquet Benchmarks</name>
   <url>https://github.com/Parquet/parquet-mr</url>
 
   <properties>
@@ -38,12 +38,12 @@
 
   <dependencies>
     <dependency>
-      <groupId>com.twitter</groupId>
+      <groupId>org.apache.parquet</groupId>
       <artifactId>parquet-encoding</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
-       <groupId>com.twitter</groupId>
+       <groupId>org.apache.parquet</groupId>
        <artifactId>parquet-hadoop</artifactId>
        <version>${project.version}</version>
     </dependency>

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/BenchmarkConstants.java
----------------------------------------------------------------------
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/BenchmarkConstants.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/BenchmarkConstants.java
new file mode 100644
index 0000000..384a200
--- /dev/null
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/BenchmarkConstants.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.benchmarks;
+
+import static org.apache.parquet.hadoop.ParquetWriter.DEFAULT_BLOCK_SIZE;
+import static org.apache.parquet.hadoop.ParquetWriter.DEFAULT_PAGE_SIZE;
+
+public class BenchmarkConstants {
+  public static final int ONE_K = 1000;
+  public static final int FIVE_K = 5 * ONE_K;
+  public static final int TEN_K = 2 * FIVE_K;
+  public static final int HUNDRED_K = 10 * TEN_K;
+  public static final int ONE_MILLION = 10 * HUNDRED_K;
+
+  public static final int FIXED_LEN_BYTEARRAY_SIZE = 1024;
+
+  public static final int BLOCK_SIZE_DEFAULT = DEFAULT_BLOCK_SIZE;
+  public static final int BLOCK_SIZE_256M = 256 * 1024 * 1024;
+  public static final int BLOCK_SIZE_512M = 512 * 1024 * 1024;
+
+  public static final int PAGE_SIZE_DEFAULT = DEFAULT_PAGE_SIZE;
+  public static final int PAGE_SIZE_4M = 4 * 1024 * 1024;
+  public static final int PAGE_SIZE_8M = 8 * 1024 * 1024;
+
+  public static final int DICT_PAGE_SIZE = 512;
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/BenchmarkFiles.java
----------------------------------------------------------------------
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/BenchmarkFiles.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/BenchmarkFiles.java
new file mode 100644
index 0000000..d9ef4fd
--- /dev/null
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/BenchmarkFiles.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.benchmarks;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+
+public class BenchmarkFiles {
+  public static final Configuration configuration = new Configuration();
+
+  public static final String TARGET_DIR = "target/tests/ParquetBenchmarks";
+  public static final Path file_1M = new Path(TARGET_DIR + "/PARQUET-1M");
+
+  //different block and page sizes
+  public static final Path file_1M_BS256M_PS4M = new Path(TARGET_DIR + "/PARQUET-1M-BS256M_PS4M");
+  public static final Path file_1M_BS256M_PS8M = new Path(TARGET_DIR + "/PARQUET-1M-BS256M_PS8M");
+  public static final Path file_1M_BS512M_PS4M = new Path(TARGET_DIR + "/PARQUET-1M-BS512M_PS4M");
+  public static final Path file_1M_BS512M_PS8M = new Path(TARGET_DIR + "/PARQUET-1M-BS512M_PS8M");
+
+  //different compression codecs
+//  public final Path parquetFile_1M_LZO = new Path("target/tests/ParquetBenchmarks/PARQUET-1M-LZO");
+  public static final Path file_1M_SNAPPY = new Path(TARGET_DIR + "/PARQUET-1M-SNAPPY");
+  public static final Path file_1M_GZIP = new Path(TARGET_DIR + "/PARQUET-1M-GZIP");
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/BenchmarkUtils.java
----------------------------------------------------------------------
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/BenchmarkUtils.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/BenchmarkUtils.java
new file mode 100644
index 0000000..4bd9e6e
--- /dev/null
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/BenchmarkUtils.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.benchmarks;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+public class BenchmarkUtils {
+  public static void deleteIfExists(Configuration conf, Path path) {
+    try {
+      FileSystem fs = path.getFileSystem(conf);
+      if (fs.exists(path)) {
+        if (!fs.delete(path, true)) {
+          System.err.println("Couldn't delete " + path);
+        }
+      }
+    } catch (IOException e) {
+      System.err.println("Couldn't delete " + path);
+      e.printStackTrace();
+    }
+  }
+
+  public static boolean exists(Configuration conf, Path path) throws IOException {
+    FileSystem fs = path.getFileSystem(conf);
+    return fs.exists(path);
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/DataGenerator.java
----------------------------------------------------------------------
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/DataGenerator.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/DataGenerator.java
new file mode 100644
index 0000000..05c35bd
--- /dev/null
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/DataGenerator.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.benchmarks;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.column.ParquetProperties;
+import org.apache.parquet.example.data.Group;
+import org.apache.parquet.example.data.simple.SimpleGroupFactory;
+import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.parquet.hadoop.example.GroupWriteSupport;
+import org.apache.parquet.hadoop.metadata.CompressionCodecName;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.MessageType;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import static java.util.UUID.randomUUID;
+import static org.apache.parquet.benchmarks.BenchmarkUtils.deleteIfExists;
+import static org.apache.parquet.benchmarks.BenchmarkUtils.exists;
+import static org.apache.parquet.column.ParquetProperties.WriterVersion.PARQUET_2_0;
+import static org.apache.parquet.hadoop.metadata.CompressionCodecName.GZIP;
+import static org.apache.parquet.hadoop.metadata.CompressionCodecName.SNAPPY;
+import static org.apache.parquet.hadoop.metadata.CompressionCodecName.UNCOMPRESSED;
+import static org.apache.parquet.schema.MessageTypeParser.parseMessageType;
+import static org.apache.parquet.benchmarks.BenchmarkConstants.*;
+import static org.apache.parquet.benchmarks.BenchmarkFiles.*;
+
+public class DataGenerator {
+
+  public void generateAll() {
+    try {
+      generateData(file_1M, configuration, PARQUET_2_0, BLOCK_SIZE_DEFAULT, PAGE_SIZE_DEFAULT, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, ONE_MILLION);
+
+      //generate data for different block and page sizes
+      generateData(file_1M_BS256M_PS4M, configuration, PARQUET_2_0, BLOCK_SIZE_256M, PAGE_SIZE_4M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, ONE_MILLION);
+      generateData(file_1M_BS256M_PS8M, configuration, PARQUET_2_0, BLOCK_SIZE_256M, PAGE_SIZE_8M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, ONE_MILLION);
+      generateData(file_1M_BS512M_PS4M, configuration, PARQUET_2_0, BLOCK_SIZE_512M, PAGE_SIZE_4M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, ONE_MILLION);
+      generateData(file_1M_BS512M_PS8M, configuration, PARQUET_2_0, BLOCK_SIZE_512M, PAGE_SIZE_8M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, ONE_MILLION);
+
+      //generate data for different codecs
+//      generateData(parquetFile_1M_LZO, configuration, PARQUET_2_0, BLOCK_SIZE_DEFAULT, PAGE_SIZE_DEFAULT, FIXED_LEN_BYTEARRAY_SIZE, LZO, ONE_MILLION);
+      generateData(file_1M_SNAPPY, configuration, PARQUET_2_0, BLOCK_SIZE_DEFAULT, PAGE_SIZE_DEFAULT, FIXED_LEN_BYTEARRAY_SIZE, SNAPPY, ONE_MILLION);
+      generateData(file_1M_GZIP, configuration, PARQUET_2_0, BLOCK_SIZE_DEFAULT, PAGE_SIZE_DEFAULT, FIXED_LEN_BYTEARRAY_SIZE, GZIP, ONE_MILLION);
+    }
+    catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  public void generateData(Path outFile, Configuration configuration, ParquetProperties.WriterVersion version,
+                           int blockSize, int pageSize, int fixedLenByteArraySize, CompressionCodecName codec, int nRows)
+          throws IOException
+  {
+    if (exists(configuration, outFile)) {
+      System.out.println("File already exists " + outFile);
+      return;
+    }
+
+    System.out.println("Generating data @ " + outFile);
+
+    MessageType schema = parseMessageType(
+            "message test { "
+                    + "required binary binary_field; "
+                    + "required int32 int32_field; "
+                    + "required int64 int64_field; "
+                    + "required boolean boolean_field; "
+                    + "required float float_field; "
+                    + "required double double_field; "
+                    + "required fixed_len_byte_array(" + fixedLenByteArraySize +") flba_field; "
+                    + "required int96 int96_field; "
+                    + "} ");
+
+    GroupWriteSupport.setSchema(schema, configuration);
+    SimpleGroupFactory f = new SimpleGroupFactory(schema);
+    ParquetWriter<Group> writer = new ParquetWriter<Group>(outFile, new GroupWriteSupport(), codec, blockSize,
+                                                           pageSize, DICT_PAGE_SIZE, true, false, version, configuration);
+
+    //generate some data for the fixed len byte array field
+    char[] chars = new char[fixedLenByteArraySize];
+    Arrays.fill(chars, '*');
+
+    for (int i = 0; i < nRows; i++) {
+      writer.write(
+        f.newGroup()
+          .append("binary_field", randomUUID().toString())
+          .append("int32_field", i)
+          .append("int64_field", 64l)
+          .append("boolean_field", true)
+          .append("float_field", 1.0f)
+          .append("double_field", 2.0d)
+          .append("flba_field", new String(chars))
+          .append("int96_field", Binary.fromByteArray(new byte[12]))
+      );
+    }
+    writer.close();
+  }
+
+  public void cleanup()
+  {
+    deleteIfExists(configuration, file_1M);
+    deleteIfExists(configuration, file_1M_BS256M_PS4M);
+    deleteIfExists(configuration, file_1M_BS256M_PS8M);
+    deleteIfExists(configuration, file_1M_BS512M_PS4M);
+    deleteIfExists(configuration, file_1M_BS512M_PS8M);
+//    deleteIfExists(configuration, parquetFile_1M_LZO);
+    deleteIfExists(configuration, file_1M_SNAPPY);
+    deleteIfExists(configuration, file_1M_GZIP);
+  }
+
+  public static void main(String[] args) {
+    DataGenerator generator = new DataGenerator();
+    if (args.length < 1) {
+      System.err.println("Please specify a command (generate|cleanup).");
+      System.exit(1);
+    }
+
+    String command = args[0];
+    if (command.equalsIgnoreCase("generate")) {
+      generator.generateAll();
+    } else if (command.equalsIgnoreCase("cleanup")) {
+      generator.cleanup();
+    } else {
+      throw new IllegalArgumentException("invalid command " + command);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java
----------------------------------------------------------------------
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java
new file mode 100644
index 0000000..dba5544
--- /dev/null
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.benchmarks;
+
+import org.apache.hadoop.fs.Path;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.infra.Blackhole;
+import org.apache.parquet.example.data.Group;
+import org.apache.parquet.hadoop.ParquetReader;
+import org.apache.parquet.hadoop.example.GroupReadSupport;
+import static org.apache.parquet.benchmarks.BenchmarkConstants.*;
+import static org.apache.parquet.benchmarks.BenchmarkFiles.*;
+
+import java.io.IOException;
+
+public class ReadBenchmarks {
+  private void read(Path parquetFile, int nRows, Blackhole blackhole) throws IOException
+  {
+    ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), parquetFile).withConf(configuration).build();
+    for (int i = 0; i < nRows; i++) {
+      Group group = reader.read();
+      blackhole.consume(group.getBinary("binary_field", 0));
+      blackhole.consume(group.getInteger("int32_field", 0));
+      blackhole.consume(group.getLong("int64_field", 0));
+      blackhole.consume(group.getBoolean("boolean_field", 0));
+      blackhole.consume(group.getFloat("float_field", 0));
+      blackhole.consume(group.getDouble("double_field", 0));
+      blackhole.consume(group.getBinary("flba_field", 0));
+      blackhole.consume(group.getInt96("int96_field", 0));
+    }
+    reader.close();
+  }
+
+  @Benchmark
+  public void read1MRowsDefaultBlockAndPageSizeUncompressed(Blackhole blackhole)
+          throws IOException
+  {
+    read(file_1M, ONE_MILLION, blackhole);
+  }
+
+  @Benchmark
+  public void read1MRowsBS256MPS4MUncompressed(Blackhole blackhole)
+          throws IOException
+  {
+    read(file_1M_BS256M_PS4M, ONE_MILLION, blackhole);
+  }
+
+  @Benchmark
+  public void read1MRowsBS256MPS8MUncompressed(Blackhole blackhole)
+          throws IOException
+  {
+    read(file_1M_BS256M_PS8M, ONE_MILLION, blackhole);
+  }
+
+  @Benchmark
+  public void read1MRowsBS512MPS4MUncompressed(Blackhole blackhole)
+          throws IOException
+  {
+    read(file_1M_BS512M_PS4M, ONE_MILLION, blackhole);
+  }
+
+  @Benchmark
+  public void read1MRowsBS512MPS8MUncompressed(Blackhole blackhole)
+          throws IOException
+  {
+    read(file_1M_BS512M_PS8M, ONE_MILLION, blackhole);
+  }
+
+  //TODO how to handle lzo jar?
+//  @Benchmark
+//  public void read1MRowsDefaultBlockAndPageSizeLZO(Blackhole blackhole)
+//          throws IOException
+//  {
+//    read(parquetFile_1M_LZO, ONE_MILLION, blackhole);
+//  }
+
+  @Benchmark
+  public void read1MRowsDefaultBlockAndPageSizeSNAPPY(Blackhole blackhole)
+          throws IOException
+  {
+    read(file_1M_SNAPPY, ONE_MILLION, blackhole);
+  }
+
+  @Benchmark
+  public void read1MRowsDefaultBlockAndPageSizeGZIP(Blackhole blackhole)
+          throws IOException
+  {
+    read(file_1M_GZIP, ONE_MILLION, blackhole);
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
----------------------------------------------------------------------
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
new file mode 100644
index 0000000..5c26a84
--- /dev/null
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.benchmarks;
+
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+
+import static org.openjdk.jmh.annotations.Scope.Thread;
+import static org.apache.parquet.benchmarks.BenchmarkConstants.*;
+import static org.apache.parquet.benchmarks.BenchmarkFiles.*;
+
+import java.io.IOException;
+
+import static org.apache.parquet.column.ParquetProperties.WriterVersion.PARQUET_2_0;
+import static org.apache.parquet.hadoop.metadata.CompressionCodecName.GZIP;
+import static org.apache.parquet.hadoop.metadata.CompressionCodecName.SNAPPY;
+import static org.apache.parquet.hadoop.metadata.CompressionCodecName.UNCOMPRESSED;
+
+@State(Thread)
+public class WriteBenchmarks {
+  private DataGenerator dataGenerator = new DataGenerator();
+
+  @Setup(Level.Iteration)
+  public void cleanup() {
+    //clean existing test data at the beginning of each iteration
+    dataGenerator.cleanup();
+  }
+
+  @Benchmark
+  public void write1MRowsDefaultBlockAndPageSizeUncompressed()
+          throws IOException
+  {
+    dataGenerator.generateData(file_1M,
+                               configuration,
+                               PARQUET_2_0,
+                               BLOCK_SIZE_DEFAULT,
+                               PAGE_SIZE_DEFAULT,
+                               FIXED_LEN_BYTEARRAY_SIZE,
+                               UNCOMPRESSED,
+                               ONE_MILLION);
+  }
+
+  @Benchmark
+  public void write1MRowsBS256MPS4MUncompressed()
+          throws IOException
+  {
+    dataGenerator.generateData(file_1M_BS256M_PS4M,
+                               configuration,
+                               PARQUET_2_0,
+                               BLOCK_SIZE_256M,
+                               PAGE_SIZE_4M,
+                               FIXED_LEN_BYTEARRAY_SIZE,
+                               UNCOMPRESSED,
+                               ONE_MILLION);
+  }
+
+  @Benchmark
+  public void write1MRowsBS256MPS8MUncompressed()
+          throws IOException
+  {
+    dataGenerator.generateData(file_1M_BS256M_PS8M,
+                               configuration,
+                               PARQUET_2_0,
+                               BLOCK_SIZE_256M,
+                               PAGE_SIZE_8M,
+                               FIXED_LEN_BYTEARRAY_SIZE,
+                               UNCOMPRESSED,
+                               ONE_MILLION);
+  }
+
+  @Benchmark
+  public void write1MRowsBS512MPS4MUncompressed()
+          throws IOException
+  {
+    dataGenerator.generateData(file_1M_BS512M_PS4M,
+                               configuration,
+                               PARQUET_2_0,
+                               BLOCK_SIZE_512M,
+                               PAGE_SIZE_4M,
+                               FIXED_LEN_BYTEARRAY_SIZE,
+                               UNCOMPRESSED,
+                               ONE_MILLION);
+  }
+
+  @Benchmark
+  public void write1MRowsBS512MPS8MUncompressed()
+          throws IOException
+  {
+    dataGenerator.generateData(file_1M_BS512M_PS8M,
+                               configuration,
+                               PARQUET_2_0,
+                               BLOCK_SIZE_512M,
+                               PAGE_SIZE_8M,
+                               FIXED_LEN_BYTEARRAY_SIZE,
+                               UNCOMPRESSED,
+                               ONE_MILLION);
+  }
+
+  //TODO how to handle lzo jar?
+//  @Benchmark
+//  public void write1MRowsDefaultBlockAndPageSizeLZO()
+//          throws IOException
+//  {
+//    dataGenerator.generateData(parquetFile_1M_LZO,
+//            configuration,
+//            WriterVersion.PARQUET_2_0,
+//            BLOCK_SIZE_DEFAULT,
+//            PAGE_SIZE_DEFAULT,
+//            FIXED_LEN_BYTEARRAY_SIZE,
+//            LZO,
+//            ONE_MILLION);
+//  }
+
+  @Benchmark
+  public void write1MRowsDefaultBlockAndPageSizeSNAPPY()
+          throws IOException
+  {
+    dataGenerator.generateData(file_1M_SNAPPY,
+                               configuration,
+                               PARQUET_2_0,
+                               BLOCK_SIZE_DEFAULT,
+                               PAGE_SIZE_DEFAULT,
+                               FIXED_LEN_BYTEARRAY_SIZE,
+                               SNAPPY,
+                               ONE_MILLION);
+  }
+
+  @Benchmark
+  public void write1MRowsDefaultBlockAndPageSizeGZIP()
+          throws IOException
+  {
+    dataGenerator.generateData(file_1M_GZIP,
+                               configuration,
+                               PARQUET_2_0,
+                               BLOCK_SIZE_DEFAULT,
+                               PAGE_SIZE_DEFAULT,
+                               FIXED_LEN_BYTEARRAY_SIZE,
+                               GZIP,
+                               ONE_MILLION);
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-benchmarks/src/main/java/parquet/benchmarks/BenchmarkConstants.java
----------------------------------------------------------------------
diff --git a/parquet-benchmarks/src/main/java/parquet/benchmarks/BenchmarkConstants.java b/parquet-benchmarks/src/main/java/parquet/benchmarks/BenchmarkConstants.java
deleted file mode 100644
index 4f66ccb..0000000
--- a/parquet-benchmarks/src/main/java/parquet/benchmarks/BenchmarkConstants.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.benchmarks;
-
-import static parquet.hadoop.ParquetWriter.DEFAULT_BLOCK_SIZE;
-import static parquet.hadoop.ParquetWriter.DEFAULT_PAGE_SIZE;
-
-public class BenchmarkConstants {
-  public static final int ONE_K = 1000;
-  public static final int FIVE_K = 5 * ONE_K;
-  public static final int TEN_K = 2 * FIVE_K;
-  public static final int HUNDRED_K = 10 * TEN_K;
-  public static final int ONE_MILLION = 10 * HUNDRED_K;
-
-  public static final int FIXED_LEN_BYTEARRAY_SIZE = 1024;
-
-  public static final int BLOCK_SIZE_DEFAULT = DEFAULT_BLOCK_SIZE;
-  public static final int BLOCK_SIZE_256M = 256 * 1024 * 1024;
-  public static final int BLOCK_SIZE_512M = 512 * 1024 * 1024;
-
-  public static final int PAGE_SIZE_DEFAULT = DEFAULT_PAGE_SIZE;
-  public static final int PAGE_SIZE_4M = 4 * 1024 * 1024;
-  public static final int PAGE_SIZE_8M = 8 * 1024 * 1024;
-
-  public static final int DICT_PAGE_SIZE = 512;
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-benchmarks/src/main/java/parquet/benchmarks/BenchmarkFiles.java
----------------------------------------------------------------------
diff --git a/parquet-benchmarks/src/main/java/parquet/benchmarks/BenchmarkFiles.java b/parquet-benchmarks/src/main/java/parquet/benchmarks/BenchmarkFiles.java
deleted file mode 100644
index 1e57ca2..0000000
--- a/parquet-benchmarks/src/main/java/parquet/benchmarks/BenchmarkFiles.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.benchmarks;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-
-public class BenchmarkFiles {
-  public static final Configuration configuration = new Configuration();
-
-  public static final String TARGET_DIR = "target/tests/ParquetBenchmarks";
-  public static final Path file_1M = new Path(TARGET_DIR + "/PARQUET-1M");
-
-  //different block and page sizes
-  public static final Path file_1M_BS256M_PS4M = new Path(TARGET_DIR + "/PARQUET-1M-BS256M_PS4M");
-  public static final Path file_1M_BS256M_PS8M = new Path(TARGET_DIR + "/PARQUET-1M-BS256M_PS8M");
-  public static final Path file_1M_BS512M_PS4M = new Path(TARGET_DIR + "/PARQUET-1M-BS512M_PS4M");
-  public static final Path file_1M_BS512M_PS8M = new Path(TARGET_DIR + "/PARQUET-1M-BS512M_PS8M");
-
-  //different compression codecs
-//  public final Path parquetFile_1M_LZO = new Path("target/tests/ParquetBenchmarks/PARQUET-1M-LZO");
-  public static final Path file_1M_SNAPPY = new Path(TARGET_DIR + "/PARQUET-1M-SNAPPY");
-  public static final Path file_1M_GZIP = new Path(TARGET_DIR + "/PARQUET-1M-GZIP");
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-benchmarks/src/main/java/parquet/benchmarks/BenchmarkUtils.java
----------------------------------------------------------------------
diff --git a/parquet-benchmarks/src/main/java/parquet/benchmarks/BenchmarkUtils.java b/parquet-benchmarks/src/main/java/parquet/benchmarks/BenchmarkUtils.java
deleted file mode 100644
index 9400bc1..0000000
--- a/parquet-benchmarks/src/main/java/parquet/benchmarks/BenchmarkUtils.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.benchmarks;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
-public class BenchmarkUtils {
-  public static void deleteIfExists(Configuration conf, Path path) {
-    try {
-      FileSystem fs = path.getFileSystem(conf);
-      if (fs.exists(path)) {
-        if (!fs.delete(path, true)) {
-          System.err.println("Couldn't delete " + path);
-        }
-      }
-    } catch (IOException e) {
-      System.err.println("Couldn't delete " + path);
-      e.printStackTrace();
-    }
-  }
-
-  public static boolean exists(Configuration conf, Path path) throws IOException {
-    FileSystem fs = path.getFileSystem(conf);
-    return fs.exists(path);
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-benchmarks/src/main/java/parquet/benchmarks/DataGenerator.java
----------------------------------------------------------------------
diff --git a/parquet-benchmarks/src/main/java/parquet/benchmarks/DataGenerator.java b/parquet-benchmarks/src/main/java/parquet/benchmarks/DataGenerator.java
deleted file mode 100644
index f1af4f9..0000000
--- a/parquet-benchmarks/src/main/java/parquet/benchmarks/DataGenerator.java
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.benchmarks;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import parquet.column.ParquetProperties;
-import parquet.example.data.Group;
-import parquet.example.data.simple.SimpleGroupFactory;
-import parquet.hadoop.ParquetWriter;
-import parquet.hadoop.example.GroupWriteSupport;
-import parquet.hadoop.metadata.CompressionCodecName;
-import parquet.io.api.Binary;
-import parquet.schema.MessageType;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-import static java.util.UUID.randomUUID;
-import static parquet.benchmarks.BenchmarkUtils.deleteIfExists;
-import static parquet.benchmarks.BenchmarkUtils.exists;
-import static parquet.column.ParquetProperties.WriterVersion.PARQUET_2_0;
-import static parquet.hadoop.metadata.CompressionCodecName.GZIP;
-import static parquet.hadoop.metadata.CompressionCodecName.SNAPPY;
-import static parquet.hadoop.metadata.CompressionCodecName.UNCOMPRESSED;
-import static parquet.schema.MessageTypeParser.parseMessageType;
-import static parquet.benchmarks.BenchmarkConstants.*;
-import static parquet.benchmarks.BenchmarkFiles.*;
-
-public class DataGenerator {
-
-  public void generateAll() {
-    try {
-      generateData(file_1M, configuration, PARQUET_2_0, BLOCK_SIZE_DEFAULT, PAGE_SIZE_DEFAULT, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, ONE_MILLION);
-
-      //generate data for different block and page sizes
-      generateData(file_1M_BS256M_PS4M, configuration, PARQUET_2_0, BLOCK_SIZE_256M, PAGE_SIZE_4M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, ONE_MILLION);
-      generateData(file_1M_BS256M_PS8M, configuration, PARQUET_2_0, BLOCK_SIZE_256M, PAGE_SIZE_8M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, ONE_MILLION);
-      generateData(file_1M_BS512M_PS4M, configuration, PARQUET_2_0, BLOCK_SIZE_512M, PAGE_SIZE_4M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, ONE_MILLION);
-      generateData(file_1M_BS512M_PS8M, configuration, PARQUET_2_0, BLOCK_SIZE_512M, PAGE_SIZE_8M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, ONE_MILLION);
-
-      //generate data for different codecs
-//      generateData(parquetFile_1M_LZO, configuration, PARQUET_2_0, BLOCK_SIZE_DEFAULT, PAGE_SIZE_DEFAULT, FIXED_LEN_BYTEARRAY_SIZE, LZO, ONE_MILLION);
-      generateData(file_1M_SNAPPY, configuration, PARQUET_2_0, BLOCK_SIZE_DEFAULT, PAGE_SIZE_DEFAULT, FIXED_LEN_BYTEARRAY_SIZE, SNAPPY, ONE_MILLION);
-      generateData(file_1M_GZIP, configuration, PARQUET_2_0, BLOCK_SIZE_DEFAULT, PAGE_SIZE_DEFAULT, FIXED_LEN_BYTEARRAY_SIZE, GZIP, ONE_MILLION);
-    }
-    catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  public void generateData(Path outFile, Configuration configuration, ParquetProperties.WriterVersion version,
-                           int blockSize, int pageSize, int fixedLenByteArraySize, CompressionCodecName codec, int nRows)
-          throws IOException
-  {
-    if (exists(configuration, outFile)) {
-      System.out.println("File already exists " + outFile);
-      return;
-    }
-
-    System.out.println("Generating data @ " + outFile);
-
-    MessageType schema = parseMessageType(
-            "message test { "
-                    + "required binary binary_field; "
-                    + "required int32 int32_field; "
-                    + "required int64 int64_field; "
-                    + "required boolean boolean_field; "
-                    + "required float float_field; "
-                    + "required double double_field; "
-                    + "required fixed_len_byte_array(" + fixedLenByteArraySize +") flba_field; "
-                    + "required int96 int96_field; "
-                    + "} ");
-
-    GroupWriteSupport.setSchema(schema, configuration);
-    SimpleGroupFactory f = new SimpleGroupFactory(schema);
-    ParquetWriter<Group> writer = new ParquetWriter<Group>(outFile, new GroupWriteSupport(), codec, blockSize,
-                                                           pageSize, DICT_PAGE_SIZE, true, false, version, configuration);
-
-    //generate some data for the fixed len byte array field
-    char[] chars = new char[fixedLenByteArraySize];
-    Arrays.fill(chars, '*');
-
-    for (int i = 0; i < nRows; i++) {
-      writer.write(
-        f.newGroup()
-          .append("binary_field", randomUUID().toString())
-          .append("int32_field", i)
-          .append("int64_field", 64l)
-          .append("boolean_field", true)
-          .append("float_field", 1.0f)
-          .append("double_field", 2.0d)
-          .append("flba_field", new String(chars))
-          .append("int96_field", Binary.fromByteArray(new byte[12]))
-      );
-    }
-    writer.close();
-  }
-
-  public void cleanup()
-  {
-    deleteIfExists(configuration, file_1M);
-    deleteIfExists(configuration, file_1M_BS256M_PS4M);
-    deleteIfExists(configuration, file_1M_BS256M_PS8M);
-    deleteIfExists(configuration, file_1M_BS512M_PS4M);
-    deleteIfExists(configuration, file_1M_BS512M_PS8M);
-//    deleteIfExists(configuration, parquetFile_1M_LZO);
-    deleteIfExists(configuration, file_1M_SNAPPY);
-    deleteIfExists(configuration, file_1M_GZIP);
-  }
-
-  public static void main(String[] args) {
-    DataGenerator generator = new DataGenerator();
-    if (args.length < 1) {
-      System.err.println("Please specify a command (generate|cleanup).");
-      System.exit(1);
-    }
-
-    String command = args[0];
-    if (command.equalsIgnoreCase("generate")) {
-      generator.generateAll();
-    } else if (command.equalsIgnoreCase("cleanup")) {
-      generator.cleanup();
-    } else {
-      throw new IllegalArgumentException("invalid command " + command);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-benchmarks/src/main/java/parquet/benchmarks/ReadBenchmarks.java
----------------------------------------------------------------------
diff --git a/parquet-benchmarks/src/main/java/parquet/benchmarks/ReadBenchmarks.java b/parquet-benchmarks/src/main/java/parquet/benchmarks/ReadBenchmarks.java
deleted file mode 100644
index e308e88..0000000
--- a/parquet-benchmarks/src/main/java/parquet/benchmarks/ReadBenchmarks.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.benchmarks;
-
-import org.apache.hadoop.fs.Path;
-import org.openjdk.jmh.annotations.Benchmark;
-import org.openjdk.jmh.infra.Blackhole;
-import parquet.example.data.Group;
-import parquet.hadoop.ParquetReader;
-import parquet.hadoop.example.GroupReadSupport;
-import static parquet.benchmarks.BenchmarkConstants.*;
-import static parquet.benchmarks.BenchmarkFiles.*;
-
-import java.io.IOException;
-
-public class ReadBenchmarks {
-  private void read(Path parquetFile, int nRows, Blackhole blackhole) throws IOException
-  {
-    ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), parquetFile).withConf(configuration).build();
-    for (int i = 0; i < nRows; i++) {
-      Group group = reader.read();
-      blackhole.consume(group.getBinary("binary_field", 0));
-      blackhole.consume(group.getInteger("int32_field", 0));
-      blackhole.consume(group.getLong("int64_field", 0));
-      blackhole.consume(group.getBoolean("boolean_field", 0));
-      blackhole.consume(group.getFloat("float_field", 0));
-      blackhole.consume(group.getDouble("double_field", 0));
-      blackhole.consume(group.getBinary("flba_field", 0));
-      blackhole.consume(group.getInt96("int96_field", 0));
-    }
-    reader.close();
-  }
-
-  @Benchmark
-  public void read1MRowsDefaultBlockAndPageSizeUncompressed(Blackhole blackhole)
-          throws IOException
-  {
-    read(file_1M, ONE_MILLION, blackhole);
-  }
-
-  @Benchmark
-  public void read1MRowsBS256MPS4MUncompressed(Blackhole blackhole)
-          throws IOException
-  {
-    read(file_1M_BS256M_PS4M, ONE_MILLION, blackhole);
-  }
-
-  @Benchmark
-  public void read1MRowsBS256MPS8MUncompressed(Blackhole blackhole)
-          throws IOException
-  {
-    read(file_1M_BS256M_PS8M, ONE_MILLION, blackhole);
-  }
-
-  @Benchmark
-  public void read1MRowsBS512MPS4MUncompressed(Blackhole blackhole)
-          throws IOException
-  {
-    read(file_1M_BS512M_PS4M, ONE_MILLION, blackhole);
-  }
-
-  @Benchmark
-  public void read1MRowsBS512MPS8MUncompressed(Blackhole blackhole)
-          throws IOException
-  {
-    read(file_1M_BS512M_PS8M, ONE_MILLION, blackhole);
-  }
-
-  //TODO how to handle lzo jar?
-//  @Benchmark
-//  public void read1MRowsDefaultBlockAndPageSizeLZO(Blackhole blackhole)
-//          throws IOException
-//  {
-//    read(parquetFile_1M_LZO, ONE_MILLION, blackhole);
-//  }
-
-  @Benchmark
-  public void read1MRowsDefaultBlockAndPageSizeSNAPPY(Blackhole blackhole)
-          throws IOException
-  {
-    read(file_1M_SNAPPY, ONE_MILLION, blackhole);
-  }
-
-  @Benchmark
-  public void read1MRowsDefaultBlockAndPageSizeGZIP(Blackhole blackhole)
-          throws IOException
-  {
-    read(file_1M_GZIP, ONE_MILLION, blackhole);
-  }
-}


Mime
View raw message