giraph-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ni...@apache.org
Subject [31/51] [partial] GIRAPH-457: update module names (nitay)
Date Thu, 20 Dec 2012 04:25:32 GMT
http://git-wip-us.apache.org/repos/asf/giraph/blob/45851391/giraph-formats-contrib/src/main/java/org/apache/giraph/io/hcatalog/HCatalogVertexOutputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-formats-contrib/src/main/java/org/apache/giraph/io/hcatalog/HCatalogVertexOutputFormat.java b/giraph-formats-contrib/src/main/java/org/apache/giraph/io/hcatalog/HCatalogVertexOutputFormat.java
deleted file mode 100644
index 94c7b85..0000000
--- a/giraph-formats-contrib/src/main/java/org/apache/giraph/io/hcatalog/HCatalogVertexOutputFormat.java
+++ /dev/null
@@ -1,215 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.io.hcatalog;
-
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.graph.VertexOutputFormat;
-import org.apache.giraph.graph.VertexWriter;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.OutputCommitter;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hcatalog.data.DefaultHCatRecord;
-import org.apache.hcatalog.data.HCatRecord;
-import org.apache.hcatalog.mapreduce.HCatOutputFormat;
-
-import java.io.IOException;
-
-/**
- * Abstract class that users should subclass to store data to Hive or Pig table.
- * You can easily implement a {@link HCatalogVertexWriter} by extending
- * {@link SingleRowHCatalogVertexWriter} or {@link MultiRowHCatalogVertexWriter}
- * depending on how you want to fit your vertices into the output table.
- * <p>
- * The desired database and table name to store to can be specified via
- * {@link HCatOutputFormat#setOutput(org.apache.hadoop.mapreduce.Job,
- * org.apache.hcatalog.mapreduce.OutputJobInfo)}
- * as you setup your vertex output format with
- * {@link org.apache.giraph.conf.GiraphConfiguration}
- * setVertexOutputFormatClass(Class)}. You must create the output table.
- *
- * @param <I> Vertex id
- * @param <V> Vertex value
- * @param <E> Edge value
- */
-@SuppressWarnings("rawtypes")
-public abstract class HCatalogVertexOutputFormat<
-        I extends WritableComparable,
-        V extends Writable,
-        E extends Writable>
-        extends VertexOutputFormat<I, V, E> {
-  /**
-  * hcat output format
-  */
-  protected HCatOutputFormat hCatOutputFormat = new HCatOutputFormat();
-
-  @Override
-  public final void checkOutputSpecs(JobContext context) throws IOException,
-      InterruptedException {
-    hCatOutputFormat.checkOutputSpecs(context);
-  }
-
-  @Override
-  public final OutputCommitter getOutputCommitter(TaskAttemptContext context)
-    throws IOException, InterruptedException {
-    return hCatOutputFormat.getOutputCommitter(context);
-  }
-
-  /**
-  * Abstract class that users should
-  * subclass based on their specific vertex
-  * output. Users should implement
-  * writeVertex to create a HCatRecord that is
-  * valid to for writing by HCatalogRecordWriter.
-  */
-  protected abstract class HCatalogVertexWriter implements
-            VertexWriter<I, V, E> {
-
-    /** Internal HCatRecordWriter */
-    private RecordWriter<WritableComparable<?>, HCatRecord> hCatRecordWriter;
-    /** Context passed to initialize */
-    private TaskAttemptContext context;
-
-    /**
-    * Initialize with the HCatRecordWriter
-    * @param hCatRecordWriter
-    *            Internal writer
-    */
-    private void initialize(
-                    RecordWriter<WritableComparable<?>,
-                    HCatRecord> hCatRecordWriter) {
-      this.hCatRecordWriter = hCatRecordWriter;
-    }
-
-    /**
-    * Get the record reader.
-    * @return Record reader to be used for reading.
-    */
-    protected RecordWriter<WritableComparable<?>,
-            HCatRecord> getRecordWriter() {
-      return hCatRecordWriter;
-    }
-
-    /**
-    * Get the context.
-    *
-    * @return Context passed to initialize.
-    */
-    protected TaskAttemptContext getContext() {
-      return context;
-    }
-
-    @Override
-    public void initialize(TaskAttemptContext context) throws IOException {
-      this.context = context;
-    }
-
-    @Override
-    public void close(TaskAttemptContext context) throws IOException,
-        InterruptedException {
-      hCatRecordWriter.close(context);
-    }
-
-  }
-
-  /**
-  * create vertex writer.
-  * @return HCatalogVertexWriter
-  */
-  protected abstract HCatalogVertexWriter createVertexWriter();
-
-  @Override
-  public final VertexWriter<I, V, E> createVertexWriter(
-    TaskAttemptContext context) throws IOException,
-    InterruptedException {
-    HCatalogVertexWriter writer = createVertexWriter();
-    writer.initialize(hCatOutputFormat.getRecordWriter(context));
-    return writer;
-  }
-
-  /**
-  * HCatalogVertexWriter to write each vertex in each row.
-  */
-  protected abstract class SingleRowHCatalogVertexWriter extends
-            HCatalogVertexWriter {
-    /**
-    * get num columns
-    * @return intcolumns
-    */
-    protected abstract int getNumColumns();
-
-    /**
-    * fill record
-    * @param record to fill
-    * @param vertex to populate record
-    */
-    protected abstract void fillRecord(HCatRecord record,
-                                    Vertex<I, V, E, ?> vertex);
-
-    /**
-    * create record
-    * @param vertex to populate record
-    * @return HCatRecord newly created
-    */
-    protected HCatRecord createRecord(Vertex<I, V, E, ?> vertex) {
-      HCatRecord record = new DefaultHCatRecord(getNumColumns());
-      fillRecord(record, vertex);
-      return record;
-    }
-
-    @Override
-    // XXX It is important not to put generic type signature <I,V,E,?> after
-    // Vertex. Otherwise, any class that extends this will not compile
-    // because of not implementing the VertexWriter#writeVertex. Mystery of
-    // Java Generics :(
-    @SuppressWarnings("unchecked")
-    public final void writeVertex(Vertex vertex) throws IOException,
-        InterruptedException {
-      getRecordWriter().write(null, createRecord(vertex));
-    }
-
-  }
-
-  /**
-  * HCatalogVertexWriter to write each vertex in multiple rows.
-  */
-  public abstract class MultiRowHCatalogVertexWriter extends
-    HCatalogVertexWriter {
-    /**
-    * create records
-    * @param vertex to populate records
-    * @return Iterable of records
-    */
-    protected abstract Iterable<HCatRecord> createRecords(
-        Vertex<I, V, E, ?> vertex);
-
-    @Override
-    // XXX Same thing here. No Generics for Vertex here.
-    @SuppressWarnings("unchecked")
-    public final void writeVertex(Vertex vertex) throws IOException,
-        InterruptedException {
-      Iterable<HCatRecord> records = createRecords(vertex);
-      for (HCatRecord record : records) {
-        getRecordWriter().write(null, record);
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/giraph/blob/45851391/giraph-formats-contrib/src/main/java/org/apache/giraph/io/hcatalog/HCatalogVertexValueInputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-formats-contrib/src/main/java/org/apache/giraph/io/hcatalog/HCatalogVertexValueInputFormat.java b/giraph-formats-contrib/src/main/java/org/apache/giraph/io/hcatalog/HCatalogVertexValueInputFormat.java
deleted file mode 100644
index d08179d..0000000
--- a/giraph-formats-contrib/src/main/java/org/apache/giraph/io/hcatalog/HCatalogVertexValueInputFormat.java
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.io.hcatalog;
-
-import org.apache.giraph.graph.VertexValueInputFormat;
-import org.apache.giraph.graph.VertexValueReader;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hcatalog.data.HCatRecord;
-
-import java.io.IOException;
-import java.util.List;
-
-/**
- * HCatalog {@link VertexValueInputFormat} for reading vertex values from
- * Hive/Pig.
- *
- * @param <I> Vertex id
- * @param <V> Vertex value
- * @param <E> Edge value
- * @param <M> Message data
- */
-public abstract class HCatalogVertexValueInputFormat<I extends
-    WritableComparable,
-    V extends Writable,
-    E extends Writable,
-    M extends Writable>
-    extends VertexValueInputFormat<I, V, E, M> {
-  /**
-   * HCatalog input format.
-   */
-  private GiraphHCatInputFormat hCatInputFormat = new GiraphHCatInputFormat();
-
-  @Override
-  public List<InputSplit> getSplits(JobContext context, int numWorkers)
-    throws IOException, InterruptedException {
-    return hCatInputFormat.getVertexSplits(context);
-  }
-
-  /**
-   * {@link VertexValueReader} for {@link HCatalogVertexValueInputFormat}.
-   */
-  protected abstract class HCatalogVertexValueReader
-      extends VertexValueReader<I, V, E, M> {
-    /** Internal {@link RecordReader}. */
-    private RecordReader<WritableComparable, HCatRecord> hCatRecordReader;
-    /** Context passed to initialize. */
-    private TaskAttemptContext context;
-
-    @Override
-    public final void initialize(InputSplit inputSplit,
-                                 TaskAttemptContext context)
-      throws IOException, InterruptedException {
-      super.initialize(inputSplit, context);
-      hCatRecordReader =
-          hCatInputFormat.createVertexRecordReader(inputSplit, context);
-      hCatRecordReader.initialize(inputSplit, context);
-      this.context = context;
-    }
-
-    @Override
-    public boolean nextVertex() throws IOException, InterruptedException {
-      return hCatRecordReader.nextKeyValue();
-    }
-
-    @Override
-    public final void close() throws IOException {
-      hCatRecordReader.close();
-    }
-
-    @Override
-    public final float getProgress() throws IOException, InterruptedException {
-      return hCatRecordReader.getProgress();
-    }
-
-    /**
-     * Get the record reader.
-     *
-     * @return Record reader to be used for reading.
-     */
-    protected final RecordReader<WritableComparable, HCatRecord>
-    getRecordReader() {
-      return hCatRecordReader;
-    }
-
-    /**
-     * Get the context.
-     *
-     * @return Context passed to initialize.
-     */
-    protected final TaskAttemptContext getContext() {
-      return context;
-    }
-  }
-
-  /**
-   * Create {@link VertexValueReader}.
-
-   * @return {@link HCatalogVertexValueReader} instance.
-   */
-  protected abstract HCatalogVertexValueReader createVertexValueReader();
-
-  @Override
-  public final VertexValueReader<I, V, E, M>
-  createVertexValueReader(InputSplit split, TaskAttemptContext context)
-    throws IOException {
-    try {
-      HCatalogVertexValueReader reader = createVertexValueReader();
-      reader.initialize(split, context);
-      return reader;
-    } catch (InterruptedException e) {
-      throw new IllegalStateException(
-          "createVertexValueReader: Interrupted creating reader.", e);
-    }
-  }
-
-  /**
-   * {@link HCatalogVertexValueReader} for tables holding a complete vertex
-   * value in each row.
-   */
-  protected abstract class SingleRowHCatalogVertexValueReader
-      extends HCatalogVertexValueReader {
-    /**
-     * Get vertex id from a record.
-     *
-     * @param record Input record
-     * @return I Vertex id
-     */
-    protected abstract I getVertexId(HCatRecord record);
-
-    /**
-     * Get vertex value from a record.
-     *
-     * @param record Input record
-     * @return V Vertex value
-     */
-    protected abstract V getVertexValue(HCatRecord record);
-
-    @Override
-    public final I getCurrentVertexId() throws IOException,
-        InterruptedException {
-      return getVertexId(getRecordReader().getCurrentValue());
-    }
-
-    @Override
-    public final V getCurrentVertexValue() throws IOException,
-        InterruptedException {
-      return getVertexValue(getRecordReader().getCurrentValue());
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/giraph/blob/45851391/giraph-formats-contrib/src/main/java/org/apache/giraph/io/hcatalog/HiveGiraphRunner.java
----------------------------------------------------------------------
diff --git a/giraph-formats-contrib/src/main/java/org/apache/giraph/io/hcatalog/HiveGiraphRunner.java b/giraph-formats-contrib/src/main/java/org/apache/giraph/io/hcatalog/HiveGiraphRunner.java
deleted file mode 100644
index 7a7c2f8..0000000
--- a/giraph-formats-contrib/src/main/java/org/apache/giraph/io/hcatalog/HiveGiraphRunner.java
+++ /dev/null
@@ -1,490 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.io.hcatalog;
-
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.CommandLineParser;
-import org.apache.commons.cli.GnuParser;
-import org.apache.commons.cli.HelpFormatter;
-import org.apache.commons.cli.Options;
-import org.apache.commons.cli.ParseException;
-import org.apache.giraph.graph.EdgeInputFormat;
-import org.apache.giraph.graph.GiraphJob;
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.graph.VertexInputFormat;
-import org.apache.giraph.graph.VertexOutputFormat;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.hcatalog.mapreduce.HCatOutputFormat;
-import org.apache.hcatalog.mapreduce.InputJobInfo;
-import org.apache.hcatalog.mapreduce.OutputJobInfo;
-import org.apache.log4j.Logger;
-
-import com.google.common.collect.Lists;
-
-import java.io.File;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Hive Giraph Runner
- */
-public class HiveGiraphRunner implements Tool {
-  /**
-   * logger
-   */
-  private static final Logger LOG = Logger.getLogger(HiveGiraphRunner.class);
-  /**
-   * workers
-   */
-  protected int workers;
-  /**
-   * is verbose
-   */
-  protected boolean isVerbose;
-  /**
-   * output table partitions
-   */
-  protected Map<String, String> outputTablePartitionValues;
-  /**
-   * dbName
-   */
-  protected String dbName;
-  /**
-   * vertex input table name
-   */
-  protected String vertexInputTableName;
-  /**
-   * vertex input table filter
-   */
-  protected String vertexInputTableFilterExpr;
-  /**
-   * edge input table name
-   */
-  protected String edgeInputTableName;
-  /**
-   * edge input table filter
-   */
-  protected String edgeInputTableFilterExpr;
-  /**
-   * output table name
-   */
-  protected String outputTableName;
-  /** Configuration */
-  private Configuration conf;
-  /** Skip output? (Useful for testing without writing) */
-  private boolean skipOutput = false;
-
-  /**
-  * vertex class.
-  */
-  private Class<? extends Vertex> vertexClass;
-  /**
-   * vertex input format internal.
-   */
-  private Class<? extends VertexInputFormat> vertexInputFormatClass;
-  /**
-   * edge input format internal.
-   */
-  private Class<? extends EdgeInputFormat> edgeInputFormatClass;
-  /**
-  * vertex output format internal.
-  */
-  private Class<? extends VertexOutputFormat> vertexOutputFormatClass;
-
-  /**
-  * Giraph runner class.
-   *
-  * @param vertexClass Vertex class
-  * @param vertexInputFormatClass Vertex input format
-  * @param edgeInputFormatClass Edge input format
-  * @param vertexOutputFormatClass Output format
-  */
-  protected HiveGiraphRunner(
-      Class<? extends Vertex> vertexClass,
-      Class<? extends VertexInputFormat> vertexInputFormatClass,
-      Class<? extends EdgeInputFormat> edgeInputFormatClass,
-      Class<? extends VertexOutputFormat> vertexOutputFormatClass) {
-    this.vertexClass = vertexClass;
-    this.vertexInputFormatClass = vertexInputFormatClass;
-    this.edgeInputFormatClass = edgeInputFormatClass;
-    this.vertexOutputFormatClass = vertexOutputFormatClass;
-    this.conf = new HiveConf(getClass());
-  }
-
-  /**
-  * main method
-  * @param args system arguments
-  * @throws Exception any errors from Hive Giraph Runner
-  */
-  public static void main(String[] args) throws Exception {
-    System.exit(ToolRunner.run(
-        new HiveGiraphRunner(null, null, null, null), args));
-  }
-
-  @Override
-  public final int run(String[] args) throws Exception {
-    // process args
-    try {
-      processArguments(args);
-    } catch (InterruptedException e) {
-      return 0;
-    } catch (IllegalArgumentException e) {
-      System.err.println(e.getMessage());
-      return -1;
-    }
-
-    // additional configuration for Hive
-    adjustConfigurationForHive(getConf());
-
-    // setup GiraphJob
-    GiraphJob job = new GiraphJob(getConf(), getClass().getName());
-    job.getConfiguration().setVertexClass(vertexClass);
-
-    // setup input from Hive
-    if (vertexInputFormatClass != null) {
-      InputJobInfo vertexInputJobInfo = InputJobInfo.create(dbName,
-          vertexInputTableName, vertexInputTableFilterExpr);
-      GiraphHCatInputFormat.setVertexInput(job.getInternalJob(),
-          vertexInputJobInfo);
-      job.getConfiguration().setVertexInputFormatClass(vertexInputFormatClass);
-    }
-    if (edgeInputFormatClass != null) {
-      InputJobInfo edgeInputJobInfo = InputJobInfo.create(dbName,
-          edgeInputTableName, edgeInputTableFilterExpr);
-      GiraphHCatInputFormat.setEdgeInput(job.getInternalJob(),
-          edgeInputJobInfo);
-      job.getConfiguration().setEdgeInputFormatClass(edgeInputFormatClass);
-    }
-
-    // setup output to Hive
-    HCatOutputFormat.setOutput(job.getInternalJob(), OutputJobInfo.create(
-        dbName, outputTableName, outputTablePartitionValues));
-    HCatOutputFormat.setSchema(job.getInternalJob(),
-        HCatOutputFormat.getTableSchema(job.getInternalJob()));
-    if (skipOutput) {
-      LOG.warn("run: Warning - Output will be skipped!");
-    } else {
-      job.getConfiguration().setVertexOutputFormatClass(
-          vertexOutputFormatClass);
-    }
-
-    job.getConfiguration().setWorkerConfiguration(workers, workers, 100.0f);
-    initGiraphJob(job);
-
-    return job.run(isVerbose) ? 0 : -1;
-  }
-
-  /**
-  * set hive configuration
-  * @param conf Configuration argument
-  */
-  private static void adjustConfigurationForHive(Configuration conf) {
-    // when output partitions are used, workers register them to the
-    // metastore at cleanup stage, and on HiveConf's initialization, it
-    // looks for hive-site.xml from.
-    addToStringCollection(conf, "tmpfiles", conf.getClassLoader()
-        .getResource("hive-site.xml").toString());
-
-    // Also, you need hive.aux.jars as well
-    // addToStringCollection(conf, "tmpjars",
-    // conf.getStringCollection("hive.aux.jars.path"));
-
-    // Or, more effectively, we can provide all the jars client needed to
-    // the workers as well
-    String[] hadoopJars = System.getenv("HADOOP_CLASSPATH").split(
-        File.pathSeparator);
-    List<String> hadoopJarURLs = Lists.newArrayList();
-    for (String jarPath : hadoopJars) {
-      File file = new File(jarPath);
-      if (file.exists() && file.isFile()) {
-        String jarURL = file.toURI().toString();
-        hadoopJarURLs.add(jarURL);
-      }
-    }
-    addToStringCollection(conf, "tmpjars", hadoopJarURLs);
-  }
-
-  /**
-  * process arguments
-  * @param args to process
-  * @return CommandLine instance
-  * @throws ParseException error parsing arguments
-  * @throws InterruptedException interrupted
-  */
-  private CommandLine processArguments(String[] args) throws ParseException,
-            InterruptedException {
-    Options options = new Options();
-    options.addOption("h", "help", false, "Help");
-    options.addOption("v", "verbose", false, "Verbose");
-    options.addOption("D", "hiveconf", true,
-                "property=value for Hive/Hadoop configuration");
-    options.addOption("w", "workers", true, "Number of workers");
-    if (vertexClass == null) {
-      options.addOption(null, "vertexClass", true,
-          "Giraph Vertex class to use");
-    }
-    if (vertexInputFormatClass == null) {
-      options.addOption(null, "vertexInputFormatClass", true,
-          "Giraph HCatalogVertexInputFormat class to use");
-    }
-    if (edgeInputFormatClass == null) {
-      options.addOption(null, "edgeInputFormatClass", true,
-          "Giraph HCatalogEdgeInputFormat class to use");
-    }
-
-    if (vertexOutputFormatClass == null) {
-      options.addOption(null, "vertexOutputFormatClass", true,
-          "Giraph HCatalogVertexOutputFormat class to use");
-    }
-
-    options.addOption("db", "database", true, "Hive database name");
-    options.addOption("vi", "vertexInputTable", true,
-        "Vertex input table name");
-    options.addOption("VI", "vertexInputFilter", true,
-        "Vertex input table filter expression (e.g., \"a<2 AND b='two'\"");
-    options.addOption("ei", "edgeInputTable", true,
-        "Edge input table name");
-    options.addOption("EI", "edgeInputFilter", true,
-        "Edge input table filter expression (e.g., \"a<2 AND b='two'\"");
-    options.addOption("o", "outputTable", true, "Output table name");
-    options.addOption("O", "outputPartition", true,
-        "Output table partition values (e.g., \"a=1,b=two\")");
-    options.addOption("s", "skipOutput", false, "Skip output?");
-
-    addMoreOptions(options);
-
-    CommandLineParser parser = new GnuParser();
-    final CommandLine cmdln = parser.parse(options, args);
-    if (args.length == 0 || cmdln.hasOption("help")) {
-      new HelpFormatter().printHelp(getClass().getName(), options, true);
-      throw new InterruptedException();
-    }
-
-    // Giraph classes
-    if (cmdln.hasOption("vertexClass")) {
-      vertexClass = findClass(cmdln.getOptionValue("vertexClass"),
-          Vertex.class);
-    }
-    if (cmdln.hasOption("vertexInputFormatClass")) {
-      vertexInputFormatClass = findClass(
-          cmdln.getOptionValue("vertexInputFormatClass"),
-          HCatalogVertexInputFormat.class);
-    }
-    if (cmdln.hasOption("edgeInputFormatClass")) {
-      edgeInputFormatClass = findClass(
-          cmdln.getOptionValue("edgeInputFormatClass"),
-          HCatalogEdgeInputFormat.class);
-    }
-
-    if (cmdln.hasOption("vertexOutputFormatClass")) {
-      vertexOutputFormatClass = findClass(
-          cmdln.getOptionValue("vertexOutputFormatClass"),
-          HCatalogVertexOutputFormat.class);
-    }
-
-    if (cmdln.hasOption("skipOutput")) {
-      skipOutput = true;
-    }
-
-    if (vertexClass == null) {
-      throw new IllegalArgumentException(
-          "Need the Giraph Vertex class name (-vertexClass) to use");
-    }
-    if (vertexInputFormatClass == null && edgeInputFormatClass == null) {
-      throw new IllegalArgumentException(
-          "Need at least one of Giraph VertexInputFormat " +
-              "class name (-vertexInputFormatClass) and " +
-              "EdgeInputFormat class name (-edgeInputFormatClass)");
-    }
-    if (vertexOutputFormatClass == null) {
-      throw new IllegalArgumentException(
-          "Need the Giraph VertexOutputFormat " +
-              "class name (-vertexOutputFormatClass) to use");
-    }
-    if (!cmdln.hasOption("workers")) {
-      throw new IllegalArgumentException(
-          "Need to choose the number of workers (-w)");
-    }
-    if (!cmdln.hasOption("vertexInputTable") &&
-        vertexInputFormatClass != null) {
-      throw new IllegalArgumentException(
-          "Need to set the vertex input table name (-vi)");
-    }
-    if (!cmdln.hasOption("edgeInputTable") &&
-        edgeInputFormatClass != null) {
-      throw new IllegalArgumentException(
-          "Need to set the edge input table name (-ei)");
-    }
-    if (!cmdln.hasOption("outputTable")) {
-      throw new IllegalArgumentException(
-          "Need to set the output table name (-o)");
-    }
-    dbName = cmdln.getOptionValue("dbName", "default");
-    vertexInputTableName = cmdln.getOptionValue("vertexInputTable");
-    vertexInputTableFilterExpr = cmdln.getOptionValue("vertexInputFilter");
-    edgeInputTableName = cmdln.getOptionValue("edgeInputTable");
-    edgeInputTableFilterExpr = cmdln.getOptionValue("edgeInputFilter");
-    outputTableName = cmdln.getOptionValue("outputTable");
-    outputTablePartitionValues = HiveUtils.parsePartitionValues(cmdln
-                .getOptionValue("outputPartition"));
-    workers = Integer.parseInt(cmdln.getOptionValue("workers"));
-    isVerbose = cmdln.hasOption("verbose");
-
-    // pick up -hiveconf arguments
-    for (String hiveconf : cmdln.getOptionValues("hiveconf")) {
-      String[] keyval = hiveconf.split("=", 2);
-      if (keyval.length == 2) {
-        String name = keyval[0];
-        String value = keyval[1];
-        if (name.equals("tmpjars") || name.equals("tmpfiles")) {
-          addToStringCollection(
-                  conf, name, value);
-        } else {
-          conf.set(name, value);
-        }
-      }
-    }
-
-    processMoreArguments(cmdln);
-
-    return cmdln;
-  }
-
-  /**
-  * add string to collection
-  * @param conf Configuration
-  * @param name name to add
-  * @param values values for collection
-  */
-  private static void addToStringCollection(Configuration conf, String name,
-                                              String... values) {
-    addToStringCollection(conf, name, Arrays.asList(values));
-  }
-
-  /**
-  * add string to collection
-  * @param conf Configuration
-  * @param name to add
-  * @param values values for collection
-  */
-  private static void addToStringCollection(
-          Configuration conf, String name, Collection
-          <? extends String> values) {
-    Collection<String> tmpfiles = conf.getStringCollection(name);
-    tmpfiles.addAll(values);
-    conf.setStrings(name, tmpfiles.toArray(new String[tmpfiles.size()]));
-  }
-
-  /**
-  *
-  * @param className to find
-  * @param base  base class
-  * @param <T> class type found
-  * @return type found
-  */
-  private <T> Class<? extends T> findClass(String className, Class<T> base) {
-    try {
-      Class<?> cls = Class.forName(className);
-      if (base.isAssignableFrom(cls)) {
-        return cls.asSubclass(base);
-      }
-      return null;
-    } catch (ClassNotFoundException e) {
-      throw new IllegalArgumentException(className + ": Invalid class name");
-    }
-  }
-
-  @Override
-  public final Configuration getConf() {
-    return conf;
-  }
-
-  @Override
-  public final void setConf(Configuration conf) {
-    this.conf = conf;
-  }
-
-  /**
-  * Override this method to add more command-line options. You can process
-  * them by also overriding {@link #processMoreArguments(CommandLine)}.
-  *
-  * @param options Options
-  */
-  protected void addMoreOptions(Options options) {
-  }
-
-  /**
-  * Override this method to process additional command-line arguments. You
-  * may want to declare additional options by also overriding
-  * {@link #addMoreOptions(Options)}.
-  *
-  * @param cmd Command
-  */
-  protected void processMoreArguments(CommandLine cmd) {
-  }
-
-  /**
-  * Override this method to do additional setup with the GiraphJob that will
-  * run.
-  *
-  * @param job
-  *            GiraphJob that is going to run
-  */
-  protected void initGiraphJob(GiraphJob job) {
-    LOG.info(getClass().getSimpleName() + " with");
-    String prefix = "\t";
-    LOG.info(prefix + "-vertexClass=" +
-         vertexClass.getCanonicalName());
-    if (vertexInputFormatClass != null) {
-      LOG.info(prefix + "-vertexInputFormatClass=" +
-          vertexInputFormatClass.getCanonicalName());
-    }
-    if (edgeInputFormatClass != null) {
-      LOG.info(prefix + "-edgeInputFormatClass=" +
-          edgeInputFormatClass.getCanonicalName());
-    }
-    LOG.info(prefix + "-vertexOutputFormatClass=" +
-        vertexOutputFormatClass.getCanonicalName());
-    if (vertexInputTableName != null) {
-      LOG.info(prefix + "-vertexInputTable=" + vertexInputTableName);
-    }
-    if (vertexInputTableFilterExpr != null) {
-      LOG.info(prefix + "-vertexInputFilter=\"" +
-          vertexInputTableFilterExpr + "\"");
-    }
-    if (edgeInputTableName != null) {
-      LOG.info(prefix + "-edgeInputTable=" + edgeInputTableName);
-    }
-    if (edgeInputTableFilterExpr != null) {
-      LOG.info(prefix + "-edgeInputFilter=\"" +
-          edgeInputTableFilterExpr + "\"");
-    }
-    LOG.info(prefix + "-outputTable=" + outputTableName);
-    if (outputTablePartitionValues != null) {
-      LOG.info(prefix + "-outputPartition=\"" +
-          outputTablePartitionValues + "\"");
-    }
-    LOG.info(prefix + "-workers=" + workers);
-  }
-}

http://git-wip-us.apache.org/repos/asf/giraph/blob/45851391/giraph-formats-contrib/src/main/java/org/apache/giraph/io/hcatalog/HiveUtils.java
----------------------------------------------------------------------
diff --git a/giraph-formats-contrib/src/main/java/org/apache/giraph/io/hcatalog/HiveUtils.java b/giraph-formats-contrib/src/main/java/org/apache/giraph/io/hcatalog/HiveUtils.java
deleted file mode 100644
index c1f76f1..0000000
--- a/giraph-formats-contrib/src/main/java/org/apache/giraph/io/hcatalog/HiveUtils.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.io.hcatalog;
-
-import com.google.common.base.Splitter;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-
-import java.util.List;
-import java.util.Map;
-
-/**
- * Utilities and helpers for working with Hive tables.
- */
-public class HiveUtils {
-  // TODO use Hive util class if this is already provided by it
-
-  /**
-   * Private constructor for helper class.
-   */
-  private HiveUtils() {
-    // Do nothing.
-  }
-
-  /**
-  * @param outputTablePartitionString table partition string
-  * @return Map
-  */
-  public static Map<String, String> parsePartitionValues(
-            String outputTablePartitionString) {
-    if (outputTablePartitionString == null) {
-      return null;
-    }
-    Splitter commaSplitter = Splitter.on(',').omitEmptyStrings().trimResults();
-    Splitter equalSplitter = Splitter.on('=').omitEmptyStrings().trimResults();
-    Map<String, String> partitionValues = Maps.newHashMap();
-    for (String keyValStr : commaSplitter.split(outputTablePartitionString)) {
-      List<String> keyVal = Lists.newArrayList(equalSplitter.split(keyValStr));
-      if (keyVal.size() != 2) {
-        throw new IllegalArgumentException(
-            "Unrecognized partition value format: " +
-            outputTablePartitionString);
-      }
-      partitionValues.put(keyVal.get(0), keyVal.get(1));
-    }
-    return partitionValues;
-  }
-}

http://git-wip-us.apache.org/repos/asf/giraph/blob/45851391/giraph-formats-contrib/src/main/java/org/apache/giraph/io/hcatalog/package-info.java
----------------------------------------------------------------------
diff --git a/giraph-formats-contrib/src/main/java/org/apache/giraph/io/hcatalog/package-info.java b/giraph-formats-contrib/src/main/java/org/apache/giraph/io/hcatalog/package-info.java
deleted file mode 100644
index b01e254..0000000
--- a/giraph-formats-contrib/src/main/java/org/apache/giraph/io/hcatalog/package-info.java
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * Package of input and output format classes
- * for loading and storing Hive/Pig data using HCatalog.
- */
-package org.apache.giraph.io.hcatalog;
-

http://git-wip-us.apache.org/repos/asf/giraph/blob/45851391/giraph-formats-contrib/src/main/java/org/apache/hcatalog/mapreduce/HCatUtils.java
----------------------------------------------------------------------
diff --git a/giraph-formats-contrib/src/main/java/org/apache/hcatalog/mapreduce/HCatUtils.java b/giraph-formats-contrib/src/main/java/org/apache/hcatalog/mapreduce/HCatUtils.java
deleted file mode 100644
index 1f25709..0000000
--- a/giraph-formats-contrib/src/main/java/org/apache/hcatalog/mapreduce/HCatUtils.java
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hcatalog.mapreduce;
-
-import org.apache.giraph.io.hcatalog.GiraphHCatInputFormat;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
-import org.apache.hadoop.hive.metastore.api.Partition;
-import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.metadata.Table;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hcatalog.common.ErrorType;
-import org.apache.hcatalog.common.HCatException;
-import org.apache.hcatalog.common.HCatUtil;
-import org.apache.hcatalog.data.schema.HCatSchema;
-import org.apache.thrift.TException;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-
-/**
- * Utility methods copied from HCatalog because of visibility restrictions.
- */
-public class HCatUtils {
-  /**
-   * Don't instantiate.
-   */
-  private HCatUtils() { }
-
-  /**
-   * Returns the given InputJobInfo after populating with data queried from the
-   * metadata service.
-   *
-   * @param conf Configuration
-   * @param inputJobInfo Input job info
-   * @return Populated input job info
-   * @throws IOException
-   */
-  public static InputJobInfo getInputJobInfo(
-      Configuration conf, InputJobInfo inputJobInfo)
-    throws IOException {
-    HiveMetaStoreClient client = null;
-    HiveConf hiveConf;
-    try {
-      if (conf != null) {
-        hiveConf = HCatUtil.getHiveConf(conf);
-      } else {
-        hiveConf = new HiveConf(GiraphHCatInputFormat.class);
-      }
-      client = HCatUtil.getHiveClient(hiveConf);
-      Table table = HCatUtil.getTable(client, inputJobInfo.getDatabaseName(),
-          inputJobInfo.getTableName());
-
-      List<PartInfo> partInfoList = new ArrayList<PartInfo>();
-
-      inputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable()));
-      if (table.getPartitionKeys().size() != 0) {
-        // Partitioned table
-        List<Partition> parts = client.listPartitionsByFilter(
-            inputJobInfo.getDatabaseName(),
-            inputJobInfo.getTableName(),
-            inputJobInfo.getFilter(),
-            (short) -1);
-
-        if (parts != null) {
-          // Default to 100,000 partitions if hive.metastore.maxpartition is not
-          // defined
-          int maxPart = hiveConf.getInt("hcat.metastore.maxpartitions", 100000);
-          if (parts.size() > maxPart) {
-            throw new HCatException(ErrorType.ERROR_EXCEED_MAXPART,
-                "total number of partitions is " + parts.size());
-          }
-
-          // Populate partition info
-          for (Partition ptn : parts) {
-            HCatSchema schema = HCatUtil.extractSchema(
-                new org.apache.hadoop.hive.ql.metadata.Partition(table, ptn));
-            PartInfo partInfo = extractPartInfo(schema, ptn.getSd(),
-                ptn.getParameters(), conf, inputJobInfo);
-            partInfo.setPartitionValues(InternalUtil.createPtnKeyValueMap(table,
-                ptn));
-            partInfoList.add(partInfo);
-          }
-        }
-      } else {
-        // Non partitioned table
-        HCatSchema schema = HCatUtil.extractSchema(table);
-        PartInfo partInfo = extractPartInfo(schema, table.getTTable().getSd(),
-            table.getParameters(), conf, inputJobInfo);
-        partInfo.setPartitionValues(new HashMap<String, String>());
-        partInfoList.add(partInfo);
-      }
-      inputJobInfo.setPartitions(partInfoList);
-    } catch (MetaException e) {
-      throw new IOException("Got MetaException", e);
-    } catch (NoSuchObjectException e) {
-      throw new IOException("Got NoSuchObjectException", e);
-    } catch (TException e) {
-      throw new IOException("Got TException", e);
-    } catch (HiveException e) {
-      throw new IOException("Got HiveException", e);
-    } finally {
-      HCatUtil.closeHiveClientQuietly(client);
-    }
-    return inputJobInfo;
-  }
-
-  /**
-   * Extract partition info.
-   *
-   * @param schema Table schema
-   * @param sd Storage descriptor
-   * @param parameters Parameters
-   * @param conf Configuration
-   * @param inputJobInfo Input job info
-   * @return Partition info
-   * @throws IOException
-   */
-  private static PartInfo extractPartInfo(
-      HCatSchema schema, StorageDescriptor sd, Map<String, String> parameters,
-      Configuration conf, InputJobInfo inputJobInfo) throws IOException {
-    StorerInfo storerInfo = InternalUtil.extractStorerInfo(sd, parameters);
-
-    Properties hcatProperties = new Properties();
-    HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(conf,
-        storerInfo);
-
-    // Copy the properties from storageHandler to jobProperties
-    Map<String, String> jobProperties =
-        HCatUtil.getInputJobProperties(storageHandler, inputJobInfo);
-
-    for (Map.Entry<String, String> param : parameters.entrySet()) {
-      hcatProperties.put(param.getKey(), param.getValue());
-    }
-
-    return new PartInfo(schema, storageHandler, sd.getLocation(),
-        hcatProperties, jobProperties, inputJobInfo.getTableInfo());
-  }
-
-  /**
-   * Create a new {@link HCatRecordReader}.
-   *
-   * @param storageHandler Storage handler
-   * @param valuesNotInDataCols Values not in data columns
-   * @return Record reader
-   */
-  public static RecordReader newHCatReader(
-      HCatStorageHandler storageHandler,
-      Map<String, String> valuesNotInDataCols) {
-    return new HCatRecordReader(storageHandler, valuesNotInDataCols);
-  }
-
-  /**
-   * Cast an {@link InputSplit} to {@link HCatSplit}.
-   *
-   * @param split Input split
-   * @return {@link HCatSplit}
-   * @throws IOException
-   */
-  public static HCatSplit castToHCatSplit(InputSplit split)
-    throws IOException {
-    return InternalUtil.castToHCatSplit(split);
-  }
-}

http://git-wip-us.apache.org/repos/asf/giraph/blob/45851391/giraph-formats-contrib/src/main/java/org/apache/hcatalog/mapreduce/package-info.java
----------------------------------------------------------------------
diff --git a/giraph-formats-contrib/src/main/java/org/apache/hcatalog/mapreduce/package-info.java b/giraph-formats-contrib/src/main/java/org/apache/hcatalog/mapreduce/package-info.java
deleted file mode 100644
index e236aaf..0000000
--- a/giraph-formats-contrib/src/main/java/org/apache/hcatalog/mapreduce/package-info.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * Package for HCatalog helper utilities.
- */
-package org.apache.hcatalog.mapreduce;

http://git-wip-us.apache.org/repos/asf/giraph/blob/45851391/giraph-formats-contrib/src/test/java/org/apache/giraph/io/accumulo/TestAccumuloVertexFormat.java
----------------------------------------------------------------------
diff --git a/giraph-formats-contrib/src/test/java/org/apache/giraph/io/accumulo/TestAccumuloVertexFormat.java b/giraph-formats-contrib/src/test/java/org/apache/giraph/io/accumulo/TestAccumuloVertexFormat.java
deleted file mode 100644
index 5885b64..0000000
--- a/giraph-formats-contrib/src/test/java/org/apache/giraph/io/accumulo/TestAccumuloVertexFormat.java
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.io.accumulo;
-
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.Scanner;
-import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
-import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.util.ByteBufferUtil;
-import org.apache.accumulo.core.util.Pair;
-import org.apache.giraph.BspCase;
-import org.apache.giraph.conf.GiraphConfiguration;
-import org.apache.giraph.io.accumulo.edgemarker.AccumuloEdgeInputFormat;
-import org.apache.giraph.io.accumulo.edgemarker.AccumuloEdgeOutputFormat;
-import org.apache.giraph.graph.EdgeListVertex;
-import org.apache.giraph.graph.GiraphJob;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-import org.apache.log4j.Logger;
-
-import java.io.File;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.HashSet;
-import java.util.Map;
-import org.junit.Test;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-/*
-    Test class for Accumulo vertex input/output formats.
- */
-public class TestAccumuloVertexFormat extends BspCase{
-
-    private final String TABLE_NAME = "simple_graph";
-    private final String INSTANCE_NAME = "instance";
-    private final Text FAMILY = new Text("cf");
-    private final Text CHILDREN = new Text("children");
-    private final String USER = "root";
-    private final byte[] PASSWORD = new byte[] {};
-    private final Text OUTPUT_FIELD = new Text("parent");
-
-
-    private final Logger log = Logger.getLogger(TestAccumuloVertexFormat.class);
-
-    /**
-     * Create the test case
-     */
-    public TestAccumuloVertexFormat() {
-        super(TestAccumuloVertexFormat.class.getName());
-    }
-
-    /*
-     Write a simple parent-child directed graph to Accumulo.
-     Run a job which reads the values
-     into subclasses that extend AccumuloVertex I/O formats.
-     Check the output after the job.
-     */
-    @Test
-    public void testAccumuloInputOutput() throws Exception {
-        if (System.getProperty("prop.mapred.job.tracker") != null) {
-            if(log.isInfoEnabled())
-                log.info("testAccumuloInputOutput: " +
-                        "Ignore this test if not local mode.");
-            return;
-        }
-
-        File jarTest = new File(System.getProperty("prop.jarLocation"));
-        if(!jarTest.exists()) {
-            fail("Could not find Giraph jar at " +
-                    "location specified by 'prop.jarLocation'. " +
-                    "Make sure you built the main Giraph artifact?.");
-        }
-
-        //Write out vertices and edges out to a mock instance.
-        MockInstance mockInstance = new MockInstance(INSTANCE_NAME);
-        Connector c = mockInstance.getConnector("root", new byte[] {});
-        c.tableOperations().create(TABLE_NAME);
-        BatchWriter bw = c.createBatchWriter(TABLE_NAME, 10000L, 1000L, 4);
-
-        Mutation m1 = new Mutation(new Text("0001"));
-        m1.put(FAMILY, CHILDREN, new Value("0002".getBytes()));
-        bw.addMutation(m1);
-
-        Mutation m2 = new Mutation(new Text("0002"));
-        m2.put(FAMILY, CHILDREN, new Value("0003".getBytes()));
-        bw.addMutation(m2);
-        if(log.isInfoEnabled())
-            log.info("Writing mutations to Accumulo table");
-        bw.close();
-
-        Configuration conf = new Configuration();
-        conf.set(AccumuloVertexOutputFormat.OUTPUT_TABLE, TABLE_NAME);
-
-        /*
-        Very important to initialize the formats before
-        sending configuration to the GiraphJob. Otherwise
-        the internally constructed Job in GiraphJob will
-        not have the proper context initialization.
-         */
-        AccumuloInputFormat.setInputInfo(conf, USER, "".getBytes(),
-                TABLE_NAME, new Authorizations());
-        AccumuloInputFormat.setMockInstance(conf, INSTANCE_NAME);
-
-        AccumuloOutputFormat.setOutputInfo(conf, USER, PASSWORD, true, null);
-        AccumuloOutputFormat.setMockInstance(conf, INSTANCE_NAME);
-
-        GiraphJob job = new GiraphJob(conf, getCallingMethodName());
-        setupConfiguration(job);
-        GiraphConfiguration giraphConf = job.getConfiguration();
-        giraphConf.setVertexClass(EdgeNotification.class);
-        giraphConf.setVertexInputFormatClass(AccumuloEdgeInputFormat.class);
-        giraphConf.setVertexOutputFormatClass(AccumuloEdgeOutputFormat.class);
-
-        HashSet<Pair<Text, Text>> columnsToFetch = new HashSet<Pair<Text,Text>>();
-        columnsToFetch.add(new Pair<Text, Text>(FAMILY, CHILDREN));
-        AccumuloInputFormat.fetchColumns(job.getConfiguration(), columnsToFetch);
-
-        if(log.isInfoEnabled())
-            log.info("Running edge notification job using Accumulo input");
-        assertTrue(job.run(true));
-        Scanner scanner = c.createScanner(TABLE_NAME, new Authorizations());
-        scanner.setRange(new Range("0002", "0002"));
-        scanner.fetchColumn(FAMILY, OUTPUT_FIELD);
-        boolean foundColumn = false;
-
-        if(log.isInfoEnabled())
-            log.info("Verify job output persisted correctly.");
-        //make sure we found the qualifier.
-        assertTrue(scanner.iterator().hasNext());
-
-
-        //now we check to make sure the expected value from the job persisted correctly.
-        for(Map.Entry<Key,Value> entry : scanner) {
-            Text row = entry.getKey().getRow();
-            assertEquals("0002", row.toString());
-            Value value = entry.getValue();
-            assertEquals("0001", ByteBufferUtil.toString(
-                    ByteBuffer.wrap(value.get())));
-            foundColumn = true;
-        }
-    }
-    /*
-    Test compute method that sends each edge a notification of its parents.
-    The test set only has a 1-1 parent-to-child ratio for this unit test.
-     */
-    public static class EdgeNotification
-            extends EdgeListVertex<Text, Text, Text, Text> {
-        @Override
-        public void compute(Iterable<Text> messages) throws IOException {
-          for (Text message : messages) {
-            getValue().set(message);
-          }
-          if(getSuperstep() == 0) {
-            sendMessageToAllEdges(getId());
-          }
-          voteToHalt();
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/giraph/blob/45851391/giraph-formats-contrib/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeInputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-formats-contrib/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeInputFormat.java b/giraph-formats-contrib/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeInputFormat.java
deleted file mode 100644
index b670144..0000000
--- a/giraph-formats-contrib/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeInputFormat.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.giraph.io.accumulo.edgemarker;
-
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
-import org.apache.giraph.graph.Edge;
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.graph.VertexReader;
-import org.apache.giraph.io.accumulo.AccumuloVertexInputFormat;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-import com.google.common.collect.Lists;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.regex.Pattern;
-
-/*
- Example subclass which reads in Key/Value pairs to construct vertex objects.
- */
-public class AccumuloEdgeInputFormat
-        extends AccumuloVertexInputFormat<Text, Text, Text, Text> {
-
-    private static final Text uselessEdgeValue = new Text();
-    private Configuration conf;
-    public VertexReader<Text, Text, Text, Text>
-    createVertexReader(InputSplit split, TaskAttemptContext context)
-            throws IOException {
-        try {
-
-        return new AccumuloEdgeVertexReader(
-                accumuloInputFormat.createRecordReader(split, context)) {
-        };
-        } catch (InterruptedException e) {
-            throw new IOException(e);
-        }
-
-    }
-    /*
-        Reader takes Key/Value pairs from the underlying input format.
-     */
-    public static class AccumuloEdgeVertexReader
-            extends AccumuloVertexReader<Text, Text, Text, Text> {
-
-        public static final Pattern commaPattern = Pattern.compile("[,]");
-
-        public AccumuloEdgeVertexReader(RecordReader<Key, Value> recordReader) {
-            super(recordReader);
-        }
-
-
-        public boolean nextVertex() throws IOException, InterruptedException {
-            return getRecordReader().nextKeyValue();
-        }
-
-        /*
-       Each Key/Value contains the information needed to construct the vertices.
-         */
-        public Vertex<Text, Text, Text, Text> getCurrentVertex()
-                throws IOException, InterruptedException {
-              Key key = getRecordReader().getCurrentKey();
-              Value value = getRecordReader().getCurrentValue();
-              Vertex<Text, Text, Text, Text> vertex =
-                  getConfiguration().createVertex();
-              Text vertexId = key.getRow();
-              List<Edge<Text, Text>> edges = Lists.newLinkedList();
-              String edge = new String(value.get());
-              Text edgeId = new Text(edge);
-              edges.add(new Edge<Text, Text>(edgeId, uselessEdgeValue));
-              vertex.initialize(vertexId, new Text(), edges);
-
-            return vertex;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/giraph/blob/45851391/giraph-formats-contrib/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeOutputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-formats-contrib/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeOutputFormat.java b/giraph-formats-contrib/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeOutputFormat.java
deleted file mode 100644
index ff00fd6..0000000
--- a/giraph-formats-contrib/src/test/java/org/apache/giraph/io/accumulo/edgemarker/AccumuloEdgeOutputFormat.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.giraph.io.accumulo.edgemarker;
-
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Value;
-import org.apache.giraph.io.accumulo.AccumuloVertexOutputFormat;
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.graph.VertexWriter;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-import java.io.IOException;
-
-/*
- Example subclass for writing vertices back to Accumulo.
- */
-public class AccumuloEdgeOutputFormat
-        extends AccumuloVertexOutputFormat<Text, Text, Text> {
-
-    public VertexWriter<Text, Text, Text>
-    createVertexWriter(TaskAttemptContext context)
-            throws IOException, InterruptedException {
-        RecordWriter<Text, Mutation> writer =
-                accumuloOutputFormat.getRecordWriter(context);
-        String tableName = context.getConfiguration().get(OUTPUT_TABLE);
-        if(tableName == null)
-           throw new IOException("Forgot to set table name " +
-                   "using AccumuloVertexOutputFormat.OUTPUT_TABLE");
-        return new AccumuloEdgeVertexWriter(writer, tableName);
-    }
-
-    /*
-    Wraps RecordWriter for writing Mutations back to the configured Accumulo Table.
-     */
-    public static class AccumuloEdgeVertexWriter
-            extends AccumuloVertexWriter<Text, Text, Text> {
-
-        private final Text CF = new Text("cf");
-        private final Text PARENT =  new Text("parent");
-        private Text tableName;
-
-        public AccumuloEdgeVertexWriter(
-                RecordWriter<Text, Mutation> writer, String tableName) {
-            super(writer);
-            this.tableName = new Text(tableName);
-        }
-        /*
-         Write back a mutation that adds a qualifier for 'parent' containing the vertex value
-         as the cell value. Assume the vertex ID corresponds to a key.
-         */
-        public void writeVertex(Vertex<Text, Text, Text, ?> vertex)
-                throws IOException, InterruptedException {
-              RecordWriter<Text, Mutation> writer = getRecordWriter();
-              Mutation mt = new Mutation(vertex.getId());
-              mt.put(CF, PARENT, new Value(
-                  vertex.getValue().toString().getBytes()));
-              writer.write(tableName, mt);
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/giraph/blob/45851391/giraph-formats-contrib/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java
----------------------------------------------------------------------
diff --git a/giraph-formats-contrib/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java b/giraph-formats-contrib/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java
deleted file mode 100644
index c09913d..0000000
--- a/giraph-formats-contrib/src/test/java/org/apache/giraph/io/hbase/TestHBaseRootMarkerVertextFormat.java
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.io.hbase;
-
-
-import org.apache.giraph.BspCase;
-import org.apache.giraph.conf.GiraphConfiguration;
-import org.apache.giraph.graph.EdgeListVertex;
-import org.apache.giraph.graph.GiraphJob;
-import org.apache.giraph.io.hbase.edgemarker.TableEdgeInputFormat;
-import org.apache.giraph.io.hbase.edgemarker.TableEdgeOutputFormat;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseTestingUtility;
-import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.MiniHBaseCluster;
-import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.HBaseAdmin;
-import org.apache.hadoop.hbase.client.HTable;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.mapreduce.ImportTsv;
-import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
-import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.util.GenericOptionsParser;
-import org.apache.log4j.Logger;
-import org.junit.Test;
-
-import java.io.File;
-import java.io.IOException;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-/*
-Test case for HBase reading/writing vertices from an HBase instance.
-*/
-public class TestHBaseRootMarkerVertextFormat extends BspCase {
-
-    /**
-     * Create the test case
-     *
-     * @param testName name of the test case
-     */
-    private HBaseTestingUtility testUtil = new HBaseTestingUtility();
-    private final Logger log = Logger.getLogger(TestHBaseRootMarkerVertextFormat.class);
-
-    private final String TABLE_NAME = "simple_graph";
-    private final String FAMILY = "cf";
-    private final String QUALIFER = "children";
-    private final String OUTPUT_FIELD = "parent";
-
-    public TestHBaseRootMarkerVertextFormat() {
-        super(TestHBaseRootMarkerVertextFormat.class.getName());
-    }
-
-    @Test
-    public void testHBaseInputOutput() throws Exception{
-
-        if (System.getProperty("prop.mapred.job.tracker") != null) {
-            if(log.isInfoEnabled())
-                log.info("testHBaseInputOutput: Ignore this test if not local mode.");
-            return;
-        }
-
-        File jarTest = new File(System.getProperty("prop.jarLocation"));
-        if(!jarTest.exists()) {
-            fail("Could not find Giraph jar at " +
-                    "location specified by 'prop.jarLocation'. " +
-                    "Make sure you built the main Giraph artifact?.");
-        }
-
-        String INPUT_FILE = "graph.csv";
-        //First let's load some data using ImportTsv into our mock table.
-        String[] args = new String[] {
-                "-Dimporttsv.columns=HBASE_ROW_KEY,cf:"+QUALIFER,
-                "-Dimporttsv.separator=" + "\u002c",
-                TABLE_NAME,
-                INPUT_FILE
-        };
-
-
-        MiniHBaseCluster cluster = testUtil.startMiniCluster();
-
-        GenericOptionsParser opts =
-                new GenericOptionsParser(cluster.getConfiguration(), args);
-        Configuration conf = opts.getConfiguration();
-        args = opts.getRemainingArgs();
-
-        try {
-
-            FileSystem fs = FileSystem.get(conf);
-            FSDataOutputStream op = fs.create(new Path(INPUT_FILE), true);
-            String line1 = "0001,0002\n";
-            String line2 = "0002,0004\n";
-            String line3 = "0003,0005\n";
-            String line4 = "0004,-1\n";
-            String line5 = "0005,-1\n";
-            op.write(line1.getBytes());
-            op.write(line2.getBytes());
-            op.write(line3.getBytes());
-            op.write(line4.getBytes());
-            op.write(line5.getBytes());
-            op.close();
-
-            final byte[] FAM = Bytes.toBytes(FAMILY);
-            final byte[] TAB = Bytes.toBytes(TABLE_NAME);
-
-            HTableDescriptor desc = new HTableDescriptor(TAB);
-            desc.addFamily(new HColumnDescriptor(FAM));
-            new HBaseAdmin(conf).createTable(desc);
-
-            Job job = ImportTsv.createSubmittableJob(conf, args);
-            job.waitForCompletion(false);
-            assertTrue(job.isSuccessful());
-            if(log.isInfoEnabled())
-                log.info("ImportTsv successful. Running HBase Giraph job.");
-
-            //now operate over HBase using Vertex I/O formats
-            conf.set(TableInputFormat.INPUT_TABLE, TABLE_NAME);
-            conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE_NAME);
-
-            GiraphJob giraphJob = new GiraphJob(conf, getCallingMethodName());
-            GiraphConfiguration giraphConf = giraphJob.getConfiguration();
-            giraphConf.setZooKeeperConfiguration(
-                    cluster.getMaster().getZooKeeper().getQuorum());
-            setupConfiguration(giraphJob);
-            giraphConf.setVertexClass(EdgeNotification.class);
-            giraphConf.setVertexInputFormatClass(TableEdgeInputFormat.class);
-            giraphConf.setVertexOutputFormatClass(TableEdgeOutputFormat.class);
-
-            assertTrue(giraphJob.run(true));
-            if(log.isInfoEnabled())
-                log.info("Giraph job successful. Checking output qualifier.");
-
-            //Do a get on row 0002, it should have a parent of 0001
-            //if the outputFormat worked.
-            HTable table = new HTable(conf, TABLE_NAME);
-            Result result = table.get(new Get("0002".getBytes()));
-            byte[] parentBytes = result.getValue(FAMILY.getBytes(),
-                    OUTPUT_FIELD.getBytes());
-            assertNotNull(parentBytes);
-            assertTrue(parentBytes.length > 0);
-            assertEquals("0001", Bytes.toString(parentBytes));
-
-        }   finally {
-            cluster.shutdown();
-        }
-    }
-
-    /*
-    Test compute method that sends each edge a notification of its parents.
-    The test set only has a 1-1 parent-to-child ratio for this unit test.
-     */
-    public static class EdgeNotification
-            extends EdgeListVertex<Text, Text, Text, Text> {
-        @Override
-        public void compute(Iterable<Text> messages) throws IOException {
-          for (Text message : messages) {
-            getValue().set(message);
-          }
-          if(getSuperstep() == 0) {
-            sendMessageToAllEdges(getId());
-          }
-          voteToHalt();
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/giraph/blob/45851391/giraph-formats-contrib/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-formats-contrib/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java b/giraph-formats-contrib/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java
deleted file mode 100644
index e4e08d6..0000000
--- a/giraph-formats-contrib/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeInputFormat.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.giraph.io.hbase.edgemarker;
-
-import org.apache.giraph.graph.Edge;
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.graph.VertexReader;
-import org.apache.giraph.io.hbase.HBaseVertexInputFormat;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.log4j.Logger;
-
-import com.google.common.collect.Lists;
-
-import java.io.IOException;
-import java.util.List;
-
-/*
-  Test subclass for HBaseVertexInputFormat. Reads a simple
-  children qualifier to create an edge.
- */
-public class TableEdgeInputFormat extends
-        HBaseVertexInputFormat<Text, Text, Text, Text> {
-
-    private static final Logger log =
-            Logger.getLogger(TableEdgeInputFormat.class);
-    private static final Text uselessEdgeValue = new Text();
-
-    public VertexReader<Text, Text, Text, Text>
-            createVertexReader(InputSplit split,
-                               TaskAttemptContext context) throws IOException {
-
-        return new TableEdgeVertexReader(split, context);
-
-    }
-
-    /*
-     Uses the RecordReader to return Hbase rows
-     */
-    public static class TableEdgeVertexReader
-            extends HBaseVertexReader<Text, Text, Text, Text> {
-
-        private final byte[] CF = Bytes.toBytes("cf");
-        private final byte[] CHILDREN = Bytes.toBytes("children");
-
-        public TableEdgeVertexReader(InputSplit split, TaskAttemptContext context) throws IOException {
-            super(split, context);
-        }
-
-
-        public boolean nextVertex() throws IOException,
-                InterruptedException {
-            return getRecordReader().nextKeyValue();
-        }
-
-        /*
-         For each row, create a vertex with the row ID as a text,
-         and it's 'children' qualifier as a single edge.
-         */
-        public Vertex<Text, Text, Text, Text>
-                    getCurrentVertex()
-                throws IOException, InterruptedException {
-            Result row = getRecordReader().getCurrentValue();
-            Vertex<Text, Text, Text, Text> vertex =
-                getConfiguration().createVertex();
-            Text vertexId = new Text(Bytes.toString(row.getRow()));
-            List<Edge<Text, Text>> edges = Lists.newLinkedList();
-            String edge = Bytes.toString(row.getValue(CF, CHILDREN));
-            Text vertexValue = new Text();
-            Text edgeId = new Text(edge);
-            edges.add(new Edge<Text, Text>(edgeId, uselessEdgeValue));
-            vertex.initialize(vertexId, vertexValue, edges);
-
-            return vertex;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/giraph/blob/45851391/giraph-formats-contrib/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java
----------------------------------------------------------------------
diff --git a/giraph-formats-contrib/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java b/giraph-formats-contrib/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java
deleted file mode 100644
index 169fd88..0000000
--- a/giraph-formats-contrib/src/test/java/org/apache/giraph/io/hbase/edgemarker/TableEdgeOutputFormat.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.giraph.io.hbase.edgemarker;
-
-import org.apache.giraph.io.hbase.HBaseVertexOutputFormat;
-import org.apache.giraph.graph.Vertex;
-import org.apache.giraph.graph.VertexWriter;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-import java.io.IOException;
-/*
- Test subclass for HBaseVertexOutputFormat
- */
-public class TableEdgeOutputFormat
-        extends HBaseVertexOutputFormat<Text, Text, Text> {
-
-
-    public VertexWriter<Text, Text, Text>
-    createVertexWriter(TaskAttemptContext context)
-            throws IOException, InterruptedException {
-        return new TableEdgeVertexWriter(context);
-    }
-
-    /*
-     For each vertex, write back to the configured table using
-     the vertex id as the row key bytes.
-     */
-    public static class TableEdgeVertexWriter
-            extends HBaseVertexWriter<Text, Text, Text> {
-
-        private final byte[] CF = Bytes.toBytes("cf");
-        private final byte[] PARENT =  Bytes.toBytes("parent");
-
-        public TableEdgeVertexWriter(TaskAttemptContext context)
-          throws IOException, InterruptedException  {
-            super(context);
-        }
-        /*
-         Record the vertex value as a the value for a new qualifier 'parent'.
-         */
-        public void writeVertex(
-                Vertex<Text, Text, Text, ?> vertex)
-                throws IOException, InterruptedException {
-              RecordWriter<ImmutableBytesWritable, Writable> writer = getRecordWriter();
-              byte[] rowBytes = vertex.getId().getBytes();
-              Put put = new Put(rowBytes);
-              Text value = vertex.getValue();
-              if(value.toString().length() > 0)   {
-                 put.add(CF, PARENT, value.getBytes());
-                 writer.write(new ImmutableBytesWritable(rowBytes), put);
-              }
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/giraph/blob/45851391/giraph-formats-contrib/src/test/java/org/apache/giraph/io/hcatalog/TestHiveUtils.java
----------------------------------------------------------------------
diff --git a/giraph-formats-contrib/src/test/java/org/apache/giraph/io/hcatalog/TestHiveUtils.java b/giraph-formats-contrib/src/test/java/org/apache/giraph/io/hcatalog/TestHiveUtils.java
deleted file mode 100644
index 421cc28..0000000
--- a/giraph-formats-contrib/src/test/java/org/apache/giraph/io/hcatalog/TestHiveUtils.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.giraph.io.hcatalog;
-
-import junit.framework.TestCase;
-
-import java.util.Map;
-import org.junit.Test;
-
-public class TestHiveUtils extends TestCase {
-  @Test
-  public void testParsePartition() {
-    String partitionStr = "feature1=2012-10-09, feature2=a1+b2, feature3=ff-gg";
-    Map<String, String> partition = HiveUtils.parsePartitionValues(partitionStr);
-    assertEquals(3, partition.size());
-    assertEquals("2012-10-09", partition.get("feature1"));
-    assertEquals("a1+b2", partition.get("feature2"));
-    assertEquals("ff-gg", partition.get("feature3"));
-  }
-}

http://git-wip-us.apache.org/repos/asf/giraph/blob/45851391/giraph-formats/README
----------------------------------------------------------------------
diff --git a/giraph-formats/README b/giraph-formats/README
new file mode 100644
index 0000000..527b237
--- /dev/null
+++ b/giraph-formats/README
@@ -0,0 +1,16 @@
+This module hosts additional custom input/output formats that contain bulky or heavy-weight dependencies, and
+can't be bundled directly in the main giraph jar.
+
+In order to build, the following prerequisite conditions must be met.
+
+1) You must 'mvn install' the latest giraph maven artifact in your local m2 repo. In future releases this will
+be not be required in favor of a maven central artifact available for download.
+
+2) A build of core giraph jar with the following absolute path must be available:
+    ${giraph.trunk.base}/target/giraph-${project.version}-jar-with-dependencies.jar
+
+Default checkout has ${giraph.trunk.base} set to the parent directory '..'. This is a relative path to the trunk directory
+
+Once these conditions have been met, you will be able to compile and test giraph-formats.jar
+
+See https://cwiki.apache.org/confluence/display/GIRAPH/Giraph+formats+contrib for usage information.

http://git-wip-us.apache.org/repos/asf/giraph/blob/45851391/giraph-formats/pom.xml
----------------------------------------------------------------------
diff --git a/giraph-formats/pom.xml b/giraph-formats/pom.xml
new file mode 100644
index 0000000..e34ca95
--- /dev/null
+++ b/giraph-formats/pom.xml
@@ -0,0 +1,248 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.giraph</groupId>
+    <artifactId>giraph-parent</artifactId>
+    <version>0.2-SNAPSHOT</version>
+  </parent>
+  <artifactId>giraph-formats</artifactId>
+  <packaging>jar</packaging>
+
+  <name>Apache Giraph Formats</name>
+
+  <properties>
+    <top.dir>${project.basedir}/..</top.dir>
+  </properties>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <version>2.9</version>
+        <configuration>
+          <configLocation>${top.dir}/checkstyle.xml</configLocation>
+          <headerLocation>${top.dir}/license-header.txt</headerLocation>
+          <enableRulesSummary>false</enableRulesSummary>
+          <failOnError>true</failOnError>
+          <includeTestSourceDirectory>false</includeTestSourceDirectory>
+        </configuration>
+        <executions>
+          <execution>
+            <phase>verify</phase>
+            <goals>
+               <goal>check</goal>
+             </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <version>2.6</version>
+        <configuration>
+          <systemProperties>
+            <property>
+              <name>prop.jarLocation</name>
+              <value>${top.dir}/giraph-core/target/giraph-${project.version}-${forHadoop}-jar-with-dependencies.jar</value>
+            </property>
+          </systemProperties>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>findbugs-maven-plugin</artifactId>
+        <version>2.5.1</version>
+        <configuration>
+          <xmlOutput>true</xmlOutput>
+          <findbugsXmlOutput>false</findbugsXmlOutput>
+          <excludeFilterFile>${top.dir}/findbugs-exclude.xml</excludeFilterFile>
+        </configuration>
+        <executions>
+          <execution>
+            <phase>verify</phase>
+            <goals>
+              <goal>check</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+  <repositories>
+    <!-- This is the main maven repository. Normally we wouldn't need to put
+       it here when it's the only one being used, but since we need to add
+       special repositories to get hcatalog we need to mention this one
+       specifically otherwise it won't be included. -->
+    <repository>
+      <id>central</id>
+      <name>Maven Repository</name>
+      <url>http://repo1.maven.org/maven2</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+    </repository>
+    <!-- This is necessary for hcatalog. -->
+    <repository>
+      <id>apache</id>
+      <name>Apache Repository</name>
+      <url>https://repository.apache.org/content/repositories/snapshots</url>
+      <snapshots>
+        <enabled>true</enabled>
+      </snapshots>
+    </repository>
+    <!-- This is necessary for hive-metastore dependencies for hcatalog. -->
+    <repository>
+      <id>datanucleus</id>
+      <name>datanucleus maven repository</name>
+      <url>http://www.datanucleus.org/downloads/maven2</url>
+      <layout>default</layout>
+      <releases>
+        <enabled>true</enabled>
+        <checksumPolicy>warn</checksumPolicy>
+      </releases>
+    </repository>
+  </repositories>
+
+  <profiles>
+    <profile>
+      <id>hadoop_0.20.203</id>
+      <activation>
+        <activeByDefault>true</activeByDefault>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-test</artifactId>
+          <version>${hadoop.version}</version>
+          <scope>test</scope>
+        </dependency>
+      </dependencies>
+    </profile>
+
+    <profile>
+      <id>hadoop_1.0</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-test</artifactId>
+          <version>${hadoop.version}</version>
+          <scope>test</scope>
+        </dependency>
+      </dependencies>
+    </profile>
+
+    <profile>
+      <id>hadoop_non_secure</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-test</artifactId>
+          <version>${hadoop.version}</version>
+          <scope>test</scope>
+        </dependency>
+      </dependencies>
+    </profile>
+
+    <profile>
+      <id>hadoop_facebook</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-test</artifactId>
+          <version>${hadoop.version}</version>
+          <scope>system</scope>
+          <systemPath>${lib.dir}/facebook-hadoop-0.20-test.jar</systemPath>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+
+  <dependencies>
+    <!-- compile dependencies. sorted lexicographically. -->
+    <dependency>
+      <groupId>commons-cli</groupId>
+      <artifactId>commons-cli</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.giraph</groupId>
+      <artifactId>giraph-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hcatalog</groupId>
+      <artifactId>hcatalog-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-common</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-exec</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-metastore</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.zookeeper</groupId>
+      <artifactId>zookeeper</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.giraph</groupId>
+      <artifactId>giraph</artifactId>
+      <version>0.2-SNAPSHOT</version>
+      <type>test-jar</type>
+    </dependency>
+
+    <!-- provided dependencies. sorted lexicographically. -->
+    <dependency>
+      <groupId>org.apache.accumulo</groupId>
+      <artifactId>accumulo-core</artifactId>
+      <scope>provided</scope>
+    </dependency>
+
+    <!-- test dependencies. sorted lexicographically. -->
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+</project>


Mime
View raw message