accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mwa...@apache.org
Subject [5/7] accumulo-examples git commit: ACCUMULO-4511 Adding examples from Accumulo repo
Date Fri, 09 Dec 2016 17:12:18 GMT
http://git-wip-us.apache.org/repos/asf/accumulo-examples/blob/d96c6d96/src/main/java/org/apache/accumulo/examples/client/RowOperations.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/accumulo/examples/client/RowOperations.java b/src/main/java/org/apache/accumulo/examples/client/RowOperations.java
new file mode 100644
index 0000000..4081971
--- /dev/null
+++ b/src/main/java/org/apache/accumulo/examples/client/RowOperations.java
@@ -0,0 +1,215 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.client;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.cli.BatchWriterOpts;
+import org.apache.accumulo.core.cli.ClientOpts;
+import org.apache.accumulo.core.cli.ScannerOpts;
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.TableExistsException;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.hadoop.io.Text;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A demonstration of reading entire rows and deleting entire rows.
+ */
+public class RowOperations {
+
+  private static final Logger log = LoggerFactory.getLogger(RowOperations.class);
+
+  private static Connector connector;
+  private static String tableName = "example";
+  private static BatchWriter bw;
+
+  public static void main(String[] args) throws AccumuloException, AccumuloSecurityException, TableExistsException, TableNotFoundException,
+      MutationsRejectedException {
+
+    ClientOpts opts = new ClientOpts();
+    ScannerOpts scanOpts = new ScannerOpts();
+    BatchWriterOpts bwOpts = new BatchWriterOpts();
+    opts.parseArgs(RowOperations.class.getName(), args, scanOpts, bwOpts);
+
+    // First the setup work
+    connector = opts.getConnector();
+
+    // lets create an example table
+    connector.tableOperations().create(tableName);
+
+    // lets create 3 rows of information
+    Text row1 = new Text("row1");
+    Text row2 = new Text("row2");
+    Text row3 = new Text("row3");
+
+    // Which means 3 different mutations
+    Mutation mut1 = new Mutation(row1);
+    Mutation mut2 = new Mutation(row2);
+    Mutation mut3 = new Mutation(row3);
+
+    // And we'll put 4 columns in each row
+    Text col1 = new Text("1");
+    Text col2 = new Text("2");
+    Text col3 = new Text("3");
+    Text col4 = new Text("4");
+
+    // Now we'll add them to the mutations
+    mut1.put(new Text("column"), col1, System.currentTimeMillis(), new Value("This is the value for this key".getBytes(UTF_8)));
+    mut1.put(new Text("column"), col2, System.currentTimeMillis(), new Value("This is the value for this key".getBytes(UTF_8)));
+    mut1.put(new Text("column"), col3, System.currentTimeMillis(), new Value("This is the value for this key".getBytes(UTF_8)));
+    mut1.put(new Text("column"), col4, System.currentTimeMillis(), new Value("This is the value for this key".getBytes(UTF_8)));
+
+    mut2.put(new Text("column"), col1, System.currentTimeMillis(), new Value("This is the value for this key".getBytes(UTF_8)));
+    mut2.put(new Text("column"), col2, System.currentTimeMillis(), new Value("This is the value for this key".getBytes(UTF_8)));
+    mut2.put(new Text("column"), col3, System.currentTimeMillis(), new Value("This is the value for this key".getBytes(UTF_8)));
+    mut2.put(new Text("column"), col4, System.currentTimeMillis(), new Value("This is the value for this key".getBytes(UTF_8)));
+
+    mut3.put(new Text("column"), col1, System.currentTimeMillis(), new Value("This is the value for this key".getBytes(UTF_8)));
+    mut3.put(new Text("column"), col2, System.currentTimeMillis(), new Value("This is the value for this key".getBytes(UTF_8)));
+    mut3.put(new Text("column"), col3, System.currentTimeMillis(), new Value("This is the value for this key".getBytes(UTF_8)));
+    mut3.put(new Text("column"), col4, System.currentTimeMillis(), new Value("This is the value for this key".getBytes(UTF_8)));
+
+    // Now we'll make a Batch Writer
+    bw = connector.createBatchWriter(tableName, bwOpts.getBatchWriterConfig());
+
+    // And add the mutations
+    bw.addMutation(mut1);
+    bw.addMutation(mut2);
+    bw.addMutation(mut3);
+
+    // Force a send
+    bw.flush();
+
+    // Now lets look at the rows
+    Scanner rowThree = getRow(scanOpts, new Text("row3"));
+    Scanner rowTwo = getRow(scanOpts, new Text("row2"));
+    Scanner rowOne = getRow(scanOpts, new Text("row1"));
+
+    // And print them
+    log.info("This is everything");
+    printRow(rowOne);
+    printRow(rowTwo);
+    printRow(rowThree);
+    System.out.flush();
+
+    // Now lets delete rowTwo with the iterator
+    rowTwo = getRow(scanOpts, new Text("row2"));
+    deleteRow(rowTwo);
+
+    // Now lets look at the rows again
+    rowThree = getRow(scanOpts, new Text("row3"));
+    rowTwo = getRow(scanOpts, new Text("row2"));
+    rowOne = getRow(scanOpts, new Text("row1"));
+
+    // And print them
+    log.info("This is row1 and row3");
+    printRow(rowOne);
+    printRow(rowTwo);
+    printRow(rowThree);
+    System.out.flush();
+
+    // Should only see the two rows
+    // Now lets delete rowOne without passing in the iterator
+
+    deleteRow(scanOpts, row1);
+
+    // Now lets look at the rows one last time
+    rowThree = getRow(scanOpts, new Text("row3"));
+    rowTwo = getRow(scanOpts, new Text("row2"));
+    rowOne = getRow(scanOpts, new Text("row1"));
+
+    // And print them
+    log.info("This is just row3");
+    printRow(rowOne);
+    printRow(rowTwo);
+    printRow(rowThree);
+    System.out.flush();
+
+    // Should only see rowThree
+
+    // Always close your batchwriter
+
+    bw.close();
+
+    // and lets clean up our mess
+    connector.tableOperations().delete(tableName);
+
+    // fin~
+
+  }
+
+  /**
+   * Deletes a row given a text object
+   */
+  private static void deleteRow(ScannerOpts scanOpts, Text row) throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
+    deleteRow(getRow(scanOpts, row));
+  }
+
+  /**
+   * Deletes a row, given a Scanner of JUST that row
+   */
+  private static void deleteRow(Scanner scanner) throws MutationsRejectedException {
+    Mutation deleter = null;
+    // iterate through the keys
+    for (Entry<Key,Value> entry : scanner) {
+      // create a mutation for the row
+      if (deleter == null)
+        deleter = new Mutation(entry.getKey().getRow());
+      // the remove function adds the key with the delete flag set to true
+      deleter.putDelete(entry.getKey().getColumnFamily(), entry.getKey().getColumnQualifier());
+    }
+    bw.addMutation(deleter);
+    bw.flush();
+  }
+
+  /**
+   * Just a generic print function given an iterator. Not necessarily just for printing a single row
+   */
+  private static void printRow(Scanner scanner) {
+    // iterates through and prints
+    for (Entry<Key,Value> entry : scanner)
+      log.info("Key: " + entry.getKey().toString() + " Value: " + entry.getValue().toString());
+  }
+
+  /**
+   * Gets a scanner over one row
+   */
+  private static Scanner getRow(ScannerOpts scanOpts, Text row) throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
+    // Create a scanner
+    Scanner scanner = connector.createScanner(tableName, Authorizations.EMPTY);
+    scanner.setBatchSize(scanOpts.scanBatchSize);
+    // Say start key is the one with key of row
+    // and end key is the one that immediately follows the row
+    scanner.setRange(new Range(row));
+    return scanner;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/accumulo-examples/blob/d96c6d96/src/main/java/org/apache/accumulo/examples/client/SequentialBatchWriter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/accumulo/examples/client/SequentialBatchWriter.java b/src/main/java/org/apache/accumulo/examples/client/SequentialBatchWriter.java
new file mode 100644
index 0000000..56eaa84
--- /dev/null
+++ b/src/main/java/org/apache/accumulo/examples/client/SequentialBatchWriter.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.client;
+
+import org.apache.accumulo.core.cli.BatchWriterOpts;
+import org.apache.accumulo.core.cli.ClientOnRequiredTable;
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.security.ColumnVisibility;
+
+import com.beust.jcommander.Parameter;
+
+/**
+ * Simple example for writing random data in sequential order to Accumulo.
+ */
+public class SequentialBatchWriter {
+
+  static class Opts extends ClientOnRequiredTable {
+    @Parameter(names = "--start")
+    long start = 0;
+    @Parameter(names = "--num", required = true)
+    long num = 0;
+    @Parameter(names = "--size", required = true, description = "size of the value to write")
+    int valueSize = 0;
+    @Parameter(names = "--vis", converter = VisibilityConverter.class)
+    ColumnVisibility vis = new ColumnVisibility();
+  }
+
+  /**
+   * Writes a specified number of entries to Accumulo using a {@link BatchWriter}. The rows of the entries will be sequential starting at a specified number.
+   * The column families will be "foo" and column qualifiers will be "1". The values will be random byte arrays of a specified size.
+   */
+  public static void main(String[] args) throws AccumuloException, AccumuloSecurityException, TableNotFoundException, MutationsRejectedException {
+    Opts opts = new Opts();
+    BatchWriterOpts bwOpts = new BatchWriterOpts();
+    opts.parseArgs(SequentialBatchWriter.class.getName(), args, bwOpts);
+    Connector connector = opts.getConnector();
+    BatchWriter bw = connector.createBatchWriter(opts.getTableName(), bwOpts.getBatchWriterConfig());
+
+    long end = opts.start + opts.num;
+
+    for (long i = opts.start; i < end; i++) {
+      Mutation m = RandomBatchWriter.createMutation(i, opts.valueSize, opts.vis);
+      bw.addMutation(m);
+    }
+
+    bw.close();
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo-examples/blob/d96c6d96/src/main/java/org/apache/accumulo/examples/client/TraceDumpExample.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/accumulo/examples/client/TraceDumpExample.java b/src/main/java/org/apache/accumulo/examples/client/TraceDumpExample.java
new file mode 100644
index 0000000..7261637
--- /dev/null
+++ b/src/main/java/org/apache/accumulo/examples/client/TraceDumpExample.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.client;
+
+import org.apache.accumulo.core.cli.ClientOnDefaultTable;
+import org.apache.accumulo.core.cli.ScannerOpts;
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.security.TablePermission;
+import org.apache.accumulo.tracer.TraceDump;
+import org.apache.accumulo.tracer.TraceDump.Printer;
+import org.apache.hadoop.io.Text;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.beust.jcommander.Parameter;
+
+/**
+ * Example of using the TraceDump class to print a formatted view of a Trace
+ *
+ */
+public class TraceDumpExample {
+  private static final Logger log = LoggerFactory.getLogger(TraceDumpExample.class);
+
+  static class Opts extends ClientOnDefaultTable {
+    public Opts() {
+      super("trace");
+    }
+
+    @Parameter(names = {"--traceid"}, description = "The hex string id of a given trace, for example 16cfbbd7beec4ae3")
+    public String traceId = "";
+  }
+
+  public void dump(Opts opts) throws TableNotFoundException, AccumuloException, AccumuloSecurityException {
+
+    if (opts.traceId.isEmpty()) {
+      throw new IllegalArgumentException("--traceid option is required");
+    }
+
+    final Connector conn = opts.getConnector();
+    final String principal = opts.getPrincipal();
+    final String table = opts.getTableName();
+    if (!conn.securityOperations().hasTablePermission(principal, table, TablePermission.READ)) {
+      conn.securityOperations().grantTablePermission(principal, table, TablePermission.READ);
+      try {
+        Thread.sleep(1000);
+      } catch (InterruptedException e) {
+        Thread.currentThread().interrupt();
+        throw new RuntimeException(e);
+      }
+      while (!conn.securityOperations().hasTablePermission(principal, table, TablePermission.READ)) {
+        log.info("{} didn't propagate read permission on {}", principal, table);
+        try {
+          Thread.sleep(1000);
+        } catch (InterruptedException e) {
+          Thread.currentThread().interrupt();
+          throw new RuntimeException(e);
+        }
+      }
+    }
+    Scanner scanner = conn.createScanner(table, opts.auths);
+    scanner.setRange(new Range(new Text(opts.traceId)));
+    TraceDump.printTrace(scanner, new Printer() {
+      @Override
+      public void print(String line) {
+        System.out.println(line);
+      }
+    });
+  }
+
+  public static void main(String[] args) throws TableNotFoundException, AccumuloException, AccumuloSecurityException {
+    TraceDumpExample traceDumpExample = new TraceDumpExample();
+    Opts opts = new Opts();
+    ScannerOpts scannerOpts = new ScannerOpts();
+    opts.parseArgs(TraceDumpExample.class.getName(), args, scannerOpts);
+
+    traceDumpExample.dump(opts);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/accumulo-examples/blob/d96c6d96/src/main/java/org/apache/accumulo/examples/client/TracingExample.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/accumulo/examples/client/TracingExample.java b/src/main/java/org/apache/accumulo/examples/client/TracingExample.java
new file mode 100644
index 0000000..9b05a14
--- /dev/null
+++ b/src/main/java/org/apache/accumulo/examples/client/TracingExample.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.examples.client;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.cli.ClientOnDefaultTable;
+import org.apache.accumulo.core.cli.ScannerOpts;
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.BatchWriterConfig;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.TableExistsException;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.trace.DistributedTrace;
+import org.apache.htrace.Sampler;
+import org.apache.htrace.Trace;
+import org.apache.htrace.TraceScope;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.beust.jcommander.Parameter;
+
+/**
+ * A simple example showing how to use the distributed tracing API in client code
+ *
+ */
+public class TracingExample {
+  private static final Logger log = LoggerFactory.getLogger(TracingExample.class);
+  private static final String DEFAULT_TABLE_NAME = "test";
+
+  static class Opts extends ClientOnDefaultTable {
+    @Parameter(names = {"-C", "--createtable"}, description = "create table before doing anything")
+    boolean createtable = false;
+    @Parameter(names = {"-D", "--deletetable"}, description = "delete table when finished")
+    boolean deletetable = false;
+    @Parameter(names = {"-c", "--create"}, description = "create entries before any deletes")
+    boolean createEntries = false;
+    @Parameter(names = {"-r", "--read"}, description = "read entries after any creates/deletes")
+    boolean readEntries = false;
+
+    public Opts() {
+      super(DEFAULT_TABLE_NAME);
+      auths = new Authorizations();
+    }
+  }
+
+  public void enableTracing(Opts opts) throws Exception {
+    DistributedTrace.enable("myHost", "myApp");
+  }
+
+  public void execute(Opts opts) throws TableNotFoundException, InterruptedException, AccumuloException, AccumuloSecurityException, TableExistsException {
+
+    if (opts.createtable) {
+      opts.getConnector().tableOperations().create(opts.getTableName());
+    }
+
+    if (opts.createEntries) {
+      createEntries(opts);
+    }
+
+    if (opts.readEntries) {
+      readEntries(opts);
+    }
+
+    if (opts.deletetable) {
+      opts.getConnector().tableOperations().delete(opts.getTableName());
+    }
+  }
+
+  private void createEntries(Opts opts) throws TableNotFoundException, AccumuloException, AccumuloSecurityException {
+
+    // Trace the write operation. Note, unless you flush the BatchWriter, you will not capture
+    // the write operation as it is occurs asynchronously. You can optionally create additional Spans
+    // within a given Trace as seen below around the flush
+    TraceScope scope = Trace.startSpan("Client Write", Sampler.ALWAYS);
+
+    System.out.println("TraceID: " + Long.toHexString(scope.getSpan().getTraceId()));
+    BatchWriter batchWriter = opts.getConnector().createBatchWriter(opts.getTableName(), new BatchWriterConfig());
+
+    Mutation m = new Mutation("row");
+    m.put("cf", "cq", "value");
+
+    batchWriter.addMutation(m);
+    // You can add timeline annotations to Spans which will be able to be viewed in the Monitor
+    scope.getSpan().addTimelineAnnotation("Initiating Flush");
+    batchWriter.flush();
+
+    batchWriter.close();
+    scope.close();
+  }
+
+  private void readEntries(Opts opts) throws TableNotFoundException, AccumuloException, AccumuloSecurityException {
+
+    Scanner scanner = opts.getConnector().createScanner(opts.getTableName(), opts.auths);
+
+    // Trace the read operation.
+    TraceScope readScope = Trace.startSpan("Client Read", Sampler.ALWAYS);
+    System.out.println("TraceID: " + Long.toHexString(readScope.getSpan().getTraceId()));
+
+    int numberOfEntriesRead = 0;
+    for (Entry<Key,Value> entry : scanner) {
+      System.out.println(entry.getKey().toString() + " -> " + entry.getValue().toString());
+      ++numberOfEntriesRead;
+    }
+    // You can add additional metadata (key, values) to Spans which will be able to be viewed in the Monitor
+    readScope.getSpan().addKVAnnotation("Number of Entries Read".getBytes(UTF_8), String.valueOf(numberOfEntriesRead).getBytes(UTF_8));
+
+    readScope.close();
+  }
+
+  public static void main(String[] args) throws Exception {
+    try {
+      TracingExample tracingExample = new TracingExample();
+      Opts opts = new Opts();
+      ScannerOpts scannerOpts = new ScannerOpts();
+      opts.parseArgs(TracingExample.class.getName(), args, scannerOpts);
+
+      tracingExample.enableTracing(opts);
+      tracingExample.execute(opts);
+    } catch (Exception e) {
+      log.error("Caught exception running TraceExample", e);
+      System.exit(1);
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/accumulo-examples/blob/d96c6d96/src/main/java/org/apache/accumulo/examples/combiner/StatsCombiner.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/accumulo/examples/combiner/StatsCombiner.java b/src/main/java/org/apache/accumulo/examples/combiner/StatsCombiner.java
new file mode 100644
index 0000000..cfa9b3a
--- /dev/null
+++ b/src/main/java/org/apache/accumulo/examples/combiner/StatsCombiner.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.combiner;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.Combiner;
+import org.apache.accumulo.core.iterators.IteratorEnvironment;
+import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
+
+/**
+ * This combiner calculates the max, min, sum, and count of long integers represented as strings in values. It stores the result in a comma-separated value of
+ * the form min,max,sum,count. If such a value is encountered while combining, its information is incorporated into the running calculations of min, max, sum,
+ * and count. See {@link Combiner} for more information on which values are combined together.
+ */
+public class StatsCombiner extends Combiner {
+
+  public static final String RADIX_OPTION = "radix";
+
+  private int radix = 10;
+
+  @Override
+  public Value reduce(Key key, Iterator<Value> iter) {
+
+    long min = Long.MAX_VALUE;
+    long max = Long.MIN_VALUE;
+    long sum = 0;
+    long count = 0;
+
+    while (iter.hasNext()) {
+      String stats[] = iter.next().toString().split(",");
+
+      if (stats.length == 1) {
+        long val = Long.parseLong(stats[0], radix);
+        min = Math.min(val, min);
+        max = Math.max(val, max);
+        sum += val;
+        count += 1;
+      } else {
+        min = Math.min(Long.parseLong(stats[0], radix), min);
+        max = Math.max(Long.parseLong(stats[1], radix), max);
+        sum += Long.parseLong(stats[2], radix);
+        count += Long.parseLong(stats[3], radix);
+      }
+    }
+
+    String ret = Long.toString(min, radix) + "," + Long.toString(max, radix) + "," + Long.toString(sum, radix) + "," + Long.toString(count, radix);
+    return new Value(ret.getBytes());
+  }
+
+  @Override
+  public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
+    super.init(source, options, env);
+
+    if (options.containsKey(RADIX_OPTION))
+      radix = Integer.parseInt(options.get(RADIX_OPTION));
+    else
+      radix = 10;
+  }
+
+  @Override
+  public IteratorOptions describeOptions() {
+    IteratorOptions io = super.describeOptions();
+    io.setName("statsCombiner");
+    io.setDescription("Combiner that keeps track of min, max, sum, and count");
+    io.addNamedOption(RADIX_OPTION, "radix/base of the numbers");
+    return io;
+  }
+
+  @Override
+  public boolean validateOptions(Map<String,String> options) {
+    if (!super.validateOptions(options))
+      return false;
+
+    if (options.containsKey(RADIX_OPTION) && !options.get(RADIX_OPTION).matches("\\d+"))
+      throw new IllegalArgumentException("invalid option " + RADIX_OPTION + ":" + options.get(RADIX_OPTION));
+
+    return true;
+  }
+
+  /**
+   * A convenience method for setting the expected base/radix of the numbers
+   *
+   * @param iterConfig
+   *          Iterator settings to configure
+   * @param base
+   *          The expected base/radix of the numbers.
+   */
+  public static void setRadix(IteratorSetting iterConfig, int base) {
+    iterConfig.addOption(RADIX_OPTION, base + "");
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo-examples/blob/d96c6d96/src/main/java/org/apache/accumulo/examples/constraints/AlphaNumKeyConstraint.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/accumulo/examples/constraints/AlphaNumKeyConstraint.java b/src/main/java/org/apache/accumulo/examples/constraints/AlphaNumKeyConstraint.java
new file mode 100644
index 0000000..7077722
--- /dev/null
+++ b/src/main/java/org/apache/accumulo/examples/constraints/AlphaNumKeyConstraint.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.constraints;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.accumulo.core.constraints.Constraint;
+import org.apache.accumulo.core.data.ColumnUpdate;
+import org.apache.accumulo.core.data.Mutation;
+
+/**
+ * This class is an accumulo constraint that ensures all fields of a key are alpha numeric.
+ */
+public class AlphaNumKeyConstraint implements Constraint {
+
+  static final short NON_ALPHA_NUM_ROW = 1;
+  static final short NON_ALPHA_NUM_COLF = 2;
+  static final short NON_ALPHA_NUM_COLQ = 3;
+
+  static final String ROW_VIOLATION_MESSAGE = "Row was not alpha numeric";
+  static final String COLF_VIOLATION_MESSAGE = "Column family was not alpha numeric";
+  static final String COLQ_VIOLATION_MESSAGE = "Column qualifier was not alpha numeric";
+
+  private boolean isAlphaNum(byte bytes[]) {
+    for (byte b : bytes) {
+      boolean ok = ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || (b >= '0' && b <= '9'));
+      if (!ok)
+        return false;
+    }
+
+    return true;
+  }
+
+  private Set<Short> addViolation(Set<Short> violations, short violation) {
+    if (violations == null) {
+      violations = new LinkedHashSet<>();
+      violations.add(violation);
+    } else if (!violations.contains(violation)) {
+      violations.add(violation);
+    }
+    return violations;
+  }
+
+  @Override
+  public List<Short> check(Environment env, Mutation mutation) {
+    Set<Short> violations = null;
+
+    if (!isAlphaNum(mutation.getRow()))
+      violations = addViolation(violations, NON_ALPHA_NUM_ROW);
+
+    Collection<ColumnUpdate> updates = mutation.getUpdates();
+    for (ColumnUpdate columnUpdate : updates) {
+      if (!isAlphaNum(columnUpdate.getColumnFamily()))
+        violations = addViolation(violations, NON_ALPHA_NUM_COLF);
+
+      if (!isAlphaNum(columnUpdate.getColumnQualifier()))
+        violations = addViolation(violations, NON_ALPHA_NUM_COLQ);
+    }
+
+    return null == violations ? null : new ArrayList<>(violations);
+  }
+
+  @Override
+  public String getViolationDescription(short violationCode) {
+
+    switch (violationCode) {
+      case NON_ALPHA_NUM_ROW:
+        return ROW_VIOLATION_MESSAGE;
+      case NON_ALPHA_NUM_COLF:
+        return COLF_VIOLATION_MESSAGE;
+      case NON_ALPHA_NUM_COLQ:
+        return COLQ_VIOLATION_MESSAGE;
+    }
+
+    return null;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/accumulo-examples/blob/d96c6d96/src/main/java/org/apache/accumulo/examples/constraints/MaxMutationSize.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/accumulo/examples/constraints/MaxMutationSize.java b/src/main/java/org/apache/accumulo/examples/constraints/MaxMutationSize.java
new file mode 100644
index 0000000..c7094c6
--- /dev/null
+++ b/src/main/java/org/apache/accumulo/examples/constraints/MaxMutationSize.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.constraints;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.accumulo.core.constraints.Constraint;
+import org.apache.accumulo.core.data.Mutation;
+
+/**
+ * Ensure that mutations are a reasonable size: we must be able to fit several in memory at a time.
+ */
+public class MaxMutationSize implements Constraint {
+  static final long MAX_SIZE = Runtime.getRuntime().maxMemory() >> 8;
+  static final List<Short> empty = Collections.emptyList();
+  static final List<Short> violations = Collections.singletonList(Short.valueOf((short) 0));
+
+  @Override
+  public String getViolationDescription(short violationCode) {
+    return String.format("mutation exceeded maximum size of %d", MAX_SIZE);
+  }
+
+  @Override
+  public List<Short> check(Environment env, Mutation mutation) {
+    if (mutation.estimatedMemoryUsed() < MAX_SIZE)
+      return empty;
+    return violations;
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo-examples/blob/d96c6d96/src/main/java/org/apache/accumulo/examples/constraints/NumericValueConstraint.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/accumulo/examples/constraints/NumericValueConstraint.java b/src/main/java/org/apache/accumulo/examples/constraints/NumericValueConstraint.java
new file mode 100644
index 0000000..0a8842d
--- /dev/null
+++ b/src/main/java/org/apache/accumulo/examples/constraints/NumericValueConstraint.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.constraints;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.accumulo.core.constraints.Constraint;
+import org.apache.accumulo.core.data.ColumnUpdate;
+import org.apache.accumulo.core.data.Mutation;
+
+/**
+ * This class is an accumulo constraint that ensures values are numeric strings.
+ */
+public class NumericValueConstraint implements Constraint {
+
+  static final short NON_NUMERIC_VALUE = 1;
+  static final String VIOLATION_MESSAGE = "Value is not numeric";
+
+  private static final List<Short> VIOLATION_LIST = Collections.unmodifiableList(Arrays.asList(NON_NUMERIC_VALUE));
+
+  private boolean isNumeric(byte bytes[]) {
+    for (byte b : bytes) {
+      boolean ok = (b >= '0' && b <= '9');
+      if (!ok)
+        return false;
+    }
+
+    return true;
+  }
+
+  @Override
+  public List<Short> check(Environment env, Mutation mutation) {
+    Collection<ColumnUpdate> updates = mutation.getUpdates();
+
+    for (ColumnUpdate columnUpdate : updates) {
+      if (!isNumeric(columnUpdate.getValue()))
+        return VIOLATION_LIST;
+    }
+
+    return null;
+  }
+
+  @Override
+  public String getViolationDescription(short violationCode) {
+
+    switch (violationCode) {
+      case NON_NUMERIC_VALUE:
+        return "Value is not numeric";
+    }
+
+    return null;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/accumulo-examples/blob/d96c6d96/src/main/java/org/apache/accumulo/examples/dirlist/FileCount.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/accumulo/examples/dirlist/FileCount.java b/src/main/java/org/apache/accumulo/examples/dirlist/FileCount.java
new file mode 100644
index 0000000..0b9e7a3
--- /dev/null
+++ b/src/main/java/org/apache/accumulo/examples/dirlist/FileCount.java
@@ -0,0 +1,290 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.dirlist;
+
+import java.util.Iterator;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.cli.BatchWriterOpts;
+import org.apache.accumulo.core.cli.ClientOnRequiredTable;
+import org.apache.accumulo.core.cli.ScannerOpts;
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.ColumnVisibility;
+import org.apache.hadoop.io.Text;
+
+import com.beust.jcommander.Parameter;
+
+/**
+ * Computes recursive counts over file system information and stores them back into the same Accumulo table.
+ */
+public class FileCount {
+
+  private int entriesScanned;
+  private int inserts;
+
+  private Opts opts;
+  private ScannerOpts scanOpts;
+  private BatchWriterOpts bwOpts;
+
+  private static class CountValue {
+    int dirCount = 0;
+    int fileCount = 0;
+    int recursiveDirCount = 0;
+    int recusiveFileCount = 0;
+
+    void set(Value val) {
+      String sa[] = val.toString().split(",");
+      dirCount = Integer.parseInt(sa[0]);
+      fileCount = Integer.parseInt(sa[1]);
+      recursiveDirCount = Integer.parseInt(sa[2]);
+      recusiveFileCount = Integer.parseInt(sa[3]);
+    }
+
+    Value toValue() {
+      return new Value((dirCount + "," + fileCount + "," + recursiveDirCount + "," + recusiveFileCount).getBytes());
+    }
+
+    void incrementFiles() {
+      fileCount++;
+      recusiveFileCount++;
+    }
+
+    void incrementDirs() {
+      dirCount++;
+      recursiveDirCount++;
+    }
+
+    public void clear() {
+      dirCount = 0;
+      fileCount = 0;
+      recursiveDirCount = 0;
+      recusiveFileCount = 0;
+    }
+
+    public void incrementRecursive(CountValue other) {
+      recursiveDirCount += other.recursiveDirCount;
+      recusiveFileCount += other.recusiveFileCount;
+    }
+  }
+
+  private int findMaxDepth(Scanner scanner, int min, int max) {
+    int mid = min + (max - min) / 2;
+    return findMaxDepth(scanner, min, mid, max);
+  }
+
+  private int findMaxDepth(Scanner scanner, int min, int mid, int max) {
+    // check to see if the mid point exist
+    if (max < min)
+      return -1;
+
+    scanner.setRange(new Range(String.format("%03d", mid), true, String.format("%03d", mid + 1), false));
+
+    if (scanner.iterator().hasNext()) {
+      // this depth exist, check to see if a larger depth exist
+      int ret = findMaxDepth(scanner, mid + 1, max);
+      if (ret == -1)
+        return mid; // this must the max
+      else
+        return ret;
+    } else {
+      // this depth does not exist, look lower
+      return findMaxDepth(scanner, min, mid - 1);
+    }
+
+  }
+
+  private int findMaxDepth(Scanner scanner) {
+    // do binary search to find max depth
+    int origBatchSize = scanner.getBatchSize();
+    scanner.setBatchSize(100);
+    int depth = findMaxDepth(scanner, 0, 64, 999);
+    scanner.setBatchSize(origBatchSize);
+    return depth;
+  }
+
+  // find the count column and consume a row
+  private Entry<Key,Value> findCount(Entry<Key,Value> entry, Iterator<Entry<Key,Value>> iterator, CountValue cv) {
+
+    Key key = entry.getKey();
+    Text currentRow = key.getRow();
+
+    if (key.compareColumnQualifier(QueryUtil.COUNTS_COLQ) == 0)
+      cv.set(entry.getValue());
+
+    while (iterator.hasNext()) {
+      entry = iterator.next();
+      entriesScanned++;
+      key = entry.getKey();
+
+      if (key.compareRow(currentRow) != 0)
+        return entry;
+
+      if (key.compareColumnFamily(QueryUtil.DIR_COLF) == 0 && key.compareColumnQualifier(QueryUtil.COUNTS_COLQ) == 0) {
+        cv.set(entry.getValue());
+      }
+
+    }
+
+    return null;
+  }
+
+  private Entry<Key,Value> consumeRow(Entry<Key,Value> entry, Iterator<Entry<Key,Value>> iterator) {
+    Key key = entry.getKey();
+    Text currentRow = key.getRow();
+
+    while (iterator.hasNext()) {
+      entry = iterator.next();
+      entriesScanned++;
+      key = entry.getKey();
+
+      if (key.compareRow(currentRow) != 0)
+        return entry;
+    }
+
+    return null;
+  }
+
+  private String extractDir(Key key) {
+    String row = key.getRowData().toString();
+    return row.substring(3, row.lastIndexOf('/'));
+  }
+
+  private Mutation createMutation(int depth, String dir, CountValue countVal) {
+    Mutation m = new Mutation(String.format("%03d%s", depth, dir));
+    m.put(QueryUtil.DIR_COLF, QueryUtil.COUNTS_COLQ, opts.visibility, countVal.toValue());
+    return m;
+  }
+
+  private void calculateCounts(Scanner scanner, int depth, BatchWriter batchWriter) throws Exception {
+
+    scanner.setRange(new Range(String.format("%03d", depth), true, String.format("%03d", depth + 1), false));
+
+    CountValue countVal = new CountValue();
+
+    Iterator<Entry<Key,Value>> iterator = scanner.iterator();
+
+    String currentDir = null;
+
+    Entry<Key,Value> entry = null;
+    if (iterator.hasNext()) {
+      entry = iterator.next();
+      entriesScanned++;
+    }
+
+    while (entry != null) {
+      Key key = entry.getKey();
+
+      String dir = extractDir(key);
+
+      if (currentDir == null) {
+        currentDir = dir;
+      } else if (!currentDir.equals(dir)) {
+        batchWriter.addMutation(createMutation(depth - 1, currentDir, countVal));
+        inserts++;
+        currentDir = dir;
+        countVal.clear();
+      }
+
+      // process a whole row
+      if (key.compareColumnFamily(QueryUtil.DIR_COLF) == 0) {
+        CountValue tmpCount = new CountValue();
+        entry = findCount(entry, iterator, tmpCount);
+
+        if (tmpCount.dirCount == 0 && tmpCount.fileCount == 0) {
+          // in this case the higher depth will not insert anything if the
+          // dir has no children, so insert something here
+          Mutation m = new Mutation(key.getRow());
+          m.put(QueryUtil.DIR_COLF, QueryUtil.COUNTS_COLQ, opts.visibility, tmpCount.toValue());
+          batchWriter.addMutation(m);
+          inserts++;
+        }
+
+        countVal.incrementRecursive(tmpCount);
+        countVal.incrementDirs();
+      } else {
+        entry = consumeRow(entry, iterator);
+        countVal.incrementFiles();
+      }
+    }
+
+    if (currentDir != null) {
+      batchWriter.addMutation(createMutation(depth - 1, currentDir, countVal));
+      inserts++;
+    }
+  }
+
+  public FileCount(Opts opts, ScannerOpts scanOpts, BatchWriterOpts bwOpts) throws Exception {
+    this.opts = opts;
+    this.scanOpts = scanOpts;
+    this.bwOpts = bwOpts;
+  }
+
+  public void run() throws Exception {
+
+    entriesScanned = 0;
+    inserts = 0;
+
+    Connector conn = opts.getConnector();
+    Scanner scanner = conn.createScanner(opts.getTableName(), opts.auths);
+    scanner.setBatchSize(scanOpts.scanBatchSize);
+    BatchWriter bw = conn.createBatchWriter(opts.getTableName(), bwOpts.getBatchWriterConfig());
+
+    long t1 = System.currentTimeMillis();
+
+    int depth = findMaxDepth(scanner);
+
+    long t2 = System.currentTimeMillis();
+
+    for (int d = depth; d > 0; d--) {
+      calculateCounts(scanner, d, bw);
+      // must flush so next depth can read what prev depth wrote
+      bw.flush();
+    }
+
+    bw.close();
+
+    long t3 = System.currentTimeMillis();
+
+    System.out.printf("Max depth              : %d%n", depth);
+    System.out.printf("Time to find max depth : %,d ms%n", (t2 - t1));
+    System.out.printf("Time to compute counts : %,d ms%n", (t3 - t2));
+    System.out.printf("Entries scanned        : %,d %n", entriesScanned);
+    System.out.printf("Counts inserted        : %,d %n", inserts);
+  }
+
+  public static class Opts extends ClientOnRequiredTable {
+    @Parameter(names = "--vis", description = "use a given visibility for the new counts", converter = VisibilityConverter.class)
+    ColumnVisibility visibility = new ColumnVisibility();
+  }
+
+  public static void main(String[] args) throws Exception {
+    Opts opts = new Opts();
+    ScannerOpts scanOpts = new ScannerOpts();
+    BatchWriterOpts bwOpts = new BatchWriterOpts();
+    String programName = FileCount.class.getName();
+    opts.parseArgs(programName, args, scanOpts, bwOpts);
+
+    FileCount fileCount = new FileCount(opts, scanOpts, bwOpts);
+    fileCount.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo-examples/blob/d96c6d96/src/main/java/org/apache/accumulo/examples/dirlist/Ingest.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/accumulo/examples/dirlist/Ingest.java b/src/main/java/org/apache/accumulo/examples/dirlist/Ingest.java
new file mode 100644
index 0000000..b55ba71
--- /dev/null
+++ b/src/main/java/org/apache/accumulo/examples/dirlist/Ingest.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.dirlist;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.accumulo.core.cli.BatchWriterOpts;
+import org.apache.accumulo.core.cli.ClientOpts;
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.LongCombiner;
+import org.apache.accumulo.core.iterators.TypedValueCombiner.Encoder;
+import org.apache.accumulo.core.security.ColumnVisibility;
+import org.apache.accumulo.examples.filedata.ChunkCombiner;
+import org.apache.accumulo.examples.filedata.FileDataIngest;
+import org.apache.hadoop.io.Text;
+
+import com.beust.jcommander.Parameter;
+
+/**
+ * Recursively lists the files and directories under a given path, ingests their names and file info into one Accumulo table, indexes the file names in a
+ * separate table, and the file data into a third table.
+ */
+public class Ingest {
+  static final Value nullValue = new Value(new byte[0]);
+  public static final String LENGTH_CQ = "length";
+  public static final String HIDDEN_CQ = "hidden";
+  public static final String EXEC_CQ = "exec";
+  public static final String LASTMOD_CQ = "lastmod";
+  public static final String HASH_CQ = "md5";
+  public static final Encoder<Long> encoder = LongCombiner.FIXED_LEN_ENCODER;
+
+  public static Mutation buildMutation(ColumnVisibility cv, String path, boolean isDir, boolean isHidden, boolean canExec, long length, long lastmod,
+      String hash) {
+    if (path.equals("/"))
+      path = "";
+    Mutation m = new Mutation(QueryUtil.getRow(path));
+    Text colf = null;
+    if (isDir)
+      colf = QueryUtil.DIR_COLF;
+    else
+      colf = new Text(encoder.encode(Long.MAX_VALUE - lastmod));
+    m.put(colf, new Text(LENGTH_CQ), cv, new Value(Long.toString(length).getBytes()));
+    m.put(colf, new Text(HIDDEN_CQ), cv, new Value(Boolean.toString(isHidden).getBytes()));
+    m.put(colf, new Text(EXEC_CQ), cv, new Value(Boolean.toString(canExec).getBytes()));
+    m.put(colf, new Text(LASTMOD_CQ), cv, new Value(Long.toString(lastmod).getBytes()));
+    if (hash != null && hash.length() > 0)
+      m.put(colf, new Text(HASH_CQ), cv, new Value(hash.getBytes()));
+    return m;
+  }
+
+  private static void ingest(File src, ColumnVisibility cv, BatchWriter dirBW, BatchWriter indexBW, FileDataIngest fdi, BatchWriter data) throws Exception {
+    // build main table entry
+    String path = null;
+    try {
+      path = src.getCanonicalPath();
+    } catch (IOException e) {
+      path = src.getAbsolutePath();
+    }
+    System.out.println(path);
+
+    String hash = null;
+    if (!src.isDirectory()) {
+      try {
+        hash = fdi.insertFileData(path, data);
+      } catch (Exception e) {
+        // if something goes wrong, just skip this one
+        return;
+      }
+    }
+
+    dirBW.addMutation(buildMutation(cv, path, src.isDirectory(), src.isHidden(), src.canExecute(), src.length(), src.lastModified(), hash));
+
+    // build index table entries
+    Text row = QueryUtil.getForwardIndex(path);
+    if (row != null) {
+      Text p = new Text(QueryUtil.getRow(path));
+      Mutation m = new Mutation(row);
+      m.put(QueryUtil.INDEX_COLF, p, cv, nullValue);
+      indexBW.addMutation(m);
+
+      row = QueryUtil.getReverseIndex(path);
+      m = new Mutation(row);
+      m.put(QueryUtil.INDEX_COLF, p, cv, nullValue);
+      indexBW.addMutation(m);
+    }
+  }
+
+  private static void recurse(File src, ColumnVisibility cv, BatchWriter dirBW, BatchWriter indexBW, FileDataIngest fdi, BatchWriter data) throws Exception {
+    // ingest this File
+    ingest(src, cv, dirBW, indexBW, fdi, data);
+    // recurse into subdirectories
+    if (src.isDirectory()) {
+      File[] files = src.listFiles();
+      if (files == null)
+        return;
+      for (File child : files) {
+        recurse(child, cv, dirBW, indexBW, fdi, data);
+      }
+    }
+  }
+
+  static class Opts extends ClientOpts {
+    @Parameter(names = "--dirTable", description = "a table to hold the directory information")
+    String nameTable = "dirTable";
+    @Parameter(names = "--indexTable", description = "an index over the ingested data")
+    String indexTable = "indexTable";
+    @Parameter(names = "--dataTable", description = "the file data, chunked into parts")
+    String dataTable = "dataTable";
+    @Parameter(names = "--vis", description = "the visibility to mark the data", converter = VisibilityConverter.class)
+    ColumnVisibility visibility = new ColumnVisibility();
+    @Parameter(names = "--chunkSize", description = "the size of chunks when breaking down files")
+    int chunkSize = 100000;
+    @Parameter(description = "<dir> { <dir> ... }")
+    List<String> directories = new ArrayList<>();
+  }
+
+  public static void main(String[] args) throws Exception {
+    Opts opts = new Opts();
+    BatchWriterOpts bwOpts = new BatchWriterOpts();
+    opts.parseArgs(Ingest.class.getName(), args, bwOpts);
+
+    Connector conn = opts.getConnector();
+    if (!conn.tableOperations().exists(opts.nameTable))
+      conn.tableOperations().create(opts.nameTable);
+    if (!conn.tableOperations().exists(opts.indexTable))
+      conn.tableOperations().create(opts.indexTable);
+    if (!conn.tableOperations().exists(opts.dataTable)) {
+      conn.tableOperations().create(opts.dataTable);
+      conn.tableOperations().attachIterator(opts.dataTable, new IteratorSetting(1, ChunkCombiner.class));
+    }
+
+    BatchWriter dirBW = conn.createBatchWriter(opts.nameTable, bwOpts.getBatchWriterConfig());
+    BatchWriter indexBW = conn.createBatchWriter(opts.indexTable, bwOpts.getBatchWriterConfig());
+    BatchWriter dataBW = conn.createBatchWriter(opts.dataTable, bwOpts.getBatchWriterConfig());
+    FileDataIngest fdi = new FileDataIngest(opts.chunkSize, opts.visibility);
+    for (String dir : opts.directories) {
+      recurse(new File(dir), opts.visibility, dirBW, indexBW, fdi, dataBW);
+
+      // fill in parent directory info
+      int slashIndex = -1;
+      while ((slashIndex = dir.lastIndexOf("/")) > 0) {
+        dir = dir.substring(0, slashIndex);
+        ingest(new File(dir), opts.visibility, dirBW, indexBW, fdi, dataBW);
+      }
+    }
+    ingest(new File("/"), opts.visibility, dirBW, indexBW, fdi, dataBW);
+
+    dirBW.close();
+    indexBW.close();
+    dataBW.close();
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo-examples/blob/d96c6d96/src/main/java/org/apache/accumulo/examples/dirlist/QueryUtil.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/accumulo/examples/dirlist/QueryUtil.java b/src/main/java/org/apache/accumulo/examples/dirlist/QueryUtil.java
new file mode 100644
index 0000000..d54d39a
--- /dev/null
+++ b/src/main/java/org/apache/accumulo/examples/dirlist/QueryUtil.java
@@ -0,0 +1,279 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.dirlist;
+
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.TreeMap;
+
+import org.apache.accumulo.core.cli.ClientOnRequiredTable;
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.user.RegExFilter;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.hadoop.io.Text;
+
+import com.beust.jcommander.Parameter;
+
+/**
+ * Provides utility methods for getting the info for a file, listing the contents of a directory, and performing single wild card searches on file or directory
+ * names.
+ */
+public class QueryUtil {
+  private Connector conn = null;
+  private String tableName;
+  private Authorizations auths;
+  public static final Text DIR_COLF = new Text("dir");
+  public static final Text FORWARD_PREFIX = new Text("f");
+  public static final Text REVERSE_PREFIX = new Text("r");
+  public static final Text INDEX_COLF = new Text("i");
+  public static final Text COUNTS_COLQ = new Text("counts");
+
+  public QueryUtil(Opts opts) throws AccumuloException, AccumuloSecurityException {
+    conn = opts.getConnector();
+    this.tableName = opts.getTableName();
+    this.auths = opts.auths;
+  }
+
+  /**
+   * Calculates the depth of a path, i.e. the number of forward slashes in the path name.
+   *
+   * @param path
+   *          the full path of a file or directory
+   * @return the depth of the path
+   */
+  public static int getDepth(String path) {
+    int numSlashes = 0;
+    int index = -1;
+    while ((index = path.indexOf("/", index + 1)) >= 0)
+      numSlashes++;
+    return numSlashes;
+  }
+
+  /**
+   * Given a path, construct an accumulo row prepended with the path's depth for the directory table.
+   *
+   * @param path
+   *          the full path of a file or directory
+   * @return the accumulo row associated with this path
+   */
+  public static Text getRow(String path) {
+    Text row = new Text(String.format("%03d", getDepth(path)));
+    row.append(path.getBytes(), 0, path.length());
+    return row;
+  }
+
+  /**
+   * Given a path, construct an accumulo row prepended with the {@link #FORWARD_PREFIX} for the index table.
+   *
+   * @param path
+   *          the full path of a file or directory
+   * @return the accumulo row associated with this path
+   */
+  public static Text getForwardIndex(String path) {
+    String part = path.substring(path.lastIndexOf("/") + 1);
+    if (part.length() == 0)
+      return null;
+    Text row = new Text(FORWARD_PREFIX);
+    row.append(part.getBytes(), 0, part.length());
+    return row;
+  }
+
+  /**
+   * Given a path, construct an accumulo row prepended with the {@link #REVERSE_PREFIX} with the path reversed for the index table.
+   *
+   * @param path
+   *          the full path of a file or directory
+   * @return the accumulo row associated with this path
+   */
+  public static Text getReverseIndex(String path) {
+    String part = path.substring(path.lastIndexOf("/") + 1);
+    if (part.length() == 0)
+      return null;
+    byte[] rev = new byte[part.length()];
+    int i = part.length() - 1;
+    for (byte b : part.getBytes())
+      rev[i--] = b;
+    Text row = new Text(REVERSE_PREFIX);
+    row.append(rev, 0, rev.length);
+    return row;
+  }
+
+  /**
+   * Returns either the {@link #DIR_COLF} or a decoded string version of the colf.
+   *
+   * @param colf
+   *          the column family
+   */
+  public static String getType(Text colf) {
+    if (colf.equals(DIR_COLF))
+      return colf.toString() + ":";
+    return Long.toString(Ingest.encoder.decode(colf.getBytes())) + ":";
+  }
+
+  /**
+   * Scans over the directory table and pulls out stat information about a path.
+   *
+   * @param path
+   *          the full path of a file or directory
+   */
+  public Map<String,String> getData(String path) throws TableNotFoundException {
+    if (path.endsWith("/"))
+      path = path.substring(0, path.length() - 1);
+    Scanner scanner = conn.createScanner(tableName, auths);
+    scanner.setRange(new Range(getRow(path)));
+    Map<String,String> data = new TreeMap<>();
+    for (Entry<Key,Value> e : scanner) {
+      String type = getType(e.getKey().getColumnFamily());
+      data.put("fullname", e.getKey().getRow().toString().substring(3));
+      data.put(type + e.getKey().getColumnQualifier().toString() + ":" + e.getKey().getColumnVisibility().toString(), new String(e.getValue().get()));
+    }
+    return data;
+  }
+
+  /**
+   * Uses the directory table to list the contents of a directory.
+   *
+   * @param path
+   *          the full path of a directory
+   */
+  public Map<String,Map<String,String>> getDirList(String path) throws TableNotFoundException {
+    if (!path.endsWith("/"))
+      path = path + "/";
+    Map<String,Map<String,String>> fim = new TreeMap<>();
+    Scanner scanner = conn.createScanner(tableName, auths);
+    scanner.setRange(Range.prefix(getRow(path)));
+    for (Entry<Key,Value> e : scanner) {
+      String name = e.getKey().getRow().toString();
+      name = name.substring(name.lastIndexOf("/") + 1);
+      String type = getType(e.getKey().getColumnFamily());
+      if (!fim.containsKey(name)) {
+        fim.put(name, new TreeMap<String,String>());
+        fim.get(name).put("fullname", e.getKey().getRow().toString().substring(3));
+      }
+      fim.get(name).put(type + e.getKey().getColumnQualifier().toString() + ":" + e.getKey().getColumnVisibility().toString(), new String(e.getValue().get()));
+    }
+    return fim;
+  }
+
+  /**
+   * Scans over the index table for files or directories with a given name.
+   *
+   * @param term
+   *          the name a file or directory to search for
+   */
+  public Iterable<Entry<Key,Value>> exactTermSearch(String term) throws Exception {
+    System.out.println("executing exactTermSearch for " + term);
+    Scanner scanner = conn.createScanner(tableName, auths);
+    scanner.setRange(new Range(getForwardIndex(term)));
+    return scanner;
+  }
+
+  /**
+   * Scans over the index table for files or directories with a given name, prefix, or suffix (indicated by a wildcard '*' at the beginning or end of the term.
+   *
+   * @param exp
+   *          the name a file or directory to search for with an optional wildcard '*' at the beginning or end
+   */
+  public Iterable<Entry<Key,Value>> singleRestrictedWildCardSearch(String exp) throws Exception {
+    if (exp.indexOf("/") >= 0)
+      throw new Exception("this method only works with unqualified names");
+
+    Scanner scanner = conn.createScanner(tableName, auths);
+    if (exp.startsWith("*")) {
+      System.out.println("executing beginning wildcard search for " + exp);
+      exp = exp.substring(1);
+      scanner.setRange(Range.prefix(getReverseIndex(exp)));
+    } else if (exp.endsWith("*")) {
+      System.out.println("executing ending wildcard search for " + exp);
+      exp = exp.substring(0, exp.length() - 1);
+      scanner.setRange(Range.prefix(getForwardIndex(exp)));
+    } else if (exp.indexOf("*") >= 0) {
+      throw new Exception("this method only works for beginning or ending wild cards");
+    } else {
+      return exactTermSearch(exp);
+    }
+    return scanner;
+  }
+
+  /**
+   * Scans over the index table for files or directories with a given name that can contain a single wildcard '*' anywhere in the term.
+   *
+   * @param exp
+   *          the name a file or directory to search for with one optional wildcard '*'
+   */
+  public Iterable<Entry<Key,Value>> singleWildCardSearch(String exp) throws Exception {
+    int starIndex = exp.indexOf("*");
+    if (exp.indexOf("*", starIndex + 1) >= 0)
+      throw new Exception("only one wild card for search");
+
+    if (starIndex < 0) {
+      return exactTermSearch(exp);
+    } else if (starIndex == 0 || starIndex == exp.length() - 1) {
+      return singleRestrictedWildCardSearch(exp);
+    }
+
+    String firstPart = exp.substring(0, starIndex);
+    String lastPart = exp.substring(starIndex + 1);
+    String regexString = ".*/" + exp.replace("*", "[^/]*");
+
+    Scanner scanner = conn.createScanner(tableName, auths);
+    if (firstPart.length() >= lastPart.length()) {
+      System.out.println("executing middle wildcard search for " + regexString + " from entries starting with " + firstPart);
+      scanner.setRange(Range.prefix(getForwardIndex(firstPart)));
+    } else {
+      System.out.println("executing middle wildcard search for " + regexString + " from entries ending with " + lastPart);
+      scanner.setRange(Range.prefix(getReverseIndex(lastPart)));
+    }
+    IteratorSetting regex = new IteratorSetting(50, "regex", RegExFilter.class);
+    RegExFilter.setRegexs(regex, null, null, regexString, null, false);
+    scanner.addScanIterator(regex);
+    return scanner;
+  }
+
+  public static class Opts extends ClientOnRequiredTable {
+    @Parameter(names = "--path", description = "the directory to list")
+    String path = "/";
+    @Parameter(names = "--search", description = "find a file or directory with the given name")
+    boolean search = false;
+  }
+
+  /**
+   * Lists the contents of a directory using the directory table, or searches for file or directory names (if the -search flag is included).
+   */
+  public static void main(String[] args) throws Exception {
+    Opts opts = new Opts();
+    opts.parseArgs(QueryUtil.class.getName(), args);
+    QueryUtil q = new QueryUtil(opts);
+    if (opts.search) {
+      for (Entry<Key,Value> e : q.singleWildCardSearch(opts.path)) {
+        System.out.println(e.getKey().getColumnQualifier());
+      }
+    } else {
+      for (Entry<String,Map<String,String>> e : q.getDirList(opts.path).entrySet()) {
+        System.out.println(e);
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo-examples/blob/d96c6d96/src/main/java/org/apache/accumulo/examples/dirlist/Viewer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/accumulo/examples/dirlist/Viewer.java b/src/main/java/org/apache/accumulo/examples/dirlist/Viewer.java
new file mode 100644
index 0000000..2cc2298
--- /dev/null
+++ b/src/main/java/org/apache/accumulo/examples/dirlist/Viewer.java
@@ -0,0 +1,217 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.dirlist;
+
+import java.awt.BorderLayout;
+import java.io.IOException;
+import java.util.Enumeration;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import javax.swing.JFrame;
+import javax.swing.JScrollPane;
+import javax.swing.JSplitPane;
+import javax.swing.JTextArea;
+import javax.swing.JTree;
+import javax.swing.event.TreeExpansionEvent;
+import javax.swing.event.TreeExpansionListener;
+import javax.swing.event.TreeSelectionEvent;
+import javax.swing.event.TreeSelectionListener;
+import javax.swing.tree.DefaultMutableTreeNode;
+import javax.swing.tree.DefaultTreeModel;
+import javax.swing.tree.TreePath;
+
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.examples.filedata.FileDataQuery;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.beust.jcommander.Parameter;
+
+/**
+ * Provides a GUI for browsing the file system information stored in Accumulo.
+ */
+@SuppressWarnings("serial")
+public class Viewer extends JFrame implements TreeSelectionListener, TreeExpansionListener {
+  private static final Logger log = LoggerFactory.getLogger(Viewer.class);
+
+  JTree tree;
+  DefaultTreeModel treeModel;
+  QueryUtil q;
+  FileDataQuery fdq;
+  String topPath;
+  Map<String,DefaultMutableTreeNode> nodeNameMap;
+  JTextArea text;
+  JTextArea data;
+  JScrollPane dataPane;
+
+  public static class NodeInfo {
+    private String name;
+    private Map<String,String> data;
+
+    public NodeInfo(String name, Map<String,String> data) {
+      this.name = name;
+      this.data = data;
+    }
+
+    public String getName() {
+      return name;
+    }
+
+    public String getFullName() {
+      String fn = data.get("fullname");
+      if (fn == null)
+        return name;
+      return fn;
+    }
+
+    public Map<String,String> getData() {
+      return data;
+    }
+
+    @Override
+    public String toString() {
+      return getName();
+    }
+
+    public String getHash() {
+      for (String k : data.keySet()) {
+        String[] parts = k.split(":");
+        if (parts.length >= 2 && parts[1].equals("md5")) {
+          return data.get(k);
+        }
+      }
+      return null;
+    }
+  }
+
+  public Viewer(Opts opts) throws Exception {
+    super("File Viewer");
+    setSize(1000, 800);
+    setDefaultCloseOperation(EXIT_ON_CLOSE);
+    q = new QueryUtil(opts);
+    fdq = new FileDataQuery(opts.instance, opts.zookeepers, opts.getPrincipal(), opts.getToken(), opts.dataTable, opts.auths);
+    this.topPath = opts.path;
+  }
+
+  public void populate(DefaultMutableTreeNode node) throws TableNotFoundException {
+    String path = ((NodeInfo) node.getUserObject()).getFullName();
+    log.debug("listing " + path);
+    for (Entry<String,Map<String,String>> e : q.getDirList(path).entrySet()) {
+      log.debug("got child for " + node.getUserObject() + ": " + e.getKey());
+      node.add(new DefaultMutableTreeNode(new NodeInfo(e.getKey(), e.getValue())));
+    }
+  }
+
+  public void populateChildren(DefaultMutableTreeNode node) throws TableNotFoundException {
+    @SuppressWarnings("unchecked")
+    Enumeration<DefaultMutableTreeNode> children = node.children();
+    while (children.hasMoreElements()) {
+      populate(children.nextElement());
+    }
+  }
+
+  public void init() throws TableNotFoundException {
+    DefaultMutableTreeNode root = new DefaultMutableTreeNode(new NodeInfo(topPath, q.getData(topPath)));
+    populate(root);
+    populateChildren(root);
+
+    treeModel = new DefaultTreeModel(root);
+    tree = new JTree(treeModel);
+    tree.addTreeExpansionListener(this);
+    tree.addTreeSelectionListener(this);
+    text = new JTextArea(getText(q.getData(topPath)));
+    data = new JTextArea("");
+    JScrollPane treePane = new JScrollPane(tree);
+    JScrollPane textPane = new JScrollPane(text);
+    dataPane = new JScrollPane(data);
+    JSplitPane infoSplitPane = new JSplitPane(JSplitPane.VERTICAL_SPLIT, textPane, dataPane);
+    JSplitPane mainSplitPane = new JSplitPane(JSplitPane.HORIZONTAL_SPLIT, treePane, infoSplitPane);
+    mainSplitPane.setDividerLocation(300);
+    infoSplitPane.setDividerLocation(150);
+    getContentPane().add(mainSplitPane, BorderLayout.CENTER);
+  }
+
+  public static String getText(DefaultMutableTreeNode node) {
+    return getText(((NodeInfo) node.getUserObject()).getData());
+  }
+
+  public static String getText(Map<String,String> data) {
+    StringBuilder sb = new StringBuilder();
+    for (String name : data.keySet()) {
+      sb.append(name);
+      sb.append(" : ");
+      sb.append(data.get(name));
+      sb.append('\n');
+    }
+    return sb.toString();
+  }
+
+  @Override
+  public void treeExpanded(TreeExpansionEvent event) {
+    try {
+      populateChildren((DefaultMutableTreeNode) event.getPath().getLastPathComponent());
+    } catch (TableNotFoundException e) {
+      log.error("Could not find table.", e);
+    }
+  }
+
+  @Override
+  public void treeCollapsed(TreeExpansionEvent event) {
+    DefaultMutableTreeNode node = (DefaultMutableTreeNode) event.getPath().getLastPathComponent();
+    @SuppressWarnings("unchecked")
+    Enumeration<DefaultMutableTreeNode> children = node.children();
+    while (children.hasMoreElements()) {
+      DefaultMutableTreeNode child = children.nextElement();
+      log.debug("removing children of " + ((NodeInfo) child.getUserObject()).getFullName());
+      child.removeAllChildren();
+    }
+  }
+
+  @Override
+  public void valueChanged(TreeSelectionEvent e) {
+    TreePath selected = e.getNewLeadSelectionPath();
+    if (selected == null)
+      return;
+    DefaultMutableTreeNode node = (DefaultMutableTreeNode) selected.getLastPathComponent();
+    text.setText(getText(node));
+    try {
+      String hash = ((NodeInfo) node.getUserObject()).getHash();
+      if (hash != null) {
+        data.setText(fdq.getSomeData(hash, 10000));
+      } else {
+        data.setText("");
+      }
+    } catch (IOException e1) {
+      log.error("Could not get data from FileDataQuery.", e1);
+    }
+  }
+
+  static class Opts extends QueryUtil.Opts {
+    @Parameter(names = "--dataTable")
+    String dataTable = "dataTable";
+  }
+
+  public static void main(String[] args) throws Exception {
+    Opts opts = new Opts();
+    opts.parseArgs(Viewer.class.getName(), args);
+
+    Viewer v = new Viewer(opts);
+    v.init();
+    v.setVisible(true);
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo-examples/blob/d96c6d96/src/main/java/org/apache/accumulo/examples/filedata/CharacterHistogram.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/accumulo/examples/filedata/CharacterHistogram.java b/src/main/java/org/apache/accumulo/examples/filedata/CharacterHistogram.java
new file mode 100644
index 0000000..5058f6d
--- /dev/null
+++ b/src/main/java/org/apache/accumulo/examples/filedata/CharacterHistogram.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.filedata;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.cli.MapReduceClientOnRequiredTable;
+import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.user.SummingArrayCombiner;
+import org.apache.accumulo.core.security.ColumnVisibility;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import com.beust.jcommander.Parameter;
+
+/**
+ * A MapReduce that computes a histogram of byte frequency for each file and stores the histogram alongside the file data. The {@link ChunkInputFormat} is used
+ * to read the file data from Accumulo.
+ */
+public class CharacterHistogram extends Configured implements Tool {
+  public static final String VIS = "vis";
+
+  public static void main(String[] args) throws Exception {
+    System.exit(ToolRunner.run(new Configuration(), new CharacterHistogram(), args));
+  }
+
+  public static class HistMapper extends Mapper<List<Entry<Key,Value>>,InputStream,Text,Mutation> {
+    private ColumnVisibility cv;
+
+    @Override
+    public void map(List<Entry<Key,Value>> k, InputStream v, Context context) throws IOException, InterruptedException {
+      Long[] hist = new Long[256];
+      for (int i = 0; i < hist.length; i++)
+        hist[i] = 0l;
+      int b = v.read();
+      while (b >= 0) {
+        hist[b] += 1l;
+        b = v.read();
+      }
+      v.close();
+      Mutation m = new Mutation(k.get(0).getKey().getRow());
+      m.put("info", "hist", cv, new Value(SummingArrayCombiner.STRING_ARRAY_ENCODER.encode(Arrays.asList(hist))));
+      context.write(new Text(), m);
+    }
+
+    @Override
+    protected void setup(Context context) throws IOException, InterruptedException {
+      cv = new ColumnVisibility(context.getConfiguration().get(VIS, ""));
+    }
+  }
+
+  static class Opts extends MapReduceClientOnRequiredTable {
+    @Parameter(names = "--vis")
+    String visibilities = "";
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+    Job job = Job.getInstance(getConf());
+    job.setJobName(this.getClass().getSimpleName());
+    job.setJarByClass(this.getClass());
+
+    Opts opts = new Opts();
+    opts.parseArgs(CharacterHistogram.class.getName(), args);
+
+    job.setInputFormatClass(ChunkInputFormat.class);
+    opts.setAccumuloConfigs(job);
+    job.getConfiguration().set(VIS, opts.visibilities.toString());
+
+    job.setMapperClass(HistMapper.class);
+    job.setMapOutputKeyClass(Text.class);
+    job.setMapOutputValueClass(Mutation.class);
+
+    job.setNumReduceTasks(0);
+
+    job.setOutputFormatClass(AccumuloOutputFormat.class);
+
+    job.waitForCompletion(true);
+    return job.isSuccessful() ? 0 : 1;
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo-examples/blob/d96c6d96/src/main/java/org/apache/accumulo/examples/filedata/ChunkCombiner.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/accumulo/examples/filedata/ChunkCombiner.java b/src/main/java/org/apache/accumulo/examples/filedata/ChunkCombiner.java
new file mode 100644
index 0000000..15c44d0
--- /dev/null
+++ b/src/main/java/org/apache/accumulo/examples/filedata/ChunkCombiner.java
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.filedata;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.accumulo.core.data.ByteSequence;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.PartialKey;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.IteratorEnvironment;
+import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
+import org.apache.hadoop.io.Text;
+
+/**
+ * This iterator dedupes chunks and sets their visibilities to the combined visibility of the refs columns. For example, it would combine
+ *
+ * <pre>
+ *    row1 refs uid1\0a A&amp;B V0
+ *    row1 refs uid2\0b C&amp;D V0
+ *    row1 ~chunk 0 A&amp;B V1
+ *    row1 ~chunk 0 C&amp;D V1
+ *    row1 ~chunk 0 E&amp;F V1
+ *    row1 ~chunk 0 G&amp;H V1
+ * </pre>
+ *
+ * into the following
+ *
+ * <pre>
+ *    row1 refs uid1\0a A&amp;B V0
+ *    row1 refs uid2\0b C&amp;D V0
+ *    row1 ~chunk 0 (A&amp;B)|(C&amp;D) V1
+ * </pre>
+ *
+ * {@link VisibilityCombiner} is used to combie the visibilities.
+ */
+
+public class ChunkCombiner implements SortedKeyValueIterator<Key,Value> {
+
+  private SortedKeyValueIterator<Key,Value> source;
+  private SortedKeyValueIterator<Key,Value> refsSource;
+  private static final Collection<ByteSequence> refsColf = Collections.singleton(FileDataIngest.REFS_CF_BS);
+  private Map<Text,byte[]> lastRowVC = Collections.emptyMap();
+
+  private Key topKey = null;
+  private Value topValue = null;
+
+  public ChunkCombiner() {}
+
+  @Override
+  public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
+    this.source = source;
+    this.refsSource = source.deepCopy(env);
+  }
+
+  @Override
+  public boolean hasTop() {
+    return topKey != null;
+  }
+
+  @Override
+  public void next() throws IOException {
+    findTop();
+  }
+
+  @Override
+  public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
+    source.seek(range, columnFamilies, inclusive);
+    findTop();
+  }
+
+  private void findTop() throws IOException {
+    do {
+      topKey = null;
+      topValue = null;
+    } while (source.hasTop() && _findTop() == null);
+  }
+
+  private byte[] _findTop() throws IOException {
+    long maxTS;
+
+    topKey = new Key(source.getTopKey());
+    topValue = new Value(source.getTopValue());
+    source.next();
+
+    if (!topKey.getColumnFamilyData().equals(FileDataIngest.CHUNK_CF_BS))
+      return topKey.getColumnVisibility().getBytes();
+
+    maxTS = topKey.getTimestamp();
+
+    while (source.hasTop() && source.getTopKey().equals(topKey, PartialKey.ROW_COLFAM_COLQUAL)) {
+      if (source.getTopKey().getTimestamp() > maxTS)
+        maxTS = source.getTopKey().getTimestamp();
+
+      if (!topValue.equals(source.getTopValue()))
+        throw new RuntimeException("values not equals " + topKey + " " + source.getTopKey() + " : " + diffInfo(topValue, source.getTopValue()));
+
+      source.next();
+    }
+
+    byte[] vis = getVisFromRefs();
+    if (vis != null) {
+      topKey = new Key(topKey.getRowData().toArray(), topKey.getColumnFamilyData().toArray(), topKey.getColumnQualifierData().toArray(), vis, maxTS);
+    }
+    return vis;
+  }
+
+  private byte[] getVisFromRefs() throws IOException {
+    Text row = topKey.getRow();
+    if (lastRowVC.containsKey(row))
+      return lastRowVC.get(row);
+    Range range = new Range(row);
+    refsSource.seek(range, refsColf, true);
+    VisibilityCombiner vc = null;
+    while (refsSource.hasTop()) {
+      if (vc == null)
+        vc = new VisibilityCombiner();
+      vc.add(refsSource.getTopKey().getColumnVisibilityData());
+      refsSource.next();
+    }
+    if (vc == null) {
+      lastRowVC = Collections.singletonMap(row, null);
+      return null;
+    }
+    lastRowVC = Collections.singletonMap(row, vc.get());
+    return vc.get();
+  }
+
+  private String diffInfo(Value v1, Value v2) {
+    if (v1.getSize() != v2.getSize()) {
+      return "val len not equal " + v1.getSize() + "!=" + v2.getSize();
+    }
+
+    byte[] vb1 = v1.get();
+    byte[] vb2 = v2.get();
+
+    for (int i = 0; i < vb1.length; i++) {
+      if (vb1[i] != vb2[i]) {
+        return String.format("first diff at offset %,d 0x%02x != 0x%02x", i, 0xff & vb1[i], 0xff & vb2[i]);
+      }
+    }
+
+    return null;
+  }
+
+  @Override
+  public Key getTopKey() {
+    return topKey;
+  }
+
+  @Override
+  public Value getTopValue() {
+    return topValue;
+  }
+
+  @Override
+  public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
+    ChunkCombiner cc = new ChunkCombiner();
+    try {
+      cc.init(source.deepCopy(env), null, env);
+    } catch (IOException e) {
+      throw new IllegalArgumentException(e);
+    }
+    return cc;
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo-examples/blob/d96c6d96/src/main/java/org/apache/accumulo/examples/filedata/ChunkInputFormat.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/accumulo/examples/filedata/ChunkInputFormat.java b/src/main/java/org/apache/accumulo/examples/filedata/ChunkInputFormat.java
new file mode 100644
index 0000000..62be1f4
--- /dev/null
+++ b/src/main/java/org/apache/accumulo/examples/filedata/ChunkInputFormat.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.filedata;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.client.mapreduce.InputFormatBase;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.util.PeekingIterator;
+import org.apache.accumulo.core.util.format.DefaultFormatter;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+/**
+ * An InputFormat that turns the file data ingested with {@link FileDataIngest} into an InputStream using {@link ChunkInputStream}. Mappers used with this
+ * InputFormat must close the InputStream.
+ */
+public class ChunkInputFormat extends InputFormatBase<List<Entry<Key,Value>>,InputStream> {
+  @Override
+  public RecordReader<List<Entry<Key,Value>>,InputStream> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException,
+      InterruptedException {
+    return new RecordReaderBase<List<Entry<Key,Value>>,InputStream>() {
+      private PeekingIterator<Entry<Key,Value>> peekingScannerIterator;
+
+      @Override
+      public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException {
+        super.initialize(inSplit, attempt);
+        peekingScannerIterator = new PeekingIterator<>(scannerIterator);
+        currentK = new ArrayList<>();
+        currentV = new ChunkInputStream();
+      }
+
+      @Override
+      public boolean nextKeyValue() throws IOException, InterruptedException {
+        currentK.clear();
+        if (peekingScannerIterator.hasNext()) {
+          ++numKeysRead;
+          Entry<Key,Value> entry = peekingScannerIterator.peek();
+          while (!entry.getKey().getColumnFamily().equals(FileDataIngest.CHUNK_CF)) {
+            currentK.add(entry);
+            peekingScannerIterator.next();
+            if (!peekingScannerIterator.hasNext())
+              return true;
+            entry = peekingScannerIterator.peek();
+          }
+          currentKey = entry.getKey();
+          ((ChunkInputStream) currentV).setSource(peekingScannerIterator);
+          if (log.isTraceEnabled())
+            log.trace("Processing key/value pair: " + DefaultFormatter.formatEntry(entry, true));
+          return true;
+        }
+        return false;
+      }
+    };
+  }
+}


Mime
View raw message