accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From e..@apache.org
Subject svn commit: r1230608 [3/16] - in /incubator/accumulo/trunk: ./ contrib/accumulo_sample/ src/assemble/ src/core/ src/core/src/main/java/org/apache/accumulo/core/client/impl/thrift/ src/core/src/main/java/org/apache/accumulo/core/master/thrift/ src/core/...
Date Thu, 12 Jan 2012 16:06:20 GMT
Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithBatchWriter.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithBatchWriter.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithBatchWriter.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithBatchWriter.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.helloworld;
+
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.MultiTableBatchWriter;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.TableExistsException;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.hadoop.io.Text;
+
+public class InsertWithBatchWriter {
+  public static void main(String[] args) throws AccumuloException, AccumuloSecurityException, MutationsRejectedException, TableExistsException,
+      TableNotFoundException {
+    if (args.length != 5) {
+      System.out
+          .println("Usage: bin/tool.sh accumulo-examplesjar accumulo.examples.helloworld.InsertWithBatchWriter <instance name> <zoo keepers> <tableName> <username> <password>");
+      System.exit(1);
+    }
+    
+    String instanceName = args[0];
+    String zooKeepers = args[1];
+    String tableName = args[2];
+    String user = args[3];
+    byte[] pass = args[4].getBytes();
+    
+    ZooKeeperInstance instance = new ZooKeeperInstance(instanceName, zooKeepers);
+    Connector connector = instance.getConnector(user, pass);
+    MultiTableBatchWriter mtbw = connector.createMultiTableBatchWriter(200000l, 300, 4);
+    
+    BatchWriter bw = null;
+    
+    if (!connector.tableOperations().exists(tableName))
+      connector.tableOperations().create(tableName);
+    bw = mtbw.getBatchWriter(tableName);
+    
+    Text colf = new Text("colfam");
+    System.out.println("writing ...");
+    for (int i = 0; i < 10000; i++) {
+      Mutation m = new Mutation(new Text(String.format("row_%d", i)));
+      for (int j = 0; j < 5; j++) {
+        m.put(colf, new Text(String.format("colqual_%d", j)), new Value((String.format("value_%d_%d", i, j)).getBytes()));
+      }
+      bw.addMutation(m);
+      if (i % 100 == 0)
+        System.out.println(i);
+    }
+    
+    mtbw.close();
+  }
+  
+}

Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithBatchWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithOutputFormat.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithOutputFormat.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithOutputFormat.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithOutputFormat.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.helloworld;
+
+import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+public class InsertWithOutputFormat extends Configured implements Tool {
+  // this is a tool because when you run a mapreduce, you will need to use the
+  // ToolRunner
+  // if you want libjars to be passed properly to the map and reduce tasks
+  // even though this class isn't a mapreduce
+  @Override
+  public int run(String[] args) throws Exception {
+    if (args.length != 5) {
+      System.out.println("Usage: accumulo " + this.getClass().getName() + " <instance name> <zoo keepers> <tablename> <username> <password>");
+      return 1;
+    }
+    Text tableName = new Text(args[2]);
+    Job job = new Job(getConf());
+    Configuration conf = job.getConfiguration();
+    AccumuloOutputFormat.setZooKeeperInstance(job, args[0], args[1]);
+    AccumuloOutputFormat.setOutputInfo(job, args[3], args[4].getBytes(), true, null);
+    job.setOutputFormatClass(AccumuloOutputFormat.class);
+    
+    // when running a mapreduce, you won't need to instantiate the output
+    // format and record writer
+    // mapreduce will do that for you, and you will just use
+    // output.collect(tableName, mutation)
+    TaskAttemptContext context = new TaskAttemptContext(conf, new TaskAttemptID());
+    RecordWriter<Text,Mutation> rw = new AccumuloOutputFormat().getRecordWriter(context);
+    
+    Text colf = new Text("colfam");
+    System.out.println("writing ...");
+    for (int i = 0; i < 10000; i++) {
+      Mutation m = new Mutation(new Text(String.format("row_%d", i)));
+      for (int j = 0; j < 5; j++) {
+        m.put(colf, new Text(String.format("colqual_%d", j)), new Value((String.format("value_%d_%d", i, j)).getBytes()));
+      }
+      rw.write(tableName, m); // repeat until done
+      if (i % 100 == 0)
+        System.out.println(i);
+    }
+    
+    rw.close(context); // close when done
+    return 0;
+  }
+  
+  public static void main(String[] args) throws Exception {
+    System.exit(ToolRunner.run(CachedConfiguration.getInstance(), new InsertWithOutputFormat(), args));
+  }
+}

Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/InsertWithOutputFormat.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/ReadData.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/ReadData.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/ReadData.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/ReadData.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.helloworld;
+
+import java.util.Iterator;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.Constants;
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.hadoop.io.Text;
+
+public class ReadData {
+  public static void main(String[] args) throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
+    if (args.length < 5 || args.length > 7) {
+      System.out
+          .println("bin/accumulo accumulo.examples.helloworld.ReadData <instance name> <zoo keepers> <tablename> <username> <password> [startkey [endkey]]");
+      System.exit(1);
+    }
+    
+    String instanceName = args[0];
+    String zooKeepers = args[1];
+    String tableName = args[2];
+    String user = args[3];
+    byte[] pass = args[4].getBytes();
+    
+    ZooKeeperInstance instance = new ZooKeeperInstance(instanceName, zooKeepers);
+    Connector connector = instance.getConnector(user, pass);
+    
+    Scanner scan = connector.createScanner(tableName, Constants.NO_AUTHS);
+    Key start = null;
+    if (args.length > 5)
+      start = new Key(new Text(args[5]));
+    Key end = null;
+    if (args.length > 6)
+      end = new Key(new Text(args[6]));
+    scan.setRange(new Range(start, end));
+    Iterator<Entry<Key,Value>> iter = scan.iterator();
+    
+    while (iter.hasNext()) {
+      Entry<Key,Value> e = iter.next();
+      Text colf = e.getKey().getColumnFamily();
+      Text colq = e.getKey().getColumnQualifier();
+      System.out.print("row: " + e.getKey().getRow() + ", colf: " + colf + ", colq: " + colq);
+      System.out.println(", value: " + e.getValue().toString());
+    }
+  }
+}

Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/helloworld/ReadData.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/isolation/InterferenceTest.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/isolation/InterferenceTest.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/isolation/InterferenceTest.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/isolation/InterferenceTest.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.isolation;
+
+import java.util.HashSet;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.Constants;
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.IsolatedScanner;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.data.ByteSequence;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.hadoop.io.Text;
+import org.apache.log4j.Logger;
+
+/**
+ * This example shows how a concurrent reader and writer can interfere with each other. It creates two threads that run forever reading and writing to the same
+ * table.
+ * 
+ * When the example is run with isolation enabled, no interference will be observed.
+ * 
+ * When the example is run with out isolation, the reader will see partial mutations of a row.
+ * 
+ */
+
+public class InterferenceTest {
+  
+  private static final int NUM_ROWS = 500;
+  private static final int NUM_COLUMNS = 113; // scanner batches 1000 by default, so make num columns not a multiple of 10
+  private static long iterations;
+  private static final Logger log = Logger.getLogger(InterferenceTest.class);
+  
+  static class Writer implements Runnable {
+    
+    private BatchWriter bw;
+    
+    Writer(BatchWriter bw) {
+      this.bw = bw;
+    }
+    
+    @Override
+    public void run() {
+      int row = 0;
+      int value = 0;
+      
+      for (long i = 0; i < iterations; i++) {
+        Mutation m = new Mutation(new Text(String.format("%03d", row)));
+        row = (row + 1) % NUM_ROWS;
+        
+        for (int cq = 0; cq < NUM_COLUMNS; cq++)
+          m.put(new Text("000"), new Text(String.format("%04d", cq)), new Value(("" + value).getBytes()));
+        
+        value++;
+        
+        try {
+          bw.addMutation(m);
+        } catch (MutationsRejectedException e) {
+          e.printStackTrace();
+          System.exit(-1);
+        }
+      }
+      try {
+        bw.close();
+      } catch (MutationsRejectedException e) {
+        log.error(e, e);
+      }
+    }
+  }
+  
+  static class Reader implements Runnable {
+    
+    private Scanner scanner;
+    volatile boolean stop = false;
+    
+    Reader(Scanner scanner) {
+      this.scanner = scanner;
+    }
+    
+    @Override
+    public void run() {
+      while (stop) {
+        ByteSequence row = null;
+        int count = 0;
+        
+        // all columns in a row should have the same value,
+        // use this hash set to track that
+        HashSet<String> values = new HashSet<String>();
+        
+        for (Entry<Key,Value> entry : scanner) {
+          if (row == null)
+            row = entry.getKey().getRowData();
+          
+          if (!row.equals(entry.getKey().getRowData())) {
+            if (count != NUM_COLUMNS)
+              System.err.println("ERROR Did not see " + NUM_COLUMNS + " columns in row " + row);
+            
+            if (values.size() > 1)
+              System.err.println("ERROR Columns in row " + row + " had multiple values " + values);
+            
+            row = entry.getKey().getRowData();
+            count = 0;
+            values.clear();
+          }
+          
+          count++;
+          
+          values.add(entry.getValue().toString());
+        }
+        
+        if (count > 0 && count != NUM_COLUMNS)
+          System.err.println("ERROR Did not see " + NUM_COLUMNS + " columns in row " + row);
+        
+        if (values.size() > 1)
+          System.err.println("ERROR Columns in row " + row + " had multiple values " + values);
+      }
+    }
+    
+    public void stopNow() {
+      stop = true;
+    }
+  }
+  
+  public static void main(String[] args) throws Exception {
+    
+    if (args.length != 7) {
+      System.out.println("Usage : " + InterferenceTest.class.getName() + " <instance name> <zookeepers> <user> <password> <table> <iterations> true|false");
+      System.out.println("          The last argument determines if scans should be isolated.  When false, expect to see errors");
+      return;
+    }
+    
+    ZooKeeperInstance zki = new ZooKeeperInstance(args[0], args[1]);
+    Connector conn = zki.getConnector(args[2], args[3].getBytes());
+    
+    String table = args[4];
+    iterations = Long.parseLong(args[5]);
+    if (iterations < 1)
+      iterations = Long.MAX_VALUE;
+    if (!conn.tableOperations().exists(table))
+      conn.tableOperations().create(table);
+    
+    Thread writer = new Thread(new Writer(conn.createBatchWriter(table, 10000000, 60000l, 3)));
+    writer.start();
+    Reader r;
+    if (Boolean.parseBoolean(args[6]))
+      r = new Reader(new IsolatedScanner(conn.createScanner(table, Constants.NO_AUTHS)));
+    else
+      r = new Reader(conn.createScanner(table, Constants.NO_AUTHS));
+    Thread reader;
+    reader = new Thread(r);
+    reader.start();
+    writer.join();
+    r.stopNow();
+    reader.join();
+    System.out.println("finished");
+  }
+}

Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/isolation/InterferenceTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RegexExample.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RegexExample.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RegexExample.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RegexExample.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.user.RegExFilter;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+public class RegexExample extends Configured implements Tool {
+  public static class RegexMapper extends Mapper<Key,Value,Key,Value> {
+    public void map(Key row, Value data, Context context) throws IOException, InterruptedException {
+      context.write(row, data);
+    }
+  }
+  
+  public int run(String[] args) throws Exception {
+    Job job = new Job(getConf(), this.getClass().getSimpleName());
+    job.setJarByClass(this.getClass());
+    
+    job.setInputFormatClass(AccumuloInputFormat.class);
+    AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
+    AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), args[4], new Authorizations());
+    
+    IteratorSetting regex = new IteratorSetting(50, "regex", RegExFilter.class);
+    RegExFilter.setRegexs(regex, args[5], args[6], args[7], args[8], false);
+    AccumuloInputFormat.addIterator(job.getConfiguration(), regex);
+    
+    job.setMapperClass(RegexMapper.class);
+    job.setMapOutputKeyClass(Key.class);
+    job.setMapOutputValueClass(Value.class);
+    
+    job.setNumReduceTasks(0);
+    
+    job.setOutputFormatClass(TextOutputFormat.class);
+    TextOutputFormat.setOutputPath(job, new Path(args[9]));
+    
+    System.out.println("setRowRegex: " + args[5]);
+    System.out.println("setColumnFamilyRegex: " + args[6]);
+    System.out.println("setColumnQualifierRegex: " + args[7]);
+    System.out.println("setValueRegex: " + args[8]);
+    
+    job.waitForCompletion(true);
+    return job.isSuccessful() ? 0 : 1;
+  }
+  
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(CachedConfiguration.getInstance(), new RegexExample(), args);
+    if (res != 0)
+      System.exit(res);
+  }
+}

Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RegexExample.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RowHash.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RowHash.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RowHash.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RowHash.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.mapreduce;
+
+import java.io.IOException;
+import java.util.Collections;
+
+import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
+import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.MD5Hash;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+public class RowHash extends Configured implements Tool {
+  /**
+   * The Mapper class that given a row number, will generate the appropriate output line.
+   */
+  public static class HashDataMapper extends Mapper<Key,Value,Text,Mutation> {
+    public void map(Key row, Value data, Context context) throws IOException, InterruptedException {
+      Mutation m = new Mutation(row.getRow());
+      m.put(new Text("cf-HASHTYPE"), new Text("cq-MD5BASE64"), new Value(Base64.encodeBase64(MD5Hash.digest(data.toString()).getDigest())));
+      context.write(null, m);
+      context.progress();
+    }
+    
+    @Override
+    public void setup(Context job) {}
+  }
+  
+  @Override
+  public int run(String[] args) throws Exception {
+    Job job = new Job(getConf(), this.getClass().getName());
+    job.setJarByClass(this.getClass());
+    
+    job.setInputFormatClass(AccumuloInputFormat.class);
+    AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
+    AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), args[4], new Authorizations());
+    
+    String col = args[5];
+    int idx = col.indexOf(":");
+    Text cf = new Text(idx < 0 ? col : col.substring(0, idx));
+    Text cq = idx < 0 ? null : new Text(col.substring(idx + 1));
+    if (cf.getLength() > 0)
+      AccumuloInputFormat.fetchColumns(job.getConfiguration(), Collections.singleton(new Pair<Text,Text>(cf, cq)));
+    
+    // AccumuloInputFormat.setLogLevel(job, Level.TRACE);
+    
+    job.setMapperClass(HashDataMapper.class);
+    job.setMapOutputKeyClass(Text.class);
+    job.setMapOutputValueClass(Mutation.class);
+    
+    job.setNumReduceTasks(0);
+    
+    job.setOutputFormatClass(AccumuloOutputFormat.class);
+    AccumuloOutputFormat.setZooKeeperInstance(job, args[0], args[1]);
+    AccumuloOutputFormat.setOutputInfo(job, args[2], args[3].getBytes(), true, args[6]);
+    // AccumuloOutputFormat.setLogLevel(job, Level.TRACE);
+    
+    job.waitForCompletion(true);
+    return job.isSuccessful() ? 0 : 1;
+  }
+  
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(CachedConfiguration.getInstance(), new RowHash(), args);
+    if (res != 0)
+      System.exit(res);
+  }
+}

Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/RowHash.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TableToFile.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TableToFile.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TableToFile.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TableToFile.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.mapreduce;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Map;
+
+import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.accumulo.core.util.format.DefaultFormatter;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * Takes a table and outputs the specified column to a set of part files on hdfs accumulo accumulo.examples.mapreduce.TableToFile <username> <password>
+ * <tablename> <column> <hdfs-output-path>
+ */
+public class TableToFile extends Configured implements Tool {
+  /**
+   * The Mapper class that given a row number, will generate the appropriate output line.
+   */
+  public static class TTFMapper extends Mapper<Key,Value,NullWritable,Text> {
+    public void map(Key row, Value data, Context context) throws IOException, InterruptedException {
+      final Key r = row;
+      final Value v = data;
+      Map.Entry<Key,Value> entry = new Map.Entry<Key,Value>() {
+        @Override
+        public Key getKey() {
+          return r;
+        }
+        
+        @Override
+        public Value getValue() {
+          return v;
+        }
+        
+        @Override
+        public Value setValue(Value value) {
+          return null;
+        }
+      };
+      context.write(NullWritable.get(), new Text(DefaultFormatter.formatEntry(entry, false)));
+      context.setStatus("Outputed Value");
+    }
+  }
+  
+  @Override
+  public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
+    Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
+    job.setJarByClass(this.getClass());
+    
+    job.setInputFormatClass(AccumuloInputFormat.class);
+    AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
+    AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), args[4], new Authorizations());
+    
+    HashSet<Pair<Text,Text>> columnsToFetch = new HashSet<Pair<Text,Text>>();
+    for (String col : args[5].split(",")) {
+      int idx = col.indexOf(":");
+      Text cf = new Text(idx < 0 ? col : col.substring(0, idx));
+      Text cq = idx < 0 ? null : new Text(col.substring(idx + 1));
+      if (cf.getLength() > 0)
+        columnsToFetch.add(new Pair<Text,Text>(cf, cq));
+    }
+    if (!columnsToFetch.isEmpty())
+      AccumuloInputFormat.fetchColumns(job.getConfiguration(), columnsToFetch);
+    
+    job.setMapperClass(TTFMapper.class);
+    job.setMapOutputKeyClass(NullWritable.class);
+    job.setMapOutputValueClass(Text.class);
+    
+    job.setNumReduceTasks(0);
+    
+    job.setOutputFormatClass(TextOutputFormat.class);
+    TextOutputFormat.setOutputPath(job, new Path(args[6]));
+    
+    job.waitForCompletion(true);
+    return job.isSuccessful() ? 0 : 1;
+  }
+  
+  /**
+   * 
+   * @param args
+   *          instanceName zookeepers username password table columns outputpath
+   * @throws Exception
+   */
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(CachedConfiguration.getInstance(), new TableToFile(), args);
+    if (res != 0)
+      System.exit(res);
+  }
+}

Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TableToFile.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TeraSortIngest.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TeraSortIngest.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TeraSortIngest.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TeraSortIngest.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,377 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.examples.simple.mapreduce;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * Generate the *almost* official terasort input data set. (See below) The user specifies the number of rows and the output directory and this class runs a
+ * map/reduce program to generate the data. The format of the data is:
+ * <ul>
+ * <li>(10 bytes key) (10 bytes rowid) (78 bytes filler) \r \n
+ * <li>The keys are random characters from the set ' ' .. '~'.
+ * <li>The rowid is the right justified row id as a int.
+ * <li>The filler consists of 7 runs of 10 characters from 'A' to 'Z'.
+ * </ul>
+ * 
+ * This TeraSort is slightly modified to allow for variable length key sizes and value sizes. The row length isn't variable. To generate a terabyte of data in
+ * the same way TeraSort does use 10000000000 rows and 10/10 byte key length and 78/78 byte value length. Along with the 10 byte row id and \r\n this gives you
+ * 100 byte row * 10000000000 rows = 1tb. Min/Max ranges for key and value parameters are inclusive/inclusive respectively.
+ * 
+ * Params <numrows> <minkeylength> <maxkeylength> <minvaluelength> <maxvaluelength> <tablename> <instance> <zoohosts> <username> <password> [numsplits]
+ * numsplits allows you specify how many splits, and therefore mappers, to use
+ * 
+ * 
+ */
+public class TeraSortIngest extends Configured implements Tool {
+  /**
+   * An input format that assigns ranges of longs to each mapper.
+   */
+  static class RangeInputFormat extends InputFormat<LongWritable,NullWritable> {
+    /**
+     * An input split consisting of a range on numbers.
+     */
+    static class RangeInputSplit extends InputSplit implements Writable {
+      long firstRow;
+      long rowCount;
+      
+      public RangeInputSplit() {}
+      
+      public RangeInputSplit(long offset, long length) {
+        firstRow = offset;
+        rowCount = length;
+      }
+      
+      public long getLength() throws IOException {
+        return 0;
+      }
+      
+      public String[] getLocations() throws IOException {
+        return new String[] {};
+      }
+      
+      public void readFields(DataInput in) throws IOException {
+        firstRow = WritableUtils.readVLong(in);
+        rowCount = WritableUtils.readVLong(in);
+      }
+      
+      public void write(DataOutput out) throws IOException {
+        WritableUtils.writeVLong(out, firstRow);
+        WritableUtils.writeVLong(out, rowCount);
+      }
+    }
+    
+    /**
+     * A record reader that will generate a range of numbers.
+     */
+    static class RangeRecordReader extends RecordReader<LongWritable,NullWritable> {
+      long startRow;
+      long finishedRows;
+      long totalRows;
+      
+      LongWritable currentKey;
+      
+      public RangeRecordReader(RangeInputSplit split) {
+        startRow = split.firstRow;
+        finishedRows = 0;
+        totalRows = split.rowCount;
+      }
+      
+      public void close() throws IOException {}
+      
+      public float getProgress() throws IOException {
+        return finishedRows / (float) totalRows;
+      }
+      
+      @Override
+      public LongWritable getCurrentKey() throws IOException, InterruptedException {
+        return new LongWritable(startRow + finishedRows);
+      }
+      
+      @Override
+      public NullWritable getCurrentValue() throws IOException, InterruptedException {
+        return NullWritable.get();
+      }
+      
+      @Override
+      public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {}
+      
+      @Override
+      public boolean nextKeyValue() throws IOException, InterruptedException {
+        if (finishedRows < totalRows) {
+          ++finishedRows;
+          return true;
+        }
+        return false;
+      }
+    }
+    
+    public RecordReader<LongWritable,NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException {
+      // reporter.setStatus("Creating record reader");
+      return new RangeRecordReader((RangeInputSplit) split);
+    }
+    
+    /**
+     * Create the desired number of splits, dividing the number of rows between the mappers.
+     */
+    public List<InputSplit> getSplits(JobContext job) {
+      long totalRows = job.getConfiguration().getLong(NUMROWS, 0);
+      int numSplits = job.getConfiguration().getInt(NUMSPLITS, 1);
+      long rowsPerSplit = totalRows / numSplits;
+      System.out.println("Generating " + totalRows + " using " + numSplits + " maps with step of " + rowsPerSplit);
+      ArrayList<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
+      long currentRow = 0;
+      for (int split = 0; split < numSplits - 1; ++split) {
+        splits.add(new RangeInputSplit(currentRow, rowsPerSplit));
+        currentRow += rowsPerSplit;
+      }
+      splits.add(new RangeInputSplit(currentRow, totalRows - currentRow));
+      System.out.println("Done Generating.");
+      return splits;
+    }
+    
+  }
+  
+  private static String NUMSPLITS = "terasort.overridesplits";
+  private static String NUMROWS = "terasort.numrows";
+  
+  static class RandomGenerator {
+    private long seed = 0;
+    private static final long mask32 = (1l << 32) - 1;
+    /**
+     * The number of iterations separating the precomputed seeds.
+     */
+    private static final int seedSkip = 128 * 1024 * 1024;
+    /**
+     * The precomputed seed values after every seedSkip iterations. There should be enough values so that a 2**32 iterations are covered.
+     */
+    private static final long[] seeds = new long[] {0L, 4160749568L, 4026531840L, 3892314112L, 3758096384L, 3623878656L, 3489660928L, 3355443200L, 3221225472L,
+        3087007744L, 2952790016L, 2818572288L, 2684354560L, 2550136832L, 2415919104L, 2281701376L, 2147483648L, 2013265920L, 1879048192L, 1744830464L,
+        1610612736L, 1476395008L, 1342177280L, 1207959552L, 1073741824L, 939524096L, 805306368L, 671088640L, 536870912L, 402653184L, 268435456L, 134217728L,};
+    
+    /**
+     * Start the random number generator on the given iteration.
+     * 
+     * @param initalIteration
+     *          the iteration number to start on
+     */
+    RandomGenerator(long initalIteration) {
+      int baseIndex = (int) ((initalIteration & mask32) / seedSkip);
+      seed = seeds[baseIndex];
+      for (int i = 0; i < initalIteration % seedSkip; ++i) {
+        next();
+      }
+    }
+    
+    RandomGenerator() {
+      this(0);
+    }
+    
+    long next() {
+      seed = (seed * 3141592621l + 663896637) & mask32;
+      return seed;
+    }
+  }
+  
+  /**
+   * The Mapper class that given a row number, will generate the appropriate output line.
+   */
+  public static class SortGenMapper extends Mapper<LongWritable,NullWritable,Text,Mutation> {
+    private Text table = null;
+    private int minkeylength = 0;
+    private int maxkeylength = 0;
+    private int minvaluelength = 0;
+    private int maxvaluelength = 0;
+    
+    private Text key = new Text();
+    private Text value = new Text();
+    private RandomGenerator rand;
+    private byte[] keyBytes; // = new byte[12];
+    private byte[] spaces = "          ".getBytes();
+    private byte[][] filler = new byte[26][];
+    {
+      for (int i = 0; i < 26; ++i) {
+        filler[i] = new byte[10];
+        for (int j = 0; j < 10; ++j) {
+          filler[i][j] = (byte) ('A' + i);
+        }
+      }
+    }
+    
+    /**
+     * Add a random key to the text
+     */
+    private Random random = new Random();
+    
+    private void addKey() {
+      int range = random.nextInt(maxkeylength - minkeylength + 1);
+      int keylen = range + minkeylength;
+      int keyceil = keylen + (4 - (keylen % 4));
+      keyBytes = new byte[keyceil];
+      
+      long temp = 0;
+      for (int i = 0; i < keyceil / 4; i++) {
+        temp = rand.next() / 52;
+        keyBytes[3 + 4 * i] = (byte) (' ' + (temp % 95));
+        temp /= 95;
+        keyBytes[2 + 4 * i] = (byte) (' ' + (temp % 95));
+        temp /= 95;
+        keyBytes[1 + 4 * i] = (byte) (' ' + (temp % 95));
+        temp /= 95;
+        keyBytes[4 * i] = (byte) (' ' + (temp % 95));
+      }
+      key.set(keyBytes, 0, keylen);
+    }
+    
+    /**
+     * Add the rowid to the row.
+     * 
+     * @param rowId
+     */
+    private Text getRowIdString(long rowId) {
+      Text paddedRowIdString = new Text();
+      byte[] rowid = Integer.toString((int) rowId).getBytes();
+      int padSpace = 10 - rowid.length;
+      if (padSpace > 0) {
+        paddedRowIdString.append(spaces, 0, 10 - rowid.length);
+      }
+      paddedRowIdString.append(rowid, 0, Math.min(rowid.length, 10));
+      return paddedRowIdString;
+    }
+    
+    /**
+     * Add the required filler bytes. Each row consists of 7 blocks of 10 characters and 1 block of 8 characters.
+     * 
+     * @param rowId
+     *          the current row number
+     */
+    private void addFiller(long rowId) {
+      int base = (int) ((rowId * 8) % 26);
+      
+      // Get Random var
+      Random random = new Random(rand.seed);
+      
+      int range = random.nextInt(maxvaluelength - minvaluelength + 1);
+      int valuelen = range + minvaluelength;
+      
+      while (valuelen > 10) {
+        value.append(filler[(base + valuelen) % 26], 0, 10);
+        valuelen -= 10;
+      }
+      
+      if (valuelen > 0)
+        value.append(filler[(base + valuelen) % 26], 0, valuelen);
+    }
+    
+    public void map(LongWritable row, NullWritable ignored, Context context) throws IOException, InterruptedException {
+      context.setStatus("Entering");
+      long rowId = row.get();
+      if (rand == null) {
+        // we use 3 random numbers per a row
+        rand = new RandomGenerator(rowId * 3);
+      }
+      addKey();
+      value.clear();
+      // addRowId(rowId);
+      addFiller(rowId);
+      
+      // New
+      Mutation m = new Mutation(key);
+      m.put(new Text("c"), // column family
+          getRowIdString(rowId), // column qual
+          new Value(value.toString().getBytes())); // data
+      
+      context.setStatus("About to add to accumulo");
+      context.write(table, m);
+      context.setStatus("Added to accumulo " + key.toString());
+    }
+    
+    @Override
+    public void setup(Context job) {
+      minkeylength = job.getConfiguration().getInt("cloudgen.minkeylength", 0);
+      maxkeylength = job.getConfiguration().getInt("cloudgen.maxkeylength", 0);
+      minvaluelength = job.getConfiguration().getInt("cloudgen.minvaluelength", 0);
+      maxvaluelength = job.getConfiguration().getInt("cloudgen.maxvaluelength", 0);
+      table = new Text(job.getConfiguration().get("cloudgen.tablename"));
+    }
+  }
+  
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(CachedConfiguration.getInstance(), new TeraSortIngest(), args);
+    System.exit(res);
+  }
+  
+  @Override
+  public int run(String[] args) throws Exception {
+    Job job = new Job(getConf(), "TeraSortCloud");
+    job.setJarByClass(this.getClass());
+    
+    job.setInputFormatClass(RangeInputFormat.class);
+    job.setMapperClass(SortGenMapper.class);
+    job.setMapOutputKeyClass(Text.class);
+    job.setMapOutputValueClass(Mutation.class);
+    
+    job.setNumReduceTasks(0);
+    
+    job.setOutputFormatClass(AccumuloOutputFormat.class);
+    AccumuloOutputFormat.setZooKeeperInstance(job, args[6], args[7]);
+    AccumuloOutputFormat.setOutputInfo(job, args[8], args[9].getBytes(), true, null);
+    AccumuloOutputFormat.setMaxMutationBufferSize(job, 10L * 1000 * 1000);
+    
+    Configuration conf = job.getConfiguration();
+    conf.setLong(NUMROWS, Long.parseLong(args[0]));
+    conf.setInt("cloudgen.minkeylength", Integer.parseInt(args[1]));
+    conf.setInt("cloudgen.maxkeylength", Integer.parseInt(args[2]));
+    conf.setInt("cloudgen.minvaluelength", Integer.parseInt(args[3]));
+    conf.setInt("cloudgen.maxvaluelength", Integer.parseInt(args[4]));
+    conf.set("cloudgen.tablename", args[5]);
+    
+    if (args.length > 10)
+      conf.setInt(NUMSPLITS, Integer.parseInt(args[10]));
+    
+    job.waitForCompletion(true);
+    return job.isSuccessful() ? 0 : 1;
+  }
+}

Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TeraSortIngest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/WordCount.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/WordCount.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/WordCount.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/WordCount.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.commons.cli.BasicParser;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.Parser;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * A simple map reduce job that inserts word counts into accumulo. See the README for instructions on how to run this.
+ * 
+ */
+public class WordCount extends Configured implements Tool {
+  private static Options opts;
+  private static Option passwordOpt;
+  private static Option usernameOpt;
+  private static String USAGE = "wordCount <instance name> <zoo keepers> <input dir> <output table>";
+  
+  static {
+    usernameOpt = new Option("u", "username", true, "username");
+    passwordOpt = new Option("p", "password", true, "password");
+    
+    opts = new Options();
+    
+    opts.addOption(usernameOpt);
+    opts.addOption(passwordOpt);
+  }
+  
+  public static class MapClass extends Mapper<LongWritable,Text,Text,Mutation> {
+    @Override
+    public void map(LongWritable key, Text value, Context output) throws IOException {
+      String[] words = value.toString().split("\\s+");
+      
+      for (String word : words) {
+        
+        Mutation mutation = new Mutation(new Text(word));
+        mutation.put(new Text("count"), new Text("20080906"), new Value("1".getBytes()));
+        
+        try {
+          output.write(null, mutation);
+        } catch (InterruptedException e) {
+          e.printStackTrace();
+        }
+      }
+    }
+  }
+  
+  public int run(String[] unprocessed_args) throws Exception {
+    Parser p = new BasicParser();
+    
+    CommandLine cl = p.parse(opts, unprocessed_args);
+    String[] args = cl.getArgs();
+    
+    String username = cl.getOptionValue(usernameOpt.getOpt(), "root");
+    String password = cl.getOptionValue(passwordOpt.getOpt(), "secret");
+    
+    if (args.length != 4) {
+      System.out.println("ERROR: Wrong number of parameters: " + args.length + " instead of 4.");
+      return printUsage();
+    }
+    
+    Job job = new Job(getConf(), WordCount.class.getName());
+    job.setJarByClass(this.getClass());
+    
+    job.setInputFormatClass(TextInputFormat.class);
+    TextInputFormat.setInputPaths(job, new Path(args[2]));
+    
+    job.setMapperClass(MapClass.class);
+    
+    job.setNumReduceTasks(0);
+    
+    job.setOutputFormatClass(AccumuloOutputFormat.class);
+    job.setOutputKeyClass(Text.class);
+    job.setOutputValueClass(Mutation.class);
+    AccumuloOutputFormat.setOutputInfo(job, username, password.getBytes(), true, args[3]);
+    AccumuloOutputFormat.setZooKeeperInstance(job, args[0], args[1]);
+    job.waitForCompletion(true);
+    return 0;
+  }
+  
+  private int printUsage() {
+    HelpFormatter hf = new HelpFormatter();
+    hf.printHelp(USAGE, opts);
+    return 0;
+  }
+  
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(CachedConfiguration.getInstance(), new WordCount(), args);
+    System.exit(res);
+  }
+}

Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/WordCount.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/BulkIngestExample.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/BulkIngestExample.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/BulkIngestExample.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/BulkIngestExample.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.mapreduce.bulk;
+
+import java.io.BufferedOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Collection;
+
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat;
+import org.apache.accumulo.core.client.mapreduce.lib.partition.RangePartitioner;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.accumulo.core.util.TextUtil;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * Example map reduce job that bulk ingest data into an accumulo table. The expected input is text files containing tab separated key value pairs on each line.
+ */
+public class BulkIngestExample extends Configured implements Tool {
+  public static class MapClass extends Mapper<LongWritable,Text,Text,Text> {
+    private Text outputKey = new Text();
+    private Text outputValue = new Text();
+    
+    @Override
+    public void map(LongWritable key, Text value, Context output) throws IOException, InterruptedException {
+      // split on tab
+      int index = -1;
+      for (int i = 0; i < value.getLength(); i++) {
+        if (value.getBytes()[i] == '\t') {
+          index = i;
+          break;
+        }
+      }
+      
+      if (index > 0) {
+        outputKey.set(value.getBytes(), 0, index);
+        outputValue.set(value.getBytes(), index + 1, value.getLength() - (index + 1));
+        output.write(outputKey, outputValue);
+      }
+    }
+  }
+  
+  public static class ReduceClass extends Reducer<Text,Text,Key,Value> {
+    public void reduce(Text key, Iterable<Text> values, Context output) throws IOException, InterruptedException {
+      // be careful with the timestamp... if you run on a cluster
+      // where the time is whacked you may not see your updates in
+      // accumulo if there is already an existing value with a later
+      // timestamp in accumulo... so make sure ntp is running on the
+      // cluster or consider using logical time... one options is
+      // to let accumulo set the time
+      long timestamp = System.currentTimeMillis();
+      
+      int index = 0;
+      for (Text value : values) {
+        Key outputKey = new Key(key, new Text("foo"), new Text("" + index), timestamp);
+        index++;
+        
+        Value outputValue = new Value(value.getBytes(), 0, value.getLength());
+        output.write(outputKey, outputValue);
+      }
+    }
+  }
+  
+  public int run(String[] args) {
+    if (args.length != 7) {
+      System.out.println("ERROR: Wrong number of parameters: " + args.length + " instead of 7.");
+      return printUsage();
+    }
+    
+    Configuration conf = getConf();
+    PrintStream out = null;
+    try {
+      Job job = new Job(conf, "bulk ingest example");
+      job.setJarByClass(this.getClass());
+      
+      job.setInputFormatClass(TextInputFormat.class);
+      
+      job.setMapperClass(MapClass.class);
+      job.setMapOutputKeyClass(Text.class);
+      job.setMapOutputValueClass(Text.class);
+      
+      job.setReducerClass(ReduceClass.class);
+      job.setOutputFormatClass(AccumuloFileOutputFormat.class);
+      
+      Instance instance = new ZooKeeperInstance(args[0], args[1]);
+      String user = args[2];
+      byte[] pass = args[3].getBytes();
+      String tableName = args[4];
+      String inputDir = args[5];
+      String workDir = args[6];
+      
+      Connector connector = instance.getConnector(user, pass);
+      
+      TextInputFormat.setInputPaths(job, new Path(inputDir));
+      AccumuloFileOutputFormat.setOutputPath(job, new Path(workDir + "/files"));
+      
+      FileSystem fs = FileSystem.get(conf);
+      out = new PrintStream(new BufferedOutputStream(fs.create(new Path(workDir + "/splits.txt"))));
+      
+      Collection<Text> splits = connector.tableOperations().getSplits(tableName, 100);
+      for (Text split : splits)
+        out.println(new String(Base64.encodeBase64(TextUtil.getBytes(split))));
+      
+      job.setNumReduceTasks(splits.size() + 1);
+      out.close();
+      
+      job.setPartitionerClass(RangePartitioner.class);
+      RangePartitioner.setSplitFile(job, workDir + "/splits.txt");
+      
+      job.waitForCompletion(true);
+      Path failures = new Path(workDir, "failures");
+      fs.delete(failures, true);
+      fs.mkdirs(new Path(workDir, "failures"));
+      connector.tableOperations().importDirectory(tableName, workDir + "/files", workDir + "/failures", false);
+      
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    } finally {
+      if (out != null)
+        out.close();
+    }
+    
+    return 0;
+  }
+  
+  private int printUsage() {
+    System.out.println("accumulo " + this.getClass().getName() + " <instanceName> <zooKeepers> <username> <password> <table> <input dir> <work dir> ");
+    return 0;
+  }
+  
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(CachedConfiguration.getInstance(), new BulkIngestExample(), args);
+    System.exit(res);
+  }
+}

Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/BulkIngestExample.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/GenerateTestData.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/GenerateTestData.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/GenerateTestData.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/GenerateTestData.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.mapreduce.bulk;
+
+import java.io.BufferedOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+public class GenerateTestData {
+  
+  public static void main(String[] args) throws IOException {
+    int startRow = Integer.parseInt(args[0]);
+    int numRows = Integer.parseInt(args[1]);
+    String outputFile = args[2];
+    
+    Configuration conf = CachedConfiguration.getInstance();
+    FileSystem fs = FileSystem.get(conf);
+    
+    PrintStream out = new PrintStream(new BufferedOutputStream(fs.create(new Path(outputFile))));
+    
+    for (int i = 0; i < numRows; i++) {
+      out.println(String.format("row_%08d\tvalue_%08d", i + startRow, i + startRow));
+    }
+    
+    out.close();
+  }
+  
+}

Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/GenerateTestData.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/SetupTable.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/SetupTable.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/SetupTable.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/SetupTable.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.mapreduce.bulk;
+
+import java.util.TreeSet;
+
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.TableExistsException;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.hadoop.io.Text;
+
+public class SetupTable {
+  
+  public static void main(String[] args) throws AccumuloException, AccumuloSecurityException, TableExistsException {
+    Connector conn = new ZooKeeperInstance(args[0], args[1]).getConnector(args[2], args[3].getBytes());
+    if (args.length == 5) {
+      // create a basic table
+      conn.tableOperations().create(args[4]);
+    } else if (args.length > 5) {
+      // create a table with initial partitions
+      TreeSet<Text> intialPartitions = new TreeSet<Text>();
+      for (int i = 5; i < args.length; ++i)
+        intialPartitions.add(new Text(args[i]));
+      conn.tableOperations().create(args[4]);
+      
+      try {
+        conn.tableOperations().addSplits(args[4], intialPartitions);
+      } catch (TableNotFoundException e) {
+        // unlikely
+        throw new RuntimeException(e);
+      }
+    } else {
+      System.err.println("Usage : SetupTable <instance> <zookeepers> <username> <password> <table name> [<split point>{ <split point}]");
+    }
+  }
+}

Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/SetupTable.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/VerifyIngest.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/VerifyIngest.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/VerifyIngest.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/VerifyIngest.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.mapreduce.bulk;
+
+import java.util.Iterator;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.Constants;
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.hadoop.io.Text;
+import org.apache.log4j.Logger;
+
+public class VerifyIngest {
+  private static final Logger log = Logger.getLogger(VerifyIngest.class);
+  
+  public static void main(String[] args) throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
+    if (args.length != 7) {
+      System.err.println("VerifyIngest <instance name> <zoo keepers> <username> <password> <table> <startRow> <numRows> ");
+      return;
+    }
+    
+    String instanceName = args[0];
+    String zooKeepers = args[1];
+    String user = args[2];
+    byte[] pass = args[3].getBytes();
+    String table = args[4];
+    
+    int startRow = Integer.parseInt(args[5]);
+    int numRows = Integer.parseInt(args[6]);
+    
+    Instance instance = new ZooKeeperInstance(instanceName, zooKeepers);
+    Connector connector = instance.getConnector(user, pass);
+    Scanner scanner = connector.createScanner(table, Constants.NO_AUTHS);
+    
+    scanner.setRange(new Range(new Text(String.format("row_%08d", startRow)), null));
+    
+    Iterator<Entry<Key,Value>> si = scanner.iterator();
+    
+    boolean ok = true;
+    
+    for (int i = startRow; i < numRows; i++) {
+      
+      if (si.hasNext()) {
+        Entry<Key,Value> entry = si.next();
+        
+        if (!entry.getKey().getRow().toString().equals(String.format("row_%08d", i))) {
+          log.error("unexpected row key " + entry.getKey().getRow().toString() + " expected " + String.format("row_%08d", i));
+          ok = false;
+        }
+        
+        if (!entry.getValue().toString().equals(String.format("value_%08d", i))) {
+          log.error("unexpected value " + entry.getValue().toString() + " expected " + String.format("value_%08d", i));
+          ok = false;
+        }
+        
+      } else {
+        log.error("no more rows, expected " + String.format("row_%08d", i));
+        ok = false;
+        break;
+      }
+      
+    }
+    
+    if (ok)
+      System.out.println("OK");
+    
+    System.exit(ok ? 0 : 1);
+  }
+  
+}

Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/bulk/VerifyIngest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/ContinuousQuery.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/ContinuousQuery.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/ContinuousQuery.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/ContinuousQuery.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.shard;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Map.Entry;
+import java.util.Random;
+
+import org.apache.accumulo.core.Constants;
+import org.apache.accumulo.core.client.BatchScanner;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.user.IntersectingIterator;
+import org.apache.hadoop.io.Text;
+
+/**
+ * Using the doc2word table created by Reverse.java, this program randomly selects N words per document. Then it continually queries a random set of words in
+ * the shard table (created by Index.java) using the intersecting iterator.
+ * 
+ */
+
+public class ContinuousQuery {
+  public static void main(String[] args) throws Exception {
+    
+    if (args.length != 7 && args.length != 8) {
+      System.err.println("Usage : " + ContinuousQuery.class.getName()
+          + " <instance> <zoo keepers> <shard table> <doc2word table> <user> <pass> <num query terms> [iterations]");
+      System.exit(-1);
+    }
+    
+    String instance = args[0];
+    String zooKeepers = args[1];
+    String table = args[2];
+    String docTable = args[3];
+    String user = args[4];
+    String pass = args[5];
+    int numTerms = Integer.parseInt(args[6]);
+    long iterations = Long.MAX_VALUE;
+    if (args.length > 7)
+      iterations = Long.parseLong(args[7]);
+    
+    ZooKeeperInstance zki = new ZooKeeperInstance(instance, zooKeepers);
+    Connector conn = zki.getConnector(user, pass.getBytes());
+    
+    ArrayList<Text[]> randTerms = findRandomTerms(conn.createScanner(docTable, Constants.NO_AUTHS), numTerms);
+    
+    Random rand = new Random();
+    
+    BatchScanner bs = conn.createBatchScanner(table, Constants.NO_AUTHS, 20);
+
+    for (long i = 0; i < iterations; i += 1) {
+      Text[] columns = randTerms.get(rand.nextInt(randTerms.size()));
+      
+      bs.clearScanIterators();
+      bs.clearColumns();
+
+      IteratorSetting ii = new IteratorSetting(20, "ii", IntersectingIterator.class);
+      IntersectingIterator.setColumnFamilies(ii, columns);
+      bs.addScanIterator(ii);
+      bs.setRanges(Collections.singleton(new Range()));
+      
+      long t1 = System.currentTimeMillis();
+      int count = 0;
+      for (@SuppressWarnings("unused")
+      Entry<Key,Value> entry : bs) {
+        count++;
+      }
+      long t2 = System.currentTimeMillis();
+      
+      System.out.printf("  %s %,d %6.3f\n", Arrays.asList(columns), count, (t2 - t1) / 1000.0);
+    }
+    
+    bs.close();
+
+  }
+  
+  private static ArrayList<Text[]> findRandomTerms(Scanner scanner, int numTerms) {
+    
+    Text currentRow = null;
+    
+    ArrayList<Text> words = new ArrayList<Text>();
+    ArrayList<Text[]> ret = new ArrayList<Text[]>();
+    
+    Random rand = new Random();
+    
+    for (Entry<Key,Value> entry : scanner) {
+      Key key = entry.getKey();
+      
+      if (currentRow == null)
+        currentRow = key.getRow();
+      
+      if (!currentRow.equals(key.getRow())) {
+        selectRandomWords(words, ret, rand, numTerms);
+        words.clear();
+        currentRow = key.getRow();
+      }
+      
+      words.add(key.getColumnFamily());
+      
+    }
+    
+    selectRandomWords(words, ret, rand, numTerms);
+    
+    return ret;
+  }
+  
+  private static void selectRandomWords(ArrayList<Text> words, ArrayList<Text[]> ret, Random rand, int numTerms) {
+    if (words.size() >= numTerms) {
+      Collections.shuffle(words, rand);
+      Text docWords[] = new Text[numTerms];
+      for (int i = 0; i < docWords.length; i++) {
+        docWords[i] = words.get(i);
+      }
+      
+      ret.add(docWords);
+    }
+  }
+}

Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/ContinuousQuery.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Index.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Index.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Index.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Index.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.shard;
+
+import java.io.File;
+import java.io.FileReader;
+import java.util.HashSet;
+
+import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.hadoop.io.Text;
+
+/**
+ * This program indexes a set of documents given on the command line into a shard table.
+ * 
+ * What it writes to the table is row = partition id column family = term column qualifier = document id
+ */
+
+public class Index {
+  
+  static Text genPartition(int partition) {
+    return new Text(String.format("%08x", Math.abs(partition)));
+  }
+  
+  public static void index(int numPartitions, Text docId, String doc, String splitRegex, BatchWriter bw) throws Exception {
+    
+    String[] tokens = doc.split(splitRegex);
+    
+    Text partition = genPartition(doc.hashCode() % numPartitions);
+    
+    Mutation m = new Mutation(partition);
+    
+    HashSet<String> tokensSeen = new HashSet<String>();
+    
+    for (String token : tokens) {
+      token = token.toLowerCase();
+      
+      if (!tokensSeen.contains(token)) {
+        tokensSeen.add(token);
+        m.put(new Text(token), docId, new Value(new byte[0]));
+      }
+    }
+    
+    if (m.size() > 0)
+      bw.addMutation(m);
+  }
+  
+  private static void index(int numPartitions, File src, String splitRegex, BatchWriter bw) throws Exception {
+    
+    if (src.isDirectory()) {
+      for (File child : src.listFiles()) {
+        index(numPartitions, child, splitRegex, bw);
+      }
+    } else {
+      FileReader fr = new FileReader(src);
+      
+      StringBuilder sb = new StringBuilder();
+      
+      char data[] = new char[4096];
+      int len;
+      while ((len = fr.read(data)) != -1) {
+        sb.append(data, 0, len);
+      }
+      
+      fr.close();
+      
+      index(numPartitions, new Text(src.getAbsolutePath()), sb.toString(), splitRegex, bw);
+    }
+    
+  }
+  
+  private static BatchWriter setupBatchWriter(String instance, String zooKeepers, String table, String user, String pass) throws Exception {
+    ZooKeeperInstance zinstance = new ZooKeeperInstance(instance, zooKeepers);
+    Connector conn = zinstance.getConnector(user, pass.getBytes());
+    return conn.createBatchWriter(table, 50000000, 300000l, 4);
+  }
+  
+  public static void main(String[] args) throws Exception {
+    
+    if (args.length < 7) {
+      System.err.println("Usage : " + Index.class.getName() + " <instance> <zoo keepers> <table> <user> <pass> <num partitions> <file>{ <file>}");
+      System.exit(-1);
+    }
+    
+    String instance = args[0];
+    String zooKeepers = args[1];
+    String table = args[2];
+    String user = args[3];
+    String pass = args[4];
+    
+    int numPartitions = Integer.parseInt(args[5]);
+    
+    String splitRegex = "\\W+";
+    
+    BatchWriter bw = setupBatchWriter(instance, zooKeepers, table, user, pass);
+    
+    for (int i = 6; i < args.length; i++) {
+      index(numPartitions, new File(args[i]), splitRegex, bw);
+    }
+    
+    bw.close();
+    
+  }
+  
+}

Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Index.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Query.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Query.java?rev=1230608&view=auto
==============================================================================
--- incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Query.java (added)
+++ incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Query.java Thu Jan 12 16:06:14 2012
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.shard;
+
+import java.util.Collections;
+import java.util.Map.Entry;
+
+import org.apache.accumulo.core.Constants;
+import org.apache.accumulo.core.client.BatchScanner;
+import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.user.IntersectingIterator;
+import org.apache.hadoop.io.Text;
+
+/**
+ * This program queries a set of terms in the shard table (populated by Index.java) using the intersecting iterator.
+ * 
+ */
+
+public class Query {
+  
+  /**
+   * @param args
+   */
+  public static void main(String[] args) throws Exception {
+    
+    if (args.length < 6) {
+      System.err.println("Usage : " + Query.class.getName() + " <instance> <zoo keepers> <table> <user> <pass> <term>{ <term>}");
+      System.exit(-1);
+    }
+    
+    String instance = args[0];
+    String zooKeepers = args[1];
+    String table = args[2];
+    String user = args[3];
+    String pass = args[4];
+    
+    ZooKeeperInstance zki = new ZooKeeperInstance(instance, zooKeepers);
+    Connector conn = zki.getConnector(user, pass.getBytes());
+    
+    BatchScanner bs = conn.createBatchScanner(table, Constants.NO_AUTHS, 20);
+    
+    Text columns[] = new Text[args.length - 5];
+    for (int i = 5; i < args.length; i++) {
+      columns[i - 5] = new Text(args[i]);
+    }
+    IteratorSetting ii = new IteratorSetting(20, "ii", IntersectingIterator.class);
+    IntersectingIterator.setColumnFamilies(ii, columns);
+    bs.addScanIterator(ii);
+    bs.setRanges(Collections.singleton(new Range()));
+    for (Entry<Key,Value> entry : bs) {
+      System.out.println("  " + entry.getKey().getColumnQualifier());
+    }
+    
+  }
+  
+}

Propchange: incubator/accumulo/trunk/src/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/Query.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message