gora-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From lewi...@apache.org
Subject [2/4] git commit: GORA 73 Merge goraci testing suite with master branch
Date Mon, 01 Sep 2014 19:54:17 GMT
GORA 73 Merge goraci testing suite with master branch


Project: http://git-wip-us.apache.org/repos/asf/gora/repo
Commit: http://git-wip-us.apache.org/repos/asf/gora/commit/a60a3370
Tree: http://git-wip-us.apache.org/repos/asf/gora/tree/a60a3370
Diff: http://git-wip-us.apache.org/repos/asf/gora/diff/a60a3370

Branch: refs/heads/master
Commit: a60a33703ffa774a32f11bceb8f3bc2474874681
Parents: b167b3a
Author: Lewis John McGibbney <lewis.j.mcgibbney@jpl.nasa.gov>
Authored: Sun Aug 31 17:45:40 2014 -0700
Committer: Lewis John McGibbney <lewis.j.mcgibbney@jpl.nasa.gov>
Committed: Sun Aug 31 17:45:40 2014 -0700

----------------------------------------------------------------------
 .../gora/examples/generated/Employee.java       |  17 +-
 .../examples/generated/ImmutableFields.java     |  17 +-
 .../gora/examples/generated/Metadata.java       |  17 +-
 .../gora/examples/generated/TokenDatum.java     |  17 +-
 .../org/apache/gora/examples/generated/V2.java  |  17 +-
 .../apache/gora/examples/generated/WebPage.java |  17 +-
 gora-goraci/.gitignore                          |  34 ++
 gora-goraci/README                              | 256 +++++++++++
 gora-goraci/goraci.sh                           | 104 +++++
 gora-goraci/pom.xml                             | 275 ++++++++++++
 gora-goraci/src/main/avro/cinode.json           |  10 +
 gora-goraci/src/main/avro/flushed.json          |   8 +
 .../java/org/apache/gora/goraci/Delete.java     |  57 +++
 .../java/org/apache/gora/goraci/Generator.java  | 351 +++++++++++++++
 .../main/java/org/apache/gora/goraci/Loop.java  | 164 +++++++
 .../main/java/org/apache/gora/goraci/Print.java |  95 +++++
 .../java/org/apache/gora/goraci/Verify.java     | 294 +++++++++++++
 .../java/org/apache/gora/goraci/Walker.java     | 129 ++++++
 .../apache/gora/goraci/generated/CINode.java    | 424 +++++++++++++++++++
 .../apache/gora/goraci/generated/Flushed.java   | 259 +++++++++++
 .../main/resources/gora-accumulo-mapping.xml    |  22 +
 .../main/resources/gora-cassandra-mapping.xml   |  25 ++
 .../src/main/resources/gora-hbase-mapping.xml   |  22 +
 .../src/main/resources/gora-sql-mapping.xml     |  25 ++
 gora-goraci/src/main/resources/gora.properties  |  76 ++++
 pom.xml                                         |   1 +
 26 files changed, 2715 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/gora/blob/a60a3370/gora-core/src/examples/java/org/apache/gora/examples/generated/Employee.java
----------------------------------------------------------------------
diff --git a/gora-core/src/examples/java/org/apache/gora/examples/generated/Employee.java b/gora-core/src/examples/java/org/apache/gora/examples/generated/Employee.java
index fb76b30..dd6f3a9 100644
--- a/gora-core/src/examples/java/org/apache/gora/examples/generated/Employee.java
+++ b/gora-core/src/examples/java/org/apache/gora/examples/generated/Employee.java
@@ -1,7 +1,18 @@
 /**
- * Autogenerated by Avro
- * 
- * DO NOT EDIT DIRECTLY
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 package org.apache.gora.examples.generated;  
 @SuppressWarnings("all")

http://git-wip-us.apache.org/repos/asf/gora/blob/a60a3370/gora-core/src/examples/java/org/apache/gora/examples/generated/ImmutableFields.java
----------------------------------------------------------------------
diff --git a/gora-core/src/examples/java/org/apache/gora/examples/generated/ImmutableFields.java b/gora-core/src/examples/java/org/apache/gora/examples/generated/ImmutableFields.java
index 85294bf..10f48dc 100644
--- a/gora-core/src/examples/java/org/apache/gora/examples/generated/ImmutableFields.java
+++ b/gora-core/src/examples/java/org/apache/gora/examples/generated/ImmutableFields.java
@@ -1,7 +1,18 @@
 /**
- * Autogenerated by Avro
- * 
- * DO NOT EDIT DIRECTLY
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 package org.apache.gora.examples.generated;  
 @SuppressWarnings("all")

http://git-wip-us.apache.org/repos/asf/gora/blob/a60a3370/gora-core/src/examples/java/org/apache/gora/examples/generated/Metadata.java
----------------------------------------------------------------------
diff --git a/gora-core/src/examples/java/org/apache/gora/examples/generated/Metadata.java b/gora-core/src/examples/java/org/apache/gora/examples/generated/Metadata.java
index 3eb7b32..de6882b 100644
--- a/gora-core/src/examples/java/org/apache/gora/examples/generated/Metadata.java
+++ b/gora-core/src/examples/java/org/apache/gora/examples/generated/Metadata.java
@@ -1,7 +1,18 @@
 /**
- * Autogenerated by Avro
- * 
- * DO NOT EDIT DIRECTLY
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 package org.apache.gora.examples.generated;  
 @SuppressWarnings("all")

http://git-wip-us.apache.org/repos/asf/gora/blob/a60a3370/gora-core/src/examples/java/org/apache/gora/examples/generated/TokenDatum.java
----------------------------------------------------------------------
diff --git a/gora-core/src/examples/java/org/apache/gora/examples/generated/TokenDatum.java b/gora-core/src/examples/java/org/apache/gora/examples/generated/TokenDatum.java
index e38d640..f0ad9b8 100644
--- a/gora-core/src/examples/java/org/apache/gora/examples/generated/TokenDatum.java
+++ b/gora-core/src/examples/java/org/apache/gora/examples/generated/TokenDatum.java
@@ -1,7 +1,18 @@
 /**
- * Autogenerated by Avro
- * 
- * DO NOT EDIT DIRECTLY
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 package org.apache.gora.examples.generated;  
 @SuppressWarnings("all")

http://git-wip-us.apache.org/repos/asf/gora/blob/a60a3370/gora-core/src/examples/java/org/apache/gora/examples/generated/V2.java
----------------------------------------------------------------------
diff --git a/gora-core/src/examples/java/org/apache/gora/examples/generated/V2.java b/gora-core/src/examples/java/org/apache/gora/examples/generated/V2.java
index 1db2a0b..1dbb314 100644
--- a/gora-core/src/examples/java/org/apache/gora/examples/generated/V2.java
+++ b/gora-core/src/examples/java/org/apache/gora/examples/generated/V2.java
@@ -1,7 +1,18 @@
 /**
- * Autogenerated by Avro
- * 
- * DO NOT EDIT DIRECTLY
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 package org.apache.gora.examples.generated;  
 @SuppressWarnings("all")

http://git-wip-us.apache.org/repos/asf/gora/blob/a60a3370/gora-core/src/examples/java/org/apache/gora/examples/generated/WebPage.java
----------------------------------------------------------------------
diff --git a/gora-core/src/examples/java/org/apache/gora/examples/generated/WebPage.java b/gora-core/src/examples/java/org/apache/gora/examples/generated/WebPage.java
index 604b560..6f9b688 100644
--- a/gora-core/src/examples/java/org/apache/gora/examples/generated/WebPage.java
+++ b/gora-core/src/examples/java/org/apache/gora/examples/generated/WebPage.java
@@ -1,7 +1,18 @@
 /**
- * Autogenerated by Avro
- * 
- * DO NOT EDIT DIRECTLY
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 package org.apache.gora.examples.generated;  
 @SuppressWarnings("all")

http://git-wip-us.apache.org/repos/asf/gora/blob/a60a3370/gora-goraci/.gitignore
----------------------------------------------------------------------
diff --git a/gora-goraci/.gitignore b/gora-goraci/.gitignore
new file mode 100644
index 0000000..972a8df
--- /dev/null
+++ b/gora-goraci/.gitignore
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+*~
+.idea
+*.iml
+*.iws
+*.ipr
+.classpath
+.externalToolBuilders
+.project
+.settings
+.git
+.svn
+build
+target
+dist
+lib
+**/lib/*.jar
+ivy/ivy*.jar
+/conf/*-site.xml
+**/conf/*-site.xml

http://git-wip-us.apache.org/repos/asf/gora/blob/a60a3370/gora-goraci/README
----------------------------------------------------------------------
diff --git a/gora-goraci/README b/gora-goraci/README
new file mode 100644
index 0000000..54c60ed
--- /dev/null
+++ b/gora-goraci/README
@@ -0,0 +1,256 @@
+=================================
+=				=
+= GORACI README			=
+= @author Keith Turner		=
+=================================
+
+BACKGROUND
+------------
+
+Apache Accumulo [0] has a simple test suite that verifies that data is not lost
+at scale.  This test suite is called continuous ingest [5].  This test runs
+many ingest clients that continually create linked lists containing 25 million
+nodes. At some point the clients are stopped and a map reduce job is run to
+ensure no linked list has a hole. A hole indicates data was lost.    
+
+The nodes in the linked list are random.  This causes each linked list to
+spread across the table.  Therefore if one part of a table loses data, then it
+will be detected by references in another part of the table.
+
+This project is a version of the test suite written using Apache Gora [1].
+Goraci has been tested against Accumulo and HBase.  
+
+THE ANATOMY OF GORACI TESTS
+----------------------------
+
+Below is rough sketch of how data is written.  For specific details look at the
+Generator code (src/main/java/org.apache.gora.goraci/Generator.java)
+
+  1 Write out 1 million nodes 
+  2 Flush the client 
+  3 Write out 1 million that reference previous million 
+  4 If this is the 25th set of 1 million nodes, then update 1st set of million
+    to point to last 
+  5 goto 1
+
+The key is that nodes only reference flushed nodes.  Therefore a node should
+never reference a missing node, even if the ingest client is killed at any
+point in time.
+
+When running this test suite w/ Accumulo there is a script running in parallel
+called the Aggitator that randomly and continuously kills server processes.  
+The outcome was that many data loss bugs were found in Accumulo by doing this. 
+This test suite can also help find bugs that impact uptime and stability when 
+run for days or weeks.  
+
+This test suite consists the following 
+- a few Java programs 
+- a little helper script to run the java programs
+- a maven script to build it.  
+
+When generating data, its best to have each map task generate a multiple of 25
+million.  The reason for this is that circular linked list are generated every
+25M.  Not generating a multiple in 25M will result in some nodes in the linked
+list not having references.  The loss of an unreferenced node can not be
+detected.
+
+BUILDING GORACI
+---------------
+
+This code currently depends on an unreleased version of Gora.  To build Gora
+0.2 run the following commands.
+
+  svn export http://svn.apache.org/repos/asf/gora/trunk gora
+  cd gora
+  mvn install -DskipTests
+
+After this you can build org.apache.gora.goraci.
+
+  git clone git://github.com/keith-turner/org.apache.gora.goraci.git
+  cd org.apache.gora.goraci
+  mvn compile
+
+The maven pom file has some profiles that attempt to make it easier to run
+org.apache.gora.goraci against different gora backends by copying the jars you need into lib.
+Before packaging its important to edit gora.properties and set it correctly
+for your datastore.  To run against accumulo do the following.
+
+  vim src/main/resources/gora.properties (set Accumulo properties)
+  mvn package -Paccumulo-1.4
+
+To run against hbase, do the following.
+
+  vim src/main/resources/gora.properties (set HBase properties)
+  mvn package -Phbase-0.92
+
+To run against cassandra, do the following.
+
+  vim src/main/resources/gora.properties (set Cassandra properties)
+  mvn package -Pcassandra-1.1.2
+
+For other datastores mentioned in gora.properties, you will need to copy the
+appropriate deps into lib.  Feel free to update the pom with other profiles and
+send me pull request.
+
+JAVA CLASS DESCRIPTION
+-----------------
+
+Below is a description of the Java programs
+
+  * org.apache.gora.goraci.Generator - A map only job that generates data.  As stated previously, 
+                       its best to generate data in multiples of 25M.
+  * org.apache.gora.goraci.Verify    - A map reduce job that looks for holes.  Look at the
+                       counts after running.  REFERENCED and UNREFERENCED are 
+                       ok, any UNDEFINED counts are bad. Do not run at the 
+                       same time as the Generator.
+  * org.apache.gora.goraci.Walker    - A standalong program that start following a linked list 
+                       and emits timing info.  
+  * org.apache.gora.goraci.Print     - A standalone program that prints nodes in the linked list
+  * org.apache.gora.goraci.Delete    - A standalone program that deletes a single node
+  * org.apache.gora.goraci.Loop      - Runs generation and verify in a loop
+
+org.apache.gora.goraci.sh is a helper script that you can use to run the above programs.  It
+assumes all needed jars are in the lib dir.  It does not need the package name.
+You can just run "./org.apache.gora.goraci.sh Generator", below is an example.
+
+  $ ./org.apache.gora.goraci.sh Generator
+  Usage : Generator <num mappers> <num nodes>
+
+For Gora to work, it needs a gora.properties file on the classpath and a
+mapping file on the classpath, the contents of both are datastore specific,
+more details can be found here [2]. You can edit the ones in src/main/resources
+and build the org.apache.gora.goraci-${version}-SNAPSHOT.jar with those. Alternatively remove
+those and put them on the classpath through some other means.
+
+GORA AND HADOOP
+-----------------
+
+Gora uses Avro which uses a Json library that Hadoop has an old version of.
+The two libraries  jackson-core and jackson-mapper need to be updated in
+<HADOOP_HOME>/lib and <HADOOP_HOME>/share/hadoop/lib/.  I updated these to
+jackson-core-asl-1.4.2.jar and jackson-mapper-asl-1.4.2.jar.  For details see
+HADOOP-6945 [3]. 
+
+GORACI AND HBASE
+-----------------
+
+To improve performance running read jobs such as the Verify step, enable
+scanner caching on the command line.  For example:
+
+    $ ./gorachi.sh Verify -Dhbase.client.scanner.caching=1000 \
+         -Dmapred.map.tasks.speculative.execution=false verify_dir 1000
+
+Dependent on how you have your hadoop and hbase deployed, you may need to
+change the gorachi.sh script around some.  Here is one suggestion that may help
+in the case where your hadoop and hbase configuration are other than under the
+hadoop and hbase home directories.
+
+  diff --git a/org.apache.gora.goraci.sh b/org.apache.gora.goraci.sh
+  index db1562a..31c3c94 100755
+  --- a/org.apache.gora.goraci.sh
+  +++ b/org.apache.gora.goraci.sh
+  @@ -95,6 +95,4 @@ done
+   #run it
+   export HADOOP_CLASSPATH="$CLASSPATH"
+   LIBJARS=`echo $HADOOP_CLASSPATH | tr : ,`
+  -hadoop jar "$GORACI_HOME/lib/org.apache.gora.goraci-0.0.1-SNAPSHOT.jar" $CLASS -libjars "$LIBJARS" "$@"
+  -
+  -
+  +CLASSPATH="${HBASE_CONF_DIR}" hadoop --config "${HADOOP_CONF_DIR} jar "$GORACI_HOME/lib/org.apache.gora.goraci-0.0.1-SNAPSHOT.jar" $CLASS -files "${HBASE_CONF_DIR}/hbase-site.xml" -libjars "$LIBJARS" "$@"
+
+You will need to define HBASE_CONF_DIR and HADOOP_CONF_DIR before you run your
+org.apache.gora.goraci jobs.  For example:
+
+  $ export HADOOP_CONF_DIR=/home/you/hadoop-conf
+  $ export HBASE_CONF_DIR=/home/you/hbase-conf
+  $ PATH=/home/you/hadoop-1.0.2/bin:$PATH ./org.apache.gora.goraci.sh Generator 1000 1000000
+
+CONCURRENCY
+------------
+
+Its possible to run verification at the same time as generation.  To do this
+supply the -c option to Generator and Verify.  This will cause Genertor to
+create a secondary table which holds information about what verification can
+safely verify.  Running Verify with the -c option will make it run slower
+because more information must be brought back to the client side for filtering
+purposes.  The Loop program also supports the -c option, which will cause it to
+run verification concurrently with generation.
+
+If verification is run at the same time as generation without the -c option,
+then it will inevitably fail.  This is because verification mappers read
+different parts of the table at different times and giving an inconsistent view
+of the table.  So one mapper may read a part of a table before a node is
+written, when the node is later referenced it will appear to be missing.  The
+-c option basically filters out newer information using data written to the
+secondary table.
+
+CONCLUSIONS
+------------
+
+This test suite does not do everything that the Accumulo test suite does,
+mainly it does not collect statistics and generate reports.  The reports
+are useful for assesing performance.
+
+Below shows running a test of the test.  Ingest one linked list, deleted a node
+in it, ensure the verifaction map reduce job notices that the node is missing.
+Not all output is shown, just the important parts.
+
+  $ ./org.apache.gora.goraci.sh Generator  1 25000000
+  $ ./org.apache.gora.goraci.sh Print -s 2000000000000000 -l 1
+  2000001f65dbd238:30350f9ae6f6e8f7:000004265852:ef09f9dd-75b1-4c16-9f14-0fa84f3029b6
+  $ ./org.apache.gora.goraci.sh Print -s 30350f9ae6f6e8f7 -l 1
+  30350f9ae6f6e8f7:4867fe03de6ea6c8:000003265852:ef09f9dd-75b1-4c16-9f14-0fa84f3029b6
+  $ ./org.apache.gora.goraci.sh Delete 30350f9ae6f6e8f7
+  Delete returned true
+  $ ./org.apache.gora.goraci.sh Verify gci_verify_1 2 
+  11/12/20 17:12:31 INFO mapred.JobClient:   org.apache.gora.goraci.Verify$Counts
+  11/12/20 17:12:31 INFO mapred.JobClient:     UNDEFINED=1
+  11/12/20 17:12:31 INFO mapred.JobClient:     REFERENCED=24999998
+  11/12/20 17:12:31 INFO mapred.JobClient:     UNREFERENCED=1
+  $ hadoop fs -cat gci_verify_1/part\*
+  30350f9ae6f6e8f7	2000001f65dbd238
+
+The map reduce job found the one undefined node and gave the node that
+referenced it.
+
+Below are some timing statistics for running org.apache.gora.goraci on a 10 node cluster. 
+
+  Store           | Task                   | Time    | Undef  | Unref | Ref        
+  ----------------+------------------------+---------+--------+-------+------------
+  accumulo-1.4.0  | Generator 10 100000000 | 40m 16s |    N/A |   N/A |        N/A     
+  accumulo-1.4.0  | Verify /tmp/goraci1 40 |  6m  7s |      0 |     0 | 1000000000  
+  hbase-0.92.1    | Generator 10 100000000 |  2h 44m |    N/A |   N/A |        N/A     
+  hbase-0.92.1    | Verify /tmp/goraci2 40 |  6m 34s |      0 |     0 | 1000000000
+
+Hbase and Accumulo are configured differently out-of-the-box.  We used the Accumulo 
+3G, native configuration examples in the conf/examples directory.
+
+To provide a comparable memory footprint, we increased the HBase jvm to "-Xmx4000m", 
+and turned on compression for the ci table:
+
+create 'ci', {NAME=>'meta', COMPRESSION=>'GZ'}
+
+We also turned down the replication of write-ahead logs to be comparable to Accumulo:
+
+  <property>
+    <name>hbase.regionserver.hlog.replication</name>
+    <value>2</value>
+  </property>
+
+For the accumulo run, we set the split threshold to 512M:
+
+ shell> config -t ci -s table.split.threshold=512M
+
+This was done so that Accumulo would end up with 64 tablets, which is the
+number of regions hbase had.   The number of tablets/regions determines how
+much parallelism there is in the map phase of the verify step.
+
+Sometimes when this test suite is run against HBase data is lost.  This issue
+is being tracked under HBASE-5754 [4].
+
+[0] http://accumulo.apache.org
+[1] http://gora.apache.org
+[2] http://gora.apache.org/docs/current/gora-conf.html
+[3] https://issues.apache.org/jira/browse/HADOOP-6945
+[4] https://issues.apache.org/jira/browse/HBASE-5754
+[5] http://svn.apache.org/viewvc/accumulo/tags/1.4.0/test/system/continuous/ScaleTest.odp?view=co

http://git-wip-us.apache.org/repos/asf/gora/blob/a60a3370/gora-goraci/goraci.sh
----------------------------------------------------------------------
diff --git a/gora-goraci/goraci.sh b/gora-goraci/goraci.sh
new file mode 100755
index 0000000..085fe3c
--- /dev/null
+++ b/gora-goraci/goraci.sh
@@ -0,0 +1,104 @@
+#!/bin/sh
+#
+# The Goraci command script
+#
+# Environment Variables
+#
+#   GORACI_JAVA_HOME The java implementation to use.  Overrides JAVA_HOME.
+#
+#   GORACI_HEAPSIZE  The maximum amount of heap to use, in MB. 
+#                   Default is 1000.
+#
+#   GORACI_OPTS      Extra Java runtime options.
+#
+
+# resolve links - $0 may be a softlink
+THIS="$0"
+while [ -h "$THIS" ]; do
+  ls=`ls -ld "$THIS"`
+  link=`expr "$ls" : '.*-> \(.*\)$'`
+  if expr "$link" : '.*/.*' > /dev/null; then
+    THIS="$link"
+  else
+    THIS=`dirname "$THIS"`/"$link"
+  fi
+done
+
+# if no args specified, show usage
+if [ $# = 0 ]; then
+  echo "Usage: run COMMAND [COMMAND options]"
+  echo "where COMMAND is one of:"
+  echo "  Generator                  A map only job that generates data."
+  echo "  Verify                     A map reduce job that looks for holes.  
+                             Look at the counts after running.  
+  		             REFERENCED and UNREFERENCED are ok, 
+  		             any UNDEFINED counts are bad. Do not 
+  			     run at the same time as the Generator."
+  echo "  Walker                     A standalong program that starts 
+  			     following a linked list and emits 
+  			     timing info."
+  echo "  Print                      A standalone program that prints nodes 
+  			     in the linked list."
+  echo "  Delete         	     A standalone program that deletes a 
+  			     single node."
+  echo "  Loop         	             A program to Loop through Generator and
+                             Verify steps"
+  echo " or"
+  echo "  CLASSNAME                  run the class named CLASSNAME"
+  echo "Most commands print help when invoked w/o parameters."
+  exit 1
+fi
+
+# get arguments
+COMMAND=$1
+shift
+  
+# some directories
+THIS_DIR=`dirname "$THIS"`
+GORACI_HOME=`cd "$THIS_DIR" ; pwd`
+
+# cath when JAVA_HOME is not set
+if [ "$JAVA_HOME" = "" ]; then
+  echo "Error: JAVA_HOME is not set."
+  exit 1
+fi
+# so that filenames w/ spaces are handled correctly in loops below
+IFS=
+
+# restore ordinary behaviour
+unset IFS
+
+# figure out which class to run
+if [ "$COMMAND" = "Generator" ] ; then
+  CLASS=org.apache.gora.goraci.Generator
+elif [ "$COMMAND" = "Verify" ] ; then
+  CLASS=org.apache.gora.goraci.Verify
+elif [ "$COMMAND" = "Walker" ] ; then
+  CLASS=org.apache.gora.goraci.Walker
+elif [ "$COMMAND" = "Print" ] ; then
+  CLASS=org.apache.gora.goraci.Print
+elif [ "$COMMAND" = "Delete" ] ; then
+  CLASS=org.apache.gora.goraci.Delete
+elif [ "$COMMAND" = "Loop" ] ; then
+  CLASS=org.apache.gora.goraci.Loop
+else
+  CLASS=$1
+  shift
+fi
+
+# initial CLASSPATH 
+CLASSPATH=""
+
+# add libs to CLASSPATH
+SEP=""
+for f in $GORACI_HOME/lib/*.jar; do
+  CLASSPATH=${CLASSPATH}$SEP$f;
+  SEP=":"
+done
+
+#run it
+export HADOOP_CLASSPATH="$CLASSPATH"
+LIBJARS=`echo $HADOOP_CLASSPATH | tr : ,`
+hadoop jar "$GORACI_HOME/lib/org.apache.gora.goraci-0.0.1-SNAPSHOT.jar" $CLASS -libjars "$LIBJARS" "$@"
+
+

http://git-wip-us.apache.org/repos/asf/gora/blob/a60a3370/gora-goraci/pom.xml
----------------------------------------------------------------------
diff --git a/gora-goraci/pom.xml b/gora-goraci/pom.xml
new file mode 100644
index 0000000..273361a
--- /dev/null
+++ b/gora-goraci/pom.xml
@@ -0,0 +1,275 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+	license agreements. See the NOTICE file distributed with this work for additional 
+	information regarding copyright ownership. The ASF licenses this file to 
+	You under the Apache License, Version 2.0 (the "License"); you may not use 
+	this file except in compliance with the License. You may obtain a copy of 
+	the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+	by applicable law or agreed to in writing, software distributed under the 
+	License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+	OF ANY KIND, either express or implied. See the License for the specific 
+	language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
+	<modelVersion>4.0.0</modelVersion>
+	<parent>
+		<groupId>org.apache.gora</groupId>
+		<artifactId>gora</artifactId>
+		<version>0.5-SNAPSHOT</version>
+		<relativePath>../</relativePath>
+	</parent>
+	<artifactId>gora-goraci</artifactId>
+	
+	<name>Apache Gora :: GoraCI</name>
+	<url>http://gora.apache.org</url>
+	<description>The GoraCI test runs many ingest clients that continually create 
+	linked lists containing 25 million nodes. At some point the clients are stopped 
+	and a map reduce job is run to ensure no linked list has a hole. A hole indicates 
+	data was lost.</description>
+	<organization>
+		<name>The Apache Software Foundation</name>
+		<url>http://www.apache.org/</url>
+	</organization>
+	<issueManagement>
+		<system>JIRA</system>
+		<url>https://issues.apache.org/jira/browse/GORA</url>
+	</issueManagement>
+	<ciManagement>
+		<system>Jenkins</system>
+		<url>https://builds.apache.org/job/Gora-trunk/</url>
+	</ciManagement>
+
+	<build>
+		<directory>target</directory>
+		<outputDirectory>${basedir}/target/classes</outputDirectory>
+		<finalName>${project.artifactId}-${project.version}</finalName>
+		<sourceDirectory>${basedir}/src/main/java</sourceDirectory>
+
+		<plugins>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-dependency-plugin</artifactId>
+				<version>${maven-dependency-plugin.version}</version>
+				<executions>
+					<execution>
+						<id>copy-dependencies</id>
+						<phase>package</phase>
+						<goals>
+							<goal>copy-dependencies</goal>
+						</goals>
+						<configuration>
+							<outputDirectory>lib</outputDirectory>
+							<overWriteReleases>false</overWriteReleases>
+							<overWriteSnapshots>true</overWriteSnapshots>
+							<overWriteIfNewer>true</overWriteIfNewer>
+							<excludeTransitive>true</excludeTransitive>
+						</configuration>
+					</execution>
+				</executions>
+			</plugin>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-jar-plugin</artifactId>
+				<configuration>
+					<outputDirectory>lib</outputDirectory>
+				</configuration>
+			</plugin>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-clean-plugin</artifactId>
+				<configuration>
+					<filesets>
+						<fileset>
+							<directory>lib</directory>
+							<includes>
+								<include>**/*.jar</include>
+							</includes>
+							<followSymlinks>false</followSymlinks>
+						</fileset>
+					</filesets>
+				</configuration>
+			</plugin>
+		</plugins>
+
+		<pluginManagement>
+			<plugins>
+				<plugin>
+					<groupId>org.eclipse.m2e</groupId>
+					<artifactId>lifecycle-mapping</artifactId>
+					<version>1.0.0</version>
+					<configuration>
+						<lifecycleMappingMetadata>
+							<pluginExecutions>
+								<pluginExecution>
+									<pluginExecutionFilter>
+										<groupId>org.apache.maven.plugins</groupId>
+										<artifactId>maven-dependency-plugin</artifactId>
+										<versionRange>[2.0,)</versionRange>
+										<goals>
+											<goal>copy-dependencies</goal>
+										</goals>
+									</pluginExecutionFilter>
+									<action>
+										<ignore />
+									</action>
+								</pluginExecution>
+							</pluginExecutions>
+						</lifecycleMappingMetadata>
+					</configuration>
+				</plugin>
+			</plugins>
+		</pluginManagement>
+
+	</build>
+
+	<profiles>
+		<profile>
+			<!-- this profile contains the runtime deps for accumulo-1.4 -->
+			<id>accumulo-1.4</id>
+			<dependencies>
+				<dependency>
+					<groupId>org.apache.gora</groupId>
+					<artifactId>gora-accumulo</artifactId>
+					<scope>runtime</scope>
+				</dependency>
+				<dependency>
+					<groupId>org.apache.accumulo</groupId>
+					<artifactId>accumulo-core</artifactId>
+					<version>1.4.0</version>
+					<scope>runtime</scope>
+				</dependency>
+				<dependency>
+					<groupId>org.apache.accumulo</groupId>
+					<artifactId>cloudtrace</artifactId>
+					<version>1.4.0</version>
+					<scope>runtime</scope>
+				</dependency>
+				<dependency>
+					<groupId>org.apache.thrift</groupId>
+					<artifactId>libthrift</artifactId>
+					<version>0.6.1</version>
+					<scope>runtime</scope>
+				</dependency>
+				<dependency>
+					<groupId>org.apache.zookeeper</groupId>
+					<artifactId>zookeeper</artifactId>
+					<version>3.3.1</version>
+					<scope>runtime</scope>
+				</dependency>
+			</dependencies>
+		</profile>
+
+		<profile>
+			<id>hbase-0.92</id>
+			<!-- this profile contains the runtime deps for hbase-0.92 -->
+			<dependencies>
+				<dependency>
+					<groupId>org.apache.gora</groupId>
+					<artifactId>gora-hbase</artifactId>
+					<scope>runtime</scope>
+				</dependency>
+				<dependency>
+					<groupId>org.apache.hbase</groupId>
+					<artifactId>hbase</artifactId>
+					<!-- version>0.92.1</version -->
+					<scope>runtime</scope>
+				</dependency>
+				<dependency>
+					<groupId>org.jdom</groupId>
+					<artifactId>jdom</artifactId>
+					<scope>runtime</scope>
+				</dependency>
+				<dependency>
+					<groupId>org.apache.zookeeper</groupId>
+					<artifactId>zookeeper</artifactId>
+					<version>3.4.3</version>
+					<scope>runtime</scope>
+				</dependency>
+			</dependencies>
+		</profile>
+
+		<profile>
+			<id>cassandra-2.0.2</id>
+			<!-- this profile contains the runtime deps for Cassandra 1.1.2 -->
+			<dependencies>
+				<dependency>
+					<groupId>org.apache.gora</groupId>
+					<artifactId>gora-cassandra</artifactId>
+					<scope>runtime</scope>
+				</dependency>
+				<dependency>
+					<groupId>org.apache.cassandra</groupId>
+					<artifactId>cassandra-all</artifactId>
+					<scope>runtime</scope>
+				</dependency>
+				<dependency>
+					<groupId>org.apache.cassandra</groupId>
+					<artifactId>cassandra-thrift</artifactId>
+					<scope>runtime</scope>
+				</dependency>
+				<dependency>
+					<groupId>org.hectorclient</groupId>
+					<artifactId>hector-core</artifactId>
+					<scope>runtime</scope>
+					<exclusions>
+						<exclusion>
+							<groupId>org.apache.cassandra</groupId>
+							<artifactId>cassandra-all</artifactId>
+						</exclusion>
+					</exclusions>
+				</dependency>
+				<dependency>
+					<groupId>org.jdom</groupId>
+					<artifactId>jdom</artifactId>
+					<scope>runtime</scope>
+					<exclusions>
+						<exclusion>
+							<groupId>maven-plugins</groupId>
+							<artifactId>maven-cobertura-plugin</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>maven-plugins</groupId>
+							<artifactId>maven-findbugs-plugin</artifactId>
+						</exclusion>
+					</exclusions>
+				</dependency>
+				<dependency>
+					<groupId>com.google.guava</groupId>
+					<artifactId>guava</artifactId>
+					<scope>runtime</scope>
+				</dependency>
+				<!-- dependency> <groupId>org.apache.thrift</groupId> <artifactId>libthrift</artifactId> 
+					<version>0.8.0</version> </dependency -->
+			</dependencies>
+
+		</profile>
+		<!-- profile> <id>sql</id> <dependencies> <dependency> <groupId>org.apache.gora.goraci</groupId> 
+			<artifactId>gora-sql</artifactId> <version>${gora.version}</version> <scope>runtime</scope> 
+			</dependency> </dependencies> </profile -->
+	</profiles>
+
+	<dependencies>
+		<dependency>
+			<groupId>org.apache.gora</groupId>
+			<artifactId>gora-core</artifactId>
+		</dependency>
+
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-core</artifactId>
+		</dependency>
+
+		<dependency>
+			<groupId>org.apache.avro</groupId>
+			<artifactId>avro</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>commons-cli</groupId>
+			<artifactId>commons-cli</artifactId>
+		</dependency>
+
+	</dependencies>
+
+
+</project>

http://git-wip-us.apache.org/repos/asf/gora/blob/a60a3370/gora-goraci/src/main/avro/cinode.json
----------------------------------------------------------------------
diff --git a/gora-goraci/src/main/avro/cinode.json b/gora-goraci/src/main/avro/cinode.json
new file mode 100644
index 0000000..207bb18
--- /dev/null
+++ b/gora-goraci/src/main/avro/cinode.json
@@ -0,0 +1,10 @@
+{
+  "type": "record",
+  "name": "CINode",
+  "namespace": "org.apache.gora.goraci.generated",
+  "fields" : [
+    {"name": "prev", "type": "long"},
+    {"name": "client", "type": "string"},
+    {"name": "count", "type": "long"}
+  ]
+}

http://git-wip-us.apache.org/repos/asf/gora/blob/a60a3370/gora-goraci/src/main/avro/flushed.json
----------------------------------------------------------------------
diff --git a/gora-goraci/src/main/avro/flushed.json b/gora-goraci/src/main/avro/flushed.json
new file mode 100644
index 0000000..74872e6
--- /dev/null
+++ b/gora-goraci/src/main/avro/flushed.json
@@ -0,0 +1,8 @@
+{
+  "type": "record",
+  "name": "Flushed",
+  "namespace": "org.apache.gora.goraci.generated",
+  "fields" : [
+    {"name": "count", "type": "long"}
+  ]
+}

http://git-wip-us.apache.org/repos/asf/gora/blob/a60a3370/gora-goraci/src/main/java/org/apache/gora/goraci/Delete.java
----------------------------------------------------------------------
diff --git a/gora-goraci/src/main/java/org/apache/gora/goraci/Delete.java b/gora-goraci/src/main/java/org/apache/gora/goraci/Delete.java
new file mode 100644
index 0000000..9884f64
--- /dev/null
+++ b/gora-goraci/src/main/java/org/apache/gora/goraci/Delete.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gora.goraci;
+
+import org.apache.gora.goraci.generated.CINode;
+
+import java.math.BigInteger;
+
+import org.apache.gora.store.DataStore;
+import org.apache.gora.store.DataStoreFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * A stand alone program that deletes a single node.
+ */
+public class Delete extends Configured implements Tool {
+  
+  public int run(String[] args) throws Exception {
+    if (args.length != 1) {
+      System.out.println("Usage : " + Delete.class.getSimpleName() + " <node to delete>");
+      return 0;
+    }
+    
+    DataStore<Long,CINode> store = DataStoreFactory.getDataStore(Long.class, CINode.class, new Configuration());
+    
+    boolean ret = store.delete(new BigInteger(args[0], 16).longValue());
+    store.flush();
+    
+    System.out.println("Delete returned " + ret);
+    
+    store.close();
+
+    return ret ? 0 : 1;
+  }
+  
+  public static void main(String[] args) throws Exception {
+    int ret = ToolRunner.run(new Delete(), args);
+    System.exit(ret);
+  }
+}

http://git-wip-us.apache.org/repos/asf/gora/blob/a60a3370/gora-goraci/src/main/java/org/apache/gora/goraci/Generator.java
----------------------------------------------------------------------
diff --git a/gora-goraci/src/main/java/org/apache/gora/goraci/Generator.java b/gora-goraci/src/main/java/org/apache/gora/goraci/Generator.java
new file mode 100644
index 0000000..53e58c6
--- /dev/null
+++ b/gora-goraci/src/main/java/org/apache/gora/goraci/Generator.java
@@ -0,0 +1,351 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gora.goraci;
+
+import org.apache.gora.goraci.generated.CINode;
+import org.apache.gora.goraci.generated.Flushed;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.UUID;
+
+import org.apache.avro.util.Utf8;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.gora.store.DataStore;
+import org.apache.gora.store.DataStoreFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * A Map only job that generates random linked list and stores them using Gora.
+ */
+public class Generator extends Configured implements Tool {
+  
+  private static final Log LOG = LogFactory.getLog(Generator.class);
+  
+  static final int WIDTH = 1000000;
+  static final int WRAP = WIDTH * 25;
+
+  static class GeneratorInputFormat extends InputFormat<LongWritable,NullWritable> {
+    
+    static class GeneratorInputSplit extends InputSplit implements Writable {
+      
+      @Override
+      public long getLength() throws IOException, InterruptedException {
+        return 1;
+      }
+      
+      @Override
+      public String[] getLocations() throws IOException, InterruptedException {
+        return new String[0];
+      }
+      
+      @Override
+      public void readFields(DataInput arg0) throws IOException {
+        // TODO Auto-generated method stub
+        
+      }
+      
+      @Override
+      public void write(DataOutput arg0) throws IOException {
+        // TODO Auto-generated method stub
+        
+      }
+   }
+    
+    static class GeneratorRecordReader extends RecordReader<LongWritable,NullWritable> {
+      
+      private long numNodes;
+      private boolean hasNext = true;
+      
+      @Override
+      public void close() throws IOException {
+        
+      }
+      
+      @Override
+      public LongWritable getCurrentKey() throws IOException, InterruptedException {
+        return new LongWritable(numNodes);
+      }
+      
+      @Override
+      public NullWritable getCurrentValue() throws IOException, InterruptedException {
+        return NullWritable.get();
+      }
+      
+      @Override
+      public float getProgress() throws IOException, InterruptedException {
+        return 0;
+      }
+      
+      @Override
+      public void initialize(InputSplit arg0, TaskAttemptContext context) throws IOException, InterruptedException {
+        numNodes = context.getConfiguration().getLong("org.apache.gora.goraci.generator.nodes", 1000000);
+      }
+      
+      @Override
+      public boolean nextKeyValue() throws IOException, InterruptedException {
+        boolean hasnext = this.hasNext;
+        this.hasNext = false;
+        return hasnext;
+      }
+      
+    }
+    
+    @Override
+    public RecordReader<LongWritable,NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
+      GeneratorRecordReader rr = new GeneratorRecordReader();
+      rr.initialize(split, context);
+      return rr;
+    }
+    
+    @Override
+    public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
+      int numMappers = job.getConfiguration().getInt("org.apache.gora.goraci.generator.mappers", 1);
+      
+      ArrayList<InputSplit> splits = new ArrayList<InputSplit>(numMappers);
+      
+      for (int i = 0; i < numMappers; i++) {
+        splits.add(new GeneratorInputSplit());
+      }
+      
+      return splits;
+    }
+    
+  }
+
+  /**
+   * Some ASCII art time:
+   * [ . . . ] represents one batch of random longs of length WIDTH
+   *
+   *                _________________________
+   *               |                  ______ |
+   *               |                 |      ||
+   *             __+_________________+_____ ||
+   *             v v                 v     |||
+   * first   = [ . . . . . . . . . . . ]   |||
+   *             ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^     |||
+   *             | | | | | | | | | | |     |||
+   * prev    = [ . . . . . . . . . . . ]   |||
+   *             ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^     |||
+   *             | | | | | | | | | | |     |||
+   * current = [ . . . . . . . . . . . ]   |||
+   *                                       |||
+   * ...                                   |||
+   *                                       |||
+   * last    = [ . . . . . . . . . . . ]   |||
+   *             | | | | | | | | | | |-----|||
+   *             |                 |--------||
+   *             |___________________________|
+   */
+
+  static class GeneratorMapper extends Mapper<LongWritable,NullWritable,NullWritable,NullWritable> {
+    
+    private boolean concurrent;
+
+    @Override
+    protected void setup(Context context) throws IOException, InterruptedException {
+      super.setup(context);
+      concurrent = context.getConfiguration().getBoolean("org.apache.gora.goraci.generator.concurrent", false);
+    }
+    
+    @Override
+    protected void map(LongWritable key, NullWritable value, Context output) throws IOException {
+      long num = key.get();
+      System.out.println("num" + num);
+      
+      Utf8 id = new Utf8(UUID.randomUUID().toString());
+      
+      Configuration conf = new Configuration();
+      DataStore<Long,CINode> store = DataStoreFactory.getDataStore(Long.class, CINode.class, conf);
+      DataStore<Utf8,Flushed> flushedTable = null;
+      
+      if (concurrent) {
+        flushedTable = DataStoreFactory.getDataStore(Utf8.class, Flushed.class, conf);
+        flushedTable.createSchema();
+      }
+      
+      store.createSchema();
+      
+      Random rand = new Random();
+      
+      long[] first = null;
+      long[] prev = null;
+      long[] current = new long[WIDTH];
+      
+      long count = 0;
+      while (count < num) {
+        for (int i = 0; i < current.length; i++)
+          current[i] = Math.abs(rand.nextLong());
+        
+        persist(output, store, count, prev, current, id);
+        
+        if (first == null)
+          first = current;
+        prev = current;
+        current = new long[WIDTH];
+        
+        count += current.length;
+        output.setStatus("Count " + count);
+        
+        if (count % WRAP == 0) {
+          // this block of code turns the 1 million linked list of length 25 into one giant circular linked list of 25 million
+          
+          circularLeftShift(first);
+          
+          updatePrev(store, first, prev);
+          
+          if (concurrent) {
+          // keep track of whats flushed in another table, verify can use this info to run concurrently
+            Flushed flushed = flushedTable.newPersistent();
+            flushed.setCount(count);
+            flushedTable.put(id, flushed);
+            flushedTable.flush();
+          }
+
+          first = null;
+          prev = null;
+        }
+        
+      }
+      
+      store.close();
+      if (concurrent)
+        flushedTable.close();
+      
+    }
+    
+    private static void circularLeftShift(long[] first) {
+      long ez = first[0];
+      for (int i = 0; i < first.length - 1; i++)
+        first[i] = first[i + 1];
+      first[first.length - 1] = ez;
+    }
+    
+    private static void persist(Context output, DataStore<Long,CINode> store, long count, long[] prev, long[] current, Utf8 id) throws IOException {
+      for (int i = 0; i < current.length; i++) {
+        CINode node = store.newPersistent();
+        node.setCount(count + i);
+        if (prev != null)
+          node.setPrev(prev[i]);
+        else
+          node.setPrev((long) -1);
+        node.setClient(id);
+        
+        store.put(current[i], node);
+        if (i % 1000 == 0) {
+          // Tickle progress every so often else maprunner will think us hung
+          output.progress();
+        }
+      }
+      
+      store.flush();
+    }
+    
+    private static void updatePrev(DataStore<Long,CINode> store, long[] first, long[] current) throws IOException {
+      for (int i = 0; i < current.length; i++) {
+        CINode node = store.newPersistent();
+        node.setPrev(current[i]);
+        store.put(first[i], node);
+      }
+      
+      store.flush();
+    }
+  }
+  
+  
+  @Override
+  public int run(String[] args) throws Exception {
+    Options options = new Options();
+    options.addOption("c", "concurrent", false, "update secondary table with information that allows verification to run concurrently");
+    
+    GnuParser parser = new GnuParser();
+    CommandLine cmd = null;
+    try {
+      cmd = parser.parse(options, args);
+      if (cmd.getArgs().length != 2) {
+        throw new ParseException("Did not see expected # of arguments, saw " + cmd.getArgs().length);
+      }
+    } catch (ParseException e) {
+      System.err.println("Failed to parse command line " + e.getMessage());
+      System.err.println();
+      HelpFormatter formatter = new HelpFormatter();
+      formatter.printHelp(getClass().getSimpleName() + " <num mappers> <num nodes per map>", options);
+      System.exit(-1);
+    }
+
+    int numMappers = Integer.parseInt(cmd.getArgs()[0]);
+    long numNodes = Long.parseLong(cmd.getArgs()[1]);
+    return run(numMappers, numNodes, cmd.hasOption("c"));
+  }
+
+  public int run(int numMappers, long numNodes, boolean concurrent) throws Exception {
+    LOG.info("Running Generator with numMappers=" + numMappers +", numNodes=" + numNodes);
+    
+    Job job = new Job(getConf());
+    
+    job.setJobName("Link Generator");
+    job.setNumReduceTasks(0);
+    job.setJarByClass(getClass());
+    
+    job.setInputFormatClass(GeneratorInputFormat.class);
+    job.setOutputKeyClass(NullWritable.class);
+    job.setOutputValueClass(NullWritable.class);
+    
+    job.getConfiguration().setInt("org.apache.gora.goraci.generator.mappers", numMappers);
+    job.getConfiguration().setLong("org.apache.gora.goraci.generator.nodes", numNodes);
+    job.getConfiguration().setBoolean("org.apache.gora.goraci.generator.concurrent", concurrent);
+    
+    job.setMapperClass(GeneratorMapper.class);
+    
+    job.setOutputFormatClass(NullOutputFormat.class);
+
+    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
+
+    boolean success = job.waitForCompletion(true);
+    
+    return success ? 0 : 1;
+  }
+  
+  public static void main(String[] args) throws Exception {
+    int ret = ToolRunner.run(new Generator(), args);
+    System.exit(ret);
+  }
+}

http://git-wip-us.apache.org/repos/asf/gora/blob/a60a3370/gora-goraci/src/main/java/org/apache/gora/goraci/Loop.java
----------------------------------------------------------------------
diff --git a/gora-goraci/src/main/java/org/apache/gora/goraci/Loop.java b/gora-goraci/src/main/java/org/apache/gora/goraci/Loop.java
new file mode 100644
index 0000000..29c4812
--- /dev/null
+++ b/gora-goraci/src/main/java/org/apache/gora/goraci/Loop.java
@@ -0,0 +1,164 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gora.goraci;
+
+import java.util.Arrays;
+import java.util.UUID;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/** 
+ * Executes Generate and Verify in a loop. Data is not cleaned between runs, so each iteration
+ * adds more data.
+ */
+public class Loop extends Configured implements Tool {
+
+  private static final Log LOG = LogFactory.getLog(Loop.class); 
+  
+  protected void runGenerator(int numMappers, long numNodes, boolean concurrent) throws Exception {
+    Generator generator = new Generator();
+    generator.setConf(getConf());
+    int retCode = generator.run(numMappers, numNodes, concurrent);
+    
+    if (retCode > 0) {
+      throw new RuntimeException("Generator failed with return code: " + retCode);
+    }
+  }
+  
+  protected void runVerify(String outputDir, int numReducers, long expectedNumNodes) throws Exception {
+    Verify verify = startVerify(outputDir, numReducers, false);
+    verify.waitForCompletion();
+    checkSuccess(verify, expectedNumNodes);
+  }
+
+  private void checkSuccess(Verify verify, long expectedNumNodes) throws Exception {
+    if (!verify.isSuccessful()) {
+      throw new RuntimeException("Verify.isSuccessful() returned false");
+    }
+
+    boolean verifySuccess = verify.verify(expectedNumNodes);
+    if (!verifySuccess) {
+      throw new RuntimeException("Verify.verify failed");
+    }
+    
+    LOG.info("Verify finished with succees. Total nodes=" + expectedNumNodes);
+  }
+
+  protected Verify startVerify(String outputDir, int numReducers, boolean concurrent) throws Exception {
+    Path outputPath = new Path(outputDir);
+    UUID uuid = UUID.randomUUID(); //create a random UUID.
+    Path iterationOutput = new Path(outputPath, uuid.toString());
+    
+    Verify verify = new Verify();
+    verify.setConf(getConf());
+    verify.start(iterationOutput, numReducers, concurrent);
+    return verify;
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+    
+    Options options = new Options();
+    options.addOption("c", "concurrent", false, "run generation and verification and concurrently");
+    
+    GnuParser parser = new GnuParser();
+    CommandLine cmd = null;
+    try {
+      cmd = parser.parse(options, args);
+      if (cmd.getArgs().length != 5) {
+        throw new ParseException("Did not see expected # of arguments, saw " + cmd.getArgs().length);
+      }
+    } catch (ParseException e) {
+      System.err.println("Failed to parse command line " + e.getMessage());
+      System.err.println();
+      HelpFormatter formatter = new HelpFormatter();
+      formatter.printHelp(getClass().getSimpleName() + " <num iterations> <num mappers> <num nodes per mapper> <output dir> <num reducers>", options);
+      System.exit(-1);
+    }
+    
+    LOG.info("Running Loop with args:" + Arrays.deepToString(cmd.getArgs()));
+    
+    boolean concurrent = cmd.hasOption("c");
+    int numIterations = Integer.parseInt(cmd.getArgs()[0]);
+    int numMappers = Integer.parseInt(cmd.getArgs()[1]);
+    long numNodes = Long.parseLong(cmd.getArgs()[2]);
+    String outputDir = cmd.getArgs()[3];
+    int numReducers = Integer.parseInt(cmd.getArgs()[4]);
+    
+    if (numNodes % Generator.WRAP != 0) {
+      throw new RuntimeException("Number of node per mapper is not a multiple of " + String.format("%,d", Generator.WRAP));
+    }
+
+    long expectedNumNodes = 0;
+    
+    if (numIterations < 0) {
+      numIterations = Integer.MAX_VALUE; //run indefinitely (kind of)
+    }
+    
+    Verify verify = null;
+    long verifyNodes = 0;
+
+    for (int i=0; i < numIterations; i++) {
+      LOG.info("Starting iteration = " + i);
+      runGenerator(numMappers, numNodes, concurrent);
+      expectedNumNodes += numMappers * numNodes;
+      
+      if (concurrent) {
+        if (verify != null) {
+          if (verify.isComplete()) {
+            checkSuccess(verify, verifyNodes);
+            verify = startVerify(outputDir, numReducers, true);
+            verifyNodes = expectedNumNodes;
+          }
+        } else {
+          verify = startVerify(outputDir, numReducers, true);
+          verifyNodes = expectedNumNodes;
+        }
+      } else {
+        runVerify(outputDir, numReducers, expectedNumNodes);
+      }
+    }
+    
+    if (verify != null) {
+      verify.waitForCompletion();
+      checkSuccess(verify, verifyNodes);
+      
+      if (verifyNodes != expectedNumNodes)
+        runVerify(outputDir, numReducers, expectedNumNodes);
+    }
+
+    return 0;
+  }
+  
+  public static void main(String[] args) throws Exception {
+    int ret = ToolRunner.run(new Loop(), args);
+    System.exit(ret);
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/gora/blob/a60a3370/gora-goraci/src/main/java/org/apache/gora/goraci/Print.java
----------------------------------------------------------------------
diff --git a/gora-goraci/src/main/java/org/apache/gora/goraci/Print.java b/gora-goraci/src/main/java/org/apache/gora/goraci/Print.java
new file mode 100644
index 0000000..cc940e3
--- /dev/null
+++ b/gora-goraci/src/main/java/org/apache/gora/goraci/Print.java
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gora.goraci;
+
+import org.apache.gora.goraci.generated.CINode;
+
+import java.math.BigInteger;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.gora.query.Query;
+import org.apache.gora.query.Result;
+import org.apache.gora.store.DataStore;
+import org.apache.gora.store.DataStoreFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * A stand alone program that prints out portions of a list created by {@link Generator}
+ */
+public class Print extends Configured implements Tool {
+  
+  public int run(String[] args) throws Exception {
+    Options options = new Options();
+    options.addOption("s", "start", true, "start key");
+    options.addOption("e", "end", true, "end key");
+    options.addOption("l", "limit", true, "number to print");
+    
+    GnuParser parser = new GnuParser();
+    CommandLine cmd = null;
+    try {
+      cmd = parser.parse(options, args);
+      if (cmd.getArgs().length != 0) {
+        throw new ParseException("Command takes no arguments");
+      }
+    } catch (ParseException e) {
+      System.err.println("Failed to parse command line " + e.getMessage());
+      System.err.println();
+      HelpFormatter formatter = new HelpFormatter();
+      formatter.printHelp(getClass().getSimpleName(), options);
+      System.exit(-1);
+    }
+
+    DataStore<Long,CINode> store = DataStoreFactory.getDataStore(Long.class, CINode.class, new Configuration());
+    
+    Query<Long,CINode> query = store.newQuery();
+    
+    if (cmd.hasOption("s"))
+      query.setStartKey(new BigInteger(cmd.getOptionValue("s"), 16).longValue());
+    
+    if (cmd.hasOption("e"))
+      query.setEndKey(new BigInteger(cmd.getOptionValue("e"), 16).longValue());
+    
+    if (cmd.hasOption("l"))
+      query.setLimit(Integer.parseInt(cmd.getOptionValue("l")));
+    else
+      query.setLimit(100);
+
+    Result<Long,CINode> rs = store.execute(query);
+
+    while (rs.next()) {
+      CINode node = rs.get();
+      System.out.printf("%016x:%016x:%012d:%s\n", rs.getKey(), node.getPrev(), node.getCount(), node.getClient());
+
+    }
+    
+    store.close();
+    
+    return 0;
+  }
+  
+  public static void main(String[] args) throws Exception {
+    int ret = ToolRunner.run(new Print(), args);
+    System.exit(ret);
+  }
+}

http://git-wip-us.apache.org/repos/asf/gora/blob/a60a3370/gora-goraci/src/main/java/org/apache/gora/goraci/Verify.java
----------------------------------------------------------------------
diff --git a/gora-goraci/src/main/java/org/apache/gora/goraci/Verify.java b/gora-goraci/src/main/java/org/apache/gora/goraci/Verify.java
new file mode 100644
index 0000000..7a449c6
--- /dev/null
+++ b/gora-goraci/src/main/java/org/apache/gora/goraci/Verify.java
@@ -0,0 +1,294 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gora.goraci;
+
+import org.apache.gora.goraci.generated.CINode;
+import org.apache.gora.goraci.generated.Flushed;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.avro.util.Utf8;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.gora.mapreduce.GoraMapper;
+import org.apache.gora.query.Query;
+import org.apache.gora.query.Result;
+import org.apache.gora.store.DataStore;
+import org.apache.gora.store.DataStoreFactory;
+import org.apache.gora.util.GoraException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.VLongWritable;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * A Map Reduce job that verifies that the linked list generated by {@link org.apache.gora.goraci.Generator} do not have any holes.
+ */
+public class Verify extends Configured implements Tool {
+  
+  private static final Log LOG = LogFactory.getLog(Verify.class);
+  private static final VLongWritable DEF = new VLongWritable(-1);
+  
+  private Job job;
+  
+
+  public static class VerifyMapper extends GoraMapper<Long,CINode,LongWritable,VLongWritable> {
+    private LongWritable row = new LongWritable();
+    private LongWritable ref = new LongWritable();
+    private VLongWritable vrow = new VLongWritable();
+    private Map<Utf8,Long> flushed = null;
+    
+    @Override
+    protected void setup(Context context) throws IOException, InterruptedException {
+      super.setup(context);
+      
+      String[] entries = context.getConfiguration().getStrings("org.apache.gora.goraci.verify.flushed");
+      
+      if (entries != null && entries.length > 0) {
+        flushed = new HashMap<Utf8,Long>();
+        for (String entry : entries) {
+          String[] kv = entry.split(":");
+          flushed.put(new Utf8(kv[0]), Long.parseLong(kv[1]));
+        }
+      }
+    }
+    
+    @Override
+    protected void map(Long key, CINode node, Context context) throws IOException, InterruptedException {
+      if (flushed != null) {
+        Long count = flushed.get(node.getClient());
+        if (count == null || node.getCount() >= count) {
+          context.getCounter(Counts.IGNORED).increment(1);
+          return;
+        }
+      }
+
+      row.set(key);
+      context.write(row, DEF);
+      
+      if (node.getPrev() >= 0) {
+        ref.set(node.getPrev());
+        vrow.set(key);
+        context.write(ref, vrow);
+      }
+    }
+  }
+
+  public static enum Counts {
+    UNREFERENCED, UNDEFINED, REFERENCED, CORRUPT, IGNORED
+  }
+  
+  public static class VerifyReducer extends Reducer<LongWritable,VLongWritable,Text,Text> {
+    private ArrayList<Long> refs = new ArrayList<Long>();
+    
+    public void reduce(LongWritable key, Iterable<VLongWritable> values, Context context) throws IOException, InterruptedException {
+      
+      int defCount = 0;
+      
+      refs.clear();
+      for (VLongWritable type : values) {
+        if (type.get() == -1) {
+          defCount++;
+        } else {
+          refs.add(type.get());
+        }
+      }
+      
+      // TODO check for more than one def, should not happen
+
+      if (defCount == 0 && refs.size() > 0) {
+        // this is bad, found a node that is referenced but not defined. It must have been lost, emit some info about this node for debugging purposes.
+        
+        StringBuilder sb = new StringBuilder();
+        String comma = "";
+        for (Long ref : refs) {
+          sb.append(comma);
+          comma = ",";
+          sb.append(String.format("%016x", ref));
+        }
+        
+        context.write(new Text(String.format("%016x", key.get())), new Text(sb.toString()));
+        context.getCounter(Counts.UNDEFINED).increment(1);
+        
+      } else if (defCount > 0 && refs.size() == 0) {
+        // node is defined but not referenced
+        context.getCounter(Counts.UNREFERENCED).increment(1);
+      } else {
+        // node is defined and referenced
+        context.getCounter(Counts.REFERENCED).increment(1);
+      }
+      
+    }
+  }
+ 
+  @Override
+  public int run(String[] args) throws Exception {
+    
+    Options options = new Options();
+    options.addOption("c", "concurrent", false, "run concurrently with generation");
+    
+    GnuParser parser = new GnuParser();
+    CommandLine cmd = null;
+    try {
+      cmd = parser.parse(options, args);
+      if (cmd.getArgs().length != 2) {
+        throw new ParseException("Did not see expected # of arguments, saw " + cmd.getArgs().length);
+      }
+    } catch (ParseException e) {
+      System.err.println("Failed to parse command line " + e.getMessage());
+      System.err.println();
+      HelpFormatter formatter = new HelpFormatter();
+      formatter.printHelp(getClass().getSimpleName() + " <output dir> <num reducers>", options);
+      System.exit(-1);
+    }
+
+    String outputDir = cmd.getArgs()[0];
+    int numReducers = Integer.parseInt(cmd.getArgs()[1]);
+
+    return run(outputDir, numReducers, cmd.hasOption("c"));
+  }
+
+  public int run(String outputDir, int numReducers, boolean concurrent) throws Exception {
+    return run(new Path(outputDir), numReducers, concurrent);
+  }
+  
+  public int run(Path outputDir, int numReducers, boolean concurrent) throws Exception {
+    start(outputDir, numReducers, concurrent);
+    
+    boolean success = job.waitForCompletion(true);
+    
+    return success ? 0 : 1;
+  }
+  
+  public void start(Path outputDir, int numReducers, boolean concurrent) throws GoraException, IOException, Exception {
+    LOG.info("Running Verify with outputDir=" + outputDir +", numReducers=" + numReducers);
+    
+    DataStore<Long,CINode> store = DataStoreFactory.getDataStore(Long.class, CINode.class, new Configuration());
+
+    job = new Job(getConf());
+    
+    if (!job.getConfiguration().get("io.serializations").contains("org.apache.hadoop.io.serializer.JavaSerialization")) {
+      job.getConfiguration().set("io.serializations", job.getConfiguration().get("io.serializations") + ",org.apache.hadoop.io.serializer.JavaSerialization");
+    }
+
+    job.setJobName("Link Verifier");
+    job.setNumReduceTasks(numReducers);
+    job.setJarByClass(getClass());
+    
+    Query<Long,CINode> query = store.newQuery();
+    if (!concurrent) {
+      // no concurrency filtering, only need prev field
+      query.setFields("prev");
+    } else {
+      readFlushed(job.getConfiguration());
+    }
+
+    GoraMapper.initMapperJob(job, query, store, LongWritable.class, VLongWritable.class, VerifyMapper.class, true);
+
+    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
+    
+    job.setReducerClass(VerifyReducer.class);
+    job.setOutputFormatClass(TextOutputFormat.class);
+    TextOutputFormat.setOutputPath(job, outputDir);
+
+    store.close();
+    
+    job.submit();
+  }
+  
+  public boolean isComplete() throws IOException {
+    return job.isComplete();
+  }
+
+  public boolean isSuccessful() throws IOException {
+    return job.isSuccessful();
+  }
+  
+  public boolean waitForCompletion() throws IOException, InterruptedException, ClassNotFoundException {
+    return job.waitForCompletion(true);
+  }
+
+  private void readFlushed(Configuration conf) throws Exception {
+    DataStore<Utf8,Flushed> flushedTable = DataStoreFactory.getDataStore(Utf8.class, Flushed.class, conf);
+    
+    Query<Utf8,Flushed> query = flushedTable.newQuery();
+    Result<Utf8,Flushed> result = flushedTable.execute(query);
+    
+    ArrayList<String> flushedEntries = new ArrayList<String>();
+    while (result.next()) {
+      flushedEntries.add(result.getKey() + ":" + result.get().getCount());
+    }
+    
+    conf.setStrings("org.apache.gora.goraci.verify.flushed", flushedEntries.toArray(new String[] {}));
+    
+    flushedTable.close();
+  }
+
+  public boolean verify(long expectedReferenced) throws Exception {
+    if (job == null) {
+      throw new IllegalStateException("You should call run() first");
+    }
+    
+    Counters counters = job.getCounters();
+    
+    Counter referenced = counters.findCounter(Counts.REFERENCED);
+    Counter unreferenced = counters.findCounter(Counts.UNREFERENCED);
+    Counter undefined = counters.findCounter(Counts.UNDEFINED);
+    
+    boolean success = true;
+    //assert
+    if (expectedReferenced != referenced.getValue()) {
+      LOG.error("Expected referenced count does not match with actual referenced count. " +
+      		"expected referenced=" + expectedReferenced + " ,actual=" + referenced.getValue());
+      success = false;
+    }
+
+    if (unreferenced.getValue() > 0) { 
+      LOG.error("Unreferenced nodes were not expected. Unreferenced count=" + unreferenced.getValue());
+      success = false;
+    }
+    
+    if (undefined.getValue() > 0) { 
+      LOG.error("Found an undefined node. Undefined count=" + undefined.getValue());
+      success = false;
+    }
+    
+    return success;
+  }
+  
+  public static void main(String[] args) throws Exception {
+    int ret = ToolRunner.run(new Verify(), args);
+    System.exit(ret);
+  }
+}

http://git-wip-us.apache.org/repos/asf/gora/blob/a60a3370/gora-goraci/src/main/java/org/apache/gora/goraci/Walker.java
----------------------------------------------------------------------
diff --git a/gora-goraci/src/main/java/org/apache/gora/goraci/Walker.java b/gora-goraci/src/main/java/org/apache/gora/goraci/Walker.java
new file mode 100644
index 0000000..d0e0165
--- /dev/null
+++ b/gora-goraci/src/main/java/org/apache/gora/goraci/Walker.java
@@ -0,0 +1,129 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gora.goraci;
+
+import org.apache.gora.goraci.generated.CINode;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.gora.query.Query;
+import org.apache.gora.query.Result;
+import org.apache.gora.store.DataStore;
+import org.apache.gora.store.DataStoreFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * A stand alone program that follows a linked list created by {@link Generator} and prints timing info.
+ */
+public class Walker extends Configured implements Tool {
+  
+  private static final String[] PREV_FIELD = new String[] {"prev"};
+
+  public int run(String[] args) throws IOException {
+    Options options = new Options();
+    options.addOption("n", "num", true, "number of queries");
+
+    GnuParser parser = new GnuParser();
+    CommandLine cmd = null;
+    try {
+      cmd = parser.parse(options, args);
+      if (cmd.getArgs().length != 0) {
+        throw new ParseException("Command takes no arguments");
+      }
+    } catch (ParseException e) {
+      System.err.println("Failed to parse command line " + e.getMessage());
+      System.err.println();
+      HelpFormatter formatter = new HelpFormatter();
+      formatter.printHelp(getClass().getSimpleName(), options);
+      System.exit(-1);
+    }
+    
+    long maxQueries = Long.MAX_VALUE;
+    if (cmd.hasOption('n')) {
+      maxQueries = Long.parseLong(cmd.getOptionValue("n"));
+    }
+
+    DataStore<Long,CINode> store = DataStoreFactory.getDataStore(Long.class, CINode.class, new Configuration());
+  
+    Random rand = new Random();
+
+    long numQueries = 0;
+    
+    while (numQueries < maxQueries) {
+      CINode node = findStartNode(rand, store);
+      numQueries++;
+      while (node != null && node.getPrev() >= 0 && numQueries < maxQueries) {
+        long prev = node.getPrev();
+
+        long t1 = System.currentTimeMillis();
+        node = store.get(prev, PREV_FIELD);
+        long t2 = System.currentTimeMillis();
+        System.out.printf("CQ %d %016x \n", t2 - t1, prev);
+        numQueries++;
+        
+        t1 = System.currentTimeMillis();
+        node = store.get(prev, PREV_FIELD);
+        t2 = System.currentTimeMillis();
+        System.out.printf("HQ %d %016x \n", t2 - t1, prev);
+        numQueries++;
+
+      }
+    }
+    
+    store.close();
+    return 0;
+  }
+  
+  private static CINode findStartNode(Random rand, DataStore<Long,CINode> store) throws IOException {
+    Query<Long,CINode> query = store.newQuery();
+    query.setStartKey(rand.nextLong());
+    query.setLimit(1);
+    query.setFields(PREV_FIELD);
+    
+    long t1 = System.currentTimeMillis();
+    Result<Long,CINode> rs = store.execute(query);
+    long t2 = System.currentTimeMillis();
+    
+    try {
+      if (rs.next()) {
+        System.out.printf("FSR %d %016x\n", t2 - t1, rs.getKey());
+        return rs.get();
+      }
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+    
+    System.out.println("FSR " + (t2 - t1));
+    
+    return null;
+  }
+  
+  public static void main(String[] args) throws Exception {
+    int ret = ToolRunner.run(new Walker(), args);
+    System.exit(ret);
+  }
+  
+}


Mime
View raw message