accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cjno...@apache.org
Subject [2/2] git commit: ACCUMULO-1746 Adding documentation for MultiTableInputFormat
Date Wed, 16 Oct 2013 04:11:19 GMT
ACCUMULO-1746 Adding documentation for MultiTableInputFormat


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/01e5296d
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/01e5296d
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/01e5296d

Branch: refs/heads/master
Commit: 01e5296d5b12d20ea98425e652a472ddbfa954c6
Parents: fe23924
Author: Corey J. Nolet <cjnolet@gmail.com>
Authored: Wed Oct 16 00:07:07 2013 -0400
Committer: Corey J. Nolet <cjnolet@gmail.com>
Committed: Wed Oct 16 00:07:07 2013 -0400

----------------------------------------------------------------------
 .../core/client/mapreduce/BatchScanConfig.java  |  7 +-
 .../accumulo_user_manual/chapters/analytics.tex | 80 ++++++++++++++++++++
 2 files changed, 84 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/01e5296d/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
index e4b89ca..985df55 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/BatchScanConfig.java
@@ -20,6 +20,7 @@ import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
@@ -37,7 +38,7 @@ public class BatchScanConfig implements Writable {
 
   private List<IteratorSetting> iterators;
   private List<Range> ranges;
-  private Set<Pair<Text,Text>> columns;
+  private Collection<Pair<Text,Text>> columns;
 
   private boolean autoAdjustRanges = true;
   private boolean useLocalIterators = false;
@@ -83,7 +84,7 @@ public class BatchScanConfig implements Writable {
    *          selected. An empty set is the default and is equivalent to scanning the all
columns.
    * @since 1.6.0
    */
-  public BatchScanConfig fetchColumns(Set<Pair<Text,Text>> columns) {
+  public BatchScanConfig fetchColumns(Collection<Pair<Text,Text>> columns) {
     this.columns = columns;
     return this;
   }
@@ -91,7 +92,7 @@ public class BatchScanConfig implements Writable {
   /**
    * Returns the columns to be fetched for this configuration
    */
-  public Set<Pair<Text,Text>> getFetchedColumns() {
+  public Collection<Pair<Text,Text>> getFetchedColumns() {
     return columns != null ? columns : new HashSet<Pair<Text,Text>>();
   }
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/01e5296d/docs/src/main/latex/accumulo_user_manual/chapters/analytics.tex
----------------------------------------------------------------------
diff --git a/docs/src/main/latex/accumulo_user_manual/chapters/analytics.tex b/docs/src/main/latex/accumulo_user_manual/chapters/analytics.tex
index d10baa8..7bbd177 100644
--- a/docs/src/main/latex/accumulo_user_manual/chapters/analytics.tex
+++ b/docs/src/main/latex/accumulo_user_manual/chapters/analytics.tex
@@ -125,6 +125,86 @@ AccumuloInputFormat.addIterator(job, is);
 \end{verbatim}
 \normalsize
 
+\subsection{AccumuloMultiTableInputFormat options}
+
+The AccumuloMultiTableInputFormat allows the scanning over multiple tables 
+in a single MapReduce job. Separate ranges, columns, and iterators can be 
+used for each table. 
+
+\small
+\begin{verbatim}
+BatchScanConfig tableOneConfig = new BatchScanConfig();
+BatchScanConfig tableTwoConfig = new BatchScanConfig();
+\end{verbatim}
+\normalsize
+
+To set the configuration objects on the job:
+
+\small
+\begin{verbatim}
+Map<String, BatchScanConfig> configs = new HashMap<String,BatchScanConfig>();
+configs.put("table1", tableOneConfig);
+configs.put("table2", tableTwoConfig);
+AccumuloMultiTableInputFormat.setBatchScanConfigs(job, configs);
+\end{verbatim}
+\normalsize
+
+\Large
+\textbf{Optional settings:}
+\normalsize
+
+To restrict to a set of ranges:
+
+\small
+\begin{verbatim}
+ArrayList<Range> tableOneRanges = new ArrayList<Range>();
+ArrayList<Range> tableTwoRanges = new ArrayList<Range>();
+// populate array lists of row ranges for tables...
+tableOneConfig.setRanges(tableOneRanges);
+tableTwoConfig.setRanges(tableTwoRanges);
+\end{verbatim}
+\normalsize
+
+To restrict Accumulo to a list of columns:
+
+\small
+\begin{verbatim}
+ArrayList<Pair<Text,Text>> tableOneColumns = new ArrayList<Pair<Text,Text>>();
+ArrayList<Pair<Text,Text>> tableTwoColumns = new ArrayList<Pair<Text,Text>>();
+// populate lists of columns for each of the tables ...
+tableOneConfig.fetchColumns(tableOneColumns);
+tableTwoConfig.fetchColumns(tableTwoColumns);
+\end{verbatim}
+\normalsize
+
+To set scan iterators:
+
+\small
+\begin{verbatim}
+List<IteratorSetting> tableOneIterators = new ArrayList<IteratorSetting>();
+List<IteratorSetting> tableTwoIterators = new ArrayList<IteratorSetting>();
+// populate the lists of iterator settings for each of the tables ...
+tableOneConfig.setIterators(tableOneIterators);
+tableTwoConfig.setIterators(tableTwoIterators);
+\end{verbatim}
+\normalsize
+
+
+The name of the table can be retrieved from the input split:
+
+\small
+\begin{verbatim}
+class MyMapper extends Mapper<Key,Value,WritableComparable,Writable> {
+    public void map(Key k, Value v, Context c) {
+        RangeInputSplit split = (RangeInputSplit)c.getInputSplit();
+        String tableName = split.getTableName();
+        // do something with table name 
+    }
+}
+\end{verbatim}
+\normalsize
+
+
 \subsection{AccumuloOutputFormat options}
 
 \small


Mime
View raw message