accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From els...@apache.org
Subject [2/2] accumulo git commit: ACCUMULO-3602 Added support for grouping ranges per tablet when using AccumuloInputFormat
Date Fri, 17 Apr 2015 21:13:44 GMT
ACCUMULO-3602 Added support for grouping ranges per tablet when using AccumuloInputFormat

Introduces a new option to AccumuloInputFormat: batchScan. Comes with
a BatchInputSplit implementation which holds many Ranges against
the same Tablet to reduce the amount of overhead in InputSplits
generated by the InputFormat.


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/956a50ec
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/956a50ec
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/956a50ec

Branch: refs/heads/master
Commit: 956a50ecb358ab2df3a705c05f3fa8c4ae8846a8
Parents: 315530f
Author: Eugene Cheipesh <echeipesh@gmail.com>
Authored: Fri Apr 17 15:42:43 2015 -0400
Committer: Josh Elser <elserj@apache.org>
Committed: Fri Apr 17 16:26:55 2015 -0400

----------------------------------------------------------------------
 core/src/main/findbugs/exclude-filter.xml       |   1 +
 .../core/client/mapred/AbstractInputFormat.java | 263 +++++++----
 .../core/client/mapred/AccumuloInputFormat.java |   8 +-
 .../core/client/mapred/InputFormatBase.java     |  60 ++-
 .../client/mapred/impl/BatchInputSplit.java     |  42 ++
 .../client/mapreduce/AbstractInputFormat.java   | 249 +++++++----
 .../client/mapreduce/AccumuloInputFormat.java   |  12 +-
 .../AccumuloMultiTableInputFormat.java          |  12 +-
 .../core/client/mapreduce/InputFormatBase.java  |  65 ++-
 .../core/client/mapreduce/RangeInputSplit.java  | 424 ++----------------
 .../mapreduce/impl/AccumuloInputSplit.java      | 445 +++++++++++++++++++
 .../client/mapreduce/impl/BatchInputSplit.java  | 152 +++++++
 .../mapreduce/lib/impl/InputConfigurator.java   |  36 +-
 .../mapreduce/AccumuloInputFormatTest.java      |  30 +-
 .../mapreduce/impl/BatchInputSplitTest.java     | 122 +++++
 .../test/functional/AccumuloInputFormatIT.java  |  49 +-
 16 files changed, 1348 insertions(+), 622 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/956a50ec/core/src/main/findbugs/exclude-filter.xml
----------------------------------------------------------------------
diff --git a/core/src/main/findbugs/exclude-filter.xml b/core/src/main/findbugs/exclude-filter.xml
index 0f66e06..3cc5e30 100644
--- a/core/src/main/findbugs/exclude-filter.xml
+++ b/core/src/main/findbugs/exclude-filter.xml
@@ -49,6 +49,7 @@
       <Package name="org.apache.accumulo.core.iterators" />
       <Package name="org.apache.accumulo.core.trace" />
       <Class name="org.apache.accumulo.core.client.mapred.RangeInputSplit" />
+      <Class name="org.apache.accumulo.core.client.mapred.impl.BatchInputSplit" />
       <Class name="org.apache.accumulo.core.util.AddressUtil" />
       <Class name="org.apache.accumulo.core.zookeeper.ZooUtil" />
     </Or>

http://git-wip-us.apache.org/repos/asf/accumulo/blob/956a50ec/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
index 1071683..df317e3 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
@@ -34,18 +34,22 @@ import org.apache.accumulo.core.client.ClientSideIteratorScanner;
 import org.apache.accumulo.core.client.Connector;
 import org.apache.accumulo.core.client.Instance;
 import org.apache.accumulo.core.client.IsolatedScanner;
-import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.BatchScanner;
+import org.apache.accumulo.core.client.ScannerBase;
 import org.apache.accumulo.core.client.TableDeletedException;
 import org.apache.accumulo.core.client.TableNotFoundException;
 import org.apache.accumulo.core.client.TableOfflineException;
+import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.accumulo.core.client.admin.DelegationTokenConfig;
 import org.apache.accumulo.core.client.impl.ClientContext;
 import org.apache.accumulo.core.client.impl.OfflineScanner;
 import org.apache.accumulo.core.client.impl.ScannerImpl;
 import org.apache.accumulo.core.client.impl.Tables;
 import org.apache.accumulo.core.client.impl.TabletLocator;
+import org.apache.accumulo.core.client.mapred.impl.BatchInputSplit;
 import org.apache.accumulo.core.client.mapreduce.InputTableConfig;
+import org.apache.accumulo.core.client.mapreduce.impl.AccumuloInputSplit;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.ConfiguratorBase;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
 import org.apache.accumulo.core.client.mock.MockInstance;
@@ -384,7 +388,48 @@ public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
   protected abstract static class AbstractRecordReader<K,V> implements RecordReader<K,V> {
     protected long numKeysRead;
     protected Iterator<Map.Entry<Key,Value>> scannerIterator;
-    protected RangeInputSplit split;
+    protected org.apache.accumulo.core.client.mapreduce.impl.AccumuloInputSplit split;
+    protected ScannerBase scannerBase;
+
+
+    /**
+     * Extracts Iterators settings from the context to be used by RecordReader.
+     *
+     * @param job
+     *          the Hadoop job configuration
+     * @param tableName
+     *          the table name for which the scanner is configured
+     * @return List of iterator settings for given table
+     * @since 1.7.0
+     */
+    protected abstract List<IteratorSetting> jobIterators(JobConf job, String tableName);
+
+    /**
+     * Configures the iterators on a scanner for the given table name.
+     *
+     * @param job
+     *          the Hadoop job configuration
+     * @param scanner
+     *          the scanner for which to configure the iterators
+     * @param tableName
+     *          the table name for which the scanner is configured
+     * @since 1.7.0
+     */
+    private void setupIterators(JobConf job, ScannerBase scanner, String tableName, AccumuloInputSplit split) {
+      List<IteratorSetting> iterators = null;
+
+      if (null == split) {
+        iterators = jobIterators(job, tableName);
+      } else {
+        iterators = split.getIterators();
+        if (null == iterators) {
+          iterators = jobIterators(job, tableName);
+        }
+      }
+
+      for (IteratorSetting iterator : iterators)
+        scanner.addScanIterator(iterator);
+    }
 
     /**
      * Configures the iterators on a scanner for the given table name.
@@ -396,16 +441,19 @@ public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
      * @param tableName
      *          the table name for which the scanner is configured
      * @since 1.6.0
+     * @deprecated since 1.7.0; Use {@link #jobIterators} instead.
      */
-    protected abstract void setupIterators(JobConf job, Scanner scanner, String tableName, RangeInputSplit split);
+    @Deprecated
+    protected void setupIterators(JobConf job, Scanner scanner, String tableName, RangeInputSplit split) {
+      setupIterators(job, (ScannerBase) scanner, tableName, (AccumuloInputSplit) split);
+    }
 
     /**
      * Initialize a scanner over the given input split using this task attempt configuration.
      */
     public void initialize(InputSplit inSplit, JobConf job) throws IOException {
-      Scanner scanner;
-      split = (RangeInputSplit) inSplit;
-      log.debug("Initializing input split: " + split.getRange());
+      split = (AccumuloInputSplit) inSplit;
+      log.debug("Initializing input split: " + split.toString());
 
       Instance instance = split.getInstance(getClientConfiguration(job));
       if (null == instance) {
@@ -433,24 +481,74 @@ public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
       // but the scanner will use the table id resolved at job setup time
       InputTableConfig tableConfig = getInputTableConfig(job, split.getTableName());
 
-      Boolean isOffline = split.isOffline();
-      if (null == isOffline) {
-        isOffline = tableConfig.isOfflineScan();
-      }
+      log.debug("Creating connector with user: " + principal);
+      log.debug("Creating scanner for table: " + table);
+      log.debug("Authorizations are: " + authorizations);
 
-      Boolean isIsolated = split.isIsolatedScan();
-      if (null == isIsolated) {
-        isIsolated = tableConfig.shouldUseIsolatedScanners();
-      }
+      if (split instanceof org.apache.accumulo.core.client.mapreduce.RangeInputSplit) {
+        org.apache.accumulo.core.client.mapreduce.RangeInputSplit rangeSplit = (org.apache.accumulo.core.client.mapreduce.RangeInputSplit) split;
 
-      Boolean usesLocalIterators = split.usesLocalIterators();
-      if (null == usesLocalIterators) {
-        usesLocalIterators = tableConfig.shouldUseLocalIterators();
-      }
+        Boolean isOffline = rangeSplit.isOffline();
+        if (null == isOffline) {
+          isOffline = tableConfig.isOfflineScan();
+        }
+
+        Boolean isIsolated = rangeSplit.isIsolatedScan();
+        if (null == isIsolated) {
+          isIsolated = tableConfig.shouldUseIsolatedScanners();
+        }
 
-      List<IteratorSetting> iterators = split.getIterators();
-      if (null == iterators) {
-        iterators = tableConfig.getIterators();
+        Boolean usesLocalIterators = rangeSplit.usesLocalIterators();
+        if (null == usesLocalIterators) {
+          usesLocalIterators = tableConfig.shouldUseLocalIterators();
+        }
+
+        Scanner scanner;
+
+        try {
+          if (isOffline) {
+            scanner = new OfflineScanner(instance, new Credentials(principal, token), split.getTableId(), authorizations);
+          } else if (instance instanceof MockInstance) {
+            scanner = instance.getConnector(principal, token).createScanner(split.getTableName(), authorizations);
+          } else {
+            ClientConfiguration clientConf = getClientConfiguration(job);
+            ClientContext context = new ClientContext(instance, new Credentials(principal, token), clientConf);
+            scanner = new ScannerImpl(context, split.getTableId(), authorizations);
+          }
+          if (isIsolated) {
+            log.info("Creating isolated scanner");
+            scanner = new IsolatedScanner(scanner);
+          }
+          if (usesLocalIterators) {
+            log.info("Using local iterators");
+            scanner = new ClientSideIteratorScanner(scanner);
+          }
+          setupIterators(job, scanner, split.getTableName(), split);
+        } catch (Exception e) {
+          throw new IOException(e);
+        }
+
+        scanner.setRange(rangeSplit.getRange());
+        scannerBase = scanner;
+
+      } else if (split instanceof BatchInputSplit) {
+        BatchScanner scanner;
+        BatchInputSplit multiRangeSplit = (BatchInputSplit) split;
+
+        try{
+          // Note: BatchScanner will use at most one thread per tablet, currently BatchInputSplit will not span tablets
+          int scanThreads = 1;
+          scanner = instance.getConnector(principal, token).createBatchScanner(split.getTableName(), authorizations, scanThreads);
+          setupIterators(job, scanner, split.getTableName(), split);
+        } catch (Exception e) {
+          throw new IOException(e);
+        }
+
+        scanner.setRanges(multiRangeSplit.getRanges());
+        scannerBase = scanner;
+
+      } else {
+        throw new IllegalArgumentException("Can not initialize from " + split.getClass().toString());
       }
 
       Collection<Pair<Text,Text>> columns = split.getFetchedColumns();
@@ -458,53 +556,27 @@ public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
         columns = tableConfig.getFetchedColumns();
       }
 
-      try {
-        log.debug("Creating connector with user: " + principal);
-        log.debug("Creating scanner for table: " + table);
-        log.debug("Authorizations are: " + authorizations);
-        if (isOffline) {
-          scanner = new OfflineScanner(instance, new Credentials(principal, token), split.getTableId(), authorizations);
-        } else if (instance instanceof MockInstance) {
-          scanner = instance.getConnector(principal, token).createScanner(split.getTableName(), authorizations);
-        } else {
-          ClientConfiguration clientConf = getClientConfiguration(job);
-          ClientContext context = new ClientContext(instance, new Credentials(principal, token), clientConf);
-          scanner = new ScannerImpl(context, split.getTableId(), authorizations);
-        }
-        if (isIsolated) {
-          log.info("Creating isolated scanner");
-          scanner = new IsolatedScanner(scanner);
-        }
-        if (usesLocalIterators) {
-          log.info("Using local iterators");
-          scanner = new ClientSideIteratorScanner(scanner);
-        }
-        setupIterators(job, scanner, split.getTableName(), split);
-      } catch (Exception e) {
-        throw new IOException(e);
-      }
-
       // setup a scanner within the bounds of this split
       for (Pair<Text,Text> c : columns) {
         if (c.getSecond() != null) {
           log.debug("Fetching column " + c.getFirst() + ":" + c.getSecond());
-          scanner.fetchColumn(c.getFirst(), c.getSecond());
+          scannerBase.fetchColumn(c.getFirst(), c.getSecond());
         } else {
           log.debug("Fetching column family " + c.getFirst());
-          scanner.fetchColumnFamily(c.getFirst());
+          scannerBase.fetchColumnFamily(c.getFirst());
         }
       }
 
-      scanner.setRange(split.getRange());
-
+      scannerIterator = scannerBase.iterator();
       numKeysRead = 0;
-
-      // do this last after setting all scanner options
-      scannerIterator = scanner.iterator();
     }
 
     @Override
-    public void close() {}
+    public void close() {
+      if (null != scannerBase) {
+        scannerBase.close();
+      }
+    }
 
     @Override
     public long getPos() throws IOException {
@@ -531,6 +603,7 @@ public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
     return InputConfigurator.binOffline(tableId, ranges, instance, conn);
   }
 
+
   /**
    * Gets the splits of the tables that have been set on the job by reading the metadata table for the specified ranges.
    *
@@ -552,26 +625,32 @@ public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
       InputTableConfig tableConfig = tableConfigEntry.getValue();
 
       Instance instance = getInstance(job);
-      boolean mockInstance;
       String tableId;
       // resolve table name to id once, and use id from this point forward
       if (instance instanceof MockInstance) {
         tableId = "";
-        mockInstance = true;
       } else {
         try {
           tableId = Tables.getTableId(instance, tableName);
         } catch (TableNotFoundException e) {
           throw new IOException(e);
         }
-        mockInstance = false;
       }
 
       Authorizations auths = getScanAuthorizations(job);
       String principal = getPrincipal(job);
       AuthenticationToken token = getAuthenticationToken(job);
 
+      boolean batchScan =  InputConfigurator.isBatchScan(CLASS, job);
+      boolean supportBatchScan =
+        !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners() || tableConfig.shouldUseLocalIterators());
+      if (batchScan && !supportBatchScan)
+        throw new IllegalArgumentException("BatchScanner optimization not available for offline scan, isolated, or local iterators");
+
       boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
+      if (batchScan && !autoAdjust)
+        throw new IllegalArgumentException("AutoAdjustRanges must be enabled when using BatchScanner optimization");
+
       List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
       if (ranges.isEmpty()) {
         ranges = new ArrayList<Range>(1);
@@ -629,32 +708,36 @@ public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
         }
         for (Map.Entry<KeyExtent,List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
           Range ke = extentRanges.getKey().toDataRange();
-          for (Range r : extentRanges.getValue()) {
-            if (autoAdjust) {
-              // divide ranges into smaller ranges, based on the tablets
-              RangeInputSplit split = new RangeInputSplit(tableName, tableId, ke.clip(r), new String[] {location});
-
-              split.setOffline(tableConfig.isOfflineScan());
-              split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
-              split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
-              split.setMockInstance(mockInstance);
-              split.setFetchedColumns(tableConfig.getFetchedColumns());
-              split.setPrincipal(principal);
-              split.setToken(token);
-              split.setInstanceName(instance.getInstanceName());
-              split.setZooKeepers(instance.getZooKeepers());
-              split.setAuths(auths);
-              split.setIterators(tableConfig.getIterators());
-              split.setLogLevel(logLevel);
-
-              splits.add(split);
-            } else {
-              // don't divide ranges
-              ArrayList<String> locations = splitsToAdd.get(r);
-              if (locations == null)
-                locations = new ArrayList<String>(1);
-              locations.add(location);
-              splitsToAdd.put(r, locations);
+          if (batchScan) {
+            // group ranges by tablet to be read by a BatchScanner
+            ArrayList<Range> clippedRanges = new ArrayList<Range>();
+            for(Range r: extentRanges.getValue())
+              clippedRanges.add(ke.clip(r));
+
+            BatchInputSplit split = new BatchInputSplit(tableName, tableId, clippedRanges, new String[] {location});
+            AccumuloInputSplit.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
+
+            splits.add(split);
+          } else {
+            // not grouping by tablet
+            for (Range r : extentRanges.getValue()) {
+              if (autoAdjust) {
+                // divide ranges into smaller ranges, based on the tablets
+                RangeInputSplit split = new RangeInputSplit(tableName, tableId, ke.clip(r), new String[] {location});
+                AccumuloInputSplit.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
+                split.setOffline(tableConfig.isOfflineScan());
+                split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
+                split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
+
+                splits.add(split);
+              } else {
+                // don't divide ranges
+                ArrayList<String> locations = splitsToAdd.get(r);
+                if (locations == null)
+                  locations = new ArrayList<String>(1);
+                locations.add(location);
+                splitsToAdd.put(r, locations);
+              }
             }
           }
         }
@@ -663,19 +746,10 @@ public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
       if (!autoAdjust)
         for (Map.Entry<Range,ArrayList<String>> entry : splitsToAdd.entrySet()) {
           RangeInputSplit split = new RangeInputSplit(tableName, tableId, entry.getKey(), entry.getValue().toArray(new String[0]));
-
+          AccumuloInputSplit.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
           split.setOffline(tableConfig.isOfflineScan());
           split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
           split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
-          split.setMockInstance(mockInstance);
-          split.setFetchedColumns(tableConfig.getFetchedColumns());
-          split.setPrincipal(principal);
-          split.setToken(token);
-          split.setInstanceName(instance.getInstanceName());
-          split.setZooKeepers(instance.getZooKeepers());
-          split.setAuths(auths);
-          split.setIterators(tableConfig.getIterators());
-          split.setLogLevel(logLevel);
 
           splits.add(split);
         }
@@ -683,5 +757,4 @@ public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
 
     return splits.toArray(new InputSplit[splits.size()]);
   }
-
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/956a50ec/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java
index 2cdc236..2c23552 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloInputFormat.java
@@ -54,12 +54,14 @@ public class AccumuloInputFormat extends InputFormatBase<Key,Value> {
     log.setLevel(getLogLevel(job));
 
     // Override the log level from the configuration as if the RangeInputSplit has one it's the more correct one to use.
-    if (split instanceof org.apache.accumulo.core.client.mapreduce.RangeInputSplit) {
-      org.apache.accumulo.core.client.mapreduce.RangeInputSplit risplit = (org.apache.accumulo.core.client.mapreduce.RangeInputSplit) split;
-      Level level = risplit.getLogLevel();
+    if (split instanceof org.apache.accumulo.core.client.mapreduce.impl.AccumuloInputSplit) {
+      org.apache.accumulo.core.client.mapreduce.impl.AccumuloInputSplit accSplit = (org.apache.accumulo.core.client.mapreduce.impl.AccumuloInputSplit) split;
+      Level level = accSplit.getLogLevel();
       if (null != level) {
         log.setLevel(level);
       }
+    } else {
+      throw new IllegalArgumentException("No RecordReader for " + split.getClass().toString());
     }
 
     RecordReaderBase<Key,Value> recordReader = new RecordReaderBase<Key,Value>() {

http://git-wip-us.apache.org/repos/asf/accumulo/blob/956a50ec/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
index 5a8f592..517cd19 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
@@ -178,6 +178,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
 
   /**
    * Determines whether a configuration has auto-adjust ranges enabled.
+   * Must be enabled when {@link #setBatchScan(JobConf, boolean)} is true.
    *
    * @param job
    *          the Hadoop context for the configured job
@@ -297,6 +298,48 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   }
 
   /**
+   * Controls the use of the {@link org.apache.accumulo.core.client.BatchScanner} in this job.
+   * Using this feature will group Ranges by their source tablet, producing an InputSplit per tablet
+   * rather than per Range. This batching helps to reduce overhead when querying a large number of small ranges.
+   * (ex: when doing quad-tree decomposition for spatial queries)
+   * <p>
+   * In order to achieve good locality of InputSplits this option always clips the input Ranges to tablet boundaries.
+   * This may result in one input Range contributing to several InputSplits.
+   * <p>
+   * Note: that the value of {@link #setAutoAdjustRanges(JobConf, boolean)} is ignored and is assumed to be true when BatchScan option is enabled.
+   * <p>
+   * This configuration is incompatible with:
+   * <ul>
+   *   <li>{@link #setOfflineTableScan(JobConf, boolean)}</li>
+   *   <li>{@link #setLocalIterators(JobConf, boolean)}</li>
+   *   <li>{@link #setScanIsolation(JobConf, boolean)}</li>
+   * </ul>
+   * <p>
+   * By default, this feature is <b>disabled</b>.
+   *
+   * @param job
+   *          the Hadoop job instance to be configured
+   * @param enableFeature
+   *          the feature is enabled if true, disabled otherwise
+   * @since 1.7.0
+   */
+  public static void setBatchScan(JobConf job, boolean enableFeature) {
+    InputConfigurator.setBatchScan(CLASS, job, enableFeature);
+  }
+
+  /**
+   * Determines whether a configuration has the {@link org.apache.accumulo.core.client.BatchScanner} feature enabled.
+   *
+   * @param job
+   *          the Hadoop context for the configured job
+   * @since 1.7.0
+   * @see #setBatchScan(JobConf, boolean)
+   */
+  public static boolean isBatchScan(JobConf job) {
+    return InputConfigurator.isBatchScan(CLASS, job);
+  }
+
+  /**
    * Initializes an Accumulo {@link org.apache.accumulo.core.client.impl.TabletLocator} based on the configuration.
    *
    * @param job
@@ -315,19 +358,8 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   protected abstract static class RecordReaderBase<K,V> extends AbstractRecordReader<K,V> {
 
     @Override
-    protected void setupIterators(JobConf job, Scanner scanner, String tableName, org.apache.accumulo.core.client.mapred.RangeInputSplit split) {
-      List<IteratorSetting> iterators = null;
-
-      if (null == split) {
-        iterators = getIterators(job);
-      } else {
-        iterators = split.getIterators();
-        if (null == iterators) {
-          iterators = getIterators(job);
-        }
-      }
-
-      setupIterators(iterators, scanner);
+    protected List<IteratorSetting> jobIterators(JobConf job, String tableName) {
+      return getIterators(job);
     }
 
     /**
@@ -337,7 +369,9 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
      *          the iterators to set
      * @param scanner
      *          the scanner to configure
+     * @deprecated since 1.7.0; Use {@link #jobIterators} instead.
      */
+    @Deprecated
     protected void setupIterators(List<IteratorSetting> iterators, Scanner scanner) {
       for (IteratorSetting iterator : iterators) {
         scanner.addScanIterator(iterator);

http://git-wip-us.apache.org/repos/asf/accumulo/blob/956a50ec/core/src/main/java/org/apache/accumulo/core/client/mapred/impl/BatchInputSplit.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/impl/BatchInputSplit.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/impl/BatchInputSplit.java
new file mode 100644
index 0000000..619f9cd
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/impl/BatchInputSplit.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapred.impl;
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.accumulo.core.data.Range;
+import org.apache.hadoop.mapred.InputSplit;
+
+/**
+ * The Class BatchInputSplit. Encapsulates Accumulo ranges for use in Map Reduce jobs.
+ * Can contain several Ranges per InputSplit.
+ */
+public class BatchInputSplit extends org.apache.accumulo.core.client.mapreduce.impl.BatchInputSplit implements InputSplit {
+
+  public BatchInputSplit() {
+    super();
+  }
+
+  public BatchInputSplit(BatchInputSplit split) throws IOException {
+    super(split);
+  }
+
+  public BatchInputSplit(String table, String tableId, Collection<Range> ranges, String[] location) {
+    super(table, tableId, ranges, location);
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/956a50ec/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
index 300e92b..e3c3c42 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
@@ -35,6 +35,8 @@ import org.apache.accumulo.core.client.Connector;
 import org.apache.accumulo.core.client.Instance;
 import org.apache.accumulo.core.client.IsolatedScanner;
 import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.BatchScanner;
+import org.apache.accumulo.core.client.ScannerBase;
 import org.apache.accumulo.core.client.TableDeletedException;
 import org.apache.accumulo.core.client.TableNotFoundException;
 import org.apache.accumulo.core.client.TableOfflineException;
@@ -44,6 +46,8 @@ import org.apache.accumulo.core.client.impl.OfflineScanner;
 import org.apache.accumulo.core.client.impl.ScannerImpl;
 import org.apache.accumulo.core.client.impl.Tables;
 import org.apache.accumulo.core.client.impl.TabletLocator;
+import org.apache.accumulo.core.client.mapreduce.impl.AccumuloInputSplit;
+import org.apache.accumulo.core.client.mapreduce.impl.BatchInputSplit;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.ConfiguratorBase;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
 import org.apache.accumulo.core.client.mock.MockInstance;
@@ -61,6 +65,7 @@ import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.security.Credentials;
 import org.apache.accumulo.core.util.Pair;
 import org.apache.accumulo.core.util.UtilWaitThread;
+import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.InputFormat;
@@ -413,7 +418,50 @@ public abstract class AbstractInputFormat<K,V> extends InputFormat<K,V> {
   protected abstract static class AbstractRecordReader<K,V> extends RecordReader<K,V> {
     protected long numKeysRead;
     protected Iterator<Map.Entry<Key,Value>> scannerIterator;
-    protected RangeInputSplit split;
+    protected ScannerBase scannerBase;
+    protected AccumuloInputSplit split;
+
+    /**
+     * Extracts Iterators settings from the context to be used by RecordReader.
+     *
+     * @param  context
+     *           the Hadoop context for the configured job
+     * @param  tableName
+     *           the table name for which the scanner is configured
+     * @return List of iterator settings for given table
+     * @since 1.7.0
+     */
+    protected abstract List<IteratorSetting> contextIterators(TaskAttemptContext context, String tableName);
+
+    /**
+     * Configures the iterators on a scanner for the given table name.
+     * Will attempt to use configuration from the InputSplit, on failure will try to extract them from TaskAttemptContext.
+     *
+     * @param context
+     *          the Hadoop context for the configured job
+     * @param tableName
+     *          the table name for which the scanner is configured
+     * @param scanner
+     *          the scanner for which to configure the iterators
+     * @param split
+     *          InputSplit containing configurations
+     * @since 1.7.0
+     */
+    private void setupIterators(TaskAttemptContext context, ScannerBase scanner, String tableName, AccumuloInputSplit split) {
+      List<IteratorSetting> iterators = null;
+
+      if (null == split) {
+        iterators = contextIterators(context, tableName);
+      } else {
+        iterators = split.getIterators();
+        if (null == iterators) {
+          iterators = contextIterators(context, tableName);
+        }
+      }
+
+      for (IteratorSetting iterator : iterators)
+        scanner.addScanIterator(iterator);
+    }
 
     /**
      * Configures the iterators on a scanner for the given table name.
@@ -425,15 +473,18 @@ public abstract class AbstractInputFormat<K,V> extends InputFormat<K,V> {
      * @param tableName
      *          the table name for which the scanner is configured
      * @since 1.6.0
+     * @deprecated since 1.7.0; Use {@link #contextIterators} instead.
      */
-    protected abstract void setupIterators(TaskAttemptContext context, Scanner scanner, String tableName, RangeInputSplit split);
+    @Deprecated
+    protected void setupIterators(TaskAttemptContext context, Scanner scanner, String tableName, RangeInputSplit split) {
+      setupIterators(context, (ScannerBase) scanner, tableName, (AccumuloInputSplit) split);
+    }
 
     @Override
     public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException {
 
-      Scanner scanner;
-      split = (RangeInputSplit) inSplit;
-      log.debug("Initializing input split: " + split.getRange());
+      split = (AccumuloInputSplit) inSplit;
+      log.debug("Initializing input split: " + split.toString());
 
       Instance instance = split.getInstance(getClientConfiguration(attempt));
       if (null == instance) {
@@ -461,46 +512,71 @@ public abstract class AbstractInputFormat<K,V> extends InputFormat<K,V> {
       // but the scanner will use the table id resolved at job setup time
       InputTableConfig tableConfig = getInputTableConfig(attempt, split.getTableName());
 
-      Boolean isOffline = split.isOffline();
-      if (null == isOffline) {
-        isOffline = tableConfig.isOfflineScan();
-      }
+      log.debug("Creating connector with user: " + principal);
+      log.debug("Creating scanner for table: " + table);
+      log.debug("Authorizations are: " + authorizations);
 
-      Boolean isIsolated = split.isIsolatedScan();
-      if (null == isIsolated) {
-        isIsolated = tableConfig.shouldUseIsolatedScanners();
-      }
+      if (split instanceof RangeInputSplit) {
+        RangeInputSplit rangeSplit = (RangeInputSplit) split;
+        Scanner scanner;
 
-      Boolean usesLocalIterators = split.usesLocalIterators();
-      if (null == usesLocalIterators) {
-        usesLocalIterators = tableConfig.shouldUseLocalIterators();
-      }
+        Boolean isOffline = rangeSplit.isOffline();
+        if (null == isOffline) {
+          isOffline = tableConfig.isOfflineScan();
+        }
 
-      try {
-        log.debug("Creating connector with user: " + principal);
-        log.debug("Creating scanner for table: " + table);
-        log.debug("Authorizations are: " + authorizations);
-        if (isOffline) {
-          scanner = new OfflineScanner(instance, new Credentials(principal, token), split.getTableId(), authorizations);
-        } else if (instance instanceof MockInstance) {
-          scanner = instance.getConnector(principal, token).createScanner(split.getTableName(), authorizations);
-        } else {
-          ClientConfiguration clientConf = getClientConfiguration(attempt);
-          ClientContext context = new ClientContext(instance, new Credentials(principal, token), clientConf);
-          scanner = new ScannerImpl(context, split.getTableId(), authorizations);
+        Boolean isIsolated = rangeSplit.isIsolatedScan();
+        if (null == isIsolated) {
+          isIsolated = tableConfig.shouldUseIsolatedScanners();
         }
-        if (isIsolated) {
-          log.info("Creating isolated scanner");
-          scanner = new IsolatedScanner(scanner);
+
+        Boolean usesLocalIterators = rangeSplit.usesLocalIterators();
+        if (null == usesLocalIterators) {
+          usesLocalIterators = tableConfig.shouldUseLocalIterators();
         }
-        if (usesLocalIterators) {
-          log.info("Using local iterators");
-          scanner = new ClientSideIteratorScanner(scanner);
+
+        try {
+          if (isOffline) {
+            scanner = new OfflineScanner(instance, new Credentials(principal, token), split.getTableId(), authorizations);
+          } else if (instance instanceof MockInstance) {
+            scanner = instance.getConnector(principal, token).createScanner(split.getTableName(), authorizations);
+          } else {
+            ClientConfiguration clientConf = getClientConfiguration(attempt);
+            ClientContext context = new ClientContext(instance, new Credentials(principal, token), clientConf);
+            scanner = new ScannerImpl(context, split.getTableId(), authorizations);
+          }
+          if (isIsolated) {
+            log.info("Creating isolated scanner");
+            scanner = new IsolatedScanner(scanner);
+          }
+          if (usesLocalIterators) {
+            log.info("Using local iterators");
+            scanner = new ClientSideIteratorScanner(scanner);
+          }
+
+          setupIterators(attempt, scanner, split.getTableName(), split);
+        } catch (Exception e) {
+          throw new IOException(e);
         }
 
-        setupIterators(attempt, scanner, split.getTableName(), split);
-      } catch (Exception e) {
-        throw new IOException(e);
+        scanner.setRange(rangeSplit.getRange());
+        scannerBase = scanner;
+
+      } else  if (split instanceof BatchInputSplit) {
+        BatchInputSplit batchSplit = (BatchInputSplit) split;
+
+        BatchScanner scanner;
+        try{
+          // Note: BatchScanner will use at most one thread per tablet, currently BatchInputSplit will not span tablets
+          int scanThreads = 1;
+          scanner = instance.getConnector(principal, token).createBatchScanner(split.getTableName(), authorizations, scanThreads);
+          setupIterators(attempt, scanner, split.getTableName(), split);
+        } catch (Exception e) {
+          throw new IOException(e);
+        }
+
+        scanner.setRanges(batchSplit.getRanges());
+        scannerBase = scanner;
       }
 
       Collection<Pair<Text,Text>> columns = split.getFetchedColumns();
@@ -512,22 +588,23 @@ public abstract class AbstractInputFormat<K,V> extends InputFormat<K,V> {
       for (Pair<Text,Text> c : columns) {
         if (c.getSecond() != null) {
           log.debug("Fetching column " + c.getFirst() + ":" + c.getSecond());
-          scanner.fetchColumn(c.getFirst(), c.getSecond());
+          scannerBase.fetchColumn(c.getFirst(), c.getSecond());
         } else {
           log.debug("Fetching column family " + c.getFirst());
-          scanner.fetchColumnFamily(c.getFirst());
+          scannerBase.fetchColumnFamily(c.getFirst());
         }
       }
 
-      scanner.setRange(split.getRange());
+      scannerIterator = scannerBase.iterator();
       numKeysRead = 0;
-
-      // do this last after setting all scanner options
-      scannerIterator = scanner.iterator();
     }
 
     @Override
-    public void close() {}
+    public void close() {
+      if (null != scannerBase) {
+        scannerBase.close();
+      }
+    }
 
     @Override
     public float getProgress() throws IOException {
@@ -592,26 +669,32 @@ public abstract class AbstractInputFormat<K,V> extends InputFormat<K,V> {
       InputTableConfig tableConfig = tableConfigEntry.getValue();
 
       Instance instance = getInstance(context);
-      boolean mockInstance;
       String tableId;
       // resolve table name to id once, and use id from this point forward
       if (instance instanceof MockInstance) {
         tableId = "";
-        mockInstance = true;
       } else {
         try {
           tableId = Tables.getTableId(instance, tableName);
         } catch (TableNotFoundException e) {
           throw new IOException(e);
         }
-        mockInstance = false;
       }
 
       Authorizations auths = getScanAuthorizations(context);
       String principal = getPrincipal(context);
       AuthenticationToken token = getAuthenticationToken(context);
 
+      boolean batchScan =  InputConfigurator.isBatchScan(CLASS, context.getConfiguration());
+      boolean supportBatchScan =
+        !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners() || tableConfig.shouldUseLocalIterators());
+      if (batchScan && !supportBatchScan)
+        throw new IllegalArgumentException("BatchScanner optimization not available for offline scan, isolated, or local iterators");
+
       boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
+      if (batchScan && !autoAdjust)
+        throw new IllegalArgumentException("AutoAdjustRanges must be enabled when using BatchScanner optimization");
+
       List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
       if (ranges.isEmpty()) {
         ranges = new ArrayList<Range>(1);
@@ -654,6 +737,8 @@ public abstract class AbstractInputFormat<K,V> extends InputFormat<K,V> {
         throw new IOException(e);
       }
 
+      // all of this code will add either range per  each locations or split ranges and add range-location split
+      // Map from Range to Array of Locations, we only use this if we're don't split
       HashMap<Range,ArrayList<String>> splitsToAdd = null;
 
       if (!autoAdjust)
@@ -670,32 +755,35 @@ public abstract class AbstractInputFormat<K,V> extends InputFormat<K,V> {
         }
         for (Map.Entry<KeyExtent,List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
           Range ke = extentRanges.getKey().toDataRange();
-          for (Range r : extentRanges.getValue()) {
-            if (autoAdjust) {
-              // divide ranges into smaller ranges, based on the tablets
-              RangeInputSplit split = new RangeInputSplit(tableName, tableId, ke.clip(r), new String[] {location});
-
-              split.setOffline(tableConfig.isOfflineScan());
-              split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
-              split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
-              split.setMockInstance(mockInstance);
-              split.setFetchedColumns(tableConfig.getFetchedColumns());
-              split.setPrincipal(principal);
-              split.setToken(token);
-              split.setInstanceName(instance.getInstanceName());
-              split.setZooKeepers(instance.getZooKeepers());
-              split.setAuths(auths);
-              split.setIterators(tableConfig.getIterators());
-              split.setLogLevel(logLevel);
-
-              splits.add(split);
-            } else {
-              // don't divide ranges
-              ArrayList<String> locations = splitsToAdd.get(r);
-              if (locations == null)
-                locations = new ArrayList<String>(1);
-              locations.add(location);
-              splitsToAdd.put(r, locations);
+          if (batchScan) {
+            // group ranges by tablet to be read by a BatchScanner
+            ArrayList<Range> clippedRanges = new ArrayList<Range>();
+            for(Range r: extentRanges.getValue())
+              clippedRanges.add(ke.clip(r));
+            BatchInputSplit split = new BatchInputSplit(tableName, tableId, clippedRanges, new String[] {location});
+            AccumuloInputSplit.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
+
+            splits.add(split);
+          } else {
+            // not grouping by tablet
+            for (Range r : extentRanges.getValue()) {
+              if (autoAdjust) {
+                // divide ranges into smaller ranges, based on the tablets
+                RangeInputSplit split = new RangeInputSplit(tableName, tableId, ke.clip(r), new String[] {location});
+                AccumuloInputSplit.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
+                split.setOffline(tableConfig.isOfflineScan());
+                split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
+                split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
+
+                splits.add(split);
+              } else {
+                // don't divide ranges
+                ArrayList<String> locations = splitsToAdd.get(r);
+                if (locations == null)
+                  locations = new ArrayList<String>(1);
+                locations.add(location);
+                splitsToAdd.put(r, locations);
+              }
             }
           }
         }
@@ -704,23 +792,14 @@ public abstract class AbstractInputFormat<K,V> extends InputFormat<K,V> {
       if (!autoAdjust)
         for (Map.Entry<Range,ArrayList<String>> entry : splitsToAdd.entrySet()) {
           RangeInputSplit split = new RangeInputSplit(tableName, tableId, entry.getKey(), entry.getValue().toArray(new String[0]));
-
+          AccumuloInputSplit.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
           split.setOffline(tableConfig.isOfflineScan());
           split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
           split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
-          split.setMockInstance(mockInstance);
-          split.setFetchedColumns(tableConfig.getFetchedColumns());
-          split.setPrincipal(principal);
-          split.setToken(token);
-          split.setInstanceName(instance.getInstanceName());
-          split.setZooKeepers(instance.getZooKeepers());
-          split.setAuths(auths);
-          split.setIterators(tableConfig.getIterators());
-          split.setLogLevel(logLevel);
 
           splits.add(split);
         }
     }
     return splits;
   }
-}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/accumulo/blob/956a50ec/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
index b98cb77..7af5e66 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloInputFormat.java
@@ -21,6 +21,7 @@ import java.util.Map.Entry;
 
 import org.apache.accumulo.core.client.ClientConfiguration;
 import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
+import org.apache.accumulo.core.client.mapreduce.impl.AccumuloInputSplit;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.security.Authorizations;
@@ -52,17 +53,18 @@ public class AccumuloInputFormat extends InputFormatBase<Key,Value> {
   public RecordReader<Key,Value> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
     log.setLevel(getLogLevel(context));
 
-    // Override the log level from the configuration as if the RangeInputSplit has one it's the more correct one to use.
-    if (split instanceof org.apache.accumulo.core.client.mapreduce.RangeInputSplit) {
-      org.apache.accumulo.core.client.mapreduce.RangeInputSplit risplit = (org.apache.accumulo.core.client.mapreduce.RangeInputSplit) split;
-      Level level = risplit.getLogLevel();
+    // Override the log level from the configuration as if the InputSplit has one it's the more correct one to use.
+    if (split instanceof AccumuloInputSplit) {
+      AccumuloInputSplit accSplit = (AccumuloInputSplit) split;
+      Level level = accSplit.getLogLevel();
       if (null != level) {
         log.setLevel(level);
       }
+    } else {
+        throw new IllegalArgumentException("No RecordReader for " + split.getClass().toString());
     }
 
     return new RecordReaderBase<Key,Value>() {
-
       @Override
       public boolean nextKeyValue() throws IOException, InterruptedException {
         if (scannerIterator.hasNext()) {

http://git-wip-us.apache.org/repos/asf/accumulo/blob/956a50ec/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java
index bed0def..679256b 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloMultiTableInputFormat.java
@@ -24,7 +24,6 @@ import java.util.Map;
 
 import org.apache.accumulo.core.client.ClientConfiguration;
 import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.client.Scanner;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
 import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
 import org.apache.accumulo.core.data.Key;
@@ -86,15 +85,8 @@ public class AccumuloMultiTableInputFormat extends AbstractInputFormat<Key,Value
       }
 
       @Override
-      protected void setupIterators(TaskAttemptContext context, Scanner scanner, String tableName, RangeInputSplit split) {
-        List<IteratorSetting> iterators = split.getIterators();
-        if (null == iterators) {
-          iterators = getInputTableConfig(context, tableName).getIterators();
-        }
-
-        for (IteratorSetting setting : iterators) {
-          scanner.addScanIterator(setting);
-        }
+      protected List<IteratorSetting> contextIterators(TaskAttemptContext context, String tableName) {
+        return getInputTableConfig(context, tableName).getIterators();
       }
     };
   }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/956a50ec/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
index 76ca401..dcc4fd5 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
@@ -177,6 +177,7 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
 
   /**
    * Determines whether a configuration has auto-adjust ranges enabled.
+   * Must be enabled when {@link #setBatchScan(Job, boolean)} is true.
    *
    * @param context
    *          the Hadoop context for the configured job
@@ -296,6 +297,48 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   }
 
   /**
+   * Controls the use of the {@link org.apache.accumulo.core.client.BatchScanner} in this job.
+   * Using this feature will group Ranges by their source tablet, producing an InputSplit per tablet
+   * rather than per Range. This batching helps to reduce overhead when querying a large number of small ranges.
+   * (ex: when doing quad-tree decomposition for spatial queries)
+   * <p>
+   * In order to achieve good locality of InputSplits this option always clips the input Ranges to tablet boundaries.
+   * This may result in one input Range contributing to several InputSplits.
+   * <p>
+   * Note: that the value of {@link #setAutoAdjustRanges(Job, boolean)} is ignored and is assumed to be true when BatchScan option is enabled.
+   * <p>
+   * This configuration is incompatible with:
+   * <ul>
+   *   <li>{@link #setOfflineTableScan(org.apache.hadoop.mapreduce.Job, boolean)}</li>
+   *   <li>{@link #setLocalIterators(org.apache.hadoop.mapreduce.Job, boolean)}</li>
+   *   <li>{@link #setScanIsolation(org.apache.hadoop.mapreduce.Job, boolean)}</li>
+   * </ul>
+   * <p>
+   * By default, this feature is <b>disabled</b>.
+   *
+   * @param job
+   *          the Hadoop job instance to be configured
+   * @param enableFeature
+   *          the feature is enabled if true, disabled otherwise
+   * @since 1.7.0
+   */
+  public static void setBatchScan(Job job, boolean enableFeature) {
+    InputConfigurator.setBatchScan(CLASS, job.getConfiguration(), enableFeature);
+  }
+
+  /**
+   * Determines whether a configuration has the {@link org.apache.accumulo.core.client.BatchScanner} feature enabled.
+   *
+   * @param context
+   *          the Hadoop context for the configured job
+   * @since 1.7.0
+   * @see #setBatchScan(Job, boolean)
+   */
+  public static boolean isBatchScan(JobContext context) {
+    return InputConfigurator.isBatchScan(CLASS, context.getConfiguration());
+  }
+
+  /**
    * Initializes an Accumulo {@link org.apache.accumulo.core.client.impl.TabletLocator} based on the configuration.
    *
    * @param context
@@ -314,8 +357,8 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   protected abstract static class RecordReaderBase<K,V> extends AbstractRecordReader<K,V> {
 
     @Override
-    protected void setupIterators(TaskAttemptContext context, Scanner scanner, String tableName, org.apache.accumulo.core.client.mapreduce.RangeInputSplit split) {
-      setupIterators(context, scanner, split);
+    protected List<IteratorSetting> contextIterators(TaskAttemptContext context, String tableName) {
+      return getIterators(context);
     }
 
     /**
@@ -325,27 +368,21 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
      *          the Hadoop context for the configured job
      * @param scanner
      *          the scanner to configure
+     * @deprecated since 1.7.0; Use {@link #contextIterators} instead.
      */
     @Deprecated
     protected void setupIterators(TaskAttemptContext context, Scanner scanner) {
-      setupIterators(context, scanner, null);
+      // tableName is given as null as it will be ignored in eventual call to #contextIterators
+      setupIterators(context, scanner, null, null);
     }
 
     /**
      * Initialize a scanner over the given input split using this task attempt configuration.
+     * @deprecated since 1.7.0; Use {@link #contextIterators} instead.
      */
+    @Deprecated
     protected void setupIterators(TaskAttemptContext context, Scanner scanner, org.apache.accumulo.core.client.mapreduce.RangeInputSplit split) {
-      List<IteratorSetting> iterators = null;
-      if (null == split) {
-        iterators = getIterators(context);
-      } else {
-        iterators = split.getIterators();
-        if (null == iterators) {
-          iterators = getIterators(context);
-        }
-      }
-      for (IteratorSetting iterator : iterators)
-        scanner.addScanIterator(iterator);
+      setupIterators(context, scanner, null, split);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/956a50ec/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
index 872ee7b..6c870a0 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
@@ -16,114 +16,53 @@
  */
 package org.apache.accumulo.core.client.mapreduce;
 
-import static java.nio.charset.StandardCharsets.UTF_8;
-
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
-import java.math.BigInteger;
-import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
 
-import org.apache.accumulo.core.client.ClientConfiguration;
-import org.apache.accumulo.core.client.Instance;
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.client.ZooKeeperInstance;
-import org.apache.accumulo.core.client.mapreduce.lib.impl.ConfiguratorBase.TokenSource;
-import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
-import org.apache.accumulo.core.client.security.tokens.AuthenticationToken.AuthenticationTokenSerializer;
-import org.apache.accumulo.core.data.ByteSequence;
+import org.apache.accumulo.core.client.mapreduce.impl.AccumuloInputSplit;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.PartialKey;
 import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.util.Base64;
-import org.apache.accumulo.core.util.Pair;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.log4j.Level;
 
 /**
  * The Class RangeInputSplit. Encapsulates an Accumulo range for use in Map Reduce jobs.
  */
-public class RangeInputSplit extends InputSplit implements Writable {
+public class RangeInputSplit extends AccumuloInputSplit {
   private Range range;
-  private String[] locations;
-  private String tableId, tableName, instanceName, zooKeepers, principal;
-  private TokenSource tokenSource;
-  private String tokenFile;
-  private AuthenticationToken token;
-  private Boolean offline, mockInstance, isolatedScan, localIterators;
-  private Authorizations auths;
-  private Set<Pair<Text,Text>> fetchedColumns;
-  private List<IteratorSetting> iterators;
-  private Level level;
+  private Boolean offline, isolatedScan, localIterators;
 
   public RangeInputSplit() {
     range = new Range();
-    locations = new String[0];
-    tableName = "";
-    tableId = "";
   }
 
   public RangeInputSplit(RangeInputSplit split) throws IOException {
+    super(split);
     this.setRange(split.getRange());
-    this.setLocations(split.getLocations());
-    this.setTableName(split.getTableName());
-    this.setTableId(split.getTableId());
   }
 
   protected RangeInputSplit(String table, String tableId, Range range, String[] locations) {
+    super(table, tableId, locations);
     this.range = range;
-    setLocations(locations);
-    this.tableName = table;
-    this.tableId = tableId;
-  }
-
-  public Range getRange() {
-    return range;
-  }
-
-  private static byte[] extractBytes(ByteSequence seq, int numBytes) {
-    byte[] bytes = new byte[numBytes + 1];
-    bytes[0] = 0;
-    for (int i = 0; i < numBytes; i++) {
-      if (i >= seq.length())
-        bytes[i + 1] = 0;
-      else
-        bytes[i + 1] = seq.byteAt(i);
-    }
-    return bytes;
-  }
-
-  public static float getProgress(ByteSequence start, ByteSequence end, ByteSequence position) {
-    int maxDepth = Math.min(Math.max(end.length(), start.length()), position.length());
-    BigInteger startBI = new BigInteger(extractBytes(start, maxDepth));
-    BigInteger endBI = new BigInteger(extractBytes(end, maxDepth));
-    BigInteger positionBI = new BigInteger(extractBytes(position, maxDepth));
-    return (float) (positionBI.subtract(startBI).doubleValue() / endBI.subtract(startBI).doubleValue());
   }
 
   public float getProgress(Key currentKey) {
     if (currentKey == null)
       return 0f;
-    if (range.getStartKey() != null && range.getEndKey() != null) {
-      if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW) != 0) {
-        // just look at the row progress
-        return getProgress(range.getStartKey().getRowData(), range.getEndKey().getRowData(), currentKey.getRowData());
-      } else if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW_COLFAM) != 0) {
-        // just look at the column family progress
-        return getProgress(range.getStartKey().getColumnFamilyData(), range.getEndKey().getColumnFamilyData(), currentKey.getColumnFamilyData());
-      } else if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW_COLFAM_COLQUAL) != 0) {
-        // just look at the column qualifier progress
-        return getProgress(range.getStartKey().getColumnQualifierData(), range.getEndKey().getColumnQualifierData(), currentKey.getColumnQualifierData());
+    if (range.contains(currentKey)) {
+      // find the current range and report as if that is the single range
+      if (range.getStartKey() != null && range.getEndKey() != null) {
+        if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW) != 0) {
+          // just look at the row progress
+          return getProgress(range.getStartKey().getRowData(), range.getEndKey().getRowData(), currentKey.getRowData());
+        } else if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW_COLFAM) != 0) {
+          // just look at the column family progress
+          return getProgress(range.getStartKey().getColumnFamilyData(), range.getEndKey().getColumnFamilyData(), currentKey.getColumnFamilyData());
+        } else if (range.getStartKey().compareTo(range.getEndKey(), PartialKey.ROW_COLFAM_COLQUAL) != 0) {
+          // just look at the column qualifier progress
+          return getProgress(range.getStartKey().getColumnQualifierData(), range.getEndKey().getColumnQualifierData(), currentKey.getColumnQualifierData());
+        }
       }
     }
     // if we can't figure it out, then claim no progress
@@ -135,38 +74,15 @@ public class RangeInputSplit extends InputSplit implements Writable {
    */
   @Override
   public long getLength() throws IOException {
-    Text startRow = range.isInfiniteStartKey() ? new Text(new byte[] {Byte.MIN_VALUE}) : range.getStartKey().getRow();
-    Text stopRow = range.isInfiniteStopKey() ? new Text(new byte[] {Byte.MAX_VALUE}) : range.getEndKey().getRow();
-    int maxCommon = Math.min(7, Math.min(startRow.getLength(), stopRow.getLength()));
-    long diff = 0;
-
-    byte[] start = startRow.getBytes();
-    byte[] stop = stopRow.getBytes();
-    for (int i = 0; i < maxCommon; ++i) {
-      diff |= 0xff & (start[i] ^ stop[i]);
-      diff <<= Byte.SIZE;
-    }
-
-    if (startRow.getLength() != stopRow.getLength())
-      diff |= 0xff;
-
-    return diff + 1;
+    return getRangeLength(range);
   }
 
-  @Override
-  public String[] getLocations() throws IOException {
-    return Arrays.copyOf(locations, locations.length);
-  }
 
   @Override
   public void readFields(DataInput in) throws IOException {
+    super.readFields(in);
+
     range.readFields(in);
-    tableName = in.readUTF();
-    tableId = in.readUTF();
-    int numLocs = in.readInt();
-    locations = new String[numLocs];
-    for (int i = 0; i < numLocs; ++i)
-      locations[i] = in.readUTF();
 
     if (in.readBoolean()) {
       isolatedScan = in.readBoolean();
@@ -179,81 +95,13 @@ public class RangeInputSplit extends InputSplit implements Writable {
     if (in.readBoolean()) {
       localIterators = in.readBoolean();
     }
-
-    if (in.readBoolean()) {
-      mockInstance = in.readBoolean();
-    }
-
-    if (in.readBoolean()) {
-      int numColumns = in.readInt();
-      List<String> columns = new ArrayList<String>(numColumns);
-      for (int i = 0; i < numColumns; i++) {
-        columns.add(in.readUTF());
-      }
-
-      fetchedColumns = InputConfigurator.deserializeFetchedColumns(columns);
-    }
-
-    if (in.readBoolean()) {
-      String strAuths = in.readUTF();
-      auths = new Authorizations(strAuths.getBytes(UTF_8));
-    }
-
-    if (in.readBoolean()) {
-      principal = in.readUTF();
-    }
-
-    if (in.readBoolean()) {
-      int ordinal = in.readInt();
-      this.tokenSource = TokenSource.values()[ordinal];
-
-      switch (this.tokenSource) {
-        case INLINE:
-          String tokenClass = in.readUTF();
-          byte[] base64TokenBytes = in.readUTF().getBytes(UTF_8);
-          byte[] tokenBytes = Base64.decodeBase64(base64TokenBytes);
-
-          this.token = AuthenticationTokenSerializer.deserialize(tokenClass, tokenBytes);
-          break;
-
-        case FILE:
-          this.tokenFile = in.readUTF();
-
-          break;
-        default:
-          throw new IOException("Cannot parse unknown TokenSource ordinal");
-      }
-    }
-
-    if (in.readBoolean()) {
-      instanceName = in.readUTF();
-    }
-
-    if (in.readBoolean()) {
-      zooKeepers = in.readUTF();
-    }
-
-    if (in.readBoolean()) {
-      int numIterators = in.readInt();
-      iterators = new ArrayList<IteratorSetting>(numIterators);
-      for (int i = 0; i < numIterators; i++) {
-        iterators.add(new IteratorSetting(in));
-      }
-    }
-
-    if (in.readBoolean()) {
-      level = Level.toLevel(in.readInt());
-    }
   }
 
   @Override
   public void write(DataOutput out) throws IOException {
+    super.write(out);
+
     range.write(out);
-    out.writeUTF(tableName);
-    out.writeUTF(tableId);
-    out.writeInt(locations.length);
-    for (int i = 0; i < locations.length; ++i)
-      out.writeUTF(locations[i]);
 
     out.writeBoolean(null != isolatedScan);
     if (null != isolatedScan) {
@@ -269,73 +117,13 @@ public class RangeInputSplit extends InputSplit implements Writable {
     if (null != localIterators) {
       out.writeBoolean(localIterators);
     }
-
-    out.writeBoolean(null != mockInstance);
-    if (null != mockInstance) {
-      out.writeBoolean(mockInstance);
-    }
-
-    out.writeBoolean(null != fetchedColumns);
-    if (null != fetchedColumns) {
-      String[] cols = InputConfigurator.serializeColumns(fetchedColumns);
-      out.writeInt(cols.length);
-      for (String col : cols) {
-        out.writeUTF(col);
-      }
-    }
-
-    out.writeBoolean(null != auths);
-    if (null != auths) {
-      out.writeUTF(auths.serialize());
-    }
-
-    out.writeBoolean(null != principal);
-    if (null != principal) {
-      out.writeUTF(principal);
-    }
-
-    out.writeBoolean(null != tokenSource);
-    if (null != tokenSource) {
-      out.writeInt(tokenSource.ordinal());
-
-      if (null != token && null != tokenFile) {
-        throw new IOException("Cannot use both inline AuthenticationToken and file-based AuthenticationToken");
-      } else if (null != token) {
-        out.writeUTF(token.getClass().getCanonicalName());
-        out.writeUTF(Base64.encodeBase64String(AuthenticationTokenSerializer.serialize(token)));
-      } else {
-        out.writeUTF(tokenFile);
-      }
-    }
-
-    out.writeBoolean(null != instanceName);
-    if (null != instanceName) {
-      out.writeUTF(instanceName);
-    }
-
-    out.writeBoolean(null != zooKeepers);
-    if (null != zooKeepers) {
-      out.writeUTF(zooKeepers);
-    }
-
-    out.writeBoolean(null != iterators);
-    if (null != iterators) {
-      out.writeInt(iterators.size());
-      for (IteratorSetting iterator : iterators) {
-        iterator.write(out);
-      }
-    }
-
-    out.writeBoolean(null != level);
-    if (null != level) {
-      out.writeInt(level.toInt());
-    }
   }
 
   @Override
   public String toString() {
     StringBuilder sb = new StringBuilder(256);
-    sb.append("Range: ").append(range);
+    sb.append("RangeInputSplit:");
+    sb.append(" Range: ").append(range);
     sb.append(" Locations: ").append(Arrays.asList(locations));
     sb.append(" Table: ").append(tableName);
     sb.append(" TableID: ").append(tableId);
@@ -356,123 +144,12 @@ public class RangeInputSplit extends InputSplit implements Writable {
     return sb.toString();
   }
 
-  /**
-   * Use {@link #getTableName}
-   *
-   * @deprecated since 1.6.1, use getTableName() instead.
-   */
-  @Deprecated
-  public String getTable() {
-    return getTableName();
-  }
-
-  public String getTableName() {
-    return tableName;
-  }
-
-  /**
-   * Use {@link #setTableName}
-   *
-   * @deprecated since 1.6.1, use setTableName() instead.
-   */
-  @Deprecated
-  public void setTable(String table) {
-    setTableName(table);
-  }
-
-  public void setTableName(String table) {
-    this.tableName = table;
-  }
-
-  public void setTableId(String tableId) {
-    this.tableId = tableId;
-  }
-
-  public String getTableId() {
-    return tableId;
-  }
-
-  /**
-   * @see #getInstance(ClientConfiguration)
-   * @deprecated since 1.7.0, use getInstance(ClientConfiguration) instead.
-   */
-  @Deprecated
-  public Instance getInstance() {
-    return getInstance(ClientConfiguration.loadDefault());
-  }
-
-  public Instance getInstance(ClientConfiguration base) {
-    if (null == instanceName) {
-      return null;
-    }
-
-    if (isMockInstance()) {
-      return new MockInstance(getInstanceName());
-    }
-
-    if (null == zooKeepers) {
-      return null;
-    }
-
-    return new ZooKeeperInstance(base.withInstance(getInstanceName()).withZkHosts(getZooKeepers()));
-  }
-
-  public String getInstanceName() {
-    return instanceName;
-  }
-
-  public void setInstanceName(String instanceName) {
-    this.instanceName = instanceName;
-  }
-
-  public String getZooKeepers() {
-    return zooKeepers;
-  }
-
-  public void setZooKeepers(String zooKeepers) {
-    this.zooKeepers = zooKeepers;
-  }
-
-  public String getPrincipal() {
-    return principal;
-  }
-
-  public void setPrincipal(String principal) {
-    this.principal = principal;
-  }
-
-  public AuthenticationToken getToken() {
-    return token;
-  }
-
-  public void setToken(AuthenticationToken token) {
-    this.tokenSource = TokenSource.INLINE;
-    this.token = token;
-  }
-
-  public void setToken(String tokenFile) {
-    this.tokenSource = TokenSource.FILE;
-    this.tokenFile = tokenFile;
-  }
-
-  public Boolean isOffline() {
-    return offline;
-  }
-
-  public void setOffline(Boolean offline) {
-    this.offline = offline;
-  }
-
-  public void setLocations(String[] locations) {
-    this.locations = Arrays.copyOf(locations, locations.length);
-  }
-
-  public Boolean isMockInstance() {
-    return mockInstance;
+  public Range getRange() {
+    return range;
   }
 
-  public void setMockInstance(Boolean mockInstance) {
-    this.mockInstance = mockInstance;
+  public void setRange(Range range) {
+    this.range = range;
   }
 
   public Boolean isIsolatedScan() {
@@ -483,16 +160,12 @@ public class RangeInputSplit extends InputSplit implements Writable {
     this.isolatedScan = isolatedScan;
   }
 
-  public Authorizations getAuths() {
-    return auths;
-  }
-
-  public void setAuths(Authorizations auths) {
-    this.auths = auths;
+  public Boolean isOffline() {
+    return offline;
   }
 
-  public void setRange(Range range) {
-    this.range = range;
+  public void setOffline(Boolean offline) {
+    this.offline = offline;
   }
 
   public Boolean usesLocalIterators() {
@@ -502,35 +175,4 @@ public class RangeInputSplit extends InputSplit implements Writable {
   public void setUsesLocalIterators(Boolean localIterators) {
     this.localIterators = localIterators;
   }
-
-  public Set<Pair<Text,Text>> getFetchedColumns() {
-    return fetchedColumns;
-  }
-
-  public void setFetchedColumns(Collection<Pair<Text,Text>> fetchedColumns) {
-    this.fetchedColumns = new HashSet<Pair<Text,Text>>();
-    for (Pair<Text,Text> columns : fetchedColumns) {
-      this.fetchedColumns.add(columns);
-    }
-  }
-
-  public void setFetchedColumns(Set<Pair<Text,Text>> fetchedColumns) {
-    this.fetchedColumns = fetchedColumns;
-  }
-
-  public List<IteratorSetting> getIterators() {
-    return iterators;
-  }
-
-  public void setIterators(List<IteratorSetting> iterators) {
-    this.iterators = iterators;
-  }
-
-  public Level getLogLevel() {
-    return level;
-  }
-
-  public void setLogLevel(Level level) {
-    this.level = level;
-  }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/956a50ec/core/src/main/java/org/apache/accumulo/core/client/mapreduce/impl/AccumuloInputSplit.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/impl/AccumuloInputSplit.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/impl/AccumuloInputSplit.java
new file mode 100644
index 0000000..94d0026
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/impl/AccumuloInputSplit.java
@@ -0,0 +1,445 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.core.client.mapreduce.impl;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.math.BigInteger;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.accumulo.core.client.ClientConfiguration;
+import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.client.mapreduce.InputTableConfig;
+import org.apache.accumulo.core.client.mapreduce.lib.impl.ConfiguratorBase.TokenSource;
+import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
+import org.apache.accumulo.core.client.mock.MockInstance;
+import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
+import org.apache.accumulo.core.client.security.tokens.AuthenticationToken.AuthenticationTokenSerializer;
+import org.apache.accumulo.core.data.ByteSequence;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.util.Base64;
+import org.apache.accumulo.core.util.Pair;
+import org.apache.accumulo.core.data.Key;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.log4j.Level;
+
+/**
+ * Abstracts over configurations common to all InputSplits. Specifically it leaves out methods
+ * related to number of ranges and locations per InputSplit as those vary by implementation.
+ *
+ * @see org.apache.accumulo.core.client.mapreduce.RangeInputSplit
+ * @see org.apache.accumulo.core.client.mapreduce.impl.BatchInputSplit
+ */
+public abstract class AccumuloInputSplit extends InputSplit implements Writable {
+  protected String[] locations;
+  protected String tableId, tableName, instanceName, zooKeepers, principal;
+  protected TokenSource tokenSource;
+  protected String tokenFile;
+  protected AuthenticationToken token;
+  protected Boolean mockInstance;
+  protected Authorizations auths;
+  protected Set<Pair<Text,Text>> fetchedColumns;
+  protected List<IteratorSetting> iterators;
+  protected Level level;
+
+  public abstract float getProgress(Key currentKey);
+
+  public AccumuloInputSplit() {
+    locations = new String[0];
+    tableName = "";
+    tableId = "";
+  }
+
+  public AccumuloInputSplit(AccumuloInputSplit split) throws IOException {
+    this.setLocations(split.getLocations());
+    this.setTableName(split.getTableName());
+    this.setTableId(split.getTableId());
+  }
+
+  protected AccumuloInputSplit(String table, String tableId, String[] locations) {
+    setLocations(locations);
+    this.tableName = table;
+    this.tableId = tableId;
+  }
+
+  /**
+   * Central place to set common split configuration not handled by split constructors.
+   * The intention is to make it harder to miss optional setters in future refactor.
+   */
+  public static void updateSplit(AccumuloInputSplit split,  Instance instance, InputTableConfig tableConfig,
+                                  String principal, AuthenticationToken token, Authorizations auths, Level logLevel) {
+    split.setInstanceName(instance.getInstanceName());
+    split.setZooKeepers(instance.getZooKeepers());
+    split.setMockInstance(instance instanceof MockInstance);
+
+    split.setPrincipal(principal);
+    split.setToken(token);
+    split.setAuths(auths);
+
+    split.setFetchedColumns(tableConfig.getFetchedColumns());
+    split.setIterators(tableConfig.getIterators());
+    split.setLogLevel(logLevel);
+  }
+
+  private static byte[] extractBytes(ByteSequence seq, int numBytes) {
+    byte[] bytes = new byte[numBytes + 1];
+    bytes[0] = 0;
+    for (int i = 0; i < numBytes; i++) {
+      if (i >= seq.length())
+        bytes[i + 1] = 0;
+      else
+        bytes[i + 1] = seq.byteAt(i);
+    }
+    return bytes;
+  }
+
+  public static float getProgress(ByteSequence start, ByteSequence end, ByteSequence position) {
+    int maxDepth = Math.min(Math.max(end.length(), start.length()), position.length());
+    BigInteger startBI = new BigInteger(extractBytes(start, maxDepth));
+    BigInteger endBI = new BigInteger(extractBytes(end, maxDepth));
+    BigInteger positionBI = new BigInteger(extractBytes(position, maxDepth));
+    return (float) (positionBI.subtract(startBI).doubleValue() / endBI.subtract(startBI).doubleValue());
+  }
+
+  public long getRangeLength(Range range) throws IOException {
+    Text startRow = range.isInfiniteStartKey() ? new Text(new byte[] {Byte.MIN_VALUE}) : range.getStartKey().getRow();
+    Text stopRow = range.isInfiniteStopKey() ? new Text(new byte[] {Byte.MAX_VALUE}) : range.getEndKey().getRow();
+    int maxCommon = Math.min(7, Math.min(startRow.getLength(), stopRow.getLength()));
+    long diff = 0;
+
+    byte[] start = startRow.getBytes();
+    byte[] stop = stopRow.getBytes();
+    for (int i = 0; i < maxCommon; ++i) {
+      diff |= 0xff & (start[i] ^ stop[i]);
+      diff <<= Byte.SIZE;
+    }
+
+    if (startRow.getLength() != stopRow.getLength())
+      diff |= 0xff;
+
+    return diff + 1;
+  }
+
+  @Override
+  public String[] getLocations() throws IOException {
+    return Arrays.copyOf(locations, locations.length);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    tableName = in.readUTF();
+    tableId = in.readUTF();
+    int numLocs = in.readInt();
+    locations = new String[numLocs];
+    for (int i = 0; i < numLocs; ++i)
+      locations[i] = in.readUTF();
+
+    if (in.readBoolean()) {
+      mockInstance = in.readBoolean();
+    }
+
+    if (in.readBoolean()) {
+      int numColumns = in.readInt();
+      List<String> columns = new ArrayList<String>(numColumns);
+      for (int i = 0; i < numColumns; i++) {
+        columns.add(in.readUTF());
+      }
+
+      fetchedColumns = InputConfigurator.deserializeFetchedColumns(columns);
+    }
+
+    if (in.readBoolean()) {
+      String strAuths = in.readUTF();
+      auths = new Authorizations(strAuths.getBytes(UTF_8));
+    }
+
+    if (in.readBoolean()) {
+      principal = in.readUTF();
+    }
+
+    if (in.readBoolean()) {
+      int ordinal = in.readInt();
+      this.tokenSource = TokenSource.values()[ordinal];
+
+      switch (this.tokenSource) {
+        case INLINE:
+          String tokenClass = in.readUTF();
+          byte[] base64TokenBytes = in.readUTF().getBytes(UTF_8);
+          byte[] tokenBytes = Base64.decodeBase64(base64TokenBytes);
+
+          this.token = AuthenticationTokenSerializer.deserialize(tokenClass, tokenBytes);
+          break;
+
+        case FILE:
+          this.tokenFile = in.readUTF();
+
+          break;
+        default:
+          throw new IOException("Cannot parse unknown TokenSource ordinal");
+      }
+    }
+
+    if (in.readBoolean()) {
+      instanceName = in.readUTF();
+    }
+
+    if (in.readBoolean()) {
+      zooKeepers = in.readUTF();
+    }
+
+    if (in.readBoolean()) {
+      int numIterators = in.readInt();
+      iterators = new ArrayList<IteratorSetting>(numIterators);
+      for (int i = 0; i < numIterators; i++) {
+        iterators.add(new IteratorSetting(in));
+      }
+    }
+
+    if (in.readBoolean()) {
+      level = Level.toLevel(in.readInt());
+    }
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    out.writeUTF(tableName);
+    out.writeUTF(tableId);
+    out.writeInt(locations.length);
+    for (int i = 0; i < locations.length; ++i)
+      out.writeUTF(locations[i]);
+
+    out.writeBoolean(null != mockInstance);
+    if (null != mockInstance) {
+      out.writeBoolean(mockInstance);
+    }
+
+    out.writeBoolean(null != fetchedColumns);
+    if (null != fetchedColumns) {
+      String[] cols = InputConfigurator.serializeColumns(fetchedColumns);
+      out.writeInt(cols.length);
+      for (String col : cols) {
+        out.writeUTF(col);
+      }
+    }
+
+    out.writeBoolean(null != auths);
+    if (null != auths) {
+      out.writeUTF(auths.serialize());
+    }
+
+    out.writeBoolean(null != principal);
+    if (null != principal) {
+      out.writeUTF(principal);
+    }
+
+    out.writeBoolean(null != tokenSource);
+    if (null != tokenSource) {
+      out.writeInt(tokenSource.ordinal());
+
+      if (null != token && null != tokenFile) {
+        throw new IOException("Cannot use both inline AuthenticationToken and file-based AuthenticationToken");
+      } else if (null != token) {
+        out.writeUTF(token.getClass().getCanonicalName());
+        out.writeUTF(Base64.encodeBase64String(AuthenticationTokenSerializer.serialize(token)));
+      } else {
+        out.writeUTF(tokenFile);
+      }
+    }
+
+    out.writeBoolean(null != instanceName);
+    if (null != instanceName) {
+      out.writeUTF(instanceName);
+    }
+
+    out.writeBoolean(null != zooKeepers);
+    if (null != zooKeepers) {
+      out.writeUTF(zooKeepers);
+    }
+
+    out.writeBoolean(null != iterators);
+    if (null != iterators) {
+      out.writeInt(iterators.size());
+      for (IteratorSetting iterator : iterators) {
+        iterator.write(out);
+      }
+    }
+
+    out.writeBoolean(null != level);
+    if (null != level) {
+      out.writeInt(level.toInt());
+    }
+  }
+
+  /**
+   * Use {@link #getTableName}
+   */
+  @Deprecated
+  public String getTable() {
+    return getTableName();
+  }
+
+  public String getTableName() {
+    return tableName;
+  }
+
+  /**
+   * Use {@link #setTableName}
+   */
+  @Deprecated
+  public void setTable(String table) {
+    setTableName(table);
+  }
+
+  public void setTableName(String table) {
+    this.tableName = table;
+  }
+
+  public void setTableId(String tableId) {
+    this.tableId = tableId;
+  }
+
+  public String getTableId() {
+    return tableId;
+  }
+
+  /**
+   * @see #getInstance(ClientConfiguration)
+   */
+  @Deprecated
+  public Instance getInstance() {
+    return getInstance(ClientConfiguration.loadDefault());
+  }
+
+  public Instance getInstance(ClientConfiguration base) {
+    if (null == instanceName) {
+      return null;
+    }
+
+    if (isMockInstance()) {
+      return new MockInstance(getInstanceName());
+    }
+
+    if (null == zooKeepers) {
+      return null;
+    }
+
+    return new ZooKeeperInstance(base.withInstance(getInstanceName()).withZkHosts(getZooKeepers()));
+  }
+
+  public String getInstanceName() {
+    return instanceName;
+  }
+
+  public void setInstanceName(String instanceName) {
+    this.instanceName = instanceName;
+  }
+
+  public String getZooKeepers() {
+    return zooKeepers;
+  }
+
+  public void setZooKeepers(String zooKeepers) {
+    this.zooKeepers = zooKeepers;
+  }
+
+  public String getPrincipal() {
+    return principal;
+  }
+
+  public void setPrincipal(String principal) {
+    this.principal = principal;
+  }
+
+  public AuthenticationToken getToken() {
+    return token;
+  }
+
+  public void setToken(AuthenticationToken token) {
+    this.tokenSource = TokenSource.INLINE;
+    this.token = token;
+  }
+
+  public void setToken(String tokenFile) {
+    this.tokenSource = TokenSource.FILE;
+    this.tokenFile = tokenFile;
+  }
+
+  public void setLocations(String[] locations) {
+    this.locations = Arrays.copyOf(locations, locations.length);
+  }
+
+  public Boolean isMockInstance() {
+    return mockInstance;
+  }
+
+  public void setMockInstance(Boolean mockInstance) {
+    this.mockInstance = mockInstance;
+  }
+
+  public Authorizations getAuths() {
+    return auths;
+  }
+
+  public void setAuths(Authorizations auths) {
+    this.auths = auths;
+  }
+
+
+  public Set<Pair<Text,Text>> getFetchedColumns() {
+    return fetchedColumns;
+  }
+
+  public void setFetchedColumns(Collection<Pair<Text,Text>> fetchedColumns) {
+    this.fetchedColumns = new HashSet<Pair<Text,Text>>();
+    for (Pair<Text,Text> columns : fetchedColumns) {
+      this.fetchedColumns.add(columns);
+    }
+  }
+
+  public void setFetchedColumns(Set<Pair<Text,Text>> fetchedColumns) {
+    this.fetchedColumns = fetchedColumns;
+  }
+
+  public List<IteratorSetting> getIterators() {
+    return iterators;
+  }
+
+  public void setIterators(List<IteratorSetting> iterators) {
+    this.iterators = iterators;
+  }
+
+  public Level getLogLevel() {
+    return level;
+  }
+
+  public void setLogLevel(Level level) {
+    this.level = level;
+  }
+}


Mime
View raw message