accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ktur...@apache.org
Subject [7/7] accumulo git commit: ACCUMULO-3913 Added per table sampling
Date Mon, 21 Sep 2015 13:51:32 GMT
ACCUMULO-3913 Added per table sampling


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/45f18c17
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/45f18c17
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/45f18c17

Branch: refs/heads/master
Commit: 45f18c174612d1a41eed1d2eec9e77d3b3e71a82
Parents: fdcc169
Author: Keith Turner <kturner@apache.org>
Authored: Mon Sep 21 09:44:47 2015 -0400
Committer: Keith Turner <kturner@apache.org>
Committed: Mon Sep 21 09:46:39 2015 -0400

----------------------------------------------------------------------
 .../core/client/ClientSideIteratorScanner.java  |  161 +-
 .../core/client/SampleNotPresentException.java  |   42 +
 .../accumulo/core/client/ScannerBase.java       |   46 +
 .../client/admin/NewTableConfiguration.java     |   31 +
 .../core/client/admin/SamplerConfiguration.java |   91 +
 .../core/client/admin/TableOperations.java      |   29 +
 .../client/impl/BaseIteratorEnvironment.java    |   83 +
 .../core/client/impl/OfflineIterator.java       |   52 +-
 .../core/client/impl/ScannerIterator.java       |    6 +-
 .../core/client/impl/ScannerOptions.java        |   29 +-
 .../core/client/impl/TableOperationsImpl.java   |   39 +
 .../impl/TabletServerBatchReaderIterator.java   |   17 +-
 .../core/client/impl/ThriftScanner.java         |   25 +-
 .../core/client/mapred/AbstractInputFormat.java |   10 +
 .../client/mapred/AccumuloFileOutputFormat.java |   15 +
 .../core/client/mapred/InputFormatBase.java     |   19 +
 .../client/mapreduce/AbstractInputFormat.java   |   10 +
 .../mapreduce/AccumuloFileOutputFormat.java     |   15 +
 .../core/client/mapreduce/InputFormatBase.java  |   19 +
 .../core/client/mapreduce/InputTableConfig.java |   26 +
 .../core/client/mapreduce/RangeInputSplit.java  |   21 +
 .../core/client/mapreduce/impl/SplitUtils.java  |    2 +
 .../lib/impl/FileOutputConfigurator.java        |   29 +-
 .../mapreduce/lib/impl/InputConfigurator.java   |   53 +-
 .../core/client/mock/MockScannerBase.java       |   16 +
 .../core/client/mock/MockTableOperations.java   |   17 +
 .../core/compaction/CompactionSettings.java     |    1 +
 .../accumulo/core/compaction/NullType.java      |   29 +
 .../org/apache/accumulo/core/conf/Property.java |   12 +-
 .../accumulo/core/file/BloomFilterLayer.java    |    6 +
 .../accumulo/core/file/FileSKVIterator.java     |    5 +-
 .../core/file/map/MapFileOperations.java        |    6 +
 .../core/file/rfile/MultiIndexIterator.java     |    6 +
 .../core/file/rfile/MultiLevelIndex.java        |    4 +-
 .../accumulo/core/file/rfile/PrintInfo.java     |   28 +-
 .../apache/accumulo/core/file/rfile/RFile.java  |  511 +++--
 .../core/file/rfile/RFileOperations.java        |   12 +-
 .../core/iterators/IteratorEnvironment.java     |   50 +
 .../core/iterators/SortedMapIterator.java       |    4 +
 .../core/iterators/WrappingIterator.java        |    8 -
 .../core/iterators/system/EmptyIterator.java    |   72 +
 .../core/iterators/system/MapFileIterator.java  |    6 +
 .../core/iterators/system/SampleIterator.java   |   46 +
 .../iterators/system/SequenceFileIterator.java  |    6 +
 .../core/sample/AbstractHashSampler.java        |  106 ++
 .../accumulo/core/sample/RowColumnSampler.java  |  124 ++
 .../apache/accumulo/core/sample/RowSampler.java |   49 +
 .../apache/accumulo/core/sample/Sampler.java    |   57 +
 .../sample/impl/SamplerConfigurationImpl.java   |  184 ++
 .../core/sample/impl/SamplerFactory.java        |   48 +
 .../thrift/TSampleNotPresentException.java      |  409 ++++
 .../thrift/TSamplerConfiguration.java           |  556 ++++++
 .../thrift/TabletClientService.java             | 1762 ++++++++++++------
 .../accumulo/core/util/LocalityGroupUtil.java   |    4 +-
 core/src/main/thrift/tabletserver.thrift        |   19 +-
 .../client/impl/TableOperationsHelperTest.java  |   17 +
 .../mapred/AccumuloFileOutputFormatTest.java    |   14 +-
 .../mapreduce/AccumuloFileOutputFormatTest.java |   13 +
 .../core/file/rfile/MultiLevelIndexTest.java    |    3 +-
 .../accumulo/core/file/rfile/RFileTest.java     |  333 +++-
 .../iterators/DefaultIteratorEnvironment.java   |   25 +-
 .../iterators/FirstEntryInRowIteratorTest.java  |   37 +-
 .../core/iterators/SortedMapIteratorTest.java   |   46 +
 .../iterators/user/RowDeletingIteratorTest.java |   30 +-
 .../iterators/user/RowEncodingIteratorTest.java |   52 +-
 .../user/TransformingIteratorTest.java          |   41 +-
 .../apache/accumulo/core/file/rfile/ver_7.rf    |  Bin 0 -> 14557 bytes
 .../main/asciidoc/accumulo_user_manual.asciidoc |    2 +
 docs/src/main/asciidoc/chapters/sampling.txt    |   86 +
 docs/src/main/resources/examples/README         |    2 +
 docs/src/main/resources/examples/README.sample  |  192 ++
 .../examples/simple/sample/SampleExample.java   |  150 ++
 .../shard/CutoffIntersectingIterator.java       |  123 ++
 .../accumulo/examples/simple/shard/Query.java   |   31 +-
 .../server/util/VerifyTabletAssignments.java    |    2 +-
 .../iterators/MetadataBulkLoadFilterTest.java   |   25 +-
 .../server/replication/StatusCombinerTest.java  |   39 +-
 .../monitor/servlets/trace/NullScanner.java     |   11 +
 .../apache/accumulo/tserver/FileManager.java    |   30 +-
 .../apache/accumulo/tserver/InMemoryMap.java    |  244 ++-
 .../tserver/MemKeyConversionIterator.java       |    6 +-
 .../org/apache/accumulo/tserver/MemValue.java   |   63 +-
 .../org/apache/accumulo/tserver/NativeMap.java  |    4 +
 .../tserver/TabletIteratorEnvironment.java      |   61 +-
 .../apache/accumulo/tserver/TabletServer.java   |   38 +-
 .../ConfigurableCompactionStrategy.java         |   22 +
 .../accumulo/tserver/scan/LookupTask.java       |    5 +-
 .../accumulo/tserver/scan/NextBatchTask.java    |    5 +-
 .../tserver/session/MultiScanSession.java       |    5 +-
 .../accumulo/tserver/tablet/ScanDataSource.java |   24 +-
 .../accumulo/tserver/tablet/ScanOptions.java    |   16 +-
 .../apache/accumulo/tserver/tablet/Tablet.java  |   12 +-
 .../accumulo/tserver/tablet/TabletMemory.java   |    7 +-
 .../accumulo/tserver/InMemoryMapTest.java       |  383 +++-
 .../DefaultCompactionStrategyTest.java          |    6 +
 .../accumulo/shell/commands/CompactCommand.java |    6 +-
 .../accumulo/shell/commands/GrepCommand.java    |    2 +
 .../accumulo/shell/commands/ScanCommand.java    |   23 +
 start/.gitignore                                |    1 +
 .../test/InMemoryMapMemoryUsageTest.java        |    8 +-
 .../java/org/apache/accumulo/test/SampleIT.java |  497 +++++
 .../org/apache/accumulo/test/ShellServerIT.java |   72 +-
 .../accumulo/test/functional/ExamplesIT.java    |    4 +-
 .../accumulo/test/functional/ReadWriteIT.java   |    4 +-
 .../test/mapred/AccumuloFileOutputFormatIT.java |   18 +
 .../test/mapred/AccumuloInputFormatIT.java      |   57 +-
 .../mapreduce/AccumuloFileOutputFormatIT.java   |   18 +
 .../test/mapreduce/AccumuloInputFormatIT.java   |   49 +-
 .../test/performance/thrift/NullTserver.java    |    6 +-
 109 files changed, 6864 insertions(+), 1139 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/ClientSideIteratorScanner.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/ClientSideIteratorScanner.java b/core/src/main/java/org/apache/accumulo/core/client/ClientSideIteratorScanner.java
index eb3c923..5dc6d59 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/ClientSideIteratorScanner.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/ClientSideIteratorScanner.java
@@ -27,6 +27,7 @@ import java.util.TreeSet;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.accumulo.core.Constants;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.impl.ScannerOptions;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.data.ArrayByteSequence;
@@ -44,6 +45,8 @@ import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.hadoop.io.Text;
 
+import com.google.common.base.Preconditions;
+
 /**
  * A scanner that instantiates iterators on the client side instead of on the tablet server. This can be useful for testing iterators or in cases where you
  * don't want iterators affecting the performance of tablet servers.<br>
@@ -60,6 +63,7 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
   private Range range;
   private boolean isolated = false;
   private long readaheadThreshold = Constants.SCANNER_DEFAULT_READAHEAD_THRESHOLD;
+  private SamplerConfiguration iteratorSamplerConfig;
 
   /**
    * @deprecated since 1.7.0 was never intended for public use. However this could have been used by anything extending this class.
@@ -67,7 +71,7 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
   @Deprecated
   public class ScannerTranslator extends ScannerTranslatorImpl {
     public ScannerTranslator(Scanner scanner) {
-      super(scanner);
+      super(scanner, scanner.getSamplerConfiguration());
     }
 
     @Override
@@ -76,6 +80,62 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
     }
   }
 
+  private class ClientSideIteratorEnvironment implements IteratorEnvironment {
+
+    private SamplerConfiguration samplerConfig;
+    private boolean sampleEnabled;
+
+    ClientSideIteratorEnvironment(boolean sampleEnabled, SamplerConfiguration samplerConfig) {
+      this.sampleEnabled = sampleEnabled;
+      this.samplerConfig = samplerConfig;
+    }
+
+    @Override
+    public SortedKeyValueIterator<Key,Value> reserveMapFileReader(String mapFileName) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public AccumuloConfiguration getConfig() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public IteratorScope getIteratorScope() {
+      return IteratorScope.scan;
+    }
+
+    @Override
+    public boolean isFullMajorCompaction() {
+      return false;
+    }
+
+    @Override
+    public void registerSideChannel(SortedKeyValueIterator<Key,Value> iter) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public Authorizations getAuthorizations() {
+      return ClientSideIteratorScanner.this.getAuthorizations();
+    }
+
+    @Override
+    public IteratorEnvironment cloneWithSamplingEnabled() {
+      return new ClientSideIteratorEnvironment(true, samplerConfig);
+    }
+
+    @Override
+    public boolean isSamplingEnabled() {
+      return sampleEnabled;
+    }
+
+    @Override
+    public SamplerConfiguration getSamplerConfiguration() {
+      return samplerConfig;
+    }
+  }
+
   /**
    * A class that wraps a Scanner in a SortedKeyValueIterator so that other accumulo iterators can use it as a source.
    */
@@ -83,6 +143,7 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
     protected Scanner scanner;
     Iterator<Entry<Key,Value>> iter;
     Entry<Key,Value> top = null;
+    private SamplerConfiguration samplerConfig;
 
     /**
      * Constructs an accumulo iterator from a scanner.
@@ -90,8 +151,9 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
      * @param scanner
      *          the scanner to iterate over
      */
-    public ScannerTranslatorImpl(final Scanner scanner) {
+    public ScannerTranslatorImpl(final Scanner scanner, SamplerConfiguration samplerConfig) {
       this.scanner = scanner;
+      this.samplerConfig = samplerConfig;
     }
 
     @Override
@@ -122,6 +184,13 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
       for (ByteSequence colf : columnFamilies) {
         scanner.fetchColumnFamily(new Text(colf.toArray()));
       }
+
+      if (samplerConfig == null) {
+        scanner.clearSamplerConfiguration();
+      } else {
+        scanner.setSamplerConfiguration(samplerConfig);
+      }
+
       iter = scanner.iterator();
       next();
     }
@@ -138,7 +207,7 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
 
     @Override
     public SortedKeyValueIterator<Key,Value> deepCopy(final IteratorEnvironment env) {
-      return new ScannerTranslatorImpl(scanner);
+      return new ScannerTranslatorImpl(scanner, env.isSamplingEnabled() ? env.getSamplerConfiguration() : null);
     }
   }
 
@@ -151,19 +220,22 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
    *          the source scanner
    */
   public ClientSideIteratorScanner(final Scanner scanner) {
-    smi = new ScannerTranslatorImpl(scanner);
+    smi = new ScannerTranslatorImpl(scanner, scanner.getSamplerConfiguration());
     this.range = scanner.getRange();
     this.size = scanner.getBatchSize();
     this.timeOut = scanner.getTimeout(TimeUnit.MILLISECONDS);
     this.batchTimeOut = scanner.getTimeout(TimeUnit.MILLISECONDS);
     this.readaheadThreshold = scanner.getReadaheadThreshold();
+    SamplerConfiguration samplerConfig = scanner.getSamplerConfiguration();
+    if (samplerConfig != null)
+      setSamplerConfiguration(samplerConfig);
   }
 
   /**
    * Sets the source Scanner.
    */
   public void setSource(final Scanner scanner) {
-    smi = new ScannerTranslatorImpl(scanner);
+    smi = new ScannerTranslatorImpl(scanner, scanner.getSamplerConfiguration());
   }
 
   @Override
@@ -177,6 +249,8 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
     else
       smi.scanner.disableIsolation();
 
+    smi.samplerConfig = getSamplerConfiguration();
+
     final TreeMap<Integer,IterInfo> tm = new TreeMap<Integer,IterInfo>();
 
     for (IterInfo iterInfo : serverSideIteratorList) {
@@ -185,35 +259,8 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
 
     SortedKeyValueIterator<Key,Value> skvi;
     try {
-      skvi = IteratorUtil.loadIterators(smi, tm.values(), serverSideIteratorOptions, new IteratorEnvironment() {
-        @Override
-        public SortedKeyValueIterator<Key,Value> reserveMapFileReader(final String mapFileName) throws IOException {
-          return null;
-        }
-
-        @Override
-        public AccumuloConfiguration getConfig() {
-          return null;
-        }
-
-        @Override
-        public IteratorScope getIteratorScope() {
-          return null;
-        }
-
-        @Override
-        public boolean isFullMajorCompaction() {
-          return false;
-        }
-
-        @Override
-        public void registerSideChannel(final SortedKeyValueIterator<Key,Value> iter) {}
-
-        @Override
-        public Authorizations getAuthorizations() {
-          return smi.scanner.getAuthorizations();
-        }
-      }, false, null);
+      skvi = IteratorUtil.loadIterators(smi, tm.values(), serverSideIteratorOptions, new ClientSideIteratorEnvironment(getSamplerConfiguration() != null,
+          getIteratorSamplerConfigurationInternal()), false, null);
     } catch (IOException e) {
       throw new RuntimeException(e);
     }
@@ -297,4 +344,50 @@ public class ClientSideIteratorScanner extends ScannerOptions implements Scanner
     }
     this.readaheadThreshold = batches;
   }
+
+  private SamplerConfiguration getIteratorSamplerConfigurationInternal() {
+    SamplerConfiguration scannerSamplerConfig = getSamplerConfiguration();
+    if (scannerSamplerConfig != null) {
+      if (iteratorSamplerConfig != null && !iteratorSamplerConfig.equals(scannerSamplerConfig)) {
+        throw new IllegalStateException("Scanner and iterator sampler configuration differ");
+      }
+
+      return scannerSamplerConfig;
+    }
+
+    return iteratorSamplerConfig;
+  }
+
+  /**
+   * This is provided for the case where no sampler configuration is set on the scanner, but there is a need to create iterator deep copies that have sampling
+   * enabled. If sampler configuration is set on the scanner, then this method does not need to be called inorder to create deep copies with sampling.
+   *
+   * <p>
+   * Setting this differently than the scanners sampler configuration may cause exceptions.
+   *
+   * @since 1.8.0
+   */
+  public void setIteratorSamplerConfiguration(SamplerConfiguration sc) {
+    Preconditions.checkNotNull(sc);
+    this.iteratorSamplerConfig = sc;
+  }
+
+  /**
+   * Clear any iterator sampler configuration.
+   *
+   * @since 1.8.0
+   */
+  public void clearIteratorSamplerConfiguration() {
+    this.iteratorSamplerConfig = null;
+  }
+
+  /**
+   * @return currently set iterator sampler configuration.
+   *
+   * @since 1.8.0
+   */
+
+  public SamplerConfiguration getIteratorSamplerConfiguration() {
+    return iteratorSamplerConfig;
+  }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/SampleNotPresentException.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/SampleNotPresentException.java b/core/src/main/java/org/apache/accumulo/core/client/SampleNotPresentException.java
new file mode 100644
index 0000000..c70a898
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/client/SampleNotPresentException.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.client;
+
+/**
+ * Exception thrown when a table does not have sampling configured or when sampling is configured but it differs from what was requested.
+ *
+ * @since 1.8.0
+ */
+
+public class SampleNotPresentException extends RuntimeException {
+
+  public SampleNotPresentException(String message, Exception cause) {
+    super(message, cause);
+  }
+
+  public SampleNotPresentException(String message) {
+    super(message);
+  }
+
+  public SampleNotPresentException() {
+    super();
+  }
+
+  private static final long serialVersionUID = 1L;
+
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/ScannerBase.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/ScannerBase.java b/core/src/main/java/org/apache/accumulo/core/client/ScannerBase.java
index e9d288b..5642785 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/ScannerBase.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/ScannerBase.java
@@ -21,6 +21,7 @@ import java.util.Map.Entry;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.accumulo.core.client.IteratorSetting.Column;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.security.Authorizations;
@@ -176,6 +177,51 @@ public interface ScannerBase extends Iterable<Entry<Key,Value>> {
   Authorizations getAuthorizations();
 
   /**
+   * Setting this will cause the scanner to read sample data, as long as that sample data was generated with the given configuration. By default this is not set
+   * and all data is read.
+   *
+   * <p>
+   * One way to use this method is as follows, where the sampler configuration is obtained from the table configuration. Sample data can be generated in many
+   * different ways, so its important to verify the sample data configuration meets expectations.
+   *
+   * <p>
+   *
+   * <pre>
+   * <code>
+   *   // could cache this if creating many scanners to avoid RPCs.
+   *   SamplerConfiguration samplerConfig = connector.tableOperations().getSamplerConfiguration(table);
+   *   // verify table's sample data is generated in an expected way before using
+   *   userCode.verifySamplerConfig(samplerConfig);
+   *   scanner.setSamplerCongiguration(samplerConfig);
+   * </code>
+   * </pre>
+   *
+   * <p>
+   * Of course this is not the only way to obtain a {@link SamplerConfiguration}, it could be a constant, configuration, etc.
+   *
+   * <p>
+   * If sample data is not present or sample data was generated with a different configuration, then the scanner iterator will throw a
+   * {@link SampleNotPresentException}. Also if a table's sampler configuration is changed while a scanner is iterating over a table, a
+   * {@link SampleNotPresentException} may be thrown.
+   *
+   * @since 1.8.0
+   */
+  void setSamplerConfiguration(SamplerConfiguration samplerConfig);
+
+  /**
+   * @return currently set sampler configuration. Returns null if no sampler configuration is set.
+   * @since 1.8.0
+   */
+  SamplerConfiguration getSamplerConfiguration();
+
+  /**
+   * Clears sampler configuration making a scanner read all data. After calling this, {@link #getSamplerConfiguration()} should return null.
+   *
+   * @since 1.8.0
+   */
+  void clearSamplerConfiguration();
+
+  /**
    * This setting determines how long a scanner will wait to fill the returned batch. By default, a scanner wait until the batch is full.
    *
    * <p>

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/admin/NewTableConfiguration.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/admin/NewTableConfiguration.java b/core/src/main/java/org/apache/accumulo/core/client/admin/NewTableConfiguration.java
index 4db1d89..2107dc8 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/admin/NewTableConfiguration.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/admin/NewTableConfiguration.java
@@ -24,6 +24,9 @@ import java.util.Map;
 
 import org.apache.accumulo.core.iterators.IteratorUtil;
 import org.apache.accumulo.core.iterators.user.VersioningIterator;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
+
+import com.google.common.base.Preconditions;
 
 /**
  * This object stores table creation parameters. Currently includes: {@link TimeType}, whether to include default iterators, and user-specified initial
@@ -39,6 +42,7 @@ public class NewTableConfiguration {
   private boolean limitVersion = true;
 
   private Map<String,String> properties = new HashMap<String,String>();
+  private SamplerConfiguration samplerConfiguration;
 
   /**
    * Configure logical or millisecond time for tables created with this configuration.
@@ -84,6 +88,7 @@ public class NewTableConfiguration {
    */
   public NewTableConfiguration setProperties(Map<String,String> prop) {
     checkArgument(prop != null, "properties is null");
+    checkDisjoint(prop, samplerConfiguration);
 
     this.properties = new HashMap<String,String>(prop);
     return this;
@@ -101,7 +106,33 @@ public class NewTableConfiguration {
       propertyMap.putAll(IteratorUtil.generateInitialTableProperties(limitVersion));
     }
 
+    if (samplerConfiguration != null) {
+      propertyMap.putAll(new SamplerConfigurationImpl(samplerConfiguration).toTablePropertiesMap());
+    }
+
     propertyMap.putAll(properties);
     return Collections.unmodifiableMap(propertyMap);
   }
+
+  private void checkDisjoint(Map<String,String> props, SamplerConfiguration samplerConfiguration) {
+    if (props.isEmpty() || samplerConfiguration == null) {
+      return;
+    }
+
+    Map<String,String> sampleProps = new SamplerConfigurationImpl(samplerConfiguration).toTablePropertiesMap();
+
+    checkArgument(Collections.disjoint(props.keySet(), sampleProps.keySet()), "Properties and derived sampler properties are not disjoint");
+  }
+
+  /**
+   * Enable building a sample data set on the new table using the given sampler configuration.
+   *
+   * @since 1.8.0
+   */
+  public NewTableConfiguration enableSampling(SamplerConfiguration samplerConfiguration) {
+    Preconditions.checkNotNull(samplerConfiguration);
+    checkDisjoint(properties, samplerConfiguration);
+    this.samplerConfiguration = samplerConfiguration;
+    return this;
+  }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/admin/SamplerConfiguration.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/admin/SamplerConfiguration.java b/core/src/main/java/org/apache/accumulo/core/client/admin/SamplerConfiguration.java
new file mode 100644
index 0000000..079d324
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/client/admin/SamplerConfiguration.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.client.admin;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * This class encapsultes configuration and options needed to setup and use sampling.
+ *
+ * @since 1.8.0
+ */
+
+public class SamplerConfiguration {
+
+  private String className;
+  private Map<String,String> options = new HashMap<>();
+
+  public SamplerConfiguration(String samplerClassName) {
+    Preconditions.checkNotNull(samplerClassName);
+    this.className = samplerClassName;
+  }
+
+  public SamplerConfiguration setOptions(Map<String,String> options) {
+    Preconditions.checkNotNull(options);
+    this.options = new HashMap<>(options.size());
+
+    for (Entry<String,String> entry : options.entrySet()) {
+      addOption(entry.getKey(), entry.getValue());
+    }
+
+    return this;
+  }
+
+  public SamplerConfiguration addOption(String option, String value) {
+    checkArgument(option != null, "option is null");
+    checkArgument(value != null, "value is null");
+    this.options.put(option, value);
+    return this;
+  }
+
+  public Map<String,String> getOptions() {
+    return Collections.unmodifiableMap(options);
+  }
+
+  public String getSamplerClassName() {
+    return className;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (o instanceof SamplerConfiguration) {
+      SamplerConfiguration osc = (SamplerConfiguration) o;
+
+      return className.equals(osc.className) && options.equals(osc.options);
+    }
+
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    return className.hashCode() + 31 * options.hashCode();
+  }
+
+  @Override
+  public String toString() {
+    return className + " " + options;
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java b/core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java
index b7c70e9..fa6fef4 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java
@@ -756,4 +756,33 @@ public interface TableOperations {
    */
   boolean testClassLoad(String tableName, final String className, final String asTypeName) throws AccumuloException, AccumuloSecurityException,
       TableNotFoundException;
+
+  /**
+   * Set or update the sampler configuration for a table. If the table has existing sampler configuration, those properties will be cleared before setting the
+   * new table properties.
+   *
+   * @param tableName
+   *          the name of the table
+   * @since 1.8.0
+   */
+  void setSamplerConfiguration(String tableName, SamplerConfiguration samplerConfiguration) throws TableNotFoundException, AccumuloException,
+      AccumuloSecurityException;
+
+  /**
+   * Clear all sampling configuration properties on the table.
+   *
+   * @param tableName
+   *          the name of the table
+   * @since 1.8.0
+   */
+  void clearSamplerConfiguration(String tableName) throws TableNotFoundException, AccumuloException, AccumuloSecurityException;
+
+  /**
+   * Reads the sampling configuration properties for a table.
+   *
+   * @param tableName
+   *          the name of the table
+   * @since 1.8.0
+   */
+  SamplerConfiguration getSamplerConfiguration(String tableName) throws TableNotFoundException, AccumuloException, AccumuloSecurityException;
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/impl/BaseIteratorEnvironment.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/impl/BaseIteratorEnvironment.java b/core/src/main/java/org/apache/accumulo/core/client/impl/BaseIteratorEnvironment.java
new file mode 100644
index 0000000..dc138ce
--- /dev/null
+++ b/core/src/main/java/org/apache/accumulo/core/client/impl/BaseIteratorEnvironment.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.accumulo.core.client.impl;
+
+import java.io.IOException;
+
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
+import org.apache.accumulo.core.conf.AccumuloConfiguration;
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.IteratorEnvironment;
+import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
+import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
+import org.apache.accumulo.core.security.Authorizations;
+
+/**
+ * An implementation of {@link IteratorEnvironment} that throws {@link UnsupportedOperationException} for each operation. This is useful for situations that
+ * need to extend {@link IteratorEnvironment} and implement a subset of the methods.
+ */
+
+public class BaseIteratorEnvironment implements IteratorEnvironment {
+
+  @Override
+  public SortedKeyValueIterator<Key,Value> reserveMapFileReader(String mapFileName) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public AccumuloConfiguration getConfig() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public IteratorScope getIteratorScope() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public boolean isFullMajorCompaction() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void registerSideChannel(SortedKeyValueIterator<Key,Value> iter) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public Authorizations getAuthorizations() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public boolean isSamplingEnabled() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public SamplerConfiguration getSamplerConfiguration() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public IteratorEnvironment cloneWithSamplingEnabled() {
+    throw new UnsupportedOperationException();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/impl/OfflineIterator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/impl/OfflineIterator.java b/core/src/main/java/org/apache/accumulo/core/client/impl/OfflineIterator.java
index 793b044..9cce089 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/impl/OfflineIterator.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/impl/OfflineIterator.java
@@ -16,6 +16,8 @@
  */
 package org.apache.accumulo.core.client.impl;
 
+import static com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly;
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -30,8 +32,10 @@ import org.apache.accumulo.core.client.AccumuloException;
 import org.apache.accumulo.core.client.Connector;
 import org.apache.accumulo.core.client.Instance;
 import org.apache.accumulo.core.client.RowIterator;
+import org.apache.accumulo.core.client.SampleNotPresentException;
 import org.apache.accumulo.core.client.Scanner;
 import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.conf.ConfigurationCopy;
 import org.apache.accumulo.core.conf.Property;
@@ -57,6 +61,7 @@ import org.apache.accumulo.core.master.state.tables.TableState;
 import org.apache.accumulo.core.metadata.MetadataTable;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.security.ColumnVisibility;
 import org.apache.accumulo.core.util.CachedConfiguration;
@@ -68,16 +73,20 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.io.Text;
 
-import static com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly;
-
 class OfflineIterator implements Iterator<Entry<Key,Value>> {
 
   static class OfflineIteratorEnvironment implements IteratorEnvironment {
 
     private final Authorizations authorizations;
+    private AccumuloConfiguration conf;
+    private boolean useSample;
+    private SamplerConfiguration sampleConf;
 
-    public OfflineIteratorEnvironment(Authorizations auths) {
+    public OfflineIteratorEnvironment(Authorizations auths, AccumuloConfiguration acuTableConf, boolean useSample, SamplerConfiguration samplerConf) {
       this.authorizations = auths;
+      this.conf = acuTableConf;
+      this.useSample = useSample;
+      this.sampleConf = samplerConf;
     }
 
     @Override
@@ -87,7 +96,7 @@ class OfflineIterator implements Iterator<Entry<Key,Value>> {
 
     @Override
     public AccumuloConfiguration getConfig() {
-      return AccumuloConfiguration.getDefaultConfiguration();
+      return conf;
     }
 
     @Override
@@ -119,6 +128,23 @@ class OfflineIterator implements Iterator<Entry<Key,Value>> {
       allIters.add(iter);
       return new MultiIterator(allIters, false);
     }
+
+    @Override
+    public boolean isSamplingEnabled() {
+      return useSample;
+    }
+
+    @Override
+    public SamplerConfiguration getSamplerConfiguration() {
+      return sampleConf;
+    }
+
+    @Override
+    public IteratorEnvironment cloneWithSamplingEnabled() {
+      if (sampleConf == null)
+        throw new SampleNotPresentException();
+      return new OfflineIteratorEnvironment(authorizations, conf, true, sampleConf);
+    }
   }
 
   private SortedKeyValueIterator<Key,Value> iter;
@@ -154,6 +180,8 @@ class OfflineIterator implements Iterator<Entry<Key,Value>> {
         nextTablet();
 
     } catch (Exception e) {
+      if (e instanceof RuntimeException)
+        throw (RuntimeException) e;
       throw new RuntimeException(e);
     }
   }
@@ -306,16 +334,30 @@ class OfflineIterator implements Iterator<Entry<Key,Value>> {
 
     readers.clear();
 
+    SamplerConfiguration scannerSamplerConfig = options.getSamplerConfiguration();
+    SamplerConfigurationImpl scannerSamplerConfigImpl = scannerSamplerConfig == null ? null : new SamplerConfigurationImpl(scannerSamplerConfig);
+    SamplerConfigurationImpl samplerConfImpl = SamplerConfigurationImpl.newSamplerConfig(acuTableConf);
+
+    if (scannerSamplerConfigImpl != null && ((samplerConfImpl != null && !scannerSamplerConfigImpl.equals(samplerConfImpl)) || samplerConfImpl == null)) {
+      throw new SampleNotPresentException();
+    }
+
     // TODO need to close files - ACCUMULO-1303
     for (String file : absFiles) {
       FileSystem fs = VolumeConfiguration.getVolume(file, conf, config).getFileSystem();
       FileSKVIterator reader = FileOperations.getInstance().openReader(file, false, fs, conf, acuTableConf, null, null);
+      if (scannerSamplerConfigImpl != null) {
+        reader = reader.getSample(scannerSamplerConfigImpl);
+        if (reader == null)
+          throw new SampleNotPresentException();
+      }
       readers.add(reader);
     }
 
     MultiIterator multiIter = new MultiIterator(readers, extent);
 
-    OfflineIteratorEnvironment iterEnv = new OfflineIteratorEnvironment(authorizations);
+    OfflineIteratorEnvironment iterEnv = new OfflineIteratorEnvironment(authorizations, acuTableConf, false, samplerConfImpl == null ? null
+        : samplerConfImpl.toSamplerConfiguration());
 
     DeletingIterator delIter = new DeletingIterator(multiIter, false);
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerIterator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerIterator.java b/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerIterator.java
index 764db21..55b0a85 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerIterator.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerIterator.java
@@ -28,6 +28,7 @@ import java.util.concurrent.TimeUnit;
 
 import org.apache.accumulo.core.client.AccumuloException;
 import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.SampleNotPresentException;
 import org.apache.accumulo.core.client.TableDeletedException;
 import org.apache.accumulo.core.client.TableNotFoundException;
 import org.apache.accumulo.core.client.TableOfflineException;
@@ -90,7 +91,8 @@ public class ScannerIterator implements Iterator<Entry<Key,Value>> {
           synchQ.add(currentBatch);
           return;
         }
-      } catch (IsolationException | ScanTimedOutException | AccumuloException | AccumuloSecurityException | TableDeletedException | TableOfflineException e) {
+      } catch (IsolationException | ScanTimedOutException | AccumuloException | AccumuloSecurityException | TableDeletedException | TableOfflineException
+          | SampleNotPresentException e) {
         log.trace("{}", e.getMessage(), e);
         synchQ.add(e);
       } catch (TableNotFoundException e) {
@@ -119,7 +121,7 @@ public class ScannerIterator implements Iterator<Entry<Key,Value>> {
     }
 
     scanState = new ScanState(context, tableId, authorizations, new Range(range), options.fetchedColumns, size, options.serverSideIteratorList,
-        options.serverSideIteratorOptions, isolated, readaheadThreshold, options.batchTimeOut);
+        options.serverSideIteratorOptions, isolated, readaheadThreshold, options.getSamplerConfiguration(), options.batchTimeOut);
 
     // If we want to start readahead immediately, don't wait for hasNext to be called
     if (0l == readaheadThreshold) {

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerOptions.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerOptions.java b/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerOptions.java
index cc337dd..8d96464 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerOptions.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerOptions.java
@@ -32,6 +32,7 @@ import java.util.concurrent.TimeUnit;
 
 import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.accumulo.core.client.ScannerBase;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.data.Column;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Value;
@@ -40,6 +41,8 @@ import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.util.TextUtil;
 import org.apache.hadoop.io.Text;
 
+import com.google.common.base.Preconditions;
+
 public class ScannerOptions implements ScannerBase {
 
   protected List<IterInfo> serverSideIteratorList = Collections.emptyList();
@@ -53,6 +56,8 @@ public class ScannerOptions implements ScannerBase {
 
   private String regexIterName = null;
 
+  private SamplerConfiguration samplerConfig = null;
+
   protected ScannerOptions() {}
 
   public ScannerOptions(ScannerOptions so) {
@@ -168,6 +173,8 @@ public class ScannerOptions implements ScannerBase {
         Set<Entry<String,Map<String,String>>> es = src.serverSideIteratorOptions.entrySet();
         for (Entry<String,Map<String,String>> entry : es)
           dst.serverSideIteratorOptions.put(entry.getKey(), new HashMap<String,String>(entry.getValue()));
+
+        dst.samplerConfig = src.samplerConfig;
         dst.batchTimeOut = src.batchTimeOut;
       }
     }
@@ -179,7 +186,7 @@ public class ScannerOptions implements ScannerBase {
   }
 
   @Override
-  public void setTimeout(long timeout, TimeUnit timeUnit) {
+  public synchronized void setTimeout(long timeout, TimeUnit timeUnit) {
     if (timeOut < 0) {
       throw new IllegalArgumentException("TimeOut must be positive : " + timeOut);
     }
@@ -191,7 +198,7 @@ public class ScannerOptions implements ScannerBase {
   }
 
   @Override
-  public long getTimeout(TimeUnit timeunit) {
+  public synchronized long getTimeout(TimeUnit timeunit) {
     return timeunit.convert(timeOut, TimeUnit.MILLISECONDS);
   }
 
@@ -201,11 +208,27 @@ public class ScannerOptions implements ScannerBase {
   }
 
   @Override
-  public Authorizations getAuthorizations() {
+  public synchronized Authorizations getAuthorizations() {
     throw new UnsupportedOperationException("No authorizations to return");
   }
 
   @Override
+  public synchronized void setSamplerConfiguration(SamplerConfiguration samplerConfig) {
+    Preconditions.checkNotNull(samplerConfig);
+    this.samplerConfig = samplerConfig;
+  }
+
+  @Override
+  public synchronized SamplerConfiguration getSamplerConfiguration() {
+    return samplerConfig;
+  }
+
+  @Override
+  public synchronized void clearSamplerConfiguration() {
+    this.samplerConfig = null;
+  }
+
+  @Override
   public void setBatchTimeout(long timeout, TimeUnit timeUnit) {
     if (timeOut < 0) {
       throw new IllegalArgumentException("Batch timeout must be positive : " + timeOut);

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/impl/TableOperationsImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/impl/TableOperationsImpl.java b/core/src/main/java/org/apache/accumulo/core/client/impl/TableOperationsImpl.java
index d65bcec..8434f2f 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/impl/TableOperationsImpl.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/impl/TableOperationsImpl.java
@@ -67,6 +67,7 @@ import org.apache.accumulo.core.client.admin.CompactionConfig;
 import org.apache.accumulo.core.client.admin.DiskUsage;
 import org.apache.accumulo.core.client.admin.FindMax;
 import org.apache.accumulo.core.client.admin.NewTableConfiguration;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.admin.TableOperations;
 import org.apache.accumulo.core.client.admin.TimeType;
 import org.apache.accumulo.core.client.impl.TabletLocator.TabletLocation;
@@ -95,6 +96,7 @@ import org.apache.accumulo.core.metadata.MetadataTable;
 import org.apache.accumulo.core.metadata.RootTable;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection;
 import org.apache.accumulo.core.rpc.ThriftUtil;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException;
 import org.apache.accumulo.core.tabletserver.thrift.TabletClientService;
@@ -1474,4 +1476,41 @@ public class TableOperationsImpl extends TableOperationsHelper {
     }
   }
 
+  private void clearSamplerOptions(String tableName) throws AccumuloException, TableNotFoundException, AccumuloSecurityException {
+    String prefix = Property.TABLE_SAMPLER_OPTS.getKey();
+    for (Entry<String,String> entry : getProperties(tableName)) {
+      String property = entry.getKey();
+      if (property.startsWith(prefix)) {
+        removeProperty(tableName, property);
+      }
+    }
+  }
+
+  @Override
+  public void setSamplerConfiguration(String tableName, SamplerConfiguration samplerConfiguration) throws AccumuloException, TableNotFoundException,
+      AccumuloSecurityException {
+    clearSamplerOptions(tableName);
+
+    List<Pair<String,String>> props = new SamplerConfigurationImpl(samplerConfiguration).toTableProperties();
+    for (Pair<String,String> pair : props) {
+      setProperty(tableName, pair.getFirst(), pair.getSecond());
+    }
+  }
+
+  @Override
+  public void clearSamplerConfiguration(String tableName) throws AccumuloException, TableNotFoundException, AccumuloSecurityException {
+    removeProperty(tableName, Property.TABLE_SAMPLER.getKey());
+    clearSamplerOptions(tableName);
+  }
+
+  @Override
+  public SamplerConfiguration getSamplerConfiguration(String tableName) throws TableNotFoundException, AccumuloException {
+    AccumuloConfiguration conf = new ConfigurationCopy(this.getProperties(tableName));
+    SamplerConfigurationImpl sci = SamplerConfigurationImpl.newSamplerConfig(conf);
+    if (sci == null) {
+      return null;
+    }
+    return sci.toSamplerConfiguration();
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/impl/TabletServerBatchReaderIterator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/impl/TabletServerBatchReaderIterator.java b/core/src/main/java/org/apache/accumulo/core/client/impl/TabletServerBatchReaderIterator.java
index 0b09808..1ff56b9 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/impl/TabletServerBatchReaderIterator.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/impl/TabletServerBatchReaderIterator.java
@@ -39,6 +39,7 @@ import java.util.concurrent.atomic.AtomicLong;
 import org.apache.accumulo.core.client.AccumuloException;
 import org.apache.accumulo.core.client.AccumuloSecurityException;
 import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.SampleNotPresentException;
 import org.apache.accumulo.core.client.TableDeletedException;
 import org.apache.accumulo.core.client.TableNotFoundException;
 import org.apache.accumulo.core.client.TableOfflineException;
@@ -56,8 +57,10 @@ import org.apache.accumulo.core.data.thrift.TKeyValue;
 import org.apache.accumulo.core.data.thrift.TRange;
 import org.apache.accumulo.core.master.state.tables.TableState;
 import org.apache.accumulo.core.rpc.ThriftUtil;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.tabletserver.thrift.NoSuchScanIDException;
+import org.apache.accumulo.core.tabletserver.thrift.TSampleNotPresentException;
 import org.apache.accumulo.core.tabletserver.thrift.TabletClientService;
 import org.apache.accumulo.core.trace.Tracer;
 import org.apache.accumulo.core.util.ByteBufferUtil;
@@ -375,6 +378,8 @@ public class TabletServerBatchReaderIterator implements Iterator<Entry<Key,Value
           fatalException = new TableDeletedException(table);
         else
           fatalException = e;
+      } catch (SampleNotPresentException e) {
+        fatalException = e;
       } catch (Throwable t) {
         if (queryThreadPool.isShutdown())
           log.debug("Caught exception, but queryThreadPool is shutdown", t);
@@ -643,7 +648,8 @@ public class TabletServerBatchReaderIterator implements Iterator<Entry<Key,Value
             Translators.RT));
         InitialMultiScan imsr = client.startMultiScan(Tracer.traceInfo(), context.rpcCreds(), thriftTabletRanges,
             Translator.translate(columns, Translators.CT), options.serverSideIteratorList, options.serverSideIteratorOptions,
-            ByteBufferUtil.toByteBuffers(authorizations.getAuthorizations()), waitForWrites, options.batchTimeOut);
+            ByteBufferUtil.toByteBuffers(authorizations.getAuthorizations()), waitForWrites,
+            SamplerConfigurationImpl.toThrift(options.getSamplerConfiguration()), options.batchTimeOut);
         if (waitForWrites)
           ThriftScanner.serversWaitedForWrites.get(ttype).add(server.toString());
 
@@ -719,6 +725,15 @@ public class TabletServerBatchReaderIterator implements Iterator<Entry<Key,Value
     } catch (NoSuchScanIDException e) {
       log.debug("Server : {} msg : {}", server, e.getMessage(), e);
       throw new IOException(e);
+    } catch (TSampleNotPresentException e) {
+      log.debug("Server : " + server + " msg : " + e.getMessage(), e);
+      String tableInfo = "?";
+      if (e.getExtent() != null) {
+        String tableId = new KeyExtent(e.getExtent()).getTableId().toString();
+        tableInfo = Tables.getPrintableTableInfoFromId(context.getInstance(), tableId);
+      }
+      String message = "Table " + tableInfo + " does not have sampling configured or built";
+      throw new SampleNotPresentException(message, e);
     } catch (TException e) {
       log.debug("Server : {} msg : {}", server, e.getMessage(), e);
       timeoutTracker.errorOccured(e);

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/impl/ThriftScanner.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/impl/ThriftScanner.java b/core/src/main/java/org/apache/accumulo/core/client/impl/ThriftScanner.java
index c2cc1e3..52f3330 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/impl/ThriftScanner.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/impl/ThriftScanner.java
@@ -32,9 +32,11 @@ import org.apache.accumulo.core.Constants;
 import org.apache.accumulo.core.client.AccumuloException;
 import org.apache.accumulo.core.client.AccumuloSecurityException;
 import org.apache.accumulo.core.client.Instance;
+import org.apache.accumulo.core.client.SampleNotPresentException;
 import org.apache.accumulo.core.client.TableDeletedException;
 import org.apache.accumulo.core.client.TableNotFoundException;
 import org.apache.accumulo.core.client.TableOfflineException;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.impl.TabletLocator.TabletLocation;
 import org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException;
 import org.apache.accumulo.core.data.Column;
@@ -50,9 +52,11 @@ import org.apache.accumulo.core.data.thrift.ScanResult;
 import org.apache.accumulo.core.data.thrift.TKeyValue;
 import org.apache.accumulo.core.master.state.tables.TableState;
 import org.apache.accumulo.core.rpc.ThriftUtil;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.tabletserver.thrift.NoSuchScanIDException;
 import org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException;
+import org.apache.accumulo.core.tabletserver.thrift.TSampleNotPresentException;
 import org.apache.accumulo.core.tabletserver.thrift.TabletClientService;
 import org.apache.accumulo.core.tabletserver.thrift.TooManyFilesException;
 import org.apache.accumulo.core.trace.Span;
@@ -92,13 +96,13 @@ public class ThriftScanner {
       try {
         // not reading whole rows (or stopping on row boundries) so there is no need to enable isolation below
         ScanState scanState = new ScanState(context, extent.getTableId(), authorizations, range, fetchedColumns, size, serverSideIteratorList,
-            serverSideIteratorOptions, false, Constants.SCANNER_DEFAULT_READAHEAD_THRESHOLD, batchTimeOut);
+            serverSideIteratorOptions, false, Constants.SCANNER_DEFAULT_READAHEAD_THRESHOLD, null, batchTimeOut);
 
         TabletType ttype = TabletType.type(extent);
         boolean waitForWrites = !serversWaitedForWrites.get(ttype).contains(server);
         InitialScan isr = client.startScan(tinfo, scanState.context.rpcCreds(), extent.toThrift(), scanState.range.toThrift(),
             Translator.translate(scanState.columns, Translators.CT), scanState.size, scanState.serverSideIteratorList, scanState.serverSideIteratorOptions,
-            scanState.authorizations.getAuthorizationsBB(), waitForWrites, scanState.isolated, scanState.readaheadThreshold, scanState.batchTimeOut);
+            scanState.authorizations.getAuthorizationsBB(), waitForWrites, scanState.isolated, scanState.readaheadThreshold, null, scanState.batchTimeOut);
         if (waitForWrites)
           serversWaitedForWrites.get(ttype).add(server);
 
@@ -153,9 +157,11 @@ public class ThriftScanner {
 
     Map<String,Map<String,String>> serverSideIteratorOptions;
 
+    SamplerConfiguration samplerConfig;
+
     public ScanState(ClientContext context, Text tableId, Authorizations authorizations, Range range, SortedSet<Column> fetchedColumns, int size,
         List<IterInfo> serverSideIteratorList, Map<String,Map<String,String>> serverSideIteratorOptions, boolean isolated, long readaheadThreshold,
-        long batchTimeOut) {
+        SamplerConfiguration samplerConfig, long batchTimeOut) {
       this.context = context;
 
       this.authorizations = authorizations;
@@ -183,6 +189,9 @@ public class ThriftScanner {
 
       this.isolated = isolated;
       this.readaheadThreshold = readaheadThreshold;
+
+      this.samplerConfig = samplerConfig;
+
       this.batchTimeOut = batchTimeOut;
     }
   }
@@ -288,6 +297,10 @@ public class ThriftScanner {
           throw e;
         } catch (TApplicationException tae) {
           throw new AccumuloServerException(loc.tablet_location, tae);
+        } catch (TSampleNotPresentException tsnpe) {
+          String message = "Table " + Tables.getPrintableTableInfoFromId(instance, scanState.tableId.toString())
+              + " does not have sampling configured or built";
+          throw new SampleNotPresentException(message, tsnpe);
         } catch (NotServingTabletException e) {
           error = "Scan failed, not serving tablet " + loc;
           if (!error.equals(lastError))
@@ -377,7 +390,7 @@ public class ThriftScanner {
   }
 
   private static List<KeyValue> scan(TabletLocation loc, ScanState scanState, ClientContext context) throws AccumuloSecurityException,
-      NotServingTabletException, TException, NoSuchScanIDException, TooManyFilesException {
+      NotServingTabletException, TException, NoSuchScanIDException, TooManyFilesException, TSampleNotPresentException {
     if (scanState.finished)
       return null;
 
@@ -408,9 +421,11 @@ public class ThriftScanner {
 
         TabletType ttype = TabletType.type(loc.tablet_extent);
         boolean waitForWrites = !serversWaitedForWrites.get(ttype).contains(loc.tablet_location);
+
         InitialScan is = client.startScan(tinfo, scanState.context.rpcCreds(), loc.tablet_extent.toThrift(), scanState.range.toThrift(),
             Translator.translate(scanState.columns, Translators.CT), scanState.size, scanState.serverSideIteratorList, scanState.serverSideIteratorOptions,
-            scanState.authorizations.getAuthorizationsBB(), waitForWrites, scanState.isolated, scanState.readaheadThreshold, scanState.batchTimeOut);
+            scanState.authorizations.getAuthorizationsBB(), waitForWrites, scanState.isolated, scanState.readaheadThreshold,
+            SamplerConfigurationImpl.toThrift(scanState.samplerConfig), scanState.batchTimeOut);
         if (waitForWrites)
           serversWaitedForWrites.get(ttype).add(loc.tablet_location);
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
index d11639e..b581deb 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java
@@ -43,6 +43,7 @@ import org.apache.accumulo.core.client.TableDeletedException;
 import org.apache.accumulo.core.client.TableNotFoundException;
 import org.apache.accumulo.core.client.TableOfflineException;
 import org.apache.accumulo.core.client.admin.DelegationTokenConfig;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.admin.SecurityOperations;
 import org.apache.accumulo.core.client.impl.AuthenticationTokenIdentifier;
 import org.apache.accumulo.core.client.impl.ClientContext;
@@ -574,6 +575,15 @@ public abstract class AbstractInputFormat<K,V> implements InputFormat<K,V> {
         }
       }
 
+      SamplerConfiguration samplerConfig = baseSplit.getSamplerConfiguration();
+      if (null == samplerConfig) {
+        samplerConfig = tableConfig.getSamplerConfiguration();
+      }
+
+      if (samplerConfig != null) {
+        scannerBase.setSamplerConfiguration(samplerConfig);
+      }
+
       scannerIterator = scannerBase.iterator();
       numKeysRead = 0;
     }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java
index 0eb304f..b383f3e 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java
@@ -19,6 +19,7 @@ package org.apache.accumulo.core.client.mapred;
 import java.io.IOException;
 import java.util.Arrays;
 
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.ConfiguratorBase;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.FileOutputConfigurator;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
@@ -140,6 +141,20 @@ public class AccumuloFileOutputFormat extends FileOutputFormat<Key,Value> {
     FileOutputConfigurator.setReplication(CLASS, job, replication);
   }
 
+  /**
+   * Specify a sampler to be used when writing out data. This will result in the output file having sample data.
+   *
+   * @param job
+   *          The Hadoop job instance to be configured
+   * @param samplerConfig
+   *          The configuration for creating sample data in the output file.
+   * @since 1.8.0
+   */
+
+  public static void setSampler(JobConf job, SamplerConfiguration samplerConfig) {
+    FileOutputConfigurator.setSampler(CLASS, job, samplerConfig);
+  }
+
   @Override
   public RecordWriter<Key,Value> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
     // get the path of the temporary output file

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
index ffb02a9..a9403a5 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java
@@ -25,7 +25,9 @@ import org.apache.accumulo.core.client.ClientSideIteratorScanner;
 import org.apache.accumulo.core.client.IsolatedScanner;
 import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.ScannerBase;
 import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.impl.TabletLocator;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
 import org.apache.accumulo.core.data.Key;
@@ -338,6 +340,23 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   }
 
   /**
+   * Causes input format to read sample data. If sample data was created using a different configuration or a tables sampler configuration changes while reading
+   * data, then the input format will throw an error.
+   *
+   *
+   * @param job
+   *          the Hadoop job instance to be configured
+   * @param samplerConfig
+   *          The sampler configuration that sample must have been created with inorder for reading sample data to succeed.
+   *
+   * @since 1.8.0
+   * @see ScannerBase#setSamplerConfiguration(SamplerConfiguration)
+   */
+  public static void setSamplerConfiguration(JobConf job, SamplerConfiguration samplerConfig) {
+    InputConfigurator.setSamplerConfiguration(CLASS, job, samplerConfig);
+  }
+
+  /**
    * Initializes an Accumulo {@link org.apache.accumulo.core.client.impl.TabletLocator} based on the configuration.
    *
    * @param job

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
index 7db67c7..0e51f03 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java
@@ -43,6 +43,7 @@ import org.apache.accumulo.core.client.TableDeletedException;
 import org.apache.accumulo.core.client.TableNotFoundException;
 import org.apache.accumulo.core.client.TableOfflineException;
 import org.apache.accumulo.core.client.admin.DelegationTokenConfig;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.admin.SecurityOperations;
 import org.apache.accumulo.core.client.impl.AuthenticationTokenIdentifier;
 import org.apache.accumulo.core.client.impl.ClientContext;
@@ -604,6 +605,15 @@ public abstract class AbstractInputFormat<K,V> extends InputFormat<K,V> {
         }
       }
 
+      SamplerConfiguration samplerConfig = split.getSamplerConfiguration();
+      if (null == samplerConfig) {
+        samplerConfig = tableConfig.getSamplerConfiguration();
+      }
+
+      if (samplerConfig != null) {
+        scannerBase.setSamplerConfiguration(samplerConfig);
+      }
+
       scannerIterator = scannerBase.iterator();
       numKeysRead = 0;
     }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java
index abd96b6..7d4c0e2 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java
@@ -19,6 +19,7 @@ package org.apache.accumulo.core.client.mapreduce;
 import java.io.IOException;
 import java.util.Arrays;
 
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.FileOutputConfigurator;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.conf.Property;
@@ -138,6 +139,20 @@ public class AccumuloFileOutputFormat extends FileOutputFormat<Key,Value> {
     FileOutputConfigurator.setReplication(CLASS, job.getConfiguration(), replication);
   }
 
+  /**
+   * Specify a sampler to be used when writing out data. This will result in the output file having sample data.
+   *
+   * @param job
+   *          The Hadoop job instance to be configured
+   * @param samplerConfig
+   *          The configuration for creating sample data in the output file.
+   * @since 1.8.0
+   */
+
+  public static void setSampler(Job job, SamplerConfiguration samplerConfig) {
+    FileOutputConfigurator.setSampler(CLASS, job.getConfiguration(), samplerConfig);
+  }
+
   @Override
   public RecordWriter<Key,Value> getRecordWriter(TaskAttemptContext context) throws IOException {
     // get the path of the temporary output file

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
index 6ab8a19..e5a0b90 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java
@@ -25,7 +25,9 @@ import org.apache.accumulo.core.client.ClientSideIteratorScanner;
 import org.apache.accumulo.core.client.IsolatedScanner;
 import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.accumulo.core.client.Scanner;
+import org.apache.accumulo.core.client.ScannerBase;
 import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.impl.TabletLocator;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
 import org.apache.accumulo.core.data.Key;
@@ -337,6 +339,23 @@ public abstract class InputFormatBase<K,V> extends AbstractInputFormat<K,V> {
   }
 
   /**
+   * Causes input format to read sample data. If sample data was created using a different configuration or a tables sampler configuration changes while reading
+   * data, then the input format will throw an error.
+   *
+   *
+   * @param job
+   *          the Hadoop job instance to be configured
+   * @param samplerConfig
+   *          The sampler configuration that sample must have been created with inorder for reading sample data to succeed.
+   *
+   * @since 1.8.0
+   * @see ScannerBase#setSamplerConfiguration(SamplerConfiguration)
+   */
+  public static void setSamplerConfiguration(Job job, SamplerConfiguration samplerConfig) {
+    InputConfigurator.setSamplerConfiguration(CLASS, job.getConfiguration(), samplerConfig);
+  }
+
+  /**
    * Initializes an Accumulo {@link org.apache.accumulo.core.client.impl.TabletLocator} based on the configuration.
    *
    * @param context

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java
index 257f6c9..51ad7eb 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java
@@ -25,6 +25,8 @@ import java.util.HashSet;
 import java.util.List;
 
 import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.ScannerBase;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.util.Pair;
 import org.apache.hadoop.io.Text;
@@ -43,6 +45,7 @@ public class InputTableConfig implements Writable {
   private boolean useLocalIterators = false;
   private boolean useIsolatedScanners = false;
   private boolean offlineScan = false;
+  private SamplerConfiguration samplerConfig = null;
 
   public InputTableConfig() {}
 
@@ -241,6 +244,26 @@ public class InputTableConfig implements Writable {
     return useIsolatedScanners;
   }
 
+  /**
+   * Set the sampler configuration to use when reading from the data.
+   *
+   * @see ScannerBase#setSamplerConfiguration(SamplerConfiguration)
+   * @see InputFormatBase#setSamplerConfiguration(org.apache.hadoop.mapreduce.Job, SamplerConfiguration)
+   *
+   * @since 1.8.0
+   */
+  public void setSamplerConfiguration(SamplerConfiguration samplerConfiguration) {
+    this.samplerConfig = samplerConfiguration;
+  }
+
+  /**
+   *
+   * @since 1.8.0
+   */
+  public SamplerConfiguration getSamplerConfiguration() {
+    return samplerConfig;
+  }
+
   @Override
   public void write(DataOutput dataOutput) throws IOException {
     if (iterators != null) {
@@ -340,6 +363,8 @@ public class InputTableConfig implements Writable {
       return false;
     if (ranges != null ? !ranges.equals(that.ranges) : that.ranges != null)
       return false;
+    if (samplerConfig != null ? !samplerConfig.equals(that.samplerConfig) : that.samplerConfig != null)
+      return false;
     return true;
   }
 
@@ -352,6 +377,7 @@ public class InputTableConfig implements Writable {
     result = 31 * result + (useLocalIterators ? 1 : 0);
     result = 31 * result + (useIsolatedScanners ? 1 : 0);
     result = 31 * result + (offlineScan ? 1 : 0);
+    result = 31 * result + (samplerConfig == null ? 0 : samplerConfig.hashCode());
     return result;
   }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
index f3e17c6..b4f9dca 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java
@@ -32,6 +32,7 @@ import org.apache.accumulo.core.client.ClientConfiguration;
 import org.apache.accumulo.core.client.Instance;
 import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.accumulo.core.client.ZooKeeperInstance;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.mapreduce.impl.SplitUtils;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.ConfiguratorBase.TokenSource;
 import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
@@ -41,6 +42,7 @@ import org.apache.accumulo.core.data.ByteSequence;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.PartialKey;
 import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.util.Base64;
 import org.apache.accumulo.core.util.DeprecationUtil;
@@ -64,6 +66,7 @@ public class RangeInputSplit extends InputSplit implements Writable {
   private Authorizations auths;
   private Set<Pair<Text,Text>> fetchedColumns;
   private List<IteratorSetting> iterators;
+  private SamplerConfiguration samplerConfig;
   private Level level;
 
   public RangeInputSplit() {
@@ -215,6 +218,10 @@ public class RangeInputSplit extends InputSplit implements Writable {
     if (in.readBoolean()) {
       level = Level.toLevel(in.readInt());
     }
+
+    if (in.readBoolean()) {
+      samplerConfig = new SamplerConfigurationImpl(in).toSamplerConfiguration();
+    }
   }
 
   @Override
@@ -301,6 +308,11 @@ public class RangeInputSplit extends InputSplit implements Writable {
     if (null != level) {
       out.writeInt(level.toInt());
     }
+
+    out.writeBoolean(null != samplerConfig);
+    if (null != samplerConfig) {
+      new SamplerConfigurationImpl(samplerConfig).write(out);
+    }
   }
 
   /**
@@ -510,6 +522,15 @@ public class RangeInputSplit extends InputSplit implements Writable {
     sb.append(" fetchColumns: ").append(fetchedColumns);
     sb.append(" iterators: ").append(iterators);
     sb.append(" logLevel: ").append(level);
+    sb.append(" samplerConfig: ").append(samplerConfig);
     return sb.toString();
   }
+
+  public void setSamplerConfiguration(SamplerConfiguration samplerConfiguration) {
+    this.samplerConfig = samplerConfiguration;
+  }
+
+  public SamplerConfiguration getSamplerConfiguration() {
+    return samplerConfig;
+  }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapreduce/impl/SplitUtils.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/impl/SplitUtils.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/impl/SplitUtils.java
index 68268fc..b81b064 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/impl/SplitUtils.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/impl/SplitUtils.java
@@ -50,6 +50,8 @@ public class SplitUtils {
     split.setFetchedColumns(tableConfig.getFetchedColumns());
     split.setIterators(tableConfig.getIterators());
     split.setLogLevel(logLevel);
+
+    split.setSamplerConfiguration(tableConfig.getSamplerConfiguration());
   }
 
   public static float getProgress(ByteSequence start, ByteSequence end, ByteSequence position) {

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java
index 882c6d3..65248c5 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java
@@ -17,11 +17,15 @@
 package org.apache.accumulo.core.client.mapreduce.lib.impl;
 
 import java.util.Arrays;
+import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Set;
 
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.conf.ConfigurationCopy;
 import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.hadoop.conf.Configuration;
 
 /**
@@ -97,8 +101,17 @@ public class FileOutputConfigurator extends ConfiguratorBase {
     String prefix = enumToConfKey(implementingClass, Opts.ACCUMULO_PROPERTIES) + ".";
     ConfigurationCopy acuConf = new ConfigurationCopy(AccumuloConfiguration.getDefaultConfiguration());
     for (Entry<String,String> entry : conf)
-      if (entry.getKey().startsWith(prefix))
-        acuConf.set(Property.getPropertyByKey(entry.getKey().substring(prefix.length())), entry.getValue());
+      if (entry.getKey().startsWith(prefix)) {
+        String propString = entry.getKey().substring(prefix.length());
+        Property prop = Property.getPropertyByKey(propString);
+        if (prop != null) {
+          acuConf.set(prop, entry.getValue());
+        } else if (Property.isValidTablePropertyKey(propString)) {
+          acuConf.set(propString, entry.getValue());
+        } else {
+          throw new IllegalArgumentException("Unknown accumulo file property " + propString);
+        }
+      }
     return acuConf;
   }
 
@@ -184,4 +197,16 @@ public class FileOutputConfigurator extends ConfiguratorBase {
     setAccumuloProperty(implementingClass, conf, Property.TABLE_FILE_REPLICATION, replication);
   }
 
+  /**
+   * @since 1.8.0
+   */
+  public static void setSampler(Class<?> implementingClass, Configuration conf, SamplerConfiguration samplerConfig) {
+    Map<String,String> props = new SamplerConfigurationImpl(samplerConfig).toTablePropertiesMap();
+
+    Set<Entry<String,String>> es = props.entrySet();
+    for (Entry<String,String> entry : es) {
+      conf.set(enumToConfKey(implementingClass, Opts.ACCUMULO_PROPERTIES) + "." + entry.getKey(), entry.getValue());
+    }
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java
index efda7d9..6ba34af 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java
@@ -46,6 +46,7 @@ import org.apache.accumulo.core.client.IteratorSetting;
 import org.apache.accumulo.core.client.RowIterator;
 import org.apache.accumulo.core.client.Scanner;
 import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.impl.ClientContext;
 import org.apache.accumulo.core.client.impl.Credentials;
 import org.apache.accumulo.core.client.impl.DelegationTokenImpl;
@@ -62,6 +63,7 @@ import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
 import org.apache.accumulo.core.master.state.tables.TableState;
 import org.apache.accumulo.core.metadata.MetadataTable;
 import org.apache.accumulo.core.metadata.schema.MetadataSchema;
+import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.security.TablePermission;
 import org.apache.accumulo.core.util.Base64;
@@ -74,6 +76,7 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.util.StringUtils;
 
+import com.google.common.base.Preconditions;
 import com.google.common.collect.Maps;
 
 /**
@@ -87,7 +90,7 @@ public class InputConfigurator extends ConfiguratorBase {
    * @since 1.6.0
    */
   public static enum ScanOpts {
-    TABLE_NAME, AUTHORIZATIONS, RANGES, COLUMNS, ITERATORS, TABLE_CONFIGS
+    TABLE_NAME, AUTHORIZATIONS, RANGES, COLUMNS, ITERATORS, TABLE_CONFIGS, SAMPLER_CONFIG
   }
 
   /**
@@ -805,6 +808,11 @@ public class InputConfigurator extends ConfiguratorBase {
       if (ranges != null)
         queryConfig.setRanges(ranges);
 
+      SamplerConfiguration samplerConfig = getSamplerConfiguration(implementingClass, conf);
+      if (samplerConfig != null) {
+        queryConfig.setSamplerConfiguration(samplerConfig);
+      }
+
       queryConfig.setAutoAdjustRanges(getAutoAdjustRanges(implementingClass, conf)).setUseIsolatedScanners(isIsolated(implementingClass, conf))
           .setUseLocalIterators(usesLocalIterators(implementingClass, conf)).setOfflineScan(isOfflineScan(implementingClass, conf));
       return Maps.immutableEntry(tableName, queryConfig);
@@ -901,4 +909,47 @@ public class InputConfigurator extends ConfiguratorBase {
     }
     return binnedRanges;
   }
+
+  private static String toBase64(Writable writable) {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    DataOutputStream dos = new DataOutputStream(baos);
+    try {
+      writable.write(dos);
+      dos.close();
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+
+    return Base64.encodeBase64String(baos.toByteArray());
+  }
+
+  private static <T extends Writable> T fromBase64(T writable, String enc) {
+    ByteArrayInputStream bais = new ByteArrayInputStream(Base64.decodeBase64(enc));
+    DataInputStream dis = new DataInputStream(bais);
+    try {
+      writable.readFields(dis);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+    return writable;
+  }
+
+  public static void setSamplerConfiguration(Class<?> implementingClass, Configuration conf, SamplerConfiguration samplerConfig) {
+    Preconditions.checkNotNull(samplerConfig);
+
+    String key = enumToConfKey(implementingClass, ScanOpts.SAMPLER_CONFIG);
+    String val = toBase64(new SamplerConfigurationImpl(samplerConfig));
+
+    conf.set(key, val);
+  }
+
+  public static SamplerConfiguration getSamplerConfiguration(Class<?> implementingClass, Configuration conf) {
+    String key = enumToConfKey(implementingClass, ScanOpts.SAMPLER_CONFIG);
+
+    String encodedSC = conf.get(key);
+    if (encodedSC == null)
+      return null;
+
+    return fromBase64(new SamplerConfigurationImpl(), encodedSC).toSamplerConfiguration();
+  }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mock/MockScannerBase.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mock/MockScannerBase.java b/core/src/main/java/org/apache/accumulo/core/client/mock/MockScannerBase.java
index f81e9dd..45b65e9 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mock/MockScannerBase.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mock/MockScannerBase.java
@@ -24,6 +24,7 @@ import java.util.Iterator;
 import java.util.Map.Entry;
 
 import org.apache.accumulo.core.client.ScannerBase;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.impl.ScannerOptions;
 import org.apache.accumulo.core.conf.AccumuloConfiguration;
 import org.apache.accumulo.core.data.ArrayByteSequence;
@@ -112,6 +113,21 @@ public class MockScannerBase extends ScannerOptions implements ScannerBase {
       allIters.add(iter);
       return new MultiIterator(allIters, false);
     }
+
+    @Override
+    public boolean isSamplingEnabled() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public SamplerConfiguration getSamplerConfiguration() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public IteratorEnvironment cloneWithSamplingEnabled() {
+      throw new UnsupportedOperationException();
+    }
   }
 
   public SortedKeyValueIterator<Key,Value> createFilter(SortedKeyValueIterator<Key,Value> inner) throws IOException {

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/client/mock/MockTableOperations.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/client/mock/MockTableOperations.java b/core/src/main/java/org/apache/accumulo/core/client/mock/MockTableOperations.java
index 0712f22..7ca5766 100644
--- a/core/src/main/java/org/apache/accumulo/core/client/mock/MockTableOperations.java
+++ b/core/src/main/java/org/apache/accumulo/core/client/mock/MockTableOperations.java
@@ -40,6 +40,7 @@ import org.apache.accumulo.core.client.admin.CompactionConfig;
 import org.apache.accumulo.core.client.admin.DiskUsage;
 import org.apache.accumulo.core.client.admin.FindMax;
 import org.apache.accumulo.core.client.admin.NewTableConfiguration;
+import org.apache.accumulo.core.client.admin.SamplerConfiguration;
 import org.apache.accumulo.core.client.admin.TimeType;
 import org.apache.accumulo.core.client.impl.TableOperationsHelper;
 import org.apache.accumulo.core.client.impl.Tables;
@@ -480,4 +481,20 @@ class MockTableOperations extends TableOperationsHelper {
     }
     return true;
   }
+
+  @Override
+  public void setSamplerConfiguration(String tableName, SamplerConfiguration samplerConfiguration) throws TableNotFoundException, AccumuloException,
+      AccumuloSecurityException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void clearSamplerConfiguration(String tableName) throws TableNotFoundException, AccumuloException, AccumuloSecurityException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public SamplerConfiguration getSamplerConfiguration(String tableName) throws TableNotFoundException, AccumuloException, AccumuloSecurityException {
+    throw new UnsupportedOperationException();
+  }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/45f18c17/core/src/main/java/org/apache/accumulo/core/compaction/CompactionSettings.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/accumulo/core/compaction/CompactionSettings.java b/core/src/main/java/org/apache/accumulo/core/compaction/CompactionSettings.java
index 43f8c0f..1c5369e 100644
--- a/core/src/main/java/org/apache/accumulo/core/compaction/CompactionSettings.java
+++ b/core/src/main/java/org/apache/accumulo/core/compaction/CompactionSettings.java
@@ -21,6 +21,7 @@ import java.util.Map;
 
 public enum CompactionSettings {
 
+  SF_NO_SAMPLE(new NullType()),
   SF_GT_ESIZE_OPT(new SizeType()),
   SF_LT_ESIZE_OPT(new SizeType()),
   SF_NAME_RE_OPT(new PatternType()),


Mime
View raw message