Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id B1B07200AE2 for ; Fri, 27 May 2016 20:26:14 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id B0240160A10; Fri, 27 May 2016 18:26:14 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 6CB0F160A12 for ; Fri, 27 May 2016 20:26:12 +0200 (CEST) Received: (qmail 37136 invoked by uid 500); 27 May 2016 18:26:11 -0000 Mailing-List: contact commits-help@accumulo.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@accumulo.apache.org Delivered-To: mailing list commits@accumulo.apache.org Received: (qmail 37120 invoked by uid 99); 27 May 2016 18:26:11 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 27 May 2016 18:26:11 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 23BC7DFBD6; Fri, 27 May 2016 18:26:11 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: kturner@apache.org To: commits@accumulo.apache.org Date: Fri, 27 May 2016 18:26:12 -0000 Message-Id: In-Reply-To: <9f0b10acbd8d4ef4801f62ab3820436c@git.apache.org> References: <9f0b10acbd8d4ef4801f62ab3820436c@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [2/2] accumulo git commit: ACCUMULO-3913 moved sampling code meant for user in API pkg archived-at: Fri, 27 May 2016 18:26:14 -0000 ACCUMULO-3913 moved sampling code meant for user in API pkg Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/c8bd1ddd Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/c8bd1ddd Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/c8bd1ddd Branch: refs/heads/1.8 Commit: c8bd1ddd096b3b625d39d1ecf341fd8a58e46b17 Parents: 1ac7b4a Author: Keith Turner Authored: Fri May 27 14:18:22 2016 -0400 Committer: Keith Turner Committed: Fri May 27 14:25:53 2016 -0400 ---------------------------------------------------------------------- .../core/client/ClientSideIteratorScanner.java | 2 +- .../accumulo/core/client/ScannerBase.java | 2 +- .../client/admin/NewTableConfiguration.java | 1 + .../core/client/admin/SamplerConfiguration.java | 90 -------------- .../core/client/admin/TableOperations.java | 1 + .../client/impl/BaseIteratorEnvironment.java | 2 +- .../core/client/impl/OfflineIterator.java | 2 +- .../core/client/impl/ScannerOptions.java | 2 +- .../core/client/impl/TableOperationsImpl.java | 2 +- .../core/client/impl/ThriftScanner.java | 2 +- .../core/client/mapred/AbstractInputFormat.java | 2 +- .../client/mapred/AccumuloFileOutputFormat.java | 2 +- .../core/client/mapred/InputFormatBase.java | 2 +- .../client/mapreduce/AbstractInputFormat.java | 2 +- .../mapreduce/AccumuloFileOutputFormat.java | 2 +- .../core/client/mapreduce/InputFormatBase.java | 2 +- .../core/client/mapreduce/InputTableConfig.java | 2 +- .../core/client/mapreduce/RangeInputSplit.java | 2 +- .../lib/impl/FileOutputConfigurator.java | 2 +- .../mapreduce/lib/impl/InputConfigurator.java | 2 +- .../core/client/mock/MockScannerBase.java | 2 +- .../core/client/mock/MockTableOperations.java | 2 +- .../core/client/sample/AbstractHashSampler.java | 119 ++++++++++++++++++ .../core/client/sample/RowColumnSampler.java | 118 ++++++++++++++++++ .../accumulo/core/client/sample/RowSampler.java | 49 ++++++++ .../accumulo/core/client/sample/Sampler.java | 56 +++++++++ .../client/sample/SamplerConfiguration.java | 94 ++++++++++++++ .../apache/accumulo/core/file/rfile/RFile.java | 4 +- .../core/file/rfile/RFileOperations.java | 2 +- .../core/iterators/IteratorEnvironment.java | 2 +- .../core/iterators/system/SampleIterator.java | 4 +- .../core/sample/AbstractHashSampler.java | 108 ---------------- .../accumulo/core/sample/RowColumnSampler.java | 124 ------------------- .../apache/accumulo/core/sample/RowSampler.java | 49 -------- .../apache/accumulo/core/sample/Sampler.java | 57 --------- .../core/sample/impl/DataoutputHasher.java | 108 ++++++++++++++++ .../sample/impl/SamplerConfigurationImpl.java | 2 +- .../core/sample/impl/SamplerFactory.java | 2 +- .../client/impl/TableOperationsHelperTest.java | 2 +- .../mapred/AccumuloFileOutputFormatTest.java | 4 +- .../mapreduce/AccumuloFileOutputFormatTest.java | 4 +- .../accumulo/core/file/rfile/RFileTest.java | 6 +- .../core/iterators/SortedMapIteratorTest.java | 4 +- docs/src/main/resources/examples/README.sample | 6 +- .../examples/simple/sample/SampleExample.java | 4 +- .../shard/CutoffIntersectingIterator.java | 4 +- .../accumulo/examples/simple/shard/Query.java | 2 +- .../environments/SimpleIteratorEnvironment.java | 2 +- .../monitor/servlets/trace/NullScanner.java | 2 +- .../apache/accumulo/tserver/InMemoryMap.java | 2 +- .../tserver/TabletIteratorEnvironment.java | 2 +- .../tserver/session/MultiScanSession.java | 2 +- .../accumulo/tserver/tablet/ScanDataSource.java | 2 +- .../accumulo/tserver/tablet/ScanOptions.java | 2 +- .../apache/accumulo/tserver/tablet/Tablet.java | 2 +- .../accumulo/tserver/InMemoryMapTest.java | 6 +- .../accumulo/shell/commands/ScanCommand.java | 2 +- .../java/org/apache/accumulo/test/SampleIT.java | 5 +- .../test/mapred/AccumuloFileOutputFormatIT.java | 4 +- .../test/mapred/AccumuloInputFormatIT.java | 4 +- .../mapreduce/AccumuloFileOutputFormatIT.java | 4 +- .../test/mapreduce/AccumuloInputFormatIT.java | 4 +- 62 files changed, 614 insertions(+), 495 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/ClientSideIteratorScanner.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/ClientSideIteratorScanner.java b/core/src/main/java/org/apache/accumulo/core/client/ClientSideIteratorScanner.java index 6783148..9f9449a 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/ClientSideIteratorScanner.java +++ b/core/src/main/java/org/apache/accumulo/core/client/ClientSideIteratorScanner.java @@ -29,8 +29,8 @@ import java.util.TreeSet; import java.util.concurrent.TimeUnit; import org.apache.accumulo.core.Constants; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; import org.apache.accumulo.core.client.impl.ScannerOptions; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.data.ArrayByteSequence; import org.apache.accumulo.core.data.ByteSequence; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/ScannerBase.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/ScannerBase.java b/core/src/main/java/org/apache/accumulo/core/client/ScannerBase.java index 2f66445..354f6f4 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/ScannerBase.java +++ b/core/src/main/java/org/apache/accumulo/core/client/ScannerBase.java @@ -21,7 +21,7 @@ import java.util.Map.Entry; import java.util.concurrent.TimeUnit; import org.apache.accumulo.core.client.IteratorSetting.Column; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.security.Authorizations; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/admin/NewTableConfiguration.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/admin/NewTableConfiguration.java b/core/src/main/java/org/apache/accumulo/core/client/admin/NewTableConfiguration.java index d2d400e..994b653 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/admin/NewTableConfiguration.java +++ b/core/src/main/java/org/apache/accumulo/core/client/admin/NewTableConfiguration.java @@ -23,6 +23,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.Map; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.iterators.IteratorUtil; import org.apache.accumulo.core.iterators.user.VersioningIterator; import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/admin/SamplerConfiguration.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/admin/SamplerConfiguration.java b/core/src/main/java/org/apache/accumulo/core/client/admin/SamplerConfiguration.java deleted file mode 100644 index a2bd5cc..0000000 --- a/core/src/main/java/org/apache/accumulo/core/client/admin/SamplerConfiguration.java +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.accumulo.core.client.admin; - -import static com.google.common.base.Preconditions.checkArgument; -import static java.util.Objects.requireNonNull; - -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Map.Entry; - -/** - * This class encapsultes configuration and options needed to setup and use sampling. - * - * @since 1.8.0 - */ - -public class SamplerConfiguration { - - private String className; - private Map options = new HashMap<>(); - - public SamplerConfiguration(String samplerClassName) { - requireNonNull(samplerClassName); - this.className = samplerClassName; - } - - public SamplerConfiguration setOptions(Map options) { - requireNonNull(options); - this.options = new HashMap<>(options.size()); - - for (Entry entry : options.entrySet()) { - addOption(entry.getKey(), entry.getValue()); - } - - return this; - } - - public SamplerConfiguration addOption(String option, String value) { - checkArgument(option != null, "option is null"); - checkArgument(value != null, "value is null"); - this.options.put(option, value); - return this; - } - - public Map getOptions() { - return Collections.unmodifiableMap(options); - } - - public String getSamplerClassName() { - return className; - } - - @Override - public boolean equals(Object o) { - if (o instanceof SamplerConfiguration) { - SamplerConfiguration osc = (SamplerConfiguration) o; - - return className.equals(osc.className) && options.equals(osc.options); - } - - return false; - } - - @Override - public int hashCode() { - return className.hashCode() + 31 * options.hashCode(); - } - - @Override - public String toString() { - return className + " " + options; - } -} http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java b/core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java index 59ac4ef..f292902 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java +++ b/core/src/main/java/org/apache/accumulo/core/client/admin/TableOperations.java @@ -31,6 +31,7 @@ import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.client.TableExistsException; import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.accumulo.core.client.TableOfflineException; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope; import org.apache.accumulo.core.security.Authorizations; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/impl/BaseIteratorEnvironment.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/impl/BaseIteratorEnvironment.java b/core/src/main/java/org/apache/accumulo/core/client/impl/BaseIteratorEnvironment.java index dc138ce..7b1c441 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/impl/BaseIteratorEnvironment.java +++ b/core/src/main/java/org/apache/accumulo/core/client/impl/BaseIteratorEnvironment.java @@ -19,7 +19,7 @@ package org.apache.accumulo.core.client.impl; import java.io.IOException; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Value; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/impl/OfflineIterator.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/impl/OfflineIterator.java b/core/src/main/java/org/apache/accumulo/core/client/impl/OfflineIterator.java index 87abd0b..c5017c3 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/impl/OfflineIterator.java +++ b/core/src/main/java/org/apache/accumulo/core/client/impl/OfflineIterator.java @@ -35,7 +35,7 @@ import org.apache.accumulo.core.client.RowIterator; import org.apache.accumulo.core.client.SampleNotPresentException; import org.apache.accumulo.core.client.Scanner; import org.apache.accumulo.core.client.TableNotFoundException; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.conf.ConfigurationCopy; import org.apache.accumulo.core.conf.Property; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerOptions.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerOptions.java b/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerOptions.java index 54fed15..c3a1a63 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerOptions.java +++ b/core/src/main/java/org/apache/accumulo/core/client/impl/ScannerOptions.java @@ -33,7 +33,7 @@ import java.util.concurrent.TimeUnit; import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.client.ScannerBase; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.data.Column; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Value; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/impl/TableOperationsImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/impl/TableOperationsImpl.java b/core/src/main/java/org/apache/accumulo/core/client/impl/TableOperationsImpl.java index 5a685d8..1940a26 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/impl/TableOperationsImpl.java +++ b/core/src/main/java/org/apache/accumulo/core/client/impl/TableOperationsImpl.java @@ -69,12 +69,12 @@ import org.apache.accumulo.core.client.admin.DiskUsage; import org.apache.accumulo.core.client.admin.FindMax; import org.apache.accumulo.core.client.admin.Locations; import org.apache.accumulo.core.client.admin.NewTableConfiguration; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; import org.apache.accumulo.core.client.admin.TableOperations; import org.apache.accumulo.core.client.admin.TimeType; import org.apache.accumulo.core.client.impl.TabletLocator.TabletLocation; import org.apache.accumulo.core.client.impl.thrift.ClientService; import org.apache.accumulo.core.client.impl.thrift.ClientService.Client; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.client.impl.thrift.TDiskUsage; import org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException; import org.apache.accumulo.core.client.impl.thrift.ThriftTableOperationException; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/impl/ThriftScanner.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/impl/ThriftScanner.java b/core/src/main/java/org/apache/accumulo/core/client/impl/ThriftScanner.java index ed8e95a..9a7f782 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/impl/ThriftScanner.java +++ b/core/src/main/java/org/apache/accumulo/core/client/impl/ThriftScanner.java @@ -36,9 +36,9 @@ import org.apache.accumulo.core.client.SampleNotPresentException; import org.apache.accumulo.core.client.TableDeletedException; import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.accumulo.core.client.TableOfflineException; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; import org.apache.accumulo.core.client.impl.TabletLocator.TabletLocation; import org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.data.Column; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.KeyValue; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java index 794500e..dd92ae3 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java +++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/AbstractInputFormat.java @@ -43,7 +43,6 @@ import org.apache.accumulo.core.client.TableDeletedException; import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.accumulo.core.client.TableOfflineException; import org.apache.accumulo.core.client.admin.DelegationTokenConfig; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; import org.apache.accumulo.core.client.admin.SecurityOperations; import org.apache.accumulo.core.client.impl.AuthenticationTokenIdentifier; import org.apache.accumulo.core.client.impl.ClientContext; @@ -58,6 +57,7 @@ import org.apache.accumulo.core.client.mapreduce.InputTableConfig; import org.apache.accumulo.core.client.mapreduce.impl.SplitUtils; import org.apache.accumulo.core.client.mapreduce.lib.impl.ConfiguratorBase; import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.client.security.tokens.AuthenticationToken; import org.apache.accumulo.core.client.security.tokens.DelegationToken; import org.apache.accumulo.core.client.security.tokens.KerberosToken; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java index 45796cb..1e90e27 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java +++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormat.java @@ -19,9 +19,9 @@ package org.apache.accumulo.core.client.mapred; import java.io.IOException; import java.util.Arrays; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; import org.apache.accumulo.core.client.mapreduce.lib.impl.ConfiguratorBase; import org.apache.accumulo.core.client.mapreduce.lib.impl.FileOutputConfigurator; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.data.ArrayByteSequence; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java b/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java index ed8accd..0cf57d2 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java +++ b/core/src/main/java/org/apache/accumulo/core/client/mapred/InputFormatBase.java @@ -27,9 +27,9 @@ import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.client.Scanner; import org.apache.accumulo.core.client.ScannerBase; import org.apache.accumulo.core.client.TableNotFoundException; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; import org.apache.accumulo.core.client.impl.TabletLocator; import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java index cf168cd..e7cb8ec 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java +++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AbstractInputFormat.java @@ -43,7 +43,6 @@ import org.apache.accumulo.core.client.TableDeletedException; import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.accumulo.core.client.TableOfflineException; import org.apache.accumulo.core.client.admin.DelegationTokenConfig; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; import org.apache.accumulo.core.client.admin.SecurityOperations; import org.apache.accumulo.core.client.impl.AuthenticationTokenIdentifier; import org.apache.accumulo.core.client.impl.ClientContext; @@ -57,6 +56,7 @@ import org.apache.accumulo.core.client.mapreduce.impl.BatchInputSplit; import org.apache.accumulo.core.client.mapreduce.impl.SplitUtils; import org.apache.accumulo.core.client.mapreduce.lib.impl.ConfiguratorBase; import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.client.security.tokens.AuthenticationToken; import org.apache.accumulo.core.client.security.tokens.DelegationToken; import org.apache.accumulo.core.client.security.tokens.KerberosToken; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java index bf0474e..b337f56 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java +++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormat.java @@ -19,8 +19,8 @@ package org.apache.accumulo.core.client.mapreduce; import java.io.IOException; import java.util.Arrays; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; import org.apache.accumulo.core.client.mapreduce.lib.impl.FileOutputConfigurator; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.data.ArrayByteSequence; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java index 2f77928..324d5c7 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java +++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputFormatBase.java @@ -27,9 +27,9 @@ import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.client.Scanner; import org.apache.accumulo.core.client.ScannerBase; import org.apache.accumulo.core.client.TableNotFoundException; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; import org.apache.accumulo.core.client.impl.TabletLocator; import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java index 305e7e2..df0aa65 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java +++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/InputTableConfig.java @@ -26,7 +26,7 @@ import java.util.List; import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.client.ScannerBase; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.util.Pair; import org.apache.hadoop.io.Text; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java index b4f9dca..1786eb3 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java +++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/RangeInputSplit.java @@ -32,9 +32,9 @@ import org.apache.accumulo.core.client.ClientConfiguration; import org.apache.accumulo.core.client.Instance; import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.client.ZooKeeperInstance; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; import org.apache.accumulo.core.client.mapreduce.impl.SplitUtils; import org.apache.accumulo.core.client.mapreduce.lib.impl.ConfiguratorBase.TokenSource; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator; import org.apache.accumulo.core.client.security.tokens.AuthenticationToken; import org.apache.accumulo.core.client.security.tokens.AuthenticationToken.AuthenticationTokenSerializer; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java index c629c28..049395f 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java +++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/FileOutputConfigurator.java @@ -21,7 +21,7 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Set; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.conf.ConfigurationCopy; import org.apache.accumulo.core.conf.Property; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java index f96c8f4..448b45e 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java +++ b/core/src/main/java/org/apache/accumulo/core/client/mapreduce/lib/impl/InputConfigurator.java @@ -47,13 +47,13 @@ import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.client.RowIterator; import org.apache.accumulo.core.client.Scanner; import org.apache.accumulo.core.client.TableNotFoundException; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; import org.apache.accumulo.core.client.impl.ClientContext; import org.apache.accumulo.core.client.impl.Credentials; import org.apache.accumulo.core.client.impl.DelegationTokenImpl; import org.apache.accumulo.core.client.impl.Tables; import org.apache.accumulo.core.client.impl.TabletLocator; import org.apache.accumulo.core.client.mapreduce.InputTableConfig; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.client.security.tokens.AuthenticationToken; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.PartialKey; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/mock/MockScannerBase.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/mock/MockScannerBase.java b/core/src/main/java/org/apache/accumulo/core/client/mock/MockScannerBase.java index 9302fc9..a79255c 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/mock/MockScannerBase.java +++ b/core/src/main/java/org/apache/accumulo/core/client/mock/MockScannerBase.java @@ -24,8 +24,8 @@ import java.util.Iterator; import java.util.Map.Entry; import org.apache.accumulo.core.client.ScannerBase; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; import org.apache.accumulo.core.client.impl.ScannerOptions; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.data.ArrayByteSequence; import org.apache.accumulo.core.data.ByteSequence; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/mock/MockTableOperations.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/mock/MockTableOperations.java b/core/src/main/java/org/apache/accumulo/core/client/mock/MockTableOperations.java index fd1128c..2072e00 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/mock/MockTableOperations.java +++ b/core/src/main/java/org/apache/accumulo/core/client/mock/MockTableOperations.java @@ -41,10 +41,10 @@ import org.apache.accumulo.core.client.admin.DiskUsage; import org.apache.accumulo.core.client.admin.FindMax; import org.apache.accumulo.core.client.admin.Locations; import org.apache.accumulo.core.client.admin.NewTableConfiguration; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; import org.apache.accumulo.core.client.admin.TimeType; import org.apache.accumulo.core.client.impl.TableOperationsHelper; import org.apache.accumulo.core.client.impl.Tables; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Mutation; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/sample/AbstractHashSampler.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/sample/AbstractHashSampler.java b/core/src/main/java/org/apache/accumulo/core/client/sample/AbstractHashSampler.java new file mode 100644 index 0000000..5c8176a --- /dev/null +++ b/core/src/main/java/org/apache/accumulo/core/client/sample/AbstractHashSampler.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.accumulo.core.client.sample; + +import static com.google.common.base.Preconditions.checkArgument; +import static java.util.Objects.requireNonNull; + +import java.io.DataOutput; +import java.io.IOException; +import java.util.Set; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.sample.impl.DataoutputHasher; + +import com.google.common.collect.ImmutableSet; +import com.google.common.hash.HashFunction; +import com.google.common.hash.Hasher; +import com.google.common.hash.Hashing; + +/** + * A base class that can be used to create Samplers based on hashing. This class offers consistent options for configuring the hash function. The subclass + * decides which parts of the key to hash. + * + *

+ * This class support two options passed into {@link #init(SamplerConfiguration)}. One option is {@code hasher} which specifies a hashing algorithm. Valid + * values for this option are {@code md5}, {@code sha1}, and {@code murmur3_32}. If you are not sure, then choose {@code murmur3_32}. + * + *

+ * The second option is {@code modulus} which can have any positive integer as a value. + * + *

+ * Any data where {@code hash(data) % modulus == 0} will be selected for the sample. + * + * @since 1.8.0 + */ + +public abstract class AbstractHashSampler implements Sampler { + + private HashFunction hashFunction; + private int modulus; + + private static final Set VALID_OPTIONS = ImmutableSet.of("hasher", "modulus"); + + /** + * Subclasses with options should override this method and return true if the option is valid for the subclass or if {@code super.isValidOption(opt)} returns + * true. + */ + + protected boolean isValidOption(String option) { + return VALID_OPTIONS.contains(option); + } + + /** + * Subclasses with options should override this method and call {@code super.init(config)}. + */ + + @Override + public void init(SamplerConfiguration config) { + String hasherOpt = config.getOptions().get("hasher"); + String modulusOpt = config.getOptions().get("modulus"); + + requireNonNull(hasherOpt, "Hasher not specified"); + requireNonNull(modulusOpt, "Modulus not specified"); + + for (String option : config.getOptions().keySet()) { + checkArgument(isValidOption(option), "Unknown option : %s", option); + } + + switch (hasherOpt) { + case "murmur3_32": + hashFunction = Hashing.murmur3_32(); + break; + case "md5": + hashFunction = Hashing.md5(); + break; + case "sha1": + hashFunction = Hashing.sha1(); + break; + default: + throw new IllegalArgumentException("Uknown hahser " + hasherOpt); + } + + modulus = Integer.parseInt(modulusOpt); + } + + /** + * Subclass must override this method and hash some portion of the key. + * + * @param hasher + * Data written to this will be used to compute the hash for the key. + */ + protected abstract void hash(DataOutput hasher, Key k) throws IOException; + + @Override + public boolean accept(Key k) { + Hasher hasher = hashFunction.newHasher(); + try { + hash(new DataoutputHasher(hasher), k); + } catch (IOException e) { + throw new RuntimeException(e); + } + return hasher.hash().asInt() % modulus == 0; + } +} http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/sample/RowColumnSampler.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/sample/RowColumnSampler.java b/core/src/main/java/org/apache/accumulo/core/client/sample/RowColumnSampler.java new file mode 100644 index 0000000..a0482d9 --- /dev/null +++ b/core/src/main/java/org/apache/accumulo/core/client/sample/RowColumnSampler.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.accumulo.core.client.sample; + +import java.io.DataOutput; +import java.io.IOException; +import java.util.Set; + +import org.apache.accumulo.core.data.ByteSequence; +import org.apache.accumulo.core.data.Key; + +import com.google.common.collect.ImmutableSet; + +/** + * This sampler can hash any subset of a Key's fields. The fields that hashed for the sample are determined by the configuration options passed in + * {@link #init(SamplerConfiguration)}. The following key values are valid options. + * + *

    + *
  • row=true|false + *
  • family=true|false + *
  • qualifier=true|false + *
  • visibility=true|false + *
+ * + *

+ * If not specified in the options, fields default to false. + * + *

+ * To determine what options are valid for hashing see {@link AbstractHashSampler} + * + *

+ * To configure Accumulo to generate sample data on one thousandth of the column qualifiers, the following SamplerConfiguration could be created and used to + * configure a table. + * + *

+ * {@code new SamplerConfiguration(RowColumnSampler.class.getName()).setOptions(ImmutableMap.of("hasher","murmur3_32","modulus","1009","qualifier","true"))} + * + *

+ * With this configuration, if a column qualifier is selected then all key values contains that column qualifier will end up in the sample data. + * + * @since 1.8.0 + */ + +public class RowColumnSampler extends AbstractHashSampler { + + private boolean row = true; + private boolean family = true; + private boolean qualifier = true; + private boolean visibility = true; + + private static final Set VALID_OPTIONS = ImmutableSet.of("row", "family", "qualifier", "visibility"); + + private boolean hashField(SamplerConfiguration config, String field) { + String optValue = config.getOptions().get(field); + if (optValue != null) { + return Boolean.parseBoolean(optValue); + } + + return false; + } + + @Override + protected boolean isValidOption(String option) { + return super.isValidOption(option) || VALID_OPTIONS.contains(option); + } + + @Override + public void init(SamplerConfiguration config) { + super.init(config); + + row = hashField(config, "row"); + family = hashField(config, "family"); + qualifier = hashField(config, "qualifier"); + visibility = hashField(config, "visibility"); + + if (!row && !family && !qualifier && !visibility) { + throw new IllegalStateException("Must hash at least one key field"); + } + } + + private void putByteSquence(ByteSequence data, DataOutput hasher) throws IOException { + hasher.write(data.getBackingArray(), data.offset(), data.length()); + } + + @Override + protected void hash(DataOutput hasher, Key k) throws IOException { + if (row) { + putByteSquence(k.getRowData(), hasher); + } + + if (family) { + putByteSquence(k.getColumnFamilyData(), hasher); + } + + if (qualifier) { + putByteSquence(k.getColumnQualifierData(), hasher); + } + + if (visibility) { + putByteSquence(k.getColumnVisibilityData(), hasher); + } + } +} http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/sample/RowSampler.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/sample/RowSampler.java b/core/src/main/java/org/apache/accumulo/core/client/sample/RowSampler.java new file mode 100644 index 0000000..107ba49 --- /dev/null +++ b/core/src/main/java/org/apache/accumulo/core/client/sample/RowSampler.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.accumulo.core.client.sample; + +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.accumulo.core.data.ByteSequence; +import org.apache.accumulo.core.data.Key; + +/** + * Builds a sample based on entire rows. If a row is selected for the sample, then all of its columns will be included. + * + *

+ * To determine what options are valid for hashing see {@link AbstractHashSampler}. This class offers no addition options, it always hashes on the row. + * + *

+ * To configure Accumulo to generate sample data on one thousandth of the rows, the following SamplerConfiguration could be created and used to configure a + * table. + * + *

+ * {@code new SamplerConfiguration(RowSampler.class.getName()).setOptions(ImmutableMap.of("hasher","murmur3_32","modulus","1009"))} + * + * @since 1.8.0 + */ + +public class RowSampler extends AbstractHashSampler { + + @Override + protected void hash(DataOutput hasher, Key k) throws IOException { + ByteSequence row = k.getRowData(); + hasher.write(row.getBackingArray(), row.offset(), row.length()); + } +} http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/sample/Sampler.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/sample/Sampler.java b/core/src/main/java/org/apache/accumulo/core/client/sample/Sampler.java new file mode 100644 index 0000000..03bd9d7 --- /dev/null +++ b/core/src/main/java/org/apache/accumulo/core/client/sample/Sampler.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.accumulo.core.client.sample; + +import org.apache.accumulo.core.data.Key; + +/** + * A function that decides which key values are stored in a tables sample. As Accumuo compacts data and creates rfiles it uses a Sampler to decided what to + * store in the rfiles sample section. The class name of the Sampler and the Samplers configuration are stored in each rfile. A scan of a tables sample will + * only succeed if all rfiles were created with the same sampler and sampler configuration. + * + *

+ * Since the decisions that Sampler makes are persisted, the behavior of a Sampler for a given configuration should always be the same. One way to offer a new + * behavior is to offer new options, while still supporting old behavior with a Samplers existing options. + * + *

+ * Ideally a sampler that selects a Key k1 would also select updates for k1. For example if a Sampler selects : + * {@code row='000989' family='name' qualifier='last' visibility='ADMIN' time=9 value='Doe'}, it would be nice if it also selected : + * {@code row='000989' family='name' qualifier='last' visibility='ADMIN' time=20 value='Dough'}. Using hash and modulo on the key fields is a good way to + * accomplish this and {@link AbstractHashSampler} provides a good basis for implementation. + * + * @since 1.8.0 + */ + +public interface Sampler { + + /** + * An implementation of Sampler must have a noarg constructor. After construction this method is called once to initialize a sampler before it is used. + * + * @param config + * Configuration options for a sampler. + */ + void init(SamplerConfiguration config); + + /** + * @param k + * A key that was written to a rfile. + * @return True if the key (and its associtated value) should be stored in the rfile's sample. Return false if it should not be included. + */ + boolean accept(Key k); +} http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/client/sample/SamplerConfiguration.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/sample/SamplerConfiguration.java b/core/src/main/java/org/apache/accumulo/core/client/sample/SamplerConfiguration.java new file mode 100644 index 0000000..e774ec5 --- /dev/null +++ b/core/src/main/java/org/apache/accumulo/core/client/sample/SamplerConfiguration.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.accumulo.core.client.sample; + +import static com.google.common.base.Preconditions.checkArgument; +import static java.util.Objects.requireNonNull; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +/** + * This class encapsultes configuration and options needed to setup and use sampling. + * + * @since 1.8.0 + */ + +public class SamplerConfiguration { + + private String className; + private Map options = new HashMap<>(); + + public SamplerConfiguration(Class samplerClass) { + this(samplerClass.getName()); + } + + public SamplerConfiguration(String samplerClassName) { + requireNonNull(samplerClassName); + this.className = samplerClassName; + } + + public SamplerConfiguration setOptions(Map options) { + requireNonNull(options); + this.options = new HashMap<>(options.size()); + + for (Entry entry : options.entrySet()) { + addOption(entry.getKey(), entry.getValue()); + } + + return this; + } + + public SamplerConfiguration addOption(String option, String value) { + checkArgument(option != null, "option is null"); + checkArgument(value != null, "value is null"); + this.options.put(option, value); + return this; + } + + public Map getOptions() { + return Collections.unmodifiableMap(options); + } + + public String getSamplerClassName() { + return className; + } + + @Override + public boolean equals(Object o) { + if (o instanceof SamplerConfiguration) { + SamplerConfiguration osc = (SamplerConfiguration) o; + + return className.equals(osc.className) && options.equals(osc.options); + } + + return false; + } + + @Override + public int hashCode() { + return className.hashCode() + 31 * options.hashCode(); + } + + @Override + public String toString() { + return className + " " + options; + } +} http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java index 6032c7f..981a2e6 100644 --- a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java +++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java @@ -39,7 +39,8 @@ import java.util.TreeMap; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.accumulo.core.client.SampleNotPresentException; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; +import org.apache.accumulo.core.client.sample.Sampler; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.data.ArrayByteSequence; @@ -66,7 +67,6 @@ import org.apache.accumulo.core.iterators.system.HeapIterator; import org.apache.accumulo.core.iterators.system.InterruptibleIterator; import org.apache.accumulo.core.iterators.system.LocalityGroupIterator; import org.apache.accumulo.core.iterators.system.LocalityGroupIterator.LocalityGroup; -import org.apache.accumulo.core.sample.Sampler; import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl; import org.apache.accumulo.core.util.MutableByteSequence; import org.apache.commons.lang.mutable.MutableLong; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/file/rfile/RFileOperations.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFileOperations.java b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFileOperations.java index cc6aaa2..5d15973 100644 --- a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFileOperations.java +++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFileOperations.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.Collection; import java.util.Collections; +import org.apache.accumulo.core.client.sample.Sampler; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.data.ByteSequence; @@ -30,7 +31,6 @@ import org.apache.accumulo.core.file.FileSKVIterator; import org.apache.accumulo.core.file.FileSKVWriter; import org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile; import org.apache.accumulo.core.file.streams.RateLimitedOutputStream; -import org.apache.accumulo.core.sample.Sampler; import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl; import org.apache.accumulo.core.sample.impl.SamplerFactory; import org.apache.hadoop.conf.Configuration; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/iterators/IteratorEnvironment.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/iterators/IteratorEnvironment.java b/core/src/main/java/org/apache/accumulo/core/iterators/IteratorEnvironment.java index 5c265e2..7ef27e5 100644 --- a/core/src/main/java/org/apache/accumulo/core/iterators/IteratorEnvironment.java +++ b/core/src/main/java/org/apache/accumulo/core/iterators/IteratorEnvironment.java @@ -19,7 +19,7 @@ package org.apache.accumulo.core.iterators; import java.io.IOException; import org.apache.accumulo.core.client.SampleNotPresentException; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Value; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/iterators/system/SampleIterator.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/iterators/system/SampleIterator.java b/core/src/main/java/org/apache/accumulo/core/iterators/system/SampleIterator.java index aedcdba..8b488c8 100644 --- a/core/src/main/java/org/apache/accumulo/core/iterators/system/SampleIterator.java +++ b/core/src/main/java/org/apache/accumulo/core/iterators/system/SampleIterator.java @@ -17,13 +17,13 @@ package org.apache.accumulo.core.iterators.system; +import org.apache.accumulo.core.client.sample.RowSampler; +import org.apache.accumulo.core.client.sample.Sampler; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.iterators.Filter; import org.apache.accumulo.core.iterators.IteratorEnvironment; import org.apache.accumulo.core.iterators.SortedKeyValueIterator; -import org.apache.accumulo.core.sample.RowSampler; -import org.apache.accumulo.core.sample.Sampler; public class SampleIterator extends Filter { http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/sample/AbstractHashSampler.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/sample/AbstractHashSampler.java b/core/src/main/java/org/apache/accumulo/core/sample/AbstractHashSampler.java deleted file mode 100644 index cbb0f31..0000000 --- a/core/src/main/java/org/apache/accumulo/core/sample/AbstractHashSampler.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.accumulo.core.sample; - -import static com.google.common.base.Preconditions.checkArgument; -import static java.util.Objects.requireNonNull; - -import java.util.Set; - -import org.apache.accumulo.core.client.admin.SamplerConfiguration; -import org.apache.accumulo.core.data.Key; - -import com.google.common.collect.ImmutableSet; -import com.google.common.hash.HashCode; -import com.google.common.hash.HashFunction; -import com.google.common.hash.Hashing; - -/** - * A base class that can be used to create Samplers based on hashing. This class offers consistent options for configuring the hash function. The subclass - * decides which parts of the key to hash. - * - *

- * This class support two options passed into {@link #init(SamplerConfiguration)}. One option is {@code hasher} which specifies a hashing algorithm. Valid - * values for this option are {@code md5}, {@code sha1}, and {@code murmur3_32}. If you are not sure, then choose {@code murmur3_32}. - * - *

- * The second option is {@code modulus} which can have any positive integer as a value. - * - *

- * Any data where {@code hash(data) % modulus == 0} will be selected for the sample. - * - * @since 1.8.0 - */ - -public abstract class AbstractHashSampler implements Sampler { - - private HashFunction hashFunction; - private int modulus; - - private static final Set VALID_OPTIONS = ImmutableSet.of("hasher", "modulus"); - - /** - * Subclasses with options should override this method and return true if the option is valid for the subclass or if {@code super.isValidOption(opt)} returns - * true. - */ - - protected boolean isValidOption(String option) { - return VALID_OPTIONS.contains(option); - } - - /** - * Subclasses with options should override this method and call {@code super.init(config)}. - */ - - @Override - public void init(SamplerConfiguration config) { - String hasherOpt = config.getOptions().get("hasher"); - String modulusOpt = config.getOptions().get("modulus"); - - requireNonNull(hasherOpt, "Hasher not specified"); - requireNonNull(modulusOpt, "Modulus not specified"); - - for (String option : config.getOptions().keySet()) { - checkArgument(isValidOption(option), "Unknown option : %s", option); - } - - switch (hasherOpt) { - case "murmur3_32": - hashFunction = Hashing.murmur3_32(); - break; - case "md5": - hashFunction = Hashing.md5(); - break; - case "sha1": - hashFunction = Hashing.sha1(); - break; - default: - throw new IllegalArgumentException("Uknown hahser " + hasherOpt); - } - - modulus = Integer.parseInt(modulusOpt); - } - - /** - * Subclass must override this method and hash some portion of the key. - */ - protected abstract HashCode hash(HashFunction hashFunction, Key k); - - @Override - public boolean accept(Key k) { - return hash(hashFunction, k).asInt() % modulus == 0; - } -} http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/sample/RowColumnSampler.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/sample/RowColumnSampler.java b/core/src/main/java/org/apache/accumulo/core/sample/RowColumnSampler.java deleted file mode 100644 index c3464ab..0000000 --- a/core/src/main/java/org/apache/accumulo/core/sample/RowColumnSampler.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.accumulo.core.sample; - -import java.util.Set; - -import org.apache.accumulo.core.client.admin.SamplerConfiguration; -import org.apache.accumulo.core.data.ByteSequence; -import org.apache.accumulo.core.data.Key; - -import com.google.common.collect.ImmutableSet; -import com.google.common.hash.HashCode; -import com.google.common.hash.HashFunction; -import com.google.common.hash.Hasher; - -/** - * This sampler can hash any subset of a Key's fields. The fields that hashed for the sample are determined by the configuration options passed in - * {@link #init(SamplerConfiguration)}. The following key values are valid options. - * - *

    - *
  • row=true|false - *
  • family=true|false - *
  • qualifier=true|false - *
  • visibility=true|false - *
- * - *

- * If not specified in the options, fields default to false. - * - *

- * To determine what options are valid for hashing see {@link AbstractHashSampler} - * - *

- * To configure Accumulo to generate sample data on one thousandth of the column qualifiers, the following SamplerConfiguration could be created and used to - * configure a table. - * - *

- * {@code new SamplerConfiguration(RowColumnSampler.class.getName()).setOptions(ImmutableMap.of("hasher","murmur3_32","modulus","1009","qualifier","true"))} - * - *

- * With this configuration, if a column qualifier is selected then all key values contains that column qualifier will end up in the sample data. - * - * @since 1.8.0 - */ - -public class RowColumnSampler extends AbstractHashSampler { - - private boolean row = true; - private boolean family = true; - private boolean qualifier = true; - private boolean visibility = true; - - private static final Set VALID_OPTIONS = ImmutableSet.of("row", "family", "qualifier", "visibility"); - - private boolean hashField(SamplerConfiguration config, String field) { - String optValue = config.getOptions().get(field); - if (optValue != null) { - return Boolean.parseBoolean(optValue); - } - - return false; - } - - @Override - protected boolean isValidOption(String option) { - return super.isValidOption(option) || VALID_OPTIONS.contains(option); - } - - @Override - public void init(SamplerConfiguration config) { - super.init(config); - - row = hashField(config, "row"); - family = hashField(config, "family"); - qualifier = hashField(config, "qualifier"); - visibility = hashField(config, "visibility"); - - if (!row && !family && !qualifier && !visibility) { - throw new IllegalStateException("Must hash at least one key field"); - } - } - - private void putByteSquence(ByteSequence data, Hasher hasher) { - hasher.putBytes(data.getBackingArray(), data.offset(), data.length()); - } - - @Override - protected HashCode hash(HashFunction hashFunction, Key k) { - Hasher hasher = hashFunction.newHasher(); - - if (row) { - putByteSquence(k.getRowData(), hasher); - } - - if (family) { - putByteSquence(k.getColumnFamilyData(), hasher); - } - - if (qualifier) { - putByteSquence(k.getColumnQualifierData(), hasher); - } - - if (visibility) { - putByteSquence(k.getColumnVisibilityData(), hasher); - } - - return hasher.hash(); - } -} http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/sample/RowSampler.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/sample/RowSampler.java b/core/src/main/java/org/apache/accumulo/core/sample/RowSampler.java deleted file mode 100644 index 8690a1c..0000000 --- a/core/src/main/java/org/apache/accumulo/core/sample/RowSampler.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.accumulo.core.sample; - -import org.apache.accumulo.core.data.ByteSequence; -import org.apache.accumulo.core.data.Key; - -import com.google.common.hash.HashCode; -import com.google.common.hash.HashFunction; - -/** - * Builds a sample based on entire rows. If a row is selected for the sample, then all of its columns will be included. - * - *

- * To determine what options are valid for hashing see {@link AbstractHashSampler}. This class offers no addition options, it always hashes on the row. - * - *

- * To configure Accumulo to generate sample data on one thousandth of the rows, the following SamplerConfiguration could be created and used to configure a - * table. - * - *

- * {@code new SamplerConfiguration(RowSampler.class.getName()).setOptions(ImmutableMap.of("hasher","murmur3_32","modulus","1009"))} - * - * @since 1.8.0 - */ - -public class RowSampler extends AbstractHashSampler { - - @Override - protected HashCode hash(HashFunction hashFunction, Key k) { - ByteSequence row = k.getRowData(); - return hashFunction.hashBytes(row.getBackingArray(), row.offset(), row.length()); - } -} http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/sample/Sampler.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/sample/Sampler.java b/core/src/main/java/org/apache/accumulo/core/sample/Sampler.java deleted file mode 100644 index 64adeec..0000000 --- a/core/src/main/java/org/apache/accumulo/core/sample/Sampler.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.accumulo.core.sample; - -import org.apache.accumulo.core.client.admin.SamplerConfiguration; -import org.apache.accumulo.core.data.Key; - -/** - * A function that decides which key values are stored in a tables sample. As Accumuo compacts data and creates rfiles it uses a Sampler to decided what to - * store in the rfiles sample section. The class name of the Sampler and the Samplers configuration are stored in each rfile. A scan of a tables sample will - * only succeed if all rfiles were created with the same sampler and sampler configuration. - * - *

- * Since the decisions that Sampler makes are persisted, the behavior of a Sampler for a given configuration should always be the same. One way to offer a new - * behavior is to offer new options, while still supporting old behavior with a Samplers existing options. - * - *

- * Ideally a sampler that selects a Key k1 would also select updates for k1. For example if a Sampler selects : - * {@code row='000989' family='name' qualifier='last' visibility='ADMIN' time=9 value='Doe'}, it would be nice if it also selected : - * {@code row='000989' family='name' qualifier='last' visibility='ADMIN' time=20 value='Dough'}. Using hash and modulo on the key fields is a good way to - * accomplish this and {@link AbstractHashSampler} provides a good basis for implementation. - * - * @since 1.8.0 - */ - -public interface Sampler { - - /** - * An implementation of Sampler must have a noarg constructor. After construction this method is called once to initialize a sampler before it is used. - * - * @param config - * Configuration options for a sampler. - */ - void init(SamplerConfiguration config); - - /** - * @param k - * A key that was written to a rfile. - * @return True if the key (and its associtated value) should be stored in the rfile's sample. Return false if it should not be included. - */ - boolean accept(Key k); -} http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/sample/impl/DataoutputHasher.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/sample/impl/DataoutputHasher.java b/core/src/main/java/org/apache/accumulo/core/sample/impl/DataoutputHasher.java new file mode 100644 index 0000000..d243dfe --- /dev/null +++ b/core/src/main/java/org/apache/accumulo/core/sample/impl/DataoutputHasher.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.accumulo.core.sample.impl; + +import java.io.DataOutput; +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import com.google.common.hash.Hasher; + +public class DataoutputHasher implements DataOutput { + + private Hasher hasher; + + public DataoutputHasher(Hasher hasher) { + this.hasher = hasher; + } + + @Override + public void write(int b) throws IOException { + hasher.putByte((byte) (0xff & b)); + } + + @Override + public void write(byte[] b) throws IOException { + hasher.putBytes(b); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + hasher.putBytes(b, off, len); + } + + @Override + public void writeBoolean(boolean v) throws IOException { + hasher.putBoolean(v); + } + + @Override + public void writeByte(int v) throws IOException { + hasher.putByte((byte) (0xff & v)); + + } + + @Override + public void writeShort(int v) throws IOException { + hasher.putShort((short) (0xffff & v)); + } + + @Override + public void writeChar(int v) throws IOException { + hasher.putChar((char) v); + } + + @Override + public void writeInt(int v) throws IOException { + hasher.putInt(v); + } + + @Override + public void writeLong(long v) throws IOException { + hasher.putLong(v); + } + + @Override + public void writeFloat(float v) throws IOException { + hasher.putDouble(v); + } + + @Override + public void writeDouble(double v) throws IOException { + hasher.putDouble(v); + } + + @Override + public void writeBytes(String s) throws IOException { + for (int i = 0; i < s.length(); i++) { + hasher.putByte((byte) (0xff & s.charAt(i))); + } + } + + @Override + public void writeChars(String s) throws IOException { + hasher.putString(s); + + } + + @Override + public void writeUTF(String s) throws IOException { + hasher.putInt(s.length()); + hasher.putBytes(s.getBytes(StandardCharsets.UTF_8)); + } +} http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/sample/impl/SamplerConfigurationImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/sample/impl/SamplerConfigurationImpl.java b/core/src/main/java/org/apache/accumulo/core/sample/impl/SamplerConfigurationImpl.java index 348def4..f0bd528 100644 --- a/core/src/main/java/org/apache/accumulo/core/sample/impl/SamplerConfigurationImpl.java +++ b/core/src/main/java/org/apache/accumulo/core/sample/impl/SamplerConfigurationImpl.java @@ -28,7 +28,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.tabletserver.thrift.TSamplerConfiguration; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/main/java/org/apache/accumulo/core/sample/impl/SamplerFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/sample/impl/SamplerFactory.java b/core/src/main/java/org/apache/accumulo/core/sample/impl/SamplerFactory.java index 3f11fbe..d70f3af 100644 --- a/core/src/main/java/org/apache/accumulo/core/sample/impl/SamplerFactory.java +++ b/core/src/main/java/org/apache/accumulo/core/sample/impl/SamplerFactory.java @@ -19,9 +19,9 @@ package org.apache.accumulo.core.sample.impl; import java.io.IOException; +import org.apache.accumulo.core.client.sample.Sampler; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.conf.Property; -import org.apache.accumulo.core.sample.Sampler; import org.apache.accumulo.start.classloader.vfs.AccumuloVFSClassLoader; public class SamplerFactory { http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/test/java/org/apache/accumulo/core/client/impl/TableOperationsHelperTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/accumulo/core/client/impl/TableOperationsHelperTest.java b/core/src/test/java/org/apache/accumulo/core/client/impl/TableOperationsHelperTest.java index 86857fa..c55b62f 100644 --- a/core/src/test/java/org/apache/accumulo/core/client/impl/TableOperationsHelperTest.java +++ b/core/src/test/java/org/apache/accumulo/core/client/impl/TableOperationsHelperTest.java @@ -37,8 +37,8 @@ import org.apache.accumulo.core.client.admin.CompactionConfig; import org.apache.accumulo.core.client.admin.DiskUsage; import org.apache.accumulo.core.client.admin.Locations; import org.apache.accumulo.core.client.admin.NewTableConfiguration; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; import org.apache.accumulo.core.client.admin.TimeType; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope; import org.apache.accumulo.core.security.Authorizations; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormatTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormatTest.java b/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormatTest.java index d88453e..d85db92 100644 --- a/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormatTest.java +++ b/core/src/test/java/org/apache/accumulo/core/client/mapred/AccumuloFileOutputFormatTest.java @@ -20,11 +20,11 @@ import static org.junit.Assert.assertEquals; import java.io.IOException; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; import org.apache.accumulo.core.client.mapreduce.lib.impl.FileOutputConfigurator; +import org.apache.accumulo.core.client.sample.RowSampler; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.conf.Property; -import org.apache.accumulo.core.sample.RowSampler; import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl; import org.apache.hadoop.mapred.JobConf; import org.junit.Test; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormatTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormatTest.java b/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormatTest.java index cf0c8d6..39d226b 100644 --- a/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormatTest.java +++ b/core/src/test/java/org/apache/accumulo/core/client/mapreduce/AccumuloFileOutputFormatTest.java @@ -20,11 +20,11 @@ import static org.junit.Assert.assertEquals; import java.io.IOException; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; import org.apache.accumulo.core.client.mapreduce.lib.impl.FileOutputConfigurator; +import org.apache.accumulo.core.client.sample.RowSampler; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.conf.Property; -import org.apache.accumulo.core.sample.RowSampler; import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl; import org.apache.hadoop.mapreduce.Job; import org.junit.Test; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/test/java/org/apache/accumulo/core/file/rfile/RFileTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/accumulo/core/file/rfile/RFileTest.java b/core/src/test/java/org/apache/accumulo/core/file/rfile/RFileTest.java index d97a4db..8db1b21 100644 --- a/core/src/test/java/org/apache/accumulo/core/file/rfile/RFileTest.java +++ b/core/src/test/java/org/apache/accumulo/core/file/rfile/RFileTest.java @@ -42,8 +42,10 @@ import java.util.Random; import java.util.Set; import org.apache.accumulo.core.Constants; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; import org.apache.accumulo.core.client.impl.BaseIteratorEnvironment; +import org.apache.accumulo.core.client.sample.RowSampler; +import org.apache.accumulo.core.client.sample.Sampler; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.conf.ConfigurationCopy; import org.apache.accumulo.core.conf.Property; @@ -64,8 +66,6 @@ import org.apache.accumulo.core.iterators.system.ColumnFamilySkippingIterator; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.schema.MetadataSchema; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection; -import org.apache.accumulo.core.sample.RowSampler; -import org.apache.accumulo.core.sample.Sampler; import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl; import org.apache.accumulo.core.sample.impl.SamplerFactory; import org.apache.accumulo.core.security.crypto.CryptoTest; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/core/src/test/java/org/apache/accumulo/core/iterators/SortedMapIteratorTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/accumulo/core/iterators/SortedMapIteratorTest.java b/core/src/test/java/org/apache/accumulo/core/iterators/SortedMapIteratorTest.java index 7557b9a..d4080e1 100644 --- a/core/src/test/java/org/apache/accumulo/core/iterators/SortedMapIteratorTest.java +++ b/core/src/test/java/org/apache/accumulo/core/iterators/SortedMapIteratorTest.java @@ -19,11 +19,11 @@ package org.apache.accumulo.core.iterators; import java.util.TreeMap; import org.apache.accumulo.core.client.SampleNotPresentException; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; import org.apache.accumulo.core.client.impl.BaseIteratorEnvironment; +import org.apache.accumulo.core.client.sample.RowSampler; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.sample.RowSampler; import org.junit.Test; public class SortedMapIteratorTest { http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/docs/src/main/resources/examples/README.sample ---------------------------------------------------------------------- diff --git a/docs/src/main/resources/examples/README.sample b/docs/src/main/resources/examples/README.sample index 15288aa..3642cc6 100644 --- a/docs/src/main/resources/examples/README.sample +++ b/docs/src/main/resources/examples/README.sample @@ -40,7 +40,7 @@ tables sample data. root@instance sampex> config -t sampex -s table.sampler.opt.hasher=murmur3_32 root@instance sampex> config -t sampex -s table.sampler.opt.modulus=3 - root@instance sampex> config -t sampex -s table.sampler=org.apache.accumulo.core.sample.RowSampler + root@instance sampex> config -t sampex -s table.sampler=org.apache.accumulo.core.client.sample.RowSampler Below, attempting to scan the sample returns an error. This is because data was inserted before the sample set was configured. @@ -123,7 +123,7 @@ configuration for sample scan to work. Column families : [doc] Sample Configuration : - Sampler class : org.apache.accumulo.core.sample.RowSampler + Sampler class : org.apache.accumulo.core.client.sample.RowSampler Sampler options : {hasher=murmur3_32, modulus=2} Sample Locality group : @@ -159,7 +159,7 @@ shard table based on the column qualifier. root@instance shard> config -t shard -s table.sampler.opt.hasher=murmur3_32 root@instance shard> config -t shard -s table.sampler.opt.modulus=101 root@instance shard> config -t shard -s table.sampler.opt.qualifier=true - root@instance shard> config -t shard -s table.sampler=org.apache.accumulo.core.sample.RowColumnSampler + root@instance shard> config -t shard -s table.sampler=org.apache.accumulo.core.client.sample.RowColumnSampler root@instance shard> compact -t shard --sf-no-sample -w 2015-07-23 15:00:09,280 [shell.Shell] INFO : Compacting table ... 2015-07-23 15:00:10,134 [shell.Shell] INFO : Compaction of table shard completed for given range http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/examples/simple/src/main/java/org/apache/accumulo/examples/simple/sample/SampleExample.java ---------------------------------------------------------------------- diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/sample/SampleExample.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/sample/SampleExample.java index 57d77b1..262e63d 100644 --- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/sample/SampleExample.java +++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/sample/SampleExample.java @@ -28,11 +28,11 @@ import org.apache.accumulo.core.client.SampleNotPresentException; import org.apache.accumulo.core.client.Scanner; import org.apache.accumulo.core.client.admin.CompactionConfig; import org.apache.accumulo.core.client.admin.CompactionStrategyConfig; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; +import org.apache.accumulo.core.client.sample.RowSampler; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Mutation; import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.sample.RowSampler; import org.apache.accumulo.core.security.Authorizations; import org.apache.accumulo.examples.simple.client.RandomBatchWriter; import org.apache.accumulo.examples.simple.shard.CutoffIntersectingIterator; http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8bd1ddd/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/CutoffIntersectingIterator.java ---------------------------------------------------------------------- diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/CutoffIntersectingIterator.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/CutoffIntersectingIterator.java index 9f13dd4..f5dce1d 100644 --- a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/CutoffIntersectingIterator.java +++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/shard/CutoffIntersectingIterator.java @@ -25,7 +25,8 @@ import java.util.Collection; import java.util.Map; import org.apache.accumulo.core.client.IteratorSetting; -import org.apache.accumulo.core.client.admin.SamplerConfiguration; +import org.apache.accumulo.core.client.sample.RowColumnSampler; +import org.apache.accumulo.core.client.sample.SamplerConfiguration; import org.apache.accumulo.core.data.ByteSequence; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Range; @@ -33,7 +34,6 @@ import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.iterators.IteratorEnvironment; import org.apache.accumulo.core.iterators.SortedKeyValueIterator; import org.apache.accumulo.core.iterators.user.IntersectingIterator; -import org.apache.accumulo.core.sample.RowColumnSampler; /** * This iterator uses a sample built from the Column Qualifier to quickly avoid intersecting iterator queries that may return too many documents.