Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id B049B200D3C for ; Tue, 14 Nov 2017 22:59:39 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id AE78F160BF4; Tue, 14 Nov 2017 21:59:39 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 09AE9160BD7 for ; Tue, 14 Nov 2017 22:59:37 +0100 (CET) Received: (qmail 58089 invoked by uid 500); 14 Nov 2017 21:59:37 -0000 Mailing-List: contact commits-help@accumulo.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@accumulo.apache.org Delivered-To: mailing list commits@accumulo.apache.org Received: (qmail 58080 invoked by uid 99); 14 Nov 2017 21:59:37 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 14 Nov 2017 21:59:37 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id 5AE5581B10; Tue, 14 Nov 2017 21:59:36 +0000 (UTC) Date: Tue, 14 Nov 2017 21:59:36 +0000 To: "commits@accumulo.apache.org" Subject: [accumulo] branch master updated: ACCUMULO-4730 Added EntryLengthSummarizer MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Message-ID: <151069677630.28793.7737982032362043008@gitbox.apache.org> From: kturner@apache.org X-Git-Host: gitbox.apache.org X-Git-Repo: accumulo X-Git-Refname: refs/heads/master X-Git-Reftype: branch X-Git-Oldrev: 0a8c931d7e6e835f6b480cd2b3aa5a13ee877965 X-Git-Newrev: 9cd4be0432c7b0297d86b19ddeac64ed0feaea87 X-Git-Rev: 9cd4be0432c7b0297d86b19ddeac64ed0feaea87 X-Git-NotificationType: ref_changed_plus_diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated archived-at: Tue, 14 Nov 2017 21:59:39 -0000 This is an automated email from the ASF dual-hosted git repository. kturner pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/accumulo.git The following commit(s) were added to refs/heads/master by this push: new 9cd4be0 ACCUMULO-4730 Added EntryLengthSummarizer 9cd4be0 is described below commit 9cd4be0432c7b0297d86b19ddeac64ed0feaea87 Author: jkrdev AuthorDate: Wed Nov 1 18:31:12 2017 +0000 ACCUMULO-4730 Added EntryLengthSummarizer --- .../summary/summarizers/EntryLengthSummarizer.java | 147 +++ .../summarizers/EntryLengthSummarizersTest.java | 1149 ++++++++++++++++++++ 2 files changed, 1296 insertions(+) diff --git a/core/src/main/java/org/apache/accumulo/core/client/summary/summarizers/EntryLengthSummarizer.java b/core/src/main/java/org/apache/accumulo/core/client/summary/summarizers/EntryLengthSummarizer.java new file mode 100644 index 0000000..10fb9d9 --- /dev/null +++ b/core/src/main/java/org/apache/accumulo/core/client/summary/summarizers/EntryLengthSummarizer.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License");you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.accumulo.core.client.summary.summarizers; + +import java.math.RoundingMode; +import java.util.Map; +import java.util.function.BiFunction; + +import org.apache.accumulo.core.client.summary.Summarizer; +import org.apache.accumulo.core.client.summary.SummarizerConfiguration; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; + +import com.google.common.math.IntMath; + +/** + * Summarizer that computes summary information about field lengths. Specifically key length, row length, family length, qualifier length, visibility length, + * and value length. Incrementally computes minimum, maximum, count, sum, and log2 histogram of the lengths. + * + * @since 2.0.0 + */ +public class EntryLengthSummarizer implements Summarizer { + + /* Helper function that calculates the various statistics that is used for the Collector methods. */ + private static class LengthStats { + private long min = Long.MAX_VALUE; + private long max = Long.MIN_VALUE; + private long sum = 0; + private long[] counts = new long[32]; + + private void accept(int length) { + int idx; + + if (length < min) { + min = length; + } + + if (length > max) { + max = length; + } + + sum += length; + + if (length == 0) { + idx = 0; + } else { + idx = IntMath.log2(length, RoundingMode.HALF_UP); + } + + counts[idx]++; + } + + void summarize(String prefix, StatisticConsumer sc) { + sc.accept(prefix + ".min", (min != Long.MAX_VALUE ? min : 0)); + sc.accept(prefix + ".max", (max != Long.MIN_VALUE ? max : 0)); + sc.accept(prefix + ".sum", sum); + + for (int i = 0; i < counts.length; i++) { + if (counts[i] > 0) { + sc.accept(prefix + ".logHist." + i, counts[i]); + } + } + } + + } + + /* Helper functions for merging that is used by the Combiner. */ + private static void merge(String key, BiFunction mergeFunc, Map stats1, Map stats2) { + Long mergeVal = stats2.get(key); + + if (mergeVal != null) { + stats1.merge(key, mergeVal, mergeFunc); + } + } + + private static void merge(String prefix, Map stats1, Map stats2) { + merge(prefix+".min", Long::min, stats1, stats2); + merge(prefix+".max", Long::max, stats1, stats2); + merge(prefix+".sum", Long::sum, stats1, stats2); + for (int i = 0; i < 32; i++) { + merge(prefix+".logHist."+i, Long::sum, stats1, stats2); + } + } + + @Override + public Collector collector(SummarizerConfiguration sc) { + return new Collector() { + + private LengthStats keyStats = new LengthStats(); + private LengthStats rowStats = new LengthStats(); + private LengthStats familyStats = new LengthStats(); + private LengthStats qualifierStats = new LengthStats(); + private LengthStats visibilityStats = new LengthStats(); + private LengthStats valueStats = new LengthStats(); + private long total = 0; + + @Override + public void accept(Key k, Value v) { + keyStats.accept(k.getLength()); + rowStats.accept(k.getRowData().length()); + familyStats.accept(k.getColumnFamilyData().length()); + qualifierStats.accept(k.getColumnQualifierData().length()); + visibilityStats.accept(k.getColumnVisibilityData().length()); + valueStats.accept(v.getSize()); + total++; + } + + @Override + public void summarize(StatisticConsumer sc) { + keyStats.summarize("key", sc); + rowStats.summarize("row", sc); + familyStats.summarize("family", sc); + qualifierStats.summarize("qualifier", sc); + visibilityStats.summarize("visibility", sc); + valueStats.summarize("value", sc); + sc.accept("total", total); + } + }; + } + + @Override + public Combiner combiner(SummarizerConfiguration sc) { + return (stats1, stats2) -> { + merge("key", stats1, stats2); + merge("row", stats1, stats2); + merge("family", stats1, stats2); + merge("qualifier", stats1, stats2); + merge("visibility", stats1, stats2); + merge("value", stats1, stats2); + stats1.merge("total", stats2.get("total"), Long::sum); + }; + } +} diff --git a/core/src/test/java/org/apache/accumulo/core/client/summary/summarizers/EntryLengthSummarizersTest.java b/core/src/test/java/org/apache/accumulo/core/client/summary/summarizers/EntryLengthSummarizersTest.java new file mode 100644 index 0000000..ff50d87 --- /dev/null +++ b/core/src/test/java/org/apache/accumulo/core/client/summary/summarizers/EntryLengthSummarizersTest.java @@ -0,0 +1,1149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License");you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.accumulo.core.client.summary.summarizers; + +import java.util.HashMap; + +import org.apache.accumulo.core.client.summary.Summarizer.Collector; +import org.apache.accumulo.core.client.summary.Summarizer.Combiner; +import org.apache.accumulo.core.client.summary.SummarizerConfiguration; +import org.apache.accumulo.core.client.summary.summarizers.EntryLengthSummarizer; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; +import org.junit.Assert; +import org.junit.Test; + +public class EntryLengthSummarizersTest { + + /* COLLECTOR TEST */ + /* Basic Test: Each test adds to the next, all are simple lengths. */ + + @Test + public void testEmpty() { + SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build(); + EntryLengthSummarizer entrySum = new EntryLengthSummarizer(); + + Collector collector = entrySum.collector(sc); + + HashMap stats = new HashMap<>(); + collector.summarize(stats::put); + + HashMap expected = new HashMap<>(); + expected.put("key.min", 0L); + expected.put("key.max", 0L); + expected.put("key.sum", 0L); + + expected.put("row.min", 0L); + expected.put("row.max", 0L); + expected.put("row.sum", 0L); + + expected.put("family.min", 0L); + expected.put("family.max", 0L); + expected.put("family.sum", 0L); + + expected.put("qualifier.min", 0L); + expected.put("qualifier.max", 0L); + expected.put("qualifier.sum", 0L); + + expected.put("visibility.min", 0L); + expected.put("visibility.max", 0L); + expected.put("visibility.sum", 0L); + + expected.put("value.min", 0L); + expected.put("value.max", 0L); + expected.put("value.sum", 0L); + + expected.put("total", 0L); + + Assert.assertEquals(expected, stats); + } + + @Test + public void testBasicRow() { + SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build(); + EntryLengthSummarizer entrySum = new EntryLengthSummarizer(); + + Key k1 = new Key("r1"); + Key k2 = new Key("r2"); + Key k3 = new Key("r3"); + + Collector collector = entrySum.collector(sc); + collector.accept(k1, new Value("")); + collector.accept(k2, new Value("")); + collector.accept(k3, new Value("")); + + HashMap stats = new HashMap<>(); + collector.summarize(stats::put); + + HashMap expected = new HashMap<>(); + expected.put("key.min", 2L); + expected.put("key.max", 2L); + expected.put("key.sum", 6L); + + // Log2 Histogram + expected.put("key.logHist.1", 3L); + + expected.put("row.min", 2L); + expected.put("row.max", 2L); + expected.put("row.sum", 6L); + + // Log2 Histogram + expected.put("row.logHist.1", 3L); + + expected.put("family.min", 0L); + expected.put("family.max", 0L); + expected.put("family.sum", 0L); + + // Log2 Histogram + expected.put("family.logHist.0", 3L); + + expected.put("qualifier.min", 0L); + expected.put("qualifier.max", 0L); + expected.put("qualifier.sum", 0L); + + // Log2 Histogram + expected.put("qualifier.logHist.0", 3L); + + expected.put("visibility.min", 0L); + expected.put("visibility.max", 0L); + expected.put("visibility.sum", 0L); + + // Log2 Histogram + expected.put("visibility.logHist.0", 3L); + + expected.put("value.min", 0L); + expected.put("value.max", 0L); + expected.put("value.sum", 0L); + + // Log2 Histogram + expected.put("value.logHist.0", 3L); + + expected.put("total", 3L); + + Assert.assertEquals(expected, stats); + } + + @Test + public void testBasicFamily() { + SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build(); + EntryLengthSummarizer entrySum = new EntryLengthSummarizer(); + + Key k1 = new Key("r1", "f1"); + Key k2 = new Key("r2", "f2"); + Key k3 = new Key("r3", "f3"); + + Collector collector = entrySum.collector(sc); + collector.accept(k1, new Value("")); + collector.accept(k2, new Value("")); + collector.accept(k3, new Value("")); + + HashMap stats = new HashMap<>(); + collector.summarize(stats::put); + + HashMap expected = new HashMap<>(); + expected.put("key.min", 4L); + expected.put("key.max", 4L); + expected.put("key.sum", 12L); + + // Log2 Histogram + expected.put("key.logHist.2", 3L); + + expected.put("row.min", 2L); + expected.put("row.max", 2L); + expected.put("row.sum", 6L); + + // Log2 Histogram + expected.put("row.logHist.1", 3L); + + expected.put("family.min", 2L); + expected.put("family.max", 2L); + expected.put("family.sum", 6L); + + // Log2 Histogram + expected.put("family.logHist.1", 3L); + + expected.put("qualifier.min", 0L); + expected.put("qualifier.max", 0L); + expected.put("qualifier.sum", 0L); + + // Log2 Histogram + expected.put("qualifier.logHist.0", 3L); + + expected.put("visibility.min", 0L); + expected.put("visibility.max", 0L); + expected.put("visibility.sum", 0L); + + // Log2 Histogram + expected.put("visibility.logHist.0", 3L); + + expected.put("value.min", 0L); + expected.put("value.max", 0L); + expected.put("value.sum", 0L); + + // Log2 Histogram + expected.put("value.logHist.0", 3L); + + expected.put("total", 3L); + + Assert.assertEquals(expected, stats); + } + + @Test + public void testBasicQualifier() { + SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build(); + EntryLengthSummarizer entrySum = new EntryLengthSummarizer(); + + Key k1 = new Key("r1", "f1", "q1"); + Key k2 = new Key("r2", "f2", "q2"); + Key k3 = new Key("r3", "f3", "q3"); + + Collector collector = entrySum.collector(sc); + collector.accept(k1, new Value("")); + collector.accept(k2, new Value("")); + collector.accept(k3, new Value("")); + + HashMap stats = new HashMap<>(); + collector.summarize(stats::put); + + HashMap expected = new HashMap<>(); + expected.put("key.min", 6L); + expected.put("key.max", 6L); + expected.put("key.sum", 18L); + + // Log2 Histogram + expected.put("key.logHist.3", 3L); + + expected.put("row.min", 2L); + expected.put("row.max", 2L); + expected.put("row.sum", 6L); + + // Log2 Histogram + expected.put("row.logHist.1", 3L); + + expected.put("family.min", 2L); + expected.put("family.max", 2L); + expected.put("family.sum", 6L); + + // Log2 Histogram + expected.put("family.logHist.1", 3L); + + expected.put("qualifier.min", 2L); + expected.put("qualifier.max", 2L); + expected.put("qualifier.sum", 6L); + + // Log2 Histogram + expected.put("qualifier.logHist.1", 3L); + + expected.put("visibility.min", 0L); + expected.put("visibility.max", 0L); + expected.put("visibility.sum", 0L); + + // Log2 Histogram + expected.put("visibility.logHist.0", 3L); + + expected.put("value.min", 0L); + expected.put("value.max", 0L); + expected.put("value.sum", 0L); + + // Log2 Histogram + expected.put("value.logHist.0", 3L); + + expected.put("total", 3L); + + Assert.assertEquals(expected, stats); + } + + @Test + public void testBasicVisibility() { + SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build(); + EntryLengthSummarizer entrySum = new EntryLengthSummarizer(); + + Key k1 = new Key("r1", "f1", "q1", "v1"); + Key k2 = new Key("r2", "f2", "q2", "v2"); + Key k3 = new Key("r3", "f3", "q3", "v3"); + + Collector collector = entrySum.collector(sc); + collector.accept(k1, new Value("")); + collector.accept(k2, new Value("")); + collector.accept(k3, new Value("")); + + HashMap stats = new HashMap<>(); + collector.summarize(stats::put); + + HashMap expected = new HashMap<>(); + expected.put("key.min", 8L); + expected.put("key.max", 8L); + expected.put("key.sum", 24L); + + // Log2 Histogram + expected.put("key.logHist.3", 3L); + + expected.put("row.min", 2L); + expected.put("row.max", 2L); + expected.put("row.sum", 6L); + + // Log2 Histogram + expected.put("row.logHist.1", 3L); + + expected.put("family.min", 2L); + expected.put("family.max", 2L); + expected.put("family.sum", 6L); + + // Log2 Histogram + expected.put("family.logHist.1", 3L); + + expected.put("qualifier.min", 2L); + expected.put("qualifier.max", 2L); + expected.put("qualifier.sum", 6L); + + // Log2 Histogram + expected.put("qualifier.logHist.1", 3L); + + expected.put("visibility.min", 2L); + expected.put("visibility.max", 2L); + expected.put("visibility.sum", 6L); + + // Log2 Histogram + expected.put("visibility.logHist.1", 3L); + + expected.put("value.min", 0L); + expected.put("value.max", 0L); + expected.put("value.sum", 0L); + + // Log2 Histogram + expected.put("value.logHist.0", 3L); + + expected.put("total", 3L); + + Assert.assertEquals(expected, stats); + } + + @Test + public void testBasicValue() { + SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build(); + EntryLengthSummarizer entrySum = new EntryLengthSummarizer(); + + Key k1 = new Key("r1", "f1", "q1", "v1"); + Key k2 = new Key("r2", "f2", "q2", "v2"); + Key k3 = new Key("r3", "f3", "q3", "v3"); + + Collector collector = entrySum.collector(sc); + collector.accept(k1, new Value("v1")); + collector.accept(k2, new Value("v2")); + collector.accept(k3, new Value("v3")); + + HashMap stats = new HashMap<>(); + collector.summarize(stats::put); + + HashMap expected = new HashMap<>(); + expected.put("key.min", 8L); + expected.put("key.max", 8L); + expected.put("key.sum", 24L); + + // Log2 Histogram + expected.put("key.logHist.3", 3L); + + expected.put("row.min", 2L); + expected.put("row.max", 2L); + expected.put("row.sum", 6L); + + // Log2 Histogram + expected.put("row.logHist.1", 3L); + + expected.put("family.min", 2L); + expected.put("family.max", 2L); + expected.put("family.sum", 6L); + + // Log2 Histogram + expected.put("family.logHist.1", 3L); + + expected.put("qualifier.min", 2L); + expected.put("qualifier.max", 2L); + expected.put("qualifier.sum", 6L); + + // Log2 Histogram + expected.put("qualifier.logHist.1", 3L); + + expected.put("visibility.min", 2L); + expected.put("visibility.max", 2L); + expected.put("visibility.sum", 6L); + + // Log2 Histogram + expected.put("visibility.logHist.1", 3L); + + expected.put("value.min", 2L); + expected.put("value.max", 2L); + expected.put("value.sum", 6L); + + // Log2 Histogram + expected.put("value.logHist.1", 3L); + + expected.put("total", 3L); + + Assert.assertEquals(expected, stats); + } + + /* Complex Test: Each test adds to the next, all are mixed lengths. */ + + @Test + public void testComplexRow() { + SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build(); + EntryLengthSummarizer entrySum = new EntryLengthSummarizer(); + + Key k1 = new Key("r1"); + Key k2 = new Key("row2"); + Key k3 = new Key("columnRow3"); + + Collector collector = entrySum.collector(sc); + collector.accept(k1, new Value("")); + collector.accept(k2, new Value("")); + collector.accept(k3, new Value("")); + + HashMap stats = new HashMap<>(); + collector.summarize(stats::put); + + HashMap expected = new HashMap<>(); + expected.put("key.min", 2L); + expected.put("key.max", 10L); + expected.put("key.sum", 16L); + + // Log2 Histogram + expected.put("key.logHist.1", 1L); + expected.put("key.logHist.2", 1L); + expected.put("key.logHist.3", 1L); + + expected.put("row.min", 2L); + expected.put("row.max", 10L); + expected.put("row.sum", 16L); + + // Log2 Histogram + expected.put("row.logHist.1", 1L); + expected.put("row.logHist.2", 1L); + expected.put("row.logHist.3", 1L); + + expected.put("family.min", 0L); + expected.put("family.max", 0L); + expected.put("family.sum", 0L); + + // Log2 Histogram + expected.put("family.logHist.0", 3L); + + expected.put("qualifier.min", 0L); + expected.put("qualifier.max", 0L); + expected.put("qualifier.sum", 0L); + + // Log2 Histogram + expected.put("qualifier.logHist.0", 3L); + + expected.put("visibility.min", 0L); + expected.put("visibility.max", 0L); + expected.put("visibility.sum", 0L); + + // Log2 Histogram + expected.put("visibility.logHist.0", 3L); + + expected.put("value.min", 0L); + expected.put("value.max", 0L); + expected.put("value.sum", 0L); + + // Log2 Histogram + expected.put("value.logHist.0", 3L); + + expected.put("total", 3L); + + Assert.assertEquals(expected, stats); + } + + @Test + public void testComplexFamily() { + SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build(); + EntryLengthSummarizer entrySum = new EntryLengthSummarizer(); + + Key k1 = new Key("r1", "family1"); + Key k2 = new Key("row2", "columnFamily2"); + Key k3 = new Key("columnRow3", "f3"); + + Collector collector = entrySum.collector(sc); + collector.accept(k1, new Value("")); + collector.accept(k2, new Value("")); + collector.accept(k3, new Value("")); + + HashMap stats = new HashMap<>(); + collector.summarize(stats::put); + + HashMap expected = new HashMap<>(); + expected.put("key.min", 9L); + expected.put("key.max", 17L); + expected.put("key.sum", 38L); + + // Log2 Histogram + expected.put("key.logHist.3", 1L); + expected.put("key.logHist.4", 2L); + + expected.put("row.min", 2L); + expected.put("row.max", 10L); + expected.put("row.sum", 16L); + + // Log2 Histogram + expected.put("row.logHist.1", 1L); + expected.put("row.logHist.2", 1L); + expected.put("row.logHist.3", 1L); + + expected.put("family.min", 2L); + expected.put("family.max", 13L); + expected.put("family.sum", 22L); + + // Log2 Histogram + expected.put("family.logHist.1", 1L); + expected.put("family.logHist.3", 1L); + expected.put("family.logHist.4", 1L); + + expected.put("qualifier.min", 0L); + expected.put("qualifier.max", 0L); + expected.put("qualifier.sum", 0L); + + // Log2 Histogram + expected.put("qualifier.logHist.0", 3L); + + expected.put("visibility.min", 0L); + expected.put("visibility.max", 0L); + expected.put("visibility.sum", 0L); + + // Log2 Histogram + expected.put("visibility.logHist.0", 3L); + + expected.put("value.min", 0L); + expected.put("value.max", 0L); + expected.put("value.sum", 0L); + + // Log2 Histogram + expected.put("value.logHist.0", 3L); + + expected.put("total", 3L); + + Assert.assertEquals(expected, stats); + } + + @Test + public void testComplexQualifier() { + SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build(); + EntryLengthSummarizer entrySum = new EntryLengthSummarizer(); + + Key k1 = new Key("r1", "family1", "columnQualifier1"); + Key k2 = new Key("row2", "columnFamily2", "q2"); + Key k3 = new Key("columnRow3", "f3", "qualifier3"); + + Collector collector = entrySum.collector(sc); + collector.accept(k1, new Value("")); + collector.accept(k2, new Value("")); + collector.accept(k3, new Value("")); + + HashMap stats = new HashMap<>(); + collector.summarize(stats::put); + + HashMap expected = new HashMap<>(); + expected.put("key.min", 19L); + expected.put("key.max", 25L); + expected.put("key.sum", 66L); + + // Log2 Histogram + expected.put("key.logHist.4", 2L); + expected.put("key.logHist.5", 1L); + + expected.put("row.min", 2L); + expected.put("row.max", 10L); + expected.put("row.sum", 16L); + + // Log2 Histogram + expected.put("row.logHist.1", 1L); + expected.put("row.logHist.2", 1L); + expected.put("row.logHist.3", 1L); + + expected.put("family.min", 2L); + expected.put("family.max", 13L); + expected.put("family.sum", 22L); + + // Log2 Histogram + expected.put("family.logHist.1", 1L); + expected.put("family.logHist.3", 1L); + expected.put("family.logHist.4", 1L); + + expected.put("qualifier.min", 2L); + expected.put("qualifier.max", 16L); + expected.put("qualifier.sum", 28L); + + // Log2 Histogram + expected.put("qualifier.logHist.1", 1L); + expected.put("qualifier.logHist.3", 1L); + expected.put("qualifier.logHist.4", 1L); + + expected.put("visibility.min", 0L); + expected.put("visibility.max", 0L); + expected.put("visibility.sum", 0L); + + // Log2 Histogram + expected.put("visibility.logHist.0", 3L); + + expected.put("value.min", 0L); + expected.put("value.max", 0L); + expected.put("value.sum", 0L); + + // Log2 Histogram + expected.put("value.logHist.0", 3L); + + expected.put("total", 3L); + + Assert.assertEquals(expected, stats); + } + + @Test + public void testComplexVisibility() { + SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build(); + EntryLengthSummarizer entrySum = new EntryLengthSummarizer(); + + Key k1 = new Key("r1", "family1", "columnQualifier1", "v1"); + Key k2 = new Key("row2", "columnFamily2", "q2", "visibility2"); + Key k3 = new Key("columnRow3", "f3", "qualifier3", "columnVisibility3"); + + Collector collector = entrySum.collector(sc); + collector.accept(k1, new Value("")); + collector.accept(k2, new Value("")); + collector.accept(k3, new Value("")); + + HashMap stats = new HashMap<>(); + collector.summarize(stats::put); + + HashMap expected = new HashMap<>(); + expected.put("key.min", 27L); + expected.put("key.max", 39L); + expected.put("key.sum", 96L); + + // Log2 Histogram + expected.put("key.logHist.5", 3L); + + expected.put("row.min", 2L); + expected.put("row.max", 10L); + expected.put("row.sum", 16L); + + // Log2 Histogram + expected.put("row.logHist.1", 1L); + expected.put("row.logHist.2", 1L); + expected.put("row.logHist.3", 1L); + + expected.put("family.min", 2L); + expected.put("family.max", 13L); + expected.put("family.sum", 22L); + + // Log2 Histogram + expected.put("family.logHist.1", 1L); + expected.put("family.logHist.3", 1L); + expected.put("family.logHist.4", 1L); + + expected.put("qualifier.min", 2L); + expected.put("qualifier.max", 16L); + expected.put("qualifier.sum", 28L); + + // Log2 Histogram + expected.put("qualifier.logHist.1", 1L); + expected.put("qualifier.logHist.3", 1L); + expected.put("qualifier.logHist.4", 1L); + + expected.put("visibility.min", 2L); + expected.put("visibility.max", 17L); + expected.put("visibility.sum", 30L); + + // Log2 Histogram + expected.put("visibility.logHist.1", 1L); + expected.put("visibility.logHist.3", 1L); + expected.put("visibility.logHist.4", 1L); + + expected.put("value.min", 0L); + expected.put("value.max", 0L); + expected.put("value.sum", 0L); + + // Log2 Histogram + expected.put("value.logHist.0", 3L); + + expected.put("total", 3L); + + Assert.assertEquals(expected, stats); + } + + @Test + public void testComplexValue() { + SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build(); + EntryLengthSummarizer entrySum = new EntryLengthSummarizer(); + + Key k1 = new Key("r1", "family1", "columnQualifier1", "v1"); + Key k2 = new Key("row2", "columnFamily2", "q2", "visibility2"); + Key k3 = new Key("columnRow3", "f3", "qualifier3", "columnVisibility3"); + + Collector collector = entrySum.collector(sc); + collector.accept(k1, new Value("v1")); + collector.accept(k2, new Value("value2")); + collector.accept(k3, new Value("keyValue3")); + + HashMap stats = new HashMap<>(); + collector.summarize(stats::put); + + HashMap expected = new HashMap<>(); + expected.put("key.min", 27L); + expected.put("key.max", 39L); + expected.put("key.sum", 96L); + + // Log2 Histogram + expected.put("key.logHist.5", 3L); + + expected.put("row.min", 2L); + expected.put("row.max", 10L); + expected.put("row.sum", 16L); + + // Log2 Histogram + expected.put("row.logHist.1", 1L); + expected.put("row.logHist.2", 1L); + expected.put("row.logHist.3", 1L); + + expected.put("family.min", 2L); + expected.put("family.max", 13L); + expected.put("family.sum", 22L); + + // Log2 Histogram + expected.put("family.logHist.1", 1L); + expected.put("family.logHist.3", 1L); + expected.put("family.logHist.4", 1L); + + expected.put("qualifier.min", 2L); + expected.put("qualifier.max", 16L); + expected.put("qualifier.sum", 28L); + + // Log2 Histogram + expected.put("qualifier.logHist.1", 1L); + expected.put("qualifier.logHist.3", 1L); + expected.put("qualifier.logHist.4", 1L); + + expected.put("visibility.min", 2L); + expected.put("visibility.max", 17L); + expected.put("visibility.sum", 30L); + + // Log2 Histogram + expected.put("visibility.logHist.1", 1L); + expected.put("visibility.logHist.3", 1L); + expected.put("visibility.logHist.4", 1L); + + expected.put("value.min", 2L); + expected.put("value.max", 9L); + expected.put("value.sum", 17L); + + // Log2 Histogram + expected.put("value.logHist.1", 1L); + expected.put("value.logHist.3", 2L); + + expected.put("total", 3L); + + Assert.assertEquals(expected, stats); + } + + /* Miscellaneous Test */ + + @Test + public void testAll() { + SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build(); + EntryLengthSummarizer entrySum = new EntryLengthSummarizer(); + + Key k1 = new Key("maximumnoqualifier","f1", "q", "vis1"); + Key k2 = new Key("minKey","fam2", "q2", "visibility2"); + Key k3 = new Key("row3","f3", "qualifier3", "v3"); + Key k4 = new Key("r4", "family4", "qual4", "vis4"); + Key k5 = new Key("fifthrow", "thirdfamily", "q5", "v5"); + Key k6 = new Key("r6", "sixthfamily", "qual6", "visibi6"); + + Collector collector = entrySum.collector(sc); + collector.accept(k1, new Value("v1")); + collector.accept(k2, new Value("value2")); + collector.accept(k3, new Value("val3")); + collector.accept(k4, new Value("fourthvalue")); + collector.accept(k5, new Value("")); + collector.accept(k6, new Value("value6")); + + HashMap stats = new HashMap<>(); + collector.summarize(stats::put); + + HashMap expected = new HashMap<>(); + expected.put("key.min", 18L); + expected.put("key.max", 25L); + expected.put("key.sum", 132L); + + // Log2 Histogram + expected.put("key.logHist.4", 2L); + expected.put("key.logHist.5", 4L); + + expected.put("row.min", 2L); + expected.put("row.max", 18L); + expected.put("row.sum", 40L); + + // Log2 Histogram + expected.put("row.logHist.1", 2L); + expected.put("row.logHist.2", 1L); + expected.put("row.logHist.3", 2L); + expected.put("row.logHist.4", 1L); + + expected.put("family.min", 2L); + expected.put("family.max", 11L); + expected.put("family.sum", 37L); + + // Log2 Histogram + expected.put("family.logHist.1", 2L); + expected.put("family.logHist.2", 1L); + expected.put("family.logHist.3", 3L); + + expected.put("qualifier.min", 1L); + expected.put("qualifier.max", 10L); + expected.put("qualifier.sum", 25L); + + // Log2 Histogram + expected.put("qualifier.logHist.0", 1L); + expected.put("qualifier.logHist.1", 2L); + expected.put("qualifier.logHist.2", 2L); + expected.put("qualifier.logHist.3", 1L); + + expected.put("visibility.min", 2L); + expected.put("visibility.max", 11L); + expected.put("visibility.sum", 30L); + + // Log2 Histogram + expected.put("visibility.logHist.1", 2L); + expected.put("visibility.logHist.2", 2L); + expected.put("visibility.logHist.3", 2L); + + expected.put("value.min", 0L); + expected.put("value.max", 11L); + expected.put("value.sum", 29L); + + // Log2 Histogram + expected.put("value.logHist.0", 1L); + expected.put("value.logHist.1", 1L); + expected.put("value.logHist.2", 1L); + expected.put("value.logHist.3", 3L); + + expected.put("total", 6L); + + Assert.assertEquals(expected, stats); + } + + @Test + public void testLog2Histogram() { + SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build(); + EntryLengthSummarizer entrySum = new EntryLengthSummarizer(); + + Key k1 = new Key("row1"); + Key k2 = new Key("row2"); + Key k3 = new Key("row3"); + + Collector collector = entrySum.collector(sc); + collector.accept(k1, new Value("01")); + collector.accept(k2, new Value("012345678")); + collector.accept(k3, new Value("012345679")); + + HashMap stats = new HashMap<>(); + collector.summarize(stats::put); + + HashMap expected = new HashMap<>(); + expected.put("key.min", 4L); + expected.put("key.max", 4L); + expected.put("key.sum", 12L); + + // Log2 Histogram for Key + expected.put("key.logHist.2", 3L); + + expected.put("row.min", 4L); + expected.put("row.max", 4L); + expected.put("row.sum", 12L); + + // Log2 Histogram for Row + expected.put("row.logHist.2", 3L); + + expected.put("family.min", 0L); + expected.put("family.max", 0L); + expected.put("family.sum", 0L); + + // Log2 Histogram for Family + expected.put("family.logHist.0", 3L); + + expected.put("qualifier.min", 0L); + expected.put("qualifier.max", 0L); + expected.put("qualifier.sum", 0L); + + // Log2 Histogram for Qualifier + expected.put("qualifier.logHist.0", 3L); + + expected.put("visibility.min", 0L); + expected.put("visibility.max", 0L); + expected.put("visibility.sum", 0L); + + // Log2 Histogram for Visibility + expected.put("visibility.logHist.0", 3L); + + expected.put("value.min", 2L); + expected.put("value.max", 9L); + expected.put("value.sum", 20L); + + // Log2 Histogram for Value + expected.put("value.logHist.1", 1L); + expected.put("value.logHist.3", 2L); + + expected.put("total", 3L); + + Assert.assertEquals(expected, stats); + } + + /* COMBINER TEST */ + + @Test + public void testCombine() { + SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build(); + EntryLengthSummarizer entrySum = new EntryLengthSummarizer(); + + Collector collector1 = entrySum.collector(sc); + collector1.accept(new Key("1","f1","q1"), new Value("v1")); + collector1.accept(new Key("1234","f1","q1"), new Value("v111")); + collector1.accept(new Key("12345678","f1","q1"), new Value("v111111")); + + HashMap stats1 = new HashMap<>(); + collector1.summarize(stats1::put); + + Collector collector2 = entrySum.collector(sc); + collector2.accept(new Key("5432","f11","q12"), new Value("2")); + collector2.accept(new Key("12","f11","q1234"), new Value("12")); + collector2.accept(new Key("12","f11","q11234567"), new Value("4444")); + + HashMap stats2 = new HashMap<>(); + collector2.summarize(stats2::put); + + Combiner combiner = entrySum.combiner(sc); + combiner.merge(stats1, stats2); + + HashMap expected = new HashMap<>(); + expected.put("key.min", 5L); + expected.put("key.max", 14L); + expected.put("key.sum", 59L); + + // Log2 Histogram for Key + expected.put("key.logHist.2", 1L); + expected.put("key.logHist.3", 3L); + expected.put("key.logHist.4", 2L); + + expected.put("row.min", 1L); + expected.put("row.max", 8L); + expected.put("row.sum", 21L); + + // Log2 Histogram for Row + expected.put("row.logHist.0", 1L); + expected.put("row.logHist.1", 2L); + expected.put("row.logHist.2", 2L); + expected.put("row.logHist.3", 1L); + + expected.put("family.min", 2L); + expected.put("family.max", 3L); + expected.put("family.sum", 15L); + + // Log2 Histogram for Family + expected.put("family.logHist.1", 3L); + expected.put("family.logHist.2", 3L); + + expected.put("qualifier.min", 2L); + expected.put("qualifier.max", 9L); + expected.put("qualifier.sum", 23L); + + // Log2 Histogram for Qualifier + expected.put("qualifier.logHist.1", 3L); + expected.put("qualifier.logHist.2", 2L); + expected.put("qualifier.logHist.3", 1L); + + expected.put("visibility.min", 0L); + expected.put("visibility.max", 0L); + expected.put("visibility.sum", 0L); + + // Log2 Histogram for Visibility + expected.put("visibility.logHist.0", 6L); + + expected.put("value.min", 1L); + expected.put("value.max", 7L); + expected.put("value.sum", 20L); + + // Log2 Histogram for Value + expected.put("value.logHist.0", 1L); + expected.put("value.logHist.1", 2L); + expected.put("value.logHist.2", 2L); + expected.put("value.logHist.3", 1L); + + expected.put("total", 6L); + + Assert.assertEquals(expected, stats1); + } + + @Test + public void testCombine2() { + SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build(); + EntryLengthSummarizer entrySum = new EntryLengthSummarizer(); + + Collector collector1 = entrySum.collector(sc); + collector1.accept(new Key("12345678901234567890","f12345","q123456"), new Value("value1234567890")); + + HashMap stats1 = new HashMap<>(); + collector1.summarize(stats1::put); + + Collector collector2 = entrySum.collector(sc); + collector2.accept(new Key("5432","f11","q12"), new Value("2")); + collector2.accept(new Key("12","f11","q1234"), new Value("12")); + collector2.accept(new Key("12","f11","q11234567"), new Value("4444")); + + HashMap stats2 = new HashMap<>(); + collector2.summarize(stats2::put); + + Combiner combiner = entrySum.combiner(sc); + combiner.merge(stats1, stats2); + + HashMap expected = new HashMap<>(); + expected.put("key.min", 10L); + expected.put("key.max", 33L); + expected.put("key.sum", 67L); + + // Log2 Histogram for Key + expected.put("key.logHist.3", 2L); + expected.put("key.logHist.4", 1L); + expected.put("key.logHist.5", 1L); + + expected.put("row.min", 2L); + expected.put("row.max", 20L); + expected.put("row.sum", 28L); + + // Log2 Histogram for Row + expected.put("row.logHist.1", 2L); + expected.put("row.logHist.2", 1L); + expected.put("row.logHist.4", 1L); + + expected.put("family.min", 3L); + expected.put("family.max", 6L); + expected.put("family.sum", 15L); + + // Log2 Histogram for Family + expected.put("family.logHist.2", 3L); + expected.put("family.logHist.3", 1L); + + expected.put("qualifier.min", 3L); + expected.put("qualifier.max", 9L); + expected.put("qualifier.sum", 24L); + + // Log2 Histogram for Qualifier + expected.put("qualifier.logHist.2", 2L); + expected.put("qualifier.logHist.3", 2L); + + expected.put("visibility.min", 0L); + expected.put("visibility.max", 0L); + expected.put("visibility.sum", 0L); + + // Log2 Histogram for Visibility + expected.put("visibility.logHist.0", 4L); + + expected.put("value.min", 1L); + expected.put("value.max", 15L); + expected.put("value.sum", 22L); + + // Log2 Histogram for Value + expected.put("value.logHist.0", 1L); + expected.put("value.logHist.1", 1L); + expected.put("value.logHist.2", 1L); + expected.put("value.logHist.4", 1L); + + expected.put("total", 4L); + + Assert.assertEquals(expected, stats1); + } + + @Test + public void testCombine3() { + SummarizerConfiguration sc = SummarizerConfiguration.builder(EntryLengthSummarizer.class).build(); + EntryLengthSummarizer entrySum = new EntryLengthSummarizer(); + + Collector collector1 = entrySum.collector(sc); + collector1.accept(new Key("r1","f1"), new Value("v1")); + + HashMap stats1 = new HashMap<>(); + collector1.summarize(stats1::put); + + Collector collector2 = entrySum.collector(sc); + collector2.accept(new Key("row1","family1","q1"), new Value("")); + + HashMap stats2 = new HashMap<>(); + collector2.summarize(stats2::put); + + Combiner combiner = entrySum.combiner(sc); + combiner.merge(stats1, stats2); + + HashMap expected = new HashMap<>(); + expected.put("key.min", 4L); + expected.put("key.max", 13L); + expected.put("key.sum", 17L); + + // Log2 Histogram for Key + expected.put("key.logHist.2", 1L); + expected.put("key.logHist.4", 1L); + + expected.put("row.min", 2L); + expected.put("row.max", 4L); + expected.put("row.sum", 6L); + + // Log2 Histogram for Row + expected.put("row.logHist.1", 1L); + expected.put("row.logHist.2", 1L); + + expected.put("family.min", 2L); + expected.put("family.max", 7L); + expected.put("family.sum", 9L); + + // Log2 Histogram for Family + expected.put("family.logHist.1", 1L); + expected.put("family.logHist.3", 1L); + + expected.put("qualifier.min", 0L); + expected.put("qualifier.max", 2L); + expected.put("qualifier.sum", 2L); + + // Log2 Histogram for Qualifier + expected.put("qualifier.logHist.0", 1L); + expected.put("qualifier.logHist.1", 1L); + + expected.put("visibility.min", 0L); + expected.put("visibility.max", 0L); + expected.put("visibility.sum", 0L); + + // Log2 Histogram for Visibility + expected.put("visibility.logHist.0", 2L); + + expected.put("value.min", 0L); + expected.put("value.max", 2L); + expected.put("value.sum", 2L); + + // Log2 Histogram for Value + expected.put("value.logHist.0", 1L); + expected.put("value.logHist.1", 1L); + + expected.put("total", 2L); + + Assert.assertEquals(expected, stats1); + } +} -- To stop receiving notification emails like this one, please contact ['"commits@accumulo.apache.org" '].