Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id A5652200C3F for ; Tue, 7 Mar 2017 23:10:02 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id A2668160B68; Tue, 7 Mar 2017 22:10:02 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id A0B06160B65 for ; Tue, 7 Mar 2017 23:10:01 +0100 (CET) Received: (qmail 28433 invoked by uid 500); 7 Mar 2017 22:10:00 -0000 Mailing-List: contact commits-help@accumulo.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@accumulo.apache.org Delivered-To: mailing list commits@accumulo.apache.org Received: (qmail 28407 invoked by uid 99); 7 Mar 2017 22:10:00 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 07 Mar 2017 22:10:00 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id B0698DFF39; Tue, 7 Mar 2017 22:10:00 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: kturner@apache.org To: commits@accumulo.apache.org Date: Tue, 07 Mar 2017 22:10:01 -0000 Message-Id: <4740a3c4fbb84b3ebb7cc3ef9595f9b1@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [2/3] accumulo git commit: Merge branch '1.7' into 1.8 archived-at: Tue, 07 Mar 2017 22:10:02 -0000 Merge branch '1.7' into 1.8 Conflicts: core/src/main/java/org/apache/accumulo/core/file/rfile/PrintInfo.java core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/7ca6332d Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/7ca6332d Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/7ca6332d Branch: refs/heads/master Commit: 7ca6332df2f0d26d15d03ed4a74893bc0593ffbc Parents: 4a87fc9 de80cf5 Author: Keith Turner Authored: Tue Mar 7 15:48:46 2017 -0500 Committer: Keith Turner Committed: Tue Mar 7 15:48:46 2017 -0500 ---------------------------------------------------------------------- .../accumulo/core/file/rfile/PrintInfo.java | 7 +- .../apache/accumulo/core/file/rfile/RFile.java | 18 ++++-- .../accumulo/core/util/LocalityGroupUtil.java | 35 ++++++++++ .../core/file/rfile/RFileMetricsTest.java | 68 ++++++++++++++++++-- 4 files changed, 116 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo/blob/7ca6332d/core/src/main/java/org/apache/accumulo/core/file/rfile/PrintInfo.java ---------------------------------------------------------------------- diff --cc core/src/main/java/org/apache/accumulo/core/file/rfile/PrintInfo.java index cf0d046,366e4a8..7d2a054 --- a/core/src/main/java/org/apache/accumulo/core/file/rfile/PrintInfo.java +++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/PrintInfo.java @@@ -28,11 -27,10 +27,12 @@@ import org.apache.accumulo.core.data.By import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.file.FileSKVIterator; import org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile; import org.apache.accumulo.core.file.rfile.RFile.Reader; + import org.apache.accumulo.core.util.LocalityGroupUtil; import org.apache.accumulo.start.spi.KeywordExecutable; +import org.apache.commons.math3.stat.descriptive.SummaryStatistics; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@@ -161,47 -116,24 +161,46 @@@ public class PrintInfo implements Keywo Map> localityGroupCF = null; - if (opts.histogram || opts.dump || opts.vis || opts.hash) { + if (opts.histogram || opts.dump || opts.vis || opts.hash || opts.keyStats) { localityGroupCF = iter.getLocalityGroupCF(); + FileSKVIterator dataIter; + if (opts.useSample) { + dataIter = iter.getSample(); + + if (dataIter == null) { + System.out.println("ERROR : This rfile has no sample data"); + return; + } + } else { + dataIter = iter; + } + + if (opts.keyStats) { + FileSKVIterator indexIter = iter.getIndex(); + while (indexIter.hasTop()) { + indexKeyStats.add(indexIter.getTopKey()); + indexIter.next(); + } + } + - for (Entry> cf : localityGroupCF.entrySet()) { - - dataIter.seek(new Range((Key) null, (Key) null), cf.getValue(), true); + for (String lgName : localityGroupCF.keySet()) { - LocalityGroupUtil.seek(iter, new Range(), lgName, localityGroupCF); - while (iter.hasTop()) { - Key key = iter.getTopKey(); - Value value = iter.getTopValue(); - if (opts.dump) ++ LocalityGroupUtil.seek(dataIter, new Range(), lgName, localityGroupCF); + while (dataIter.hasTop()) { + Key key = dataIter.getTopKey(); + Value value = dataIter.getTopValue(); + if (opts.dump) { System.out.println(key + " -> " + value); + if (System.out.checkError()) + return; + } if (opts.histogram) { - long size = key.getSize() + value.getSize(); - int bucket = (int) Math.log10(size); - countBuckets[bucket]++; - sizeBuckets[bucket] += size; - totalSize += size; + kvHistogram.add(key.getSize() + value.getSize()); } - iter.next(); + if (opts.keyStats) { + dataKeyStats.add(key); + } + dataIter.next(); } } } http://git-wip-us.apache.org/repos/asf/accumulo/blob/7ca6332d/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java ---------------------------------------------------------------------- diff --cc core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java index b11cf1a,bab2266..26343ba --- a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java +++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java @@@ -67,15 -62,15 +67,17 @@@ import org.apache.accumulo.core.iterato import org.apache.accumulo.core.iterators.system.InterruptibleIterator; import org.apache.accumulo.core.iterators.system.LocalityGroupIterator; import org.apache.accumulo.core.iterators.system.LocalityGroupIterator.LocalityGroup; +import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl; + import org.apache.accumulo.core.util.LocalityGroupUtil; import org.apache.accumulo.core.util.MutableByteSequence; import org.apache.commons.lang.mutable.MutableLong; -import org.apache.commons.math.stat.descriptive.SummaryStatistics; +import org.apache.commons.math3.stat.descriptive.SummaryStatistics; import org.apache.hadoop.io.Writable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.annotations.VisibleForTesting; + import com.google.common.base.Preconditions; public class RFile { @@@ -1288,9 -1015,15 +1290,14 @@@ @Override public void init(SortedKeyValueIterator source, Map options, IteratorEnvironment env) throws IOException { throw new UnsupportedOperationException(); -- } + /** + * @return map of locality group names to column families. The default locality group will have {@code null} for a name. RFile will only track up to + * {@value Writer#MAX_CF_IN_DLG} families for the default locality group. After this it will stop tracking. For the case where the default group has + * more thn {@value Writer#MAX_CF_IN_DLG} families an empty list of families is returned. - * @see LocalityGroupUtil#seek(Reader, Range, String, Map) ++ * @see LocalityGroupUtil#seek(FileSKVIterator, Range, String, Map) + */ public Map> getLocalityGroupCF() { Map> cf = new HashMap<>(); http://git-wip-us.apache.org/repos/asf/accumulo/blob/7ca6332d/core/src/main/java/org/apache/accumulo/core/util/LocalityGroupUtil.java ---------------------------------------------------------------------- diff --cc core/src/main/java/org/apache/accumulo/core/util/LocalityGroupUtil.java index 2063855,a99fe01..0c658d5 --- a/core/src/main/java/org/apache/accumulo/core/util/LocalityGroupUtil.java +++ b/core/src/main/java/org/apache/accumulo/core/util/LocalityGroupUtil.java @@@ -36,7 -37,9 +37,10 @@@ import org.apache.accumulo.core.data.By import org.apache.accumulo.core.data.Column; import org.apache.accumulo.core.data.ColumnUpdate; import org.apache.accumulo.core.data.Mutation; + import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.thrift.TMutation; ++import org.apache.accumulo.core.file.FileSKVIterator; + import org.apache.accumulo.core.file.rfile.RFile.Reader; import org.apache.commons.lang.mutable.MutableLong; import org.apache.hadoop.io.Text; @@@ -296,4 -297,35 +300,35 @@@ public class LocalityGroupUtil } } + /** + * This method created to help seek an rfile for a locality group obtained from {@link Reader#getLocalityGroupCF()}. This method can possibly return an empty + * list for the default locality group. When this happens the default locality group needs to be seeked differently. This method helps do that. + * + *

+ * For the default locality group will seek using the families of all other locality groups non-inclusive. + * + * @see Reader#getLocalityGroupCF() + */ - public static void seek(Reader reader, Range range, String lgName, Map> localityGroupCF) throws IOException { ++ public static void seek(FileSKVIterator reader, Range range, String lgName, Map> localityGroupCF) throws IOException { + + Collection families; + boolean inclusive; + if (lgName == null) { + // this is the default locality group, create a set of all families not in the default group + Set nonDefaultFamilies = new HashSet<>(); + for (Entry> entry : localityGroupCF.entrySet()) { + if (entry.getKey() != null) { + nonDefaultFamilies.addAll(entry.getValue()); + } + } + + families = nonDefaultFamilies; + inclusive = false; + } else { + families = localityGroupCF.get(lgName); + inclusive = true; + } + + reader.seek(range, families, inclusive); + } } http://git-wip-us.apache.org/repos/asf/accumulo/blob/7ca6332d/core/src/test/java/org/apache/accumulo/core/file/rfile/RFileMetricsTest.java ---------------------------------------------------------------------- diff --cc core/src/test/java/org/apache/accumulo/core/file/rfile/RFileMetricsTest.java index b59e95e,7cb0ed6..cc5ca89 --- a/core/src/test/java/org/apache/accumulo/core/file/rfile/RFileMetricsTest.java +++ b/core/src/test/java/org/apache/accumulo/core/file/rfile/RFileMetricsTest.java @@@ -507,4 -508,63 +508,63 @@@ public class RFileMetricsTest trf.closeReader(); } + @Test + public void testManyFamiliesInDefaultLocGroup() throws IOException { + trf.openWriter(false, 1024); + + String fam1 = String.format("%06x", 9000); + String fam2 = String.format("%06x", 9001); + + Set lg1 = new HashSet<>(); + lg1.add(new ArrayByteSequence(fam1)); + lg1.add(new ArrayByteSequence(fam2)); + + trf.writer.startNewLocalityGroup("lg1", lg1); + + for (int row = 0; row < 1100; row++) { + String rs = String.format("%06x", row); + trf.writer.append(new Key(rs, fam1, "q4", "A", 42l), new Value("v".getBytes())); + trf.writer.append(new Key(rs, fam2, "q4", "A|B", 42l), new Value("v".getBytes())); + } + + trf.writer.startDefaultLocalityGroup(); + + String vis[] = new String[] {"A", "A&B", "A|C", "B&C", "Boo"}; + + int fam = 0; + for (int row = 0; row < 1000; row++) { + String rs = String.format("%06x", row); + for (int v = 0; v < 5; v++) { + String fs = String.format("%06x", fam++); + trf.writer.append(new Key(rs, fs, "q4", vis[v], 42l), new Value("v".getBytes())); + } + } + + trf.closeWriter(); + + trf.openReader(false); + + VisMetricsGatherer vmg = trf.gatherMetrics(); + + Map expected = new HashMap<>(); + Map expectedBlocks = new HashMap<>(); + for (String v : vis) { + expected.put(v, 1000l); - expectedBlocks.put(v, 67l); ++ expectedBlocks.put(v, 71l); + } + assertEquals(expected, vmg.metric.get(null).asMap()); + assertEquals(expectedBlocks, vmg.blocks.get(null).asMap()); + + expected.clear(); + expectedBlocks.clear(); + expected.put("A", 1100l); + expected.put("A|B", 1100l); + expectedBlocks.put("A", 32l); + expectedBlocks.put("A|B", 32l); + assertEquals(expected, vmg.metric.get("lg1").asMap()); + assertEquals(expectedBlocks, vmg.blocks.get("lg1").asMap()); + + assertEquals(2, vmg.metric.keySet().size()); + assertEquals(2, vmg.blocks.keySet().size()); + } }