accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mwa...@apache.org
Subject [05/13] accumulo-website git commit: Jekyll build from master:7cc70b2
Date Mon, 22 May 2017 17:31:52 GMT
http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/7b2eb317/docs/unreleased/development/sampling.html
----------------------------------------------------------------------
diff --git a/docs/unreleased/development/sampling.html b/docs/unreleased/development/sampling.html
new file mode 100644
index 0000000..24dcdb8
--- /dev/null
+++ b/docs/unreleased/development/sampling.html
@@ -0,0 +1,400 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+<meta charset="utf-8">
+<meta http-equiv="X-UA-Compatible" content="IE=edge">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/paper/bootstrap.min.css" rel="stylesheet" integrity="sha384-awusxf8AUojygHf2+joICySzB780jVvQaVCAt1clU3QsyAitLGul28Qxb2r1e5g+" crossorigin="anonymous">
+<link href="//netdna.bootstrapcdn.com/font-awesome/4.0.3/css/font-awesome.css" rel="stylesheet">
+<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/v/bs/jq-2.2.3/dt-1.10.12/datatables.min.css">
+<link href="/css/accumulo.css" rel="stylesheet" type="text/css">
+
+<title>Accumulo Documentation - Sampling</title>
+
+<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.4/jquery.min.js"></script>
+<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
+<script type="text/javascript" src="https://cdn.datatables.net/v/bs/jq-2.2.3/dt-1.10.12/datatables.min.js"></script>
+<script>
+  // show location of canonical site if not currently on the canonical site
+  $(function() {
+    var host = window.location.host;
+    if (typeof host !== 'undefined' && host !== 'accumulo.apache.org') {
+      $('#non-canonical').show();
+    }
+  });
+
+  $(function() {
+    // decorate section headers with anchors
+    return $("h2, h3, h4, h5, h6").each(function(i, el) {
+      var $el, icon, id;
+      $el = $(el);
+      id = $el.attr('id');
+      icon = '<i class="fa fa-link"></i>';
+      if (id) {
+        return $el.append($("<a />").addClass("header-link").attr("href", "#" + id).html(icon));
+      }
+    });
+  });
+  
+  // configure Google Analytics
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+  if (ga.hasOwnProperty('loaded') && ga.loaded === true) {
+    ga('create', 'UA-50934829-1', 'apache.org');
+    ga('send', 'pageview');
+  }
+</script>
+
+</head>
+<body style="padding-top: 100px">
+
+  <nav class="navbar navbar-default navbar-fixed-top">
+  <div class="container">
+    <div class="navbar-header">
+      <button type="button" class="navbar-toggle" data-toggle="collapse" data-target="#navbar-items">
+        <span class="sr-only">Toggle navigation</span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+      </button>
+      <a href="/"><img id="nav-logo" alt="Apache Accumulo" class="img-responsive" src="/images/accumulo-logo.png" width="200"
+        /></a>
+    </div>
+    <div class="collapse navbar-collapse" id="navbar-items">
+      <ul class="nav navbar-nav">
+        <li class="nav-link"><a href="/downloads">Download</a></li>
+        <li class="dropdown">
+          <a class="dropdown-toggle" data-toggle="dropdown" href="#">Releases<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li><a href="/release/accumulo-1.8.1/">1.8.1 (Latest)</a></li>
+            <li><a href="/release/accumulo-1.7.3/">1.7.3</a></li>
+            <li><a href="/release/accumulo-1.6.6/">1.6.6</a></li>
+            <li><a href="/release/">Archive</a></li>
+          </ul>
+        </li>
+        <li class="dropdown">
+          <a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li><a href="/1.8/accumulo_user_manual.html">User Manual (1.8)</a></li>
+            <li><a href="/1.8/apidocs">Javadocs (1.8)</a></li>
+            <li><a href="/1.8/examples">Examples (1.8)</a></li>
+            <li><a href="/features">Features</a></li>
+            <li><a href="/glossary">Glossary</a></li>
+            <li><a href="/external-docs">External Docs</a></li>
+            <li><a href="/docs-archive/">Archive</a></li>
+          </ul>
+        </li>
+        <li class="dropdown">
+          <a class="dropdown-toggle" data-toggle="dropdown" href="#">Community<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li><a href="/get_involved">Get Involved</a></li>
+            <li><a href="/mailing_list">Mailing Lists</a></li>
+            <li><a href="/people">People</a></li>
+            <li><a href="/related-projects">Related Projects</a></li>
+            <li><a href="/contributor/">Contributor Guide</a></li>
+          </ul>
+        </li>
+      </ul>
+      <ul class="nav navbar-nav navbar-right">
+        <li class="dropdown">
+          <a class="dropdown-toggle" data-toggle="dropdown" href="#">Apache Software Foundation<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li><a href="https://www.apache.org">Apache Homepage <i class="fa fa-external-link"></i></a></li>
+            <li><a href="https://www.apache.org/licenses/LICENSE-2.0">License <i class="fa fa-external-link"></i></a></li>
+            <li><a href="https://www.apache.org/foundation/sponsorship">Sponsorship <i class="fa fa-external-link"></i></a></li>
+            <li><a href="https://www.apache.org/security">Security <i class="fa fa-external-link"></i></a></li>
+            <li><a href="https://www.apache.org/foundation/thanks">Thanks <i class="fa fa-external-link"></i></a></li>
+            <li><a href="https://www.apache.org/foundation/policies/conduct">Code of Conduct <i class="fa fa-external-link"></i></a></li>
+          </ul>
+        </li>
+      </ul>
+    </div>
+  </div>
+</nav>
+
+  <div class="container">
+    <div class="row">
+      <div class="col-md-12">
+
+        <div id="non-canonical" style="display: none; background-color: #F0E68C; padding-left: 1em;">
+          Visit the official site at: <a href="https://accumulo.apache.org">https://accumulo.apache.org</a>
+        </div>
+        <div id="content">
+          
+          <div class="alert alert-danger" role="alert">This documentation is for an unreleased version of Apache Accumulo that is currently under development! Check out the <a href="/docs-1.8/">documentation for the latest release</a>.</div>
+
+<div class="row">
+  <div class="col-md-3">
+    <div class="panel-group" id="accordion" role="tablist" aria-multiselectable="true">
+      <div class="panel panel-default">
+      
+      
+      
+        
+          
+        
+          
+        
+          
+            <div class="panel-heading" role="tab" id="headingOne">
+              <h4 class="panel-title">
+                <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapsegetting-started" aria-expanded="false" aria-controls="collapsegetting-started">
+                  Getting started
+                </a>
+              </h4>
+            </div>
+            <div id="collapsegetting-started" class="panel-collapse collapse" role="tabpanel" aria-labelledby="headingOne">
+              <div class="panel-body">
+                
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/design">Accumulo Design</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/clients">Accumulo Clients</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/shell">Accumulo Shell</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/table_design">Table Design</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/table_configuration">Table Configuration</a></div>
+                
+              </div>
+            </div>
+          
+        
+          
+        
+          
+        
+      
+        
+          
+        
+          
+            <div class="panel-heading" role="tab" id="headingOne">
+              <h4 class="panel-title">
+                <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapsedevelopment" aria-expanded="true" aria-controls="collapsedevelopment">
+                  Development
+                </a>
+              </h4>
+            </div>
+            <div id="collapsedevelopment" class="panel-collapse collapse in" role="tabpanel" aria-labelledby="headingOne">
+              <div class="panel-body">
+                
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/iterator_design">Iterator Design</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/iterator_testing">Iterator Testing</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/development_tools">Development Tools</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/sampling">Sampling</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/summaries">Summary Statistics</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/security">Security</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/high_speed_ingest">High-Speed Ingest</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/analytics">Analytics</a></div>
+                
+              </div>
+            </div>
+          
+        
+          
+        
+          
+        
+          
+        
+      
+        
+          
+            <div class="panel-heading" role="tab" id="headingOne">
+              <h4 class="panel-title">
+                <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapseadministration" aria-expanded="false" aria-controls="collapseadministration">
+                  Administration
+                </a>
+              </h4>
+            </div>
+            <div id="collapseadministration" class="panel-collapse collapse" role="tabpanel" aria-labelledby="headingOne">
+              <div class="panel-body">
+                
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/overview">Overview</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/configuration-management">Configuration Management</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/configuration-properties">Configuration Properties</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/kerberos">Kerberos</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/replication">Replication</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/fate">FATE</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/multivolume">Multi-Volume Installations</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/ssl">SSL</a></div>
+                
+              </div>
+            </div>
+          
+        
+          
+        
+          
+        
+          
+        
+          
+        
+      
+        
+          
+        
+          
+        
+          
+        
+          
+        
+          
+            <div class="panel-heading" role="tab" id="headingOne">
+              <h4 class="panel-title">
+                <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapsetroubleshooting" aria-expanded="false" aria-controls="collapsetroubleshooting">
+                  Troubleshooting
+                </a>
+              </h4>
+            </div>
+            <div id="collapsetroubleshooting" class="panel-collapse collapse" role="tabpanel" aria-labelledby="headingOne">
+              <div class="panel-body">
+                
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/troubleshooting/overview">Overview</a></div>
+                
+              </div>
+            </div>
+          
+        
+      
+      </div>
+    </div>
+  </div>
+  <div class="col-md-9">
+    
+    <p><a href="/docs/unreleased/">Accumulo unreleased docs</a> &nbsp;&gt;&gt;&nbsp; Development &nbsp;&gt;&gt;&nbsp; Sampling</p>
+    
+    
+    <h1>Sampling</h1>
+    
+    <h2 id="overview">Overview</h2>
+
+<p>Accumulo has the ability to generate and scan a per table set of sample data.
+This sample data is kept up to date as a table is mutated.  What key values are
+placed in the sample data is configurable per table.</p>
+
+<p>This feature can be used for query estimation and optimization.  For an example
+of estimation assume an Accumulo table is configured to generate a sample
+containing one millionth of a tables data.   If a query is executed against the
+sample and returns one thousand results, then the same query against all the
+data would probably return a billion results.  A nice property of having
+Accumulo generate the sample is that its always up to date.  So estimations
+will be accurate even when querying the most recently written data.</p>
+
+<p>An example of a query optimization is an iterator using sample data to get an
+estimate, and then making decisions based on the estimate.</p>
+
+<h2 id="configuring">Configuring</h2>
+
+<p>Inorder to use sampling, an Accumulo table must be configured with a class that
+implements <code class="highlighter-rouge">org.apache.accumulo.core.sample.Sampler</code> along with options for
+that class.  For guidance on implementing a Sampler see that interface’s
+javadoc.  Accumulo provides a few implementations out of the box.   For
+information on how to use the samplers that ship with Accumulo look in the
+package <code class="highlighter-rouge">org.apache.accumulo.core.sample</code> and consult the javadoc of the
+classes there.  See the <a href="https://github.com/apache/accumulo-examples/blob/master/docs/sample.md">sampling example</a> for examples of how to
+configure a Sampler on a table.</p>
+
+<p>Once a table is configured with a sampler all writes after that point will
+generate sample data.  For data written before sampling was configured sample
+data will not be present.  A compaction can be initiated that only compacts the
+files in the table that do not have sample data.   The example readme shows how
+to do this.</p>
+
+<p>If the sampling configuration of a table is changed, then Accumulo will start
+generating new sample data with the new configuration.   However old data will
+still have sample data generated with the previous configuration.  A selective
+compaction can also be issued in this case to regenerate the sample data.</p>
+
+<h2 id="scanning-sample-data">Scanning sample data</h2>
+
+<p>Inorder to scan sample data, use the <code class="highlighter-rouge">setSamplerConfiguration(...)</code>  method on
+<code class="highlighter-rouge">Scanner</code> or <code class="highlighter-rouge">BatchScanner</code>.  Please consult this methods javadocs for more
+information.</p>
+
+<p>Sample data can also be scanned from within an Accumulo <code class="highlighter-rouge">SortedKeyValueIterator</code>.
+To see how to do this, look at the example iterator referenced in the <a href="https://github.com/apache/accumulo-examples/blob/master/docs/sample.md">sampling example</a>.
+Also, consult the javadoc on <code class="highlighter-rouge">org.apache.accumulo.core.iterators.IteratorEnvironment.cloneWithSamplingEnabled()</code>.</p>
+
+<p>Map reduce jobs using the <code class="highlighter-rouge">AccumuloInputFormat</code> can also read sample data.  See
+the javadoc for the <code class="highlighter-rouge">setSamplerConfiguration()</code> method on
+<code class="highlighter-rouge">AccumuloInputFormat</code>.</p>
+
+<p>Scans over sample data will throw a <code class="highlighter-rouge">SampleNotPresentException</code> in the following cases :</p>
+
+<ol>
+  <li>sample data is not present,</li>
+  <li>sample data is present but was generated with multiple configurations</li>
+  <li>sample data is partially present</li>
+</ol>
+
+<p>So a scan over sample data can only succeed if all data written has sample data
+generated with the same configuration.</p>
+
+<h2 id="bulk-import">Bulk import</h2>
+
+<p>When generating rfiles to bulk import into Accumulo, those rfiles can contain
+sample data.  To use this feature, look at the javadoc on the
+<code class="highlighter-rouge">AccumuloFileOutputFormat.setSampler(...)</code> method.</p>
+
+
+  </div>
+</div>
+
+        </div>
+
+        
+<footer>
+
+  <p><a href="https://www.apache.org/foundation/contributing"><img src="https://www.apache.org/images/SupportApache-small.png" alt="Support the ASF" id="asf-logo" height="100" /></a></p>
+
+  <p>Copyright © 2011-2017 The Apache Software Foundation. Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.</p>
+
+</footer>
+
+
+      </div>
+    </div>
+  </div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/7b2eb317/docs/unreleased/development/security.html
----------------------------------------------------------------------
diff --git a/docs/unreleased/development/security.html b/docs/unreleased/development/security.html
new file mode 100644
index 0000000..1f61c21
--- /dev/null
+++ b/docs/unreleased/development/security.html
@@ -0,0 +1,497 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+<meta charset="utf-8">
+<meta http-equiv="X-UA-Compatible" content="IE=edge">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/paper/bootstrap.min.css" rel="stylesheet" integrity="sha384-awusxf8AUojygHf2+joICySzB780jVvQaVCAt1clU3QsyAitLGul28Qxb2r1e5g+" crossorigin="anonymous">
+<link href="//netdna.bootstrapcdn.com/font-awesome/4.0.3/css/font-awesome.css" rel="stylesheet">
+<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/v/bs/jq-2.2.3/dt-1.10.12/datatables.min.css">
+<link href="/css/accumulo.css" rel="stylesheet" type="text/css">
+
+<title>Accumulo Documentation - Security</title>
+
+<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.4/jquery.min.js"></script>
+<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
+<script type="text/javascript" src="https://cdn.datatables.net/v/bs/jq-2.2.3/dt-1.10.12/datatables.min.js"></script>
+<script>
+  // show location of canonical site if not currently on the canonical site
+  $(function() {
+    var host = window.location.host;
+    if (typeof host !== 'undefined' && host !== 'accumulo.apache.org') {
+      $('#non-canonical').show();
+    }
+  });
+
+  $(function() {
+    // decorate section headers with anchors
+    return $("h2, h3, h4, h5, h6").each(function(i, el) {
+      var $el, icon, id;
+      $el = $(el);
+      id = $el.attr('id');
+      icon = '<i class="fa fa-link"></i>';
+      if (id) {
+        return $el.append($("<a />").addClass("header-link").attr("href", "#" + id).html(icon));
+      }
+    });
+  });
+  
+  // configure Google Analytics
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+  if (ga.hasOwnProperty('loaded') && ga.loaded === true) {
+    ga('create', 'UA-50934829-1', 'apache.org');
+    ga('send', 'pageview');
+  }
+</script>
+
+</head>
+<body style="padding-top: 100px">
+
+  <nav class="navbar navbar-default navbar-fixed-top">
+  <div class="container">
+    <div class="navbar-header">
+      <button type="button" class="navbar-toggle" data-toggle="collapse" data-target="#navbar-items">
+        <span class="sr-only">Toggle navigation</span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+      </button>
+      <a href="/"><img id="nav-logo" alt="Apache Accumulo" class="img-responsive" src="/images/accumulo-logo.png" width="200"
+        /></a>
+    </div>
+    <div class="collapse navbar-collapse" id="navbar-items">
+      <ul class="nav navbar-nav">
+        <li class="nav-link"><a href="/downloads">Download</a></li>
+        <li class="dropdown">
+          <a class="dropdown-toggle" data-toggle="dropdown" href="#">Releases<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li><a href="/release/accumulo-1.8.1/">1.8.1 (Latest)</a></li>
+            <li><a href="/release/accumulo-1.7.3/">1.7.3</a></li>
+            <li><a href="/release/accumulo-1.6.6/">1.6.6</a></li>
+            <li><a href="/release/">Archive</a></li>
+          </ul>
+        </li>
+        <li class="dropdown">
+          <a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li><a href="/1.8/accumulo_user_manual.html">User Manual (1.8)</a></li>
+            <li><a href="/1.8/apidocs">Javadocs (1.8)</a></li>
+            <li><a href="/1.8/examples">Examples (1.8)</a></li>
+            <li><a href="/features">Features</a></li>
+            <li><a href="/glossary">Glossary</a></li>
+            <li><a href="/external-docs">External Docs</a></li>
+            <li><a href="/docs-archive/">Archive</a></li>
+          </ul>
+        </li>
+        <li class="dropdown">
+          <a class="dropdown-toggle" data-toggle="dropdown" href="#">Community<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li><a href="/get_involved">Get Involved</a></li>
+            <li><a href="/mailing_list">Mailing Lists</a></li>
+            <li><a href="/people">People</a></li>
+            <li><a href="/related-projects">Related Projects</a></li>
+            <li><a href="/contributor/">Contributor Guide</a></li>
+          </ul>
+        </li>
+      </ul>
+      <ul class="nav navbar-nav navbar-right">
+        <li class="dropdown">
+          <a class="dropdown-toggle" data-toggle="dropdown" href="#">Apache Software Foundation<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li><a href="https://www.apache.org">Apache Homepage <i class="fa fa-external-link"></i></a></li>
+            <li><a href="https://www.apache.org/licenses/LICENSE-2.0">License <i class="fa fa-external-link"></i></a></li>
+            <li><a href="https://www.apache.org/foundation/sponsorship">Sponsorship <i class="fa fa-external-link"></i></a></li>
+            <li><a href="https://www.apache.org/security">Security <i class="fa fa-external-link"></i></a></li>
+            <li><a href="https://www.apache.org/foundation/thanks">Thanks <i class="fa fa-external-link"></i></a></li>
+            <li><a href="https://www.apache.org/foundation/policies/conduct">Code of Conduct <i class="fa fa-external-link"></i></a></li>
+          </ul>
+        </li>
+      </ul>
+    </div>
+  </div>
+</nav>
+
+  <div class="container">
+    <div class="row">
+      <div class="col-md-12">
+
+        <div id="non-canonical" style="display: none; background-color: #F0E68C; padding-left: 1em;">
+          Visit the official site at: <a href="https://accumulo.apache.org">https://accumulo.apache.org</a>
+        </div>
+        <div id="content">
+          
+          <div class="alert alert-danger" role="alert">This documentation is for an unreleased version of Apache Accumulo that is currently under development! Check out the <a href="/docs-1.8/">documentation for the latest release</a>.</div>
+
+<div class="row">
+  <div class="col-md-3">
+    <div class="panel-group" id="accordion" role="tablist" aria-multiselectable="true">
+      <div class="panel panel-default">
+      
+      
+      
+        
+          
+        
+          
+        
+          
+            <div class="panel-heading" role="tab" id="headingOne">
+              <h4 class="panel-title">
+                <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapsegetting-started" aria-expanded="false" aria-controls="collapsegetting-started">
+                  Getting started
+                </a>
+              </h4>
+            </div>
+            <div id="collapsegetting-started" class="panel-collapse collapse" role="tabpanel" aria-labelledby="headingOne">
+              <div class="panel-body">
+                
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/design">Accumulo Design</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/clients">Accumulo Clients</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/shell">Accumulo Shell</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/table_design">Table Design</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/table_configuration">Table Configuration</a></div>
+                
+              </div>
+            </div>
+          
+        
+          
+        
+          
+        
+      
+        
+          
+        
+          
+            <div class="panel-heading" role="tab" id="headingOne">
+              <h4 class="panel-title">
+                <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapsedevelopment" aria-expanded="true" aria-controls="collapsedevelopment">
+                  Development
+                </a>
+              </h4>
+            </div>
+            <div id="collapsedevelopment" class="panel-collapse collapse in" role="tabpanel" aria-labelledby="headingOne">
+              <div class="panel-body">
+                
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/iterator_design">Iterator Design</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/iterator_testing">Iterator Testing</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/development_tools">Development Tools</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/sampling">Sampling</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/summaries">Summary Statistics</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/security">Security</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/high_speed_ingest">High-Speed Ingest</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/analytics">Analytics</a></div>
+                
+              </div>
+            </div>
+          
+        
+          
+        
+          
+        
+          
+        
+      
+        
+          
+            <div class="panel-heading" role="tab" id="headingOne">
+              <h4 class="panel-title">
+                <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapseadministration" aria-expanded="false" aria-controls="collapseadministration">
+                  Administration
+                </a>
+              </h4>
+            </div>
+            <div id="collapseadministration" class="panel-collapse collapse" role="tabpanel" aria-labelledby="headingOne">
+              <div class="panel-body">
+                
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/overview">Overview</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/configuration-management">Configuration Management</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/configuration-properties">Configuration Properties</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/kerberos">Kerberos</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/replication">Replication</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/fate">FATE</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/multivolume">Multi-Volume Installations</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/ssl">SSL</a></div>
+                
+              </div>
+            </div>
+          
+        
+          
+        
+          
+        
+          
+        
+          
+        
+      
+        
+          
+        
+          
+        
+          
+        
+          
+        
+          
+            <div class="panel-heading" role="tab" id="headingOne">
+              <h4 class="panel-title">
+                <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapsetroubleshooting" aria-expanded="false" aria-controls="collapsetroubleshooting">
+                  Troubleshooting
+                </a>
+              </h4>
+            </div>
+            <div id="collapsetroubleshooting" class="panel-collapse collapse" role="tabpanel" aria-labelledby="headingOne">
+              <div class="panel-body">
+                
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/troubleshooting/overview">Overview</a></div>
+                
+              </div>
+            </div>
+          
+        
+      
+      </div>
+    </div>
+  </div>
+  <div class="col-md-9">
+    
+    <p><a href="/docs/unreleased/">Accumulo unreleased docs</a> &nbsp;&gt;&gt;&nbsp; Development &nbsp;&gt;&gt;&nbsp; Security</p>
+    
+    
+    <h1>Security</h1>
+    
+    <p>Accumulo extends the BigTable data model to implement a security mechanism
+known as cell-level security. Every key-value pair has its own security label, stored
+under the column visibility element of the key, which is used to determine whether
+a given user meets the security requirements to read the value. This enables data of
+various security levels to be stored within the same row, and users of varying
+degrees of access to query the same table, while preserving data confidentiality.</p>
+
+<h2 id="security-label-expressions">Security Label Expressions</h2>
+
+<p>When mutations are applied, users can specify a security label for each value. This is
+done as the Mutation is created by passing a ColumnVisibility object to the put()
+method:</p>
+
+<div class="language-java highlighter-rouge"><pre class="highlight"><code><span class="n">Text</span> <span class="n">rowID</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Text</span><span class="o">(</span><span class="s">"row1"</span><span class="o">);</span>
+<span class="n">Text</span> <span class="n">colFam</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Text</span><span class="o">(</span><span class="s">"myColFam"</span><span class="o">);</span>
+<span class="n">Text</span> <span class="n">colQual</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Text</span><span class="o">(</span><span class="s">"myColQual"</span><span class="o">);</span>
+<span class="n">ColumnVisibility</span> <span class="n">colVis</span> <span class="o">=</span> <span class="k">new</span> <span class="n">ColumnVisibility</span><span class="o">(</span><span class="s">"public"</span><span class="o">);</span>
+<span class="kt">long</span> <span class="n">timestamp</span> <span class="o">=</span> <span class="n">System</span><span class="o">.</span><span class="na">currentTimeMillis</span><span class="o">();</span>
+
+<span class="n">Value</span> <span class="n">value</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Value</span><span class="o">(</span><span class="s">"myValue"</span><span class="o">);</span>
+
+<span class="n">Mutation</span> <span class="n">mutation</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Mutation</span><span class="o">(</span><span class="n">rowID</span><span class="o">);</span>
+<span class="n">mutation</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">colFam</span><span class="o">,</span> <span class="n">colQual</span><span class="o">,</span> <span class="n">colVis</span><span class="o">,</span> <span class="n">timestamp</span><span class="o">,</span> <span class="n">value</span><span class="o">);</span>
+</code></pre>
+</div>
+
+<h2 id="security-label-expression-syntax">Security Label Expression Syntax</h2>
+
+<p>Security labels consist of a set of user-defined tokens that are required to read the
+value the label is associated with. The set of tokens required can be specified using
+syntax that supports logical AND <code class="highlighter-rouge">&amp;</code> and OR <code class="highlighter-rouge">|</code> combinations of terms, as
+well as nesting groups <code class="highlighter-rouge">()</code> of terms together.</p>
+
+<p>Each term is comprised of one to many alpha-numeric characters, hyphens, underscores or
+periods. Optionally, each term may be wrapped in quotation marks
+which removes the restriction on valid characters. In quoted terms, quotation marks
+and backslash characters can be used as characters in the term by escaping them
+with a backslash.</p>
+
+<p>For example, suppose within our organization we want to label our data values with
+security labels defined in terms of user roles. We might have tokens such as:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>admin
+audit
+system
+</code></pre>
+</div>
+
+<p>These can be specified alone or combined using logical operators:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>// Users must have admin privileges
+admin
+
+// Users must have admin and audit privileges
+admin&amp;audit
+
+// Users with either admin or audit privileges
+admin|audit
+
+// Users must have audit and one or both of admin or system
+(admin|system)&amp;audit
+</code></pre>
+</div>
+
+<p>When both <code class="highlighter-rouge">|</code> and <code class="highlighter-rouge">&amp;</code> operators are used, parentheses must be used to specify
+precedence of the operators.</p>
+
+<h2 id="authorization">Authorization</h2>
+
+<p>When clients attempt to read data from Accumulo, any security labels present are
+examined against the set of authorizations passed by the client code when the
+Scanner or BatchScanner are created. If the authorizations are determined to be
+insufficient to satisfy the security label, the value is suppressed from the set of
+results sent back to the client.</p>
+
+<p>Authorizations are specified as a comma-separated list of tokens the user possesses:</p>
+
+<div class="language-java highlighter-rouge"><pre class="highlight"><code><span class="c1">// user possesses both admin and system level access</span>
+<span class="n">Authorization</span> <span class="n">auths</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Authorization</span><span class="o">(</span><span class="s">"admin"</span><span class="o">,</span><span class="s">"system"</span><span class="o">);</span>
+
+<span class="n">Scanner</span> <span class="n">s</span> <span class="o">=</span> <span class="n">connector</span><span class="o">.</span><span class="na">createScanner</span><span class="o">(</span><span class="s">"table"</span><span class="o">,</span> <span class="n">auths</span><span class="o">);</span>
+</code></pre>
+</div>
+
+<h2 id="user-authorizations">User Authorizations</h2>
+
+<p>Each Accumulo user has a set of associated security labels. To manipulate
+these in the shell while using the default authorizor, use the setuaths and getauths commands.
+These may also be modified for the default authorizor using the java security operations API.</p>
+
+<p>When a user creates a scanner a set of Authorizations is passed. If the
+authorizations passed to the scanner are not a subset of the users
+authorizations, then an exception will be thrown.</p>
+
+<p>To prevent users from writing data they can not read, add the visibility
+constraint to a table. Use the -evc option in the createtable shell command to
+enable this constraint. For existing tables use the following shell command to
+enable the visibility constraint. Ensure the constraint number does not
+conflict with any existing constraints.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>config -t table -s table.constraint.1=org.apache.accumulo.core.security.VisibilityConstraint
+</code></pre>
+</div>
+
+<p>Any user with the alter table permission can add or remove this constraint.
+This constraint is not applied to bulk imported data, if this a concern then
+disable the bulk import permission.</p>
+
+<h2 id="pluggable-security">Pluggable Security</h2>
+
+<p>New in 1.5 of Accumulo is a pluggable security mechanism. It can be broken into three actions –
+authentication, authorization, and permission handling. By default all of these are handled in
+Zookeeper, which is how things were handled in Accumulo 1.4 and before. It is worth noting at this
+point, that it is a new feature in 1.5 and may be adjusted in future releases without the standard
+deprecation cycle.</p>
+
+<p>Authentication simply handles the ability for a user to verify their integrity. A combination of
+principal and authentication token are used to verify a user is who they say they are. An
+authentication token should be constructed, either directly through its constructor, but it is
+advised to use the <code class="highlighter-rouge">init(Property)</code> method to populate an authentication token. It is expected that a
+user knows what the appropriate token to use for their system is. The default token is
+<code class="highlighter-rouge">PasswordToken</code>.</p>
+
+<p>Once a user is authenticated by the Authenticator, the user has access to the other actions within
+Accumulo. All actions in Accumulo are ACLed, and this ACL check is handled by the Permission
+Handler. This is what manages all of the permissions, which are divided in system and per table
+level. From there, if a user is doing an action which requires authorizations, the Authorizor is
+queried to determine what authorizations the user has.</p>
+
+<p>This setup allows a variety of different mechanisms to be used for handling different aspects of
+Accumulo’s security. A system like Kerberos can be used for authentication, then a system like LDAP
+could be used to determine if a user has a specific permission, and then it may default back to the
+default ZookeeperAuthorizor to determine what Authorizations a user is ultimately allowed to use.
+This is a pluggable system so custom components can be created depending on your need.</p>
+
+<h2 id="secure-authorizations-handling">Secure Authorizations Handling</h2>
+
+<p>For applications serving many users, it is not expected that an Accumulo user
+will be created for each application user. In this case an Accumulo user with
+all authorizations needed by any of the applications users must be created. To
+service queries, the application should create a scanner with the application
+user’s authorizations. These authorizations could be obtained from a trusted 3rd
+party.</p>
+
+<p>Often production systems will integrate with Public-Key Infrastructure (PKI) and
+designate client code within the query layer to negotiate with PKI servers in order
+to authenticate users and retrieve their authorization tokens (credentials). This
+requires users to specify only the information necessary to authenticate themselves
+to the system. Once user identity is established, their credentials can be accessed by
+the client code and passed to Accumulo outside of the reach of the user.</p>
+
+<h2 id="query-services-layer">Query Services Layer</h2>
+
+<p>Since the primary method of interaction with Accumulo is through the Java API,
+production environments often call for the implementation of a Query layer. This
+can be done using web services in containers such as Apache Tomcat, but is not a
+requirement. The Query Services Layer provides a mechanism for providing a
+platform on which user facing applications can be built. This allows the application
+designers to isolate potentially complex query logic, and enables a convenient point
+at which to perform essential security functions.</p>
+
+<p>Several production environments choose to implement authentication at this layer,
+where users identifiers are used to retrieve their access credentials which are then
+cached within the query layer and presented to Accumulo through the
+Authorizations mechanism.</p>
+
+<p>Typically, the query services layer sits between Accumulo and user workstations.</p>
+
+  </div>
+</div>
+
+        </div>
+
+        
+<footer>
+
+  <p><a href="https://www.apache.org/foundation/contributing"><img src="https://www.apache.org/images/SupportApache-small.png" alt="Support the ASF" id="asf-logo" height="100" /></a></p>
+
+  <p>Copyright © 2011-2017 The Apache Software Foundation. Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.</p>
+
+</footer>
+
+
+      </div>
+    </div>
+  </div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/7b2eb317/docs/unreleased/development/summaries.html
----------------------------------------------------------------------
diff --git a/docs/unreleased/development/summaries.html b/docs/unreleased/development/summaries.html
new file mode 100644
index 0000000..d8adda8
--- /dev/null
+++ b/docs/unreleased/development/summaries.html
@@ -0,0 +1,554 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+<meta charset="utf-8">
+<meta http-equiv="X-UA-Compatible" content="IE=edge">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/paper/bootstrap.min.css" rel="stylesheet" integrity="sha384-awusxf8AUojygHf2+joICySzB780jVvQaVCAt1clU3QsyAitLGul28Qxb2r1e5g+" crossorigin="anonymous">
+<link href="//netdna.bootstrapcdn.com/font-awesome/4.0.3/css/font-awesome.css" rel="stylesheet">
+<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/v/bs/jq-2.2.3/dt-1.10.12/datatables.min.css">
+<link href="/css/accumulo.css" rel="stylesheet" type="text/css">
+
+<title>Accumulo Documentation - Summary Statistics</title>
+
+<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.4/jquery.min.js"></script>
+<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
+<script type="text/javascript" src="https://cdn.datatables.net/v/bs/jq-2.2.3/dt-1.10.12/datatables.min.js"></script>
+<script>
+  // show location of canonical site if not currently on the canonical site
+  $(function() {
+    var host = window.location.host;
+    if (typeof host !== 'undefined' && host !== 'accumulo.apache.org') {
+      $('#non-canonical').show();
+    }
+  });
+
+  $(function() {
+    // decorate section headers with anchors
+    return $("h2, h3, h4, h5, h6").each(function(i, el) {
+      var $el, icon, id;
+      $el = $(el);
+      id = $el.attr('id');
+      icon = '<i class="fa fa-link"></i>';
+      if (id) {
+        return $el.append($("<a />").addClass("header-link").attr("href", "#" + id).html(icon));
+      }
+    });
+  });
+  
+  // configure Google Analytics
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+  if (ga.hasOwnProperty('loaded') && ga.loaded === true) {
+    ga('create', 'UA-50934829-1', 'apache.org');
+    ga('send', 'pageview');
+  }
+</script>
+
+</head>
+<body style="padding-top: 100px">
+
+  <nav class="navbar navbar-default navbar-fixed-top">
+  <div class="container">
+    <div class="navbar-header">
+      <button type="button" class="navbar-toggle" data-toggle="collapse" data-target="#navbar-items">
+        <span class="sr-only">Toggle navigation</span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+      </button>
+      <a href="/"><img id="nav-logo" alt="Apache Accumulo" class="img-responsive" src="/images/accumulo-logo.png" width="200"
+        /></a>
+    </div>
+    <div class="collapse navbar-collapse" id="navbar-items">
+      <ul class="nav navbar-nav">
+        <li class="nav-link"><a href="/downloads">Download</a></li>
+        <li class="dropdown">
+          <a class="dropdown-toggle" data-toggle="dropdown" href="#">Releases<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li><a href="/release/accumulo-1.8.1/">1.8.1 (Latest)</a></li>
+            <li><a href="/release/accumulo-1.7.3/">1.7.3</a></li>
+            <li><a href="/release/accumulo-1.6.6/">1.6.6</a></li>
+            <li><a href="/release/">Archive</a></li>
+          </ul>
+        </li>
+        <li class="dropdown">
+          <a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li><a href="/1.8/accumulo_user_manual.html">User Manual (1.8)</a></li>
+            <li><a href="/1.8/apidocs">Javadocs (1.8)</a></li>
+            <li><a href="/1.8/examples">Examples (1.8)</a></li>
+            <li><a href="/features">Features</a></li>
+            <li><a href="/glossary">Glossary</a></li>
+            <li><a href="/external-docs">External Docs</a></li>
+            <li><a href="/docs-archive/">Archive</a></li>
+          </ul>
+        </li>
+        <li class="dropdown">
+          <a class="dropdown-toggle" data-toggle="dropdown" href="#">Community<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li><a href="/get_involved">Get Involved</a></li>
+            <li><a href="/mailing_list">Mailing Lists</a></li>
+            <li><a href="/people">People</a></li>
+            <li><a href="/related-projects">Related Projects</a></li>
+            <li><a href="/contributor/">Contributor Guide</a></li>
+          </ul>
+        </li>
+      </ul>
+      <ul class="nav navbar-nav navbar-right">
+        <li class="dropdown">
+          <a class="dropdown-toggle" data-toggle="dropdown" href="#">Apache Software Foundation<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li><a href="https://www.apache.org">Apache Homepage <i class="fa fa-external-link"></i></a></li>
+            <li><a href="https://www.apache.org/licenses/LICENSE-2.0">License <i class="fa fa-external-link"></i></a></li>
+            <li><a href="https://www.apache.org/foundation/sponsorship">Sponsorship <i class="fa fa-external-link"></i></a></li>
+            <li><a href="https://www.apache.org/security">Security <i class="fa fa-external-link"></i></a></li>
+            <li><a href="https://www.apache.org/foundation/thanks">Thanks <i class="fa fa-external-link"></i></a></li>
+            <li><a href="https://www.apache.org/foundation/policies/conduct">Code of Conduct <i class="fa fa-external-link"></i></a></li>
+          </ul>
+        </li>
+      </ul>
+    </div>
+  </div>
+</nav>
+
+  <div class="container">
+    <div class="row">
+      <div class="col-md-12">
+
+        <div id="non-canonical" style="display: none; background-color: #F0E68C; padding-left: 1em;">
+          Visit the official site at: <a href="https://accumulo.apache.org">https://accumulo.apache.org</a>
+        </div>
+        <div id="content">
+          
+          <div class="alert alert-danger" role="alert">This documentation is for an unreleased version of Apache Accumulo that is currently under development! Check out the <a href="/docs-1.8/">documentation for the latest release</a>.</div>
+
+<div class="row">
+  <div class="col-md-3">
+    <div class="panel-group" id="accordion" role="tablist" aria-multiselectable="true">
+      <div class="panel panel-default">
+      
+      
+      
+        
+          
+        
+          
+        
+          
+            <div class="panel-heading" role="tab" id="headingOne">
+              <h4 class="panel-title">
+                <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapsegetting-started" aria-expanded="false" aria-controls="collapsegetting-started">
+                  Getting started
+                </a>
+              </h4>
+            </div>
+            <div id="collapsegetting-started" class="panel-collapse collapse" role="tabpanel" aria-labelledby="headingOne">
+              <div class="panel-body">
+                
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/design">Accumulo Design</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/clients">Accumulo Clients</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/shell">Accumulo Shell</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/table_design">Table Design</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/table_configuration">Table Configuration</a></div>
+                
+              </div>
+            </div>
+          
+        
+          
+        
+          
+        
+      
+        
+          
+        
+          
+            <div class="panel-heading" role="tab" id="headingOne">
+              <h4 class="panel-title">
+                <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapsedevelopment" aria-expanded="true" aria-controls="collapsedevelopment">
+                  Development
+                </a>
+              </h4>
+            </div>
+            <div id="collapsedevelopment" class="panel-collapse collapse in" role="tabpanel" aria-labelledby="headingOne">
+              <div class="panel-body">
+                
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/iterator_design">Iterator Design</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/iterator_testing">Iterator Testing</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/development_tools">Development Tools</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/sampling">Sampling</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/summaries">Summary Statistics</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/security">Security</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/high_speed_ingest">High-Speed Ingest</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/analytics">Analytics</a></div>
+                
+              </div>
+            </div>
+          
+        
+          
+        
+          
+        
+          
+        
+      
+        
+          
+            <div class="panel-heading" role="tab" id="headingOne">
+              <h4 class="panel-title">
+                <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapseadministration" aria-expanded="false" aria-controls="collapseadministration">
+                  Administration
+                </a>
+              </h4>
+            </div>
+            <div id="collapseadministration" class="panel-collapse collapse" role="tabpanel" aria-labelledby="headingOne">
+              <div class="panel-body">
+                
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/overview">Overview</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/configuration-management">Configuration Management</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/configuration-properties">Configuration Properties</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/kerberos">Kerberos</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/replication">Replication</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/fate">FATE</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/multivolume">Multi-Volume Installations</a></div>
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/ssl">SSL</a></div>
+                
+              </div>
+            </div>
+          
+        
+          
+        
+          
+        
+          
+        
+          
+        
+      
+        
+          
+        
+          
+        
+          
+        
+          
+        
+          
+            <div class="panel-heading" role="tab" id="headingOne">
+              <h4 class="panel-title">
+                <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapsetroubleshooting" aria-expanded="false" aria-controls="collapsetroubleshooting">
+                  Troubleshooting
+                </a>
+              </h4>
+            </div>
+            <div id="collapsetroubleshooting" class="panel-collapse collapse" role="tabpanel" aria-labelledby="headingOne">
+              <div class="panel-body">
+                
+                
+                <div class="row doc-sidebar-link"><a href="/docs/unreleased/troubleshooting/overview">Overview</a></div>
+                
+              </div>
+            </div>
+          
+        
+      
+      </div>
+    </div>
+  </div>
+  <div class="col-md-9">
+    
+    <p><a href="/docs/unreleased/">Accumulo unreleased docs</a> &nbsp;&gt;&gt;&nbsp; Development &nbsp;&gt;&gt;&nbsp; Summary Statistics</p>
+    
+    
+    <h1>Summary Statistics</h1>
+    
+    <h2 id="overview">Overview</h2>
+
+<p>Accumulo has the ability to generate summary statistics about data in a table
+using user defined functions.  Currently these statistics are only generated for
+data written to files.  Data recently written to Accumulo that is still in
+memory will not contribute to summary statistics.</p>
+
+<p>This feature can be used to inform a user about what data is in their table.
+Summary statistics can also be used by compaction strategies to make decisions
+about which files to compact.</p>
+
+<p>Summary data is stored in each file Accumulo produces.  Accumulo can gather
+summary information from across a cluster merging it along the way.  In order
+for this to be fast the, summary information should fit in cache.  There is a
+dedicated cache for summary data on each tserver with a configurable size.  In
+order for summary data to fit in cache, it should probably be small.</p>
+
+<p>For information on writing a custom summarizer see the javadoc for
+<code class="highlighter-rouge">org.apache.accumulo.core.client.summary.Summarizer</code>.  The package
+<code class="highlighter-rouge">org.apache.accumulo.core.client.summary.summarizers</code> contains summarizer
+implementations that ship with Accumulo and can be configured for use.</p>
+
+<h2 id="inaccuracies">Inaccuracies</h2>
+
+<p>Summary data can be inaccurate when files are missing summary data or when
+files have extra summary data. Files can contain data outside of a tablets
+boundaries. This can happen as result of bulk imported files and tablet splits.
+When this happens, those files could contain extra summary information.
+Accumulo offsets this some by storing summary information for multiple row
+ranges per a file.  However, the ranges are not granular enough to completely
+offset extra data.</p>
+
+<p>Any source of inaccuracies is reported when summary information is requested.
+In the shell examples below this can be seen on the <code class="highlighter-rouge">File Statistics</code> line.
+For files missing summary information, the compact command in the shell has a
+<code class="highlighter-rouge">--sf-no-summary</code> option.  This options compacts files that do not have the
+summary information configured for the table.  The compact command also has the
+<code class="highlighter-rouge">--sf-extra-summary</code> option which will compact files with extra summary
+information.</p>
+
+<h2 id="configuring">Configuring</h2>
+
+<p>The following tablet server and table properties configure summarization.</p>
+
+<ul>
+  <li><a href="/docs/unreleased/administration/configuration-properties#tserver_cache_summary_size">tserver.cache.summary.size</a></li>
+  <li><a href="/docs/unreleased/administration/configuration-properties#tserver_summary_partition_threads">tserver.summary.partition.threads</a></li>
+  <li><a href="/docs/unreleased/administration/configuration-properties#tserver_summary_remote_threads">tserver.summary.remote.threads</a></li>
+  <li><a href="/docs/unreleased/administration/configuration-properties#tserver_summary_retreival_threads">tserver.summary.retrieval.threads</a></li>
+  <li><a href="/docs/unreleased/administration/configuration-properties#table_summarizer_prefix">table.summarizer.*</a></li>
+  <li><a href="/docs/unreleased/administration/configuration-properties#table_file_summary_maxSize">table.file.summary.maxSize</a></li>
+</ul>
+
+<h2 id="permissions">Permissions</h2>
+
+<p>Because summary data may be derived from sensitive data, requesting summary data
+requires a special permission.  User must have the table permission
+<code class="highlighter-rouge">GET_SUMMARIES</code> in order to retrieve summary data.</p>
+
+<h2 id="bulk-import">Bulk import</h2>
+
+<p>When generating rfiles to bulk import into Accumulo, those rfiles can contain
+summary data.  To use this feature, look at the javadoc on the
+<code class="highlighter-rouge">AccumuloFileOutputFormat.setSummarizers(...)</code> method.  Also,
+<code class="highlighter-rouge">org.apache.accumulo.core.client.rfile.RFile</code> has options for creating RFiles
+with embedded summary data.</p>
+
+<h2 id="examples">Examples</h2>
+
+<p>This example walks through using summarizers in the Accumulo shell.  Below a
+table is created and some data is inserted to summarize.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>root@uno&gt; createtable summary_test
+root@uno summary_test&gt; setauths -u root -s PI,GEO,TIME
+root@uno summary_test&gt; insert 3b503bd name last Doe
+root@uno summary_test&gt; insert 3b503bd name first John
+root@uno summary_test&gt; insert 3b503bd contact address "123 Park Ave, NY, NY" -l PI&amp;GEO
+root@uno summary_test&gt; insert 3b503bd date birth "1/11/1942" -l PI&amp;TIME
+root@uno summary_test&gt; insert 3b503bd date married "5/11/1962" -l PI&amp;TIME
+root@uno summary_test&gt; insert 3b503bd contact home_phone 1-123-456-7890 -l PI
+root@uno summary_test&gt; insert d5d18dd contact address "50 Lake Shore Dr, Chicago, IL" -l PI&amp;GEO
+root@uno summary_test&gt; insert d5d18dd name first Jane
+root@uno summary_test&gt; insert d5d18dd name last Doe
+root@uno summary_test&gt; insert d5d18dd date birth 8/15/1969 -l PI&amp;TIME
+root@uno summary_test&gt; scan -s PI,GEO,TIME
+3b503bd contact:address [PI&amp;GEO]    123 Park Ave, NY, NY
+3b503bd contact:home_phone [PI]    1-123-456-7890
+3b503bd date:birth [PI&amp;TIME]    1/11/1942
+3b503bd date:married [PI&amp;TIME]    5/11/1962
+3b503bd name:first []    John
+3b503bd name:last []    Doe
+d5d18dd contact:address [PI&amp;GEO]    50 Lake Shore Dr, Chicago, IL
+d5d18dd date:birth [PI&amp;TIME]    8/15/1969
+d5d18dd name:first []    Jane
+d5d18dd name:last []    Doe
+</code></pre>
+</div>
+
+<p>After inserting the data, summaries are requested below.  No summaries are returned.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>root@uno summary_test&gt; summaries
+</code></pre>
+</div>
+
+<p>The visibility summarizer is configured below and the table is flushed.
+Flushing the table creates a file creating summary data in the process. The
+summary data returned counts how many times each column visibility occurred.
+The statistics with a <code class="highlighter-rouge">c:</code> prefix are visibilities.  The others are generic
+statistics created by the CountingSummarizer that VisibilitySummarizer extends.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>root@uno summary_test&gt; config -t summary_test -s table.summarizer.vis=org.apache.accumulo.core.client.summary.summarizers.VisibilitySummarizer
+root@uno summary_test&gt; summaries
+root@uno summary_test&gt; flush -w
+2017-02-24 19:54:46,090 [shell.Shell] INFO : Flush of table summary_test completed.
+root@uno summary_test&gt; summaries
+Summarizer         : org.apache.accumulo.core.client.summary.summarizers.VisibilitySummarizer vis {}
+File Statistics    : [total:1, missing:0, extra:0, large:0]
+Summary Statistics : 
+   c:                                                           = 4
+   c:PI                                                         = 1
+   c:PI&amp;GEO                                                     = 2
+   c:PI&amp;TIME                                                    = 3
+   emitted                                                      = 10
+   seen                                                         = 10
+   tooLong                                                      = 0
+   tooMany                                                      = 0
+</code></pre>
+</div>
+
+<p>VisibilitySummarizer has an option <code class="highlighter-rouge">maxCounters</code> that determines the max number
+of column visibilites it will track.  Below this option is set and compaction
+is forced to regenerate summary data.  The new summary data only has three
+visibilites and now the <code class="highlighter-rouge">tooMany</code> statistic is 4.  This is the number of
+visibilites that were not counted.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code> root@uno summary_test&gt; config -t summary_test -s table.summarizer.vis.opt.maxCounters=3
+ root@uno summary_test&gt; compact -w
+ 2017-02-24 19:54:46,267 [shell.Shell] INFO : Compacting table ...
+ 2017-02-24 19:54:47,127 [shell.Shell] INFO : Compaction of table summary_test completed for given range
+ root@uno summary_test&gt; summaries
+  Summarizer         : org.apache.accumulo.core.client.summary.summarizers.VisibilitySummarizer vis {maxCounters=3}
+  File Statistics    : [total:1, missing:0, extra:0, large:0]
+  Summary Statistics : 
+     c:PI                                                         = 1
+     c:PI&amp;GEO                                                     = 2
+     c:PI&amp;TIME                                                    = 3
+     emitted                                                      = 10
+     seen                                                         = 10
+     tooLong                                                      = 0
+     tooMany                                                      = 4
+</code></pre>
+</div>
+
+<p>Another summarizer is configured below that tracks the number of deletes.  Also
+a compaction strategy that uses this summary data is configured.  The
+<code class="highlighter-rouge">TooManyDeletesCompactionStrategy</code> will force a compaction of the tablet when
+the ratio of deletes to non-deletes is over 25%.  This threshold is
+configurable.  Below a delete is added and its reflected in the statistics.  In
+this case there is 1 delete and 10 non-deletes, not enough to force a
+compaction of the tablet.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>root@uno summary_test&gt; config -t summary_test -s table.summarizer.del=org.apache.accumulo.core.client.summary.summarizers.DeletesSummarizer
+root@uno summary_test&gt; compact -w
+2017-02-24 19:54:47,282 [shell.Shell] INFO : Compacting table ...
+2017-02-24 19:54:49,236 [shell.Shell] INFO : Compaction of table summary_test completed for given range
+root@uno summary_test&gt; config -t summary_test -s table.compaction.major.ratio=10
+root@uno summary_test&gt; config -t summary_test -s table.majc.compaction.strategy=org.apache.accumulo.tserver.compaction.strategies.TooManyDeletesCompactionStrategy
+root@uno summary_test&gt; deletemany -r d5d18dd -c date -f
+[DELETED] d5d18dd date:birth [PI&amp;TIME]
+root@uno summary_test&gt; flush -w
+2017-02-24 19:54:49,686 [shell.Shell] INFO : Flush of table summary_test completed.
+root@uno summary_test&gt; summaries
+ Summarizer         : org.apache.accumulo.core.client.summary.summarizers.VisibilitySummarizer vis {maxCounters=3}
+ File Statistics    : [total:2, missing:0, extra:0, large:0]
+ Summary Statistics : 
+    c:PI                                                         = 1
+    c:PI&amp;GEO                                                     = 2
+    c:PI&amp;TIME                                                    = 4
+    emitted                                                      = 11
+    seen                                                         = 11
+    tooLong                                                      = 0
+    tooMany                                                      = 4
+
+ Summarizer         : org.apache.accumulo.core.client.summary.summarizers.DeletesSummarizer del {}
+ File Statistics    : [total:2, missing:0, extra:0, large:0]
+ Summary Statistics : 
+    deletes                                                      = 1
+    total                                                        = 11
+</code></pre>
+</div>
+
+<p>Some more deletes are added and the table is flushed below.  This results in 4
+deletes and 10 non-deletes, which triggers a full compaction.  A full
+compaction of all files is the only time when delete markers are dropped.  The
+compaction ratio was set to 10 above to show that the number of files did not
+trigger the compaction.   After the compaction there no deletes 6 non-deletes.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>root@uno summary_test&gt; deletemany -r d5d18dd -f
+[DELETED] d5d18dd contact:address [PI&amp;GEO]
+[DELETED] d5d18dd name:first []
+[DELETED] d5d18dd name:last []
+root@uno summary_test&gt; flush -w
+2017-02-24 19:54:52,800 [shell.Shell] INFO : Flush of table summary_test completed.
+root@uno summary_test&gt; summaries
+ Summarizer         : org.apache.accumulo.core.client.summary.summarizers.VisibilitySummarizer vis {maxCounters=3}
+ File Statistics    : [total:1, missing:0, extra:0, large:0]
+ Summary Statistics : 
+    c:PI                                                         = 1
+    c:PI&amp;GEO                                                     = 1
+    c:PI&amp;TIME                                                    = 2
+    emitted                                                      = 6
+    seen                                                         = 6
+    tooLong                                                      = 0
+    tooMany                                                      = 2
+
+ Summarizer         : org.apache.accumulo.core.client.summary.summarizers.DeletesSummarizer del {}
+ File Statistics    : [total:1, missing:0, extra:0, large:0]
+ Summary Statistics : 
+    deletes                                                      = 0
+    total                                                        = 6
+root@uno summary_test&gt;   
+</code></pre>
+</div>
+
+
+  </div>
+</div>
+
+        </div>
+
+        
+<footer>
+
+  <p><a href="https://www.apache.org/foundation/contributing"><img src="https://www.apache.org/images/SupportApache-small.png" alt="Support the ASF" id="asf-logo" height="100" /></a></p>
+
+  <p>Copyright © 2011-2017 The Apache Software Foundation. Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.</p>
+
+</footer>
+
+
+      </div>
+    </div>
+  </div>
+</body>
+</html>


Mime
View raw message