incubator-accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From build...@apache.org
Subject svn commit: r797867 [5/12] - in /websites/staging/accumulo/trunk/content: ./ accumulo/ accumulo/css/ accumulo/governance/ accumulo/images/ accumulo/user_manual_1.3-incubating/ accumulo/user_manual_1.3-incubating/examples/ accumulo/user_manual_1.4-incub...
Date Tue, 01 Nov 2011 17:08:19 GMT
Added: websites/staging/accumulo/trunk/content/accumulo/user_manual_1.3-incubating/Table_Configuration.html
==============================================================================
--- websites/staging/accumulo/trunk/content/accumulo/user_manual_1.3-incubating/Table_Configuration.html (added)
+++ websites/staging/accumulo/trunk/content/accumulo/user_manual_1.3-incubating/Table_Configuration.html Tue Nov  1 17:08:17 2011
@@ -0,0 +1,363 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE- 2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+  <link href="/accumulo/css/accumulo.css" rel="stylesheet" type="text/css">
+  <title>Accumulo User Manual: Table Configuration</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+  <script type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-21103458-6']);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+  </script>
+</head>
+
+<body>
+  <div id="banner">
+    <img id="logo" alt="Apache accumulo (Incubating)" src="/accumulo/images/accumulo-logo.png"/>
+    <div id="bannertext">
+&nbsp; 
+    </div><br />
+  </div>
+  
+  <div id="navigation">
+  <h1 id="project">Project</h1>
+<ul>
+<li><a href="/accumulo">Home</a></li>
+<li><a href="http://incubator.apache.org/projects/accumulo.html">Incubator page</a>
+<!--  - Download --></li>
+<li><a href="/accumulo/notable_features.html">Features</a></li>
+<li><a href="http://www.apache.org/licenses/LICENSE-2.0">License</a></li>
+</ul>
+<h1 id="community">Community</h1>
+<ul>
+<li><a href="/accumulo/get_involved.html">Get Involved</a></li>
+<li><a href="/accumulo/mailing_list.html">Mailing Lists</a></li>
+<li><a href="https://issues.apache.org/jira/secure/ConfigureReport.jspa?versionId=-2&amp;selectedProjectId=12312121&amp;reportKey=com.sourcelabs.jira.plugin.report.contributions%3Acontributionreport">People</a></li>
+</ul>
+<h1 id="development">Development</h1>
+<ul>
+<li><a href="/accumulo/source.html">Source Code</a></li>
+<li><a href="https://issues.apache.org/jira/browse/accumulo">Issues</a></li>
+<li><a href="https://builds.apache.org/job/Accumulo-Trunk">Builds</a></li>
+</ul>
+<h1 id="documentation">Documentation</h1>
+<ul>
+<li><a href="/accumulo/user_manual_1.3-incubating">Manual v1.3</a><ul>
+<li><a href="/accumulo/user_manual_1.3-incubating/examples.html">Examples v1.3</a></li>
+</ul>
+</li>
+<li><a href="/accumulo/user_manual_1.4-incubating">Manual v1.4</a>
+<!-- - klzzwxh:0005 -->
+<!-- - Javadoc -->
+<!-- - Examples --></li>
+<li><a href="/accumulo/screenshots.html">Screenshots</a></li>
+</ul>
+<!--
+# Development
+ - Source code
+ - Building
+-->
+
+<h1 id="asf_links">ASF links</h1>
+<ul>
+<li><a href="http://www.apache.org">Apache Software Foundation</a></li>
+<li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
+<li><a href="http://www.apache.org/foundation/sponsorship.html">Become a Sponsor</a></li>
+</ul>
+  </div>
+
+  <div id="content">
+    <h1 class="title">Accumulo User Manual: Table Configuration</h1>
+    <p><strong> Next:</strong> <a href="Table_Design.html">Table Design</a> <strong> Up:</strong> <a href="accumulo_user_manual.html">Accumulo User Manual Version 1.3</a> <strong> Previous:</strong> <a href="Writing_Accumulo_Clients.html">Writing Accumulo Clients</a>   <strong> <a href="Contents.html">Contents</a></strong> <br />
+</p>
+<p><a id=CHILD_LINKS></a><strong>Subsections</strong></p>
+<ul>
+<li><a href="Table_Configuration.html#Locality_Groups">Locality Groups</a></li>
+<li><a href="Table_Configuration.html#Constraints">Constraints</a></li>
+<li><a href="Table_Configuration.html#Bloom_Filters">Bloom Filters</a></li>
+<li><a href="Table_Configuration.html#Iterators">Iterators</a></li>
+<li><a href="Table_Configuration.html#Versioning_Iterators_and_Timestamps">Versioning Iterators and Timestamps</a></li>
+<li><a href="Table_Configuration.html#Filtering_Iterators">Filtering Iterators</a></li>
+<li><a href="Table_Configuration.html#Aggregating_Iterators">Aggregating Iterators</a></li>
+</ul>
+<hr />
+<h2 id="a_idtable_configurationa_table_configuration"><a id=Table_Configuration></a> Table Configuration</h2>
+<p>Accumulo tables have a few options that can be configured to alter the default behavior of Accumulo as well as improve performance based on the data stored. These include locality groups, constraints, and iterators. </p>
+<h2 id="a_idlocality_groupsa_locality_groups"><a id=Locality_Groups></a> Locality Groups</h2>
+<p>Accumulo supports storing of sets of column families separately on disk to allow clients to scan over columns that are frequently used together efficient and to avoid scanning over column families that are not requested. After a locality group is set Scanner and BatchScanner operations will automatically take advantage of them whenever the fetchColumnFamilies() method is used. </p>
+<p>By default tables place all column families into the same ``default" locality group. Additional locality groups can be configured anytime via the shell or programmatically as follows: </p>
+<h3 id="a_idmanaging_locality_groups_via_the_shella_managing_locality_groups_via_the_shell"><a id=Managing_Locality_Groups_via_the_Shell></a> Managing Locality Groups via the Shell</h3>
+<div class="codehilite"><pre><span class="err">usage:</span> <span class="err">setgroups</span> <span class="err">&lt;group&gt;=&lt;col</span> <span class="err">fam&gt;{,&lt;col</span> <span class="err">fam&gt;}{</span> <span class="err">&lt;group&gt;=&lt;col</span> <span class="err">fam&gt;{,&lt;col</span>
+<span class="err">fam&gt;}}</span> <span class="err">[-?]</span> <span class="err">-t</span> <span class="err">&lt;table&gt;</span>
+
+<span class="err">user@myinstance</span> <span class="err">mytable&gt;</span> <span class="err">setgroups</span> <span class="err">-t</span> <span class="err">mytable</span> <span class="err">group_one=colf1,colf2</span>
+
+<span class="err">user@myinstance</span> <span class="err">mytable&gt;</span> <span class="err">getgroups</span> <span class="err">-t</span> <span class="err">mytable</span>
+<span class="err">group_one=colf1,colf2</span>
+</pre></div>
+
+
+<h3 id="a_idmanaging_locality_groups_via_the_client_apia_managing_locality_groups_via_the_client_api"><a id=Managing_Locality_Groups_via_the_Client_API></a> Managing Locality Groups via the Client API</h3>
+<div class="codehilite"><pre><span class="n">Connector</span> <span class="n">conn</span><span class="p">;</span>
+
+<span class="n">HashMap</span><span class="sr">&lt;String,Set&lt;Text&gt;</span><span class="o">&gt;</span> <span class="n">localityGroups</span> <span class="o">=</span>
+    <span class="k">new</span> <span class="n">HashMap</span><span class="o">&lt;</span><span class="n">String</span><span class="p">,</span> <span class="n">Set</span><span class="sr">&lt;Text&gt;</span><span class="o">&gt;</span><span class="p">();</span>
+
+<span class="n">HashSet</span><span class="sr">&lt;Text&gt;</span> <span class="n">metadataColumns</span> <span class="o">=</span> <span class="k">new</span> <span class="n">HashSet</span><span class="sr">&lt;Text&gt;</span><span class="p">();</span>
+<span class="n">metadataColumns</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="k">new</span> <span class="n">Text</span><span class="p">(</span><span class="s">&quot;domain&quot;</span><span class="p">));</span>
+<span class="n">metadataColumns</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="k">new</span> <span class="n">Text</span><span class="p">(</span><span class="s">&quot;link&quot;</span><span class="p">));</span>
+
+<span class="n">HashSet</span><span class="sr">&lt;Text&gt;</span> <span class="n">contentColumns</span> <span class="o">=</span> <span class="k">new</span> <span class="n">HashSet</span><span class="sr">&lt;Text&gt;</span><span class="p">();</span>
+<span class="n">contentColumns</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="k">new</span> <span class="n">Text</span><span class="p">(</span><span class="s">&quot;body&quot;</span><span class="p">));</span>
+<span class="n">contentColumns</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="k">new</span> <span class="n">Text</span><span class="p">(</span><span class="s">&quot;images&quot;</span><span class="p">));</span>
+
+<span class="n">localityGroups</span><span class="o">.</span><span class="n">put</span><span class="p">(</span><span class="s">&quot;metadata&quot;</span><span class="p">,</span> <span class="n">metadataColumns</span><span class="p">);</span>
+<span class="n">localityGroups</span><span class="o">.</span><span class="n">put</span><span class="p">(</span><span class="s">&quot;content&quot;</span><span class="p">,</span> <span class="n">contentColumns</span><span class="p">);</span>
+
+<span class="n">conn</span><span class="o">.</span><span class="n">tableOperations</span><span class="p">()</span><span class="o">.</span><span class="n">setLocalityGroups</span><span class="p">(</span><span class="s">&quot;mytable&quot;</span><span class="p">,</span> <span class="n">localityGroups</span><span class="p">);</span>
+
+<span class="sr">//</span> <span class="n">existing</span> <span class="n">locality</span> <span class="n">groups</span> <span class="n">can</span> <span class="n">be</span> <span class="n">obtained</span> <span class="n">as</span> <span class="n">follows</span>
+<span class="n">Map</span><span class="o">&lt;</span><span class="n">String</span><span class="p">,</span> <span class="n">Set</span><span class="sr">&lt;Text&gt;</span><span class="o">&gt;</span> <span class="n">groups</span> <span class="o">=</span>
+    <span class="n">conn</span><span class="o">.</span><span class="n">tableOperations</span><span class="p">()</span><span class="o">.</span><span class="n">getLocalityGroups</span><span class="p">(</span><span class="s">&quot;mytable&quot;</span><span class="p">);</span>
+</pre></div>
+
+
+<p>The assignment of Column Families to Locality Groups can be changed anytime. The physical movement of column families into their new locality groups takes place via the periodic Major Compaction process that takes place continuously in the background. Major Compaction can also be scheduled to take place immediately through the shell: </p>
+<div class="codehilite"><pre><span class="n">user</span><span class="nv">@myinstance</span> <span class="n">mytable</span><span class="o">&gt;</span> <span class="n">compact</span> <span class="o">-</span><span class="n">t</span> <span class="n">mytable</span>
+</pre></div>
+
+
+<h2 id="a_idconstraintsa_constraints"><a id=Constraints></a> Constraints</h2>
+<p>Accumulo supports constraints applied on mutations at insert time. This can be used to disallow certain inserts according to a user defined policy. Any mutation that fails to meet the requirements of the constraint is rejected and sent back to the client. </p>
+<p>Constraints can be enabled by setting a table property as follows: </p>
+<div class="codehilite"><pre><span class="n">user</span><span class="nv">@myinstance</span> <span class="n">mytable</span><span class="o">&gt;</span> <span class="n">config</span> <span class="o">-</span><span class="n">t</span> <span class="n">mytable</span> <span class="o">-</span><span class="n">s</span> <span class="n">table</span><span class="o">.</span><span class="n">constraint</span><span class="mf">.1</span><span class="o">=</span><span class="n">com</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">ExampleConstraint</span>
+<span class="n">user</span><span class="nv">@myinstance</span> <span class="n">mytable</span><span class="o">&gt;</span> <span class="n">config</span> <span class="o">-</span><span class="n">t</span> <span class="n">mytable</span> <span class="o">-</span><span class="n">s</span> <span class="n">table</span><span class="o">.</span><span class="n">constraint</span><span class="mf">.2</span><span class="o">=</span><span class="n">com</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">AnotherConstraint</span>
+<span class="n">user</span><span class="nv">@myinstance</span> <span class="n">mytable</span><span class="o">&gt;</span> <span class="n">config</span> <span class="o">-</span><span class="n">t</span> <span class="n">mytable</span> <span class="o">-</span><span class="n">f</span> <span class="n">constraint</span>
+<span class="o">---------+--------------------------------+----------------------------</span>
+<span class="n">SCOPE</span>    <span class="o">|</span> <span class="n">NAME</span>                           <span class="o">|</span> <span class="n">VALUE</span>
+<span class="o">---------+--------------------------------+----------------------------</span>
+<span class="n">table</span>    <span class="o">|</span> <span class="n">table</span><span class="o">.</span><span class="n">constraint</span><span class="mf">.1</span><span class="o">............</span> <span class="o">|</span> <span class="n">com</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">ExampleConstraint</span>
+<span class="n">table</span>    <span class="o">|</span> <span class="n">table</span><span class="o">.</span><span class="n">constraint</span><span class="mf">.2</span><span class="o">............</span> <span class="o">|</span> <span class="n">com</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">AnotherConstraint</span>
+<span class="o">---------+--------------------------------+----------------------------</span>
+</pre></div>
+
+
+<p>Currently there are no general-purpose constraints provided with the Accumulo distribution. New constraints can be created by writing a Java class that implements the org.apache.accumulo.core.constraints.Constraint interface. </p>
+<p>To deploy a new constraint, create a jar file containing the class implementing the new constraint and place it in the lib directory of the Accumulo installation. New constraint jars can be added to Accumulo and enabled without restarting but any change to an existing constraint class requires Accumulo to be restarted. </p>
+<p>An example of constraints can be found in <br />
+accumulo/docs/examples/README.constraints with corresponding code under <br />
+accumulo/src/examples/main/java/accumulo/examples/constraints . </p>
+<h2 id="a_idbloom_filtersa_bloom_filters"><a id=Bloom_Filters></a> Bloom Filters</h2>
+<p>As mutations are applied to a Accumulo table, several files are created per tablet. If bloom filters are enabled, Accumulo will create and load a small data structure into memory to determine whether a file contains a given key before opening the file. This can speed up lookups considerably. </p>
+<p>To enable bloom filters, enter the following command in the Shell: </p>
+<div class="codehilite"><pre><span class="n">user</span><span class="nv">@myinstance</span><span class="o">&gt;</span> <span class="n">config</span> <span class="o">-</span><span class="n">t</span> <span class="n">mytable</span> <span class="o">-</span><span class="n">s</span> <span class="n">table</span><span class="o">.</span><span class="n">bloom</span><span class="o">.</span><span class="n">enabled</span><span class="o">=</span><span class="n">true</span>
+</pre></div>
+
+
+<p>An extensive example of using Bloom Filters can be found at <br />
+accumulo/docs/examples/README.bloom . </p>
+<h2 id="a_iditeratorsa_iterators"><a id=Iterators></a> Iterators</h2>
+<p>Iterators provide a modular mechanism for adding functionality to be executed by TabletServers when scanning or compacting data. This allows users to efficiently summarize, filter, and aggregate data. In fact, the built-in features of cell-level security and age-off are implemented using Iterators. </p>
+<h3 id="a_idsetting_iterators_via_the_shella_setting_iterators_via_the_shell"><a id=Setting_Iterators_via_the_Shell></a> Setting Iterators via the Shell</h3>
+<div class="codehilite"><pre><span class="err">usage:</span> <span class="err">setiter</span> <span class="err">[-?]</span> <span class="err">-agg</span> <span class="err">|</span> <span class="err">-</span><span class="kd">class</span> <span class="p">&lt;</span><span class="err">name</span><span class="p">&gt;</span> <span class="err">|</span> <span class="err">-filter</span> <span class="err">|</span> <span class="err">-nolabel</span> <span class="err">|</span> 
+<span class="err">-regex</span> <span class="err">|</span> <span class="err">-vers</span> <span class="err">[-majc]</span> <span class="err">[-minc]</span> <span class="err">[-n</span> <span class="err">&lt;itername&gt;]</span> <span class="err">-p</span> <span class="err">&lt;pri&gt;</span> <span class="err">[-scan]</span> 
+<span class="err">[-t</span> <span class="err">&lt;table&gt;]</span>
+
+<span class="err">user@myinstance</span> <span class="err">mytable&gt;</span> <span class="err">setiter</span> <span class="err">-t</span> <span class="err">mytable</span> <span class="err">-scan</span> <span class="err">-p</span> <span class="err">10</span> <span class="err">-n</span> <span class="err">myiter</span>
+</pre></div>
+
+
+<h3 id="a_idsetting_iterators_programmaticallya_setting_iterators_programmatically"><a id=Setting_Iterators_Programmatically></a> Setting Iterators Programmatically</h3>
+<div class="codehilite"><pre><span class="n">scanner</span><span class="o">.</span><span class="n">setScanIterators</span><span class="p">(</span>
+    <span class="mi">15</span><span class="p">,</span> <span class="sr">//</span> <span class="n">priority</span>
+    <span class="s">&quot;com.company.MyIterator&quot;</span><span class="p">,</span> <span class="sr">//</span> <span class="n">class</span> <span class="n">name</span>
+    <span class="s">&quot;myiter&quot;</span><span class="p">);</span> <span class="sr">//</span> <span class="n">name</span> <span class="n">this</span> <span class="n">iterator</span>
+</pre></div>
+
+
+<p>Some iterators take additional parameters from client code, as in the following example: </p>
+<div class="codehilite"><pre><span class="n">bscan</span><span class="o">.</span><span class="n">setIteratorOption</span><span class="p">(</span>
+    <span class="s">&quot;myiter&quot;</span><span class="p">,</span> <span class="sr">//</span> <span class="n">iterator</span> <span class="n">reference</span>
+    <span class="s">&quot;myoptionname&quot;</span><span class="p">,</span>
+    <span class="s">&quot;myoptionvalue&quot;</span><span class="p">);</span>
+</pre></div>
+
+
+<p>Tables support separate Iterator settings to be applied at scan time, upon minor compaction and upon major compaction. For most uses, tables will have identical iterator settings for all three to avoid inconsistent results. </p>
+<h2 id="a_idversioning_iterators_and_timestampsa_versioning_iterators_and_timestamps"><a id=Versioning_Iterators_and_Timestamps></a> Versioning Iterators and Timestamps</h2>
+<p>Accumulo provides the capability to manage versioned data through the use of timestamps within the Key. If a timestamp is not specified in the key created by the client then the system will set the timestamp to the current time. Two keys with identical rowIDs and columns but different timestamps are considered two versions of the same key. If two inserts are made into accumulo with the same rowID, column, and timestamp, then the behavior is non-deterministic. </p>
+<p>Timestamps are sorted in descending order, so the most recent data comes first. Accumulo can be configured to return the top k versions, or versions later than a given date. The default is to return the one most recent version. </p>
+<p>The version policy can be changed by changing the VersioningIterator options for a table as follows: </p>
+<div class="codehilite"><pre><span class="n">user</span><span class="nv">@myinstance</span> <span class="n">mytable</span><span class="o">&gt;</span> <span class="n">config</span> <span class="o">-</span><span class="n">t</span> <span class="n">mytable</span> <span class="o">-</span><span class="n">s</span>
+<span class="n">table</span><span class="o">.</span><span class="n">iterator</span><span class="o">.</span><span class="n">scan</span><span class="o">.</span><span class="n">vers</span><span class="o">.</span><span class="n">opt</span><span class="o">.</span><span class="n">maxVersions</span><span class="o">=</span><span class="mi">3</span>
+
+<span class="n">user</span><span class="nv">@myinstance</span> <span class="n">mytable</span><span class="o">&gt;</span> <span class="n">config</span> <span class="o">-</span><span class="n">t</span> <span class="n">mytable</span> <span class="o">-</span><span class="n">s</span>
+<span class="n">table</span><span class="o">.</span><span class="n">iterator</span><span class="o">.</span><span class="n">minc</span><span class="o">.</span><span class="n">vers</span><span class="o">.</span><span class="n">opt</span><span class="o">.</span><span class="n">maxVersions</span><span class="o">=</span><span class="mi">3</span>
+
+<span class="n">user</span><span class="nv">@myinstance</span> <span class="n">mytable</span><span class="o">&gt;</span> <span class="n">config</span> <span class="o">-</span><span class="n">t</span> <span class="n">mytable</span> <span class="o">-</span><span class="n">s</span>
+<span class="n">table</span><span class="o">.</span><span class="n">iterator</span><span class="o">.</span><span class="n">majc</span><span class="o">.</span><span class="n">vers</span><span class="o">.</span><span class="n">opt</span><span class="o">.</span><span class="n">maxVersions</span><span class="o">=</span><span class="mi">3</span>
+</pre></div>
+
+
+<h3 id="a_idlogical_timea_logical_time"><a id=Logical_Time></a> Logical Time</h3>
+<p>Accumulo 1.2 introduces the concept of logical time. This ensures that timestamps set by accumulo always move forward. This helps avoid problems caused by TabletServers that have different time settings. The per tablet counter gives unique one up time stamps on a per mutation basis. When using time in milliseconds, if two things arrive within the same millisecond then both receive the same timestamp. </p>
+<p>A table can be configured to use logical timestamps at creation time as follows: </p>
+<div class="codehilite"><pre><span class="n">user</span><span class="nv">@myinstance</span><span class="o">&gt;</span> <span class="n">createtable</span> <span class="o">-</span><span class="n">tl</span> <span class="n">logical</span>
+</pre></div>
+
+
+<h3 id="a_iddeletesa_deletes"><a id=Deletes></a> Deletes</h3>
+<p>Deletes are special keys in accumulo that get sorted along will all the other data. When a delete key is inserted, accumulo will not show anything that has a timestamp less than or equal to the delete key. During major compaction, any keys older than a delete key are omitted from the new file created, and the omitted keys are removed from disk as part of the regular garbage collection process. </p>
+<h2 id="a_idfiltering_iteratorsa_filtering_iterators"><a id=Filtering_Iterators></a> Filtering Iterators</h2>
+<p>When scanning over a set of key-value pairs it is possible to apply an arbitrary filtering policy through the use of a FilteringIterator. These types of iterators return only key-value pairs that satisfy the filter logic. Accumulo has two built-in filtering iterators that can be configured on any table: AgeOff and RegEx. More can be added by writing a Java class that implements the <br />
+org.apache.accumulo.core.iterators.filter.Filter interface. </p>
+<p>To configure the AgeOff filter to remove data older than a certain date or a fixed amount of time from the present. The following example sets a table to delete everything inserted over 30 seconds ago: </p>
+<div class="codehilite"><pre><span class="n">user</span><span class="nv">@myinstance</span><span class="o">&gt;</span> <span class="n">createtable</span> <span class="n">filtertest</span>
+<span class="n">user</span><span class="nv">@myinstance</span> <span class="n">filtertest</span><span class="o">&gt;</span> <span class="n">setiter</span> <span class="o">-</span><span class="n">t</span> <span class="n">filtertest</span> <span class="o">-</span><span class="n">scan</span> <span class="o">-</span><span class="n">minc</span> <span class="o">-</span><span class="n">majc</span> <span class="o">-</span><span class="n">p</span>
+<span class="mi">10</span> <span class="o">-</span><span class="n">n</span> <span class="n">myfilter</span> <span class="o">-</span><span class="n">filter</span>
+
+<span class="n">FilteringIterator</span> <span class="n">uses</span> <span class="n">Filters</span> <span class="n">to</span> <span class="nb">accept</span> <span class="ow">or</span> <span class="n">reject</span> <span class="n">key</span><span class="o">/</span><span class="n">value</span> <span class="n">pairs</span>
+<span class="o">----------&gt;</span> <span class="n">entering</span> <span class="n">options:</span> <span class="sr">&lt;filterPriorityNumber&gt;</span>
+<span class="sr">&lt;ageoff|regex|filterClass&gt;</span>
+
+<span class="o">----------&gt;</span> <span class="n">set</span> <span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">accumulo</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">iterators</span><span class="o">.</span><span class="n">FilteringIterator</span> <span class="n">option</span>
+<span class="p">(</span><span class="sr">&lt;name&gt;</span> <span class="sr">&lt;value&gt;</span><span class="p">,</span> <span class="n">hit</span> <span class="n">enter</span> <span class="n">to</span> <span class="n">skip</span><span class="p">):</span> <span class="mi">0</span> <span class="n">ageoff</span>
+
+<span class="o">----------&gt;</span> <span class="n">set</span> <span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">accumulo</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">iterators</span><span class="o">.</span><span class="n">FilteringIterator</span> <span class="n">option</span>
+<span class="p">(</span><span class="sr">&lt;name&gt;</span> <span class="sr">&lt;value&gt;</span><span class="p">,</span> <span class="n">hit</span> <span class="n">enter</span> <span class="n">to</span> <span class="n">skip</span><span class="p">):</span>
+<span class="n">AgeOffFilter</span> <span class="n">removes</span> <span class="n">entries</span> <span class="n">with</span> <span class="n">timestamps</span> <span class="n">more</span> <span class="n">than</span> <span class="sr">&lt;ttl&gt;</span>
+<span class="n">milliseconds</span> <span class="n">old</span>
+
+<span class="o">----------&gt;</span> <span class="n">set</span> <span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">accumulo</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">iterators</span><span class="o">.</span><span class="n">filter</span><span class="o">.</span><span class="n">AgeOffFilter</span> <span class="n">parameter</span>
+<span class="n">currentTime</span><span class="p">,</span> <span class="k">if</span> <span class="n">set</span><span class="p">,</span> <span class="k">use</span> <span class="n">the</span> <span class="n">given</span> <span class="n">value</span> <span class="n">as</span> <span class="n">the</span> <span class="n">absolute</span> <span class="nb">time</span> <span class="n">in</span>
+<span class="n">milliseconds</span> <span class="n">as</span> <span class="n">the</span> <span class="n">current</span> <span class="nb">time</span> <span class="n">of</span> <span class="n">day:</span>
+
+<span class="o">----------&gt;</span> <span class="n">set</span> <span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">accumulo</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">iterators</span><span class="o">.</span><span class="n">filter</span><span class="o">.</span><span class="n">AgeOffFilter</span> <span class="n">parameter</span>
+<span class="n">ttl</span><span class="p">,</span> <span class="nb">time</span> <span class="n">to</span> <span class="n">live</span> <span class="p">(</span><span class="n">milliseconds</span><span class="p">):</span> <span class="mi">30000</span>
+
+<span class="n">user</span><span class="nv">@myinstance</span> <span class="n">filtertest</span><span class="o">&gt;</span>
+<span class="n">user</span><span class="nv">@myinstance</span> <span class="n">filtertest</span><span class="o">&gt;</span> <span class="n">scan</span>
+<span class="n">user</span><span class="nv">@myinstance</span> <span class="n">filtertest</span><span class="o">&gt;</span> <span class="n">insert</span> <span class="n">foo</span> <span class="n">a</span> <span class="n">b</span> <span class="n">c</span>
+<span class="n">insert</span> <span class="n">successful</span>
+<span class="n">user</span><span class="nv">@myinstance</span> <span class="n">filtertest</span><span class="o">&gt;</span> <span class="n">scan</span>
+<span class="n">foo</span> <span class="n">a:b</span> <span class="o">[]</span> <span class="n">c</span>
+
+<span class="o">...</span> <span class="nb">wait</span> <span class="mi">30</span> <span class="n">seconds</span> <span class="o">...</span>
+
+<span class="n">user</span><span class="nv">@myinstance</span> <span class="n">filtertest</span><span class="o">&gt;</span> <span class="n">scan</span>
+<span class="n">user</span><span class="nv">@myinstance</span> <span class="n">filtertest</span><span class="o">&gt;</span>
+</pre></div>
+
+
+<p>To see the iterator settings for a table, use: </p>
+<div class="codehilite"><pre><span class="n">user</span><span class="nv">@example</span> <span class="n">filtertest</span><span class="o">&gt;</span> <span class="n">config</span> <span class="o">-</span><span class="n">t</span> <span class="n">filtertest</span> <span class="o">-</span><span class="n">f</span> <span class="n">iterator</span>
+<span class="o">---------+------------------------------------------+------------------</span>
+<span class="n">SCOPE</span>    <span class="o">|</span> <span class="n">NAME</span>                                     <span class="o">|</span> <span class="n">VALUE</span>
+<span class="o">---------+------------------------------------------+------------------</span>
+<span class="n">table</span>    <span class="o">|</span> <span class="n">table</span><span class="o">.</span><span class="n">iterator</span><span class="o">.</span><span class="n">majc</span><span class="o">.</span><span class="n">myfilter</span> <span class="o">...........</span> <span class="o">|</span>
+<span class="mi">10</span><span class="p">,</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">accumulo</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">iterators</span><span class="o">.</span><span class="n">FilteringIterator</span>
+<span class="n">table</span>    <span class="o">|</span> <span class="n">table</span><span class="o">.</span><span class="n">iterator</span><span class="o">.</span><span class="n">majc</span><span class="o">.</span><span class="n">myfilter</span><span class="o">.</span><span class="n">opt</span><span class="mf">.0</span> <span class="o">.....</span> <span class="o">|</span>
+<span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">accumulo</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">iterators</span><span class="o">.</span><span class="n">filter</span><span class="o">.</span><span class="n">AgeOffFilter</span>
+<span class="n">table</span>    <span class="o">|</span> <span class="n">table</span><span class="o">.</span><span class="n">iterator</span><span class="o">.</span><span class="n">majc</span><span class="o">.</span><span class="n">myfilter</span><span class="o">.</span><span class="n">opt</span><span class="mf">.0</span><span class="o">.</span><span class="n">ttl</span> <span class="o">.</span> <span class="o">|</span> <span class="mi">30000</span>
+<span class="n">table</span>    <span class="o">|</span> <span class="n">table</span><span class="o">.</span><span class="n">iterator</span><span class="o">.</span><span class="n">minc</span><span class="o">.</span><span class="n">myfilter</span> <span class="o">...........</span> <span class="o">|</span>
+<span class="mi">10</span><span class="p">,</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">accumulo</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">iterators</span><span class="o">.</span><span class="n">FilteringIterator</span>
+<span class="n">table</span>    <span class="o">|</span> <span class="n">table</span><span class="o">.</span><span class="n">iterator</span><span class="o">.</span><span class="n">minc</span><span class="o">.</span><span class="n">myfilter</span><span class="o">.</span><span class="n">opt</span><span class="mf">.0</span> <span class="o">.....</span> <span class="o">|</span>
+<span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">accumulo</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">iterators</span><span class="o">.</span><span class="n">filter</span><span class="o">.</span><span class="n">AgeOffFilter</span>
+<span class="n">table</span>    <span class="o">|</span> <span class="n">table</span><span class="o">.</span><span class="n">iterator</span><span class="o">.</span><span class="n">minc</span><span class="o">.</span><span class="n">myfilter</span><span class="o">.</span><span class="n">opt</span><span class="mf">.0</span><span class="o">.</span><span class="n">ttl</span> <span class="o">.</span> <span class="o">|</span> <span class="mi">30000</span>
+<span class="n">table</span>    <span class="o">|</span> <span class="n">table</span><span class="o">.</span><span class="n">iterator</span><span class="o">.</span><span class="n">scan</span><span class="o">.</span><span class="n">myfilter</span> <span class="o">...........</span> <span class="o">|</span>
+<span class="mi">10</span><span class="p">,</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">accumulo</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">iterators</span><span class="o">.</span><span class="n">FilteringIterator</span>
+<span class="n">table</span>    <span class="o">|</span> <span class="n">table</span><span class="o">.</span><span class="n">iterator</span><span class="o">.</span><span class="n">scan</span><span class="o">.</span><span class="n">myfilter</span><span class="o">.</span><span class="n">opt</span><span class="mf">.0</span> <span class="o">.....</span> <span class="o">|</span>
+<span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">accumulo</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">iterators</span><span class="o">.</span><span class="n">filter</span><span class="o">.</span><span class="n">AgeOffFilter</span>
+<span class="n">table</span>    <span class="o">|</span> <span class="n">table</span><span class="o">.</span><span class="n">iterator</span><span class="o">.</span><span class="n">scan</span><span class="o">.</span><span class="n">myfilter</span><span class="o">.</span><span class="n">opt</span><span class="mf">.0</span><span class="o">.</span><span class="n">ttl</span> <span class="o">.</span> <span class="o">|</span> <span class="mi">30000</span>
+<span class="o">---------+------------------------------------------+------------------</span>
+</pre></div>
+
+
+<h2 id="a_idaggregating_iteratorsa_aggregating_iterators"><a id=Aggregating_Iterators></a> Aggregating Iterators</h2>
+<p>Accumulo allows aggregating iterators to be configured on tables and column families. When an aggregating iterator is set, the iterator is applied across the values associated with any keys that share rowID, column family, and column qualifier. This is similar to the reduce step in MapReduce, which applied some function to all the values associated with a particular key. </p>
+<p>For example, if an aggregating iterator were configured on a table and the following mutations were inserted: </p>
+<div class="codehilite"><pre><span class="n">Row</span>     <span class="n">Family</span> <span class="n">Qualifier</span> <span class="n">Timestamp</span>  <span class="n">Value</span>
+<span class="n">rowID1</span>  <span class="n">colfA</span>  <span class="n">colqA</span>     <span class="mi">20100101</span>   <span class="mi">1</span>
+<span class="n">rowID1</span>  <span class="n">colfA</span>  <span class="n">colqA</span>     <span class="mi">20100102</span>   <span class="mi">1</span>
+</pre></div>
+
+
+<p>The table would reflect only one aggregate value: </p>
+<div class="codehilite"><pre><span class="n">rowID1</span>  <span class="n">colfA</span>  <span class="n">colqA</span>     <span class="o">-</span>          <span class="mi">2</span>
+</pre></div>
+
+
+<p>Aggregating iterators can be enabled for a table as follows: </p>
+<div class="codehilite"><pre><span class="n">user</span><span class="nv">@myinstance</span><span class="o">&gt;</span> <span class="n">createtable</span> <span class="n">perDayCounts</span> <span class="o">-</span><span class="n">a</span>
+<span class="n">day</span><span class="o">=</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">accumulo</span><span class="o">.</span><span class="n">core</span><span class="o">.</span><span class="n">iterators</span><span class="o">.</span><span class="n">aggregation</span><span class="o">.</span><span class="n">StringSummation</span>
+
+<span class="n">user</span><span class="nv">@myinstance</span> <span class="n">perDayCounts</span><span class="o">&gt;</span> <span class="n">insert</span> <span class="n">row1</span> <span class="n">day</span> <span class="mi">20080101</span> <span class="mi">1</span>
+<span class="n">user</span><span class="nv">@myinstance</span> <span class="n">perDayCounts</span><span class="o">&gt;</span> <span class="n">insert</span> <span class="n">row1</span> <span class="n">day</span> <span class="mi">20080101</span> <span class="mi">1</span>
+<span class="n">user</span><span class="nv">@myinstance</span> <span class="n">perDayCounts</span><span class="o">&gt;</span> <span class="n">insert</span> <span class="n">row1</span> <span class="n">day</span> <span class="mi">20080103</span> <span class="mi">1</span>
+<span class="n">user</span><span class="nv">@myinstance</span> <span class="n">perDayCounts</span><span class="o">&gt;</span> <span class="n">insert</span> <span class="n">row2</span> <span class="n">day</span> <span class="mi">20080101</span> <span class="mi">1</span>
+<span class="n">user</span><span class="nv">@myinstance</span> <span class="n">perDayCounts</span><span class="o">&gt;</span> <span class="n">insert</span> <span class="n">row3</span> <span class="n">day</span> <span class="mi">20080101</span> <span class="mi">1</span>
+
+<span class="n">user</span><span class="nv">@myinstance</span> <span class="n">perDayCounts</span><span class="o">&gt;</span> <span class="n">scan</span>
+<span class="n">row1</span> <span class="n">day:20080101</span> <span class="o">[]</span> <span class="mi">2</span>
+<span class="n">row1</span> <span class="n">day:20080103</span> <span class="o">[]</span> <span class="mi">1</span>
+<span class="n">row2</span> <span class="n">day:20080101</span> <span class="o">[]</span> <span class="mi">2</span>
+</pre></div>
+
+
+<p>Accumulo includes the following aggregators: </p>
+<ul>
+<li><strong>LongSummation</strong>: expects values of type long and adds them. </li>
+<li><strong>StringSummation</strong>: expects numbers represented as strings and adds them. </li>
+<li><strong>StringMax</strong>: expects numbers as strings and retains the maximum number inserted. </li>
+<li><strong>StringMin</strong>: expects numbers as strings and retains the minimum number inserted. </li>
+</ul>
+<p>Additional Aggregators can be added by creating a Java class that implements <br />
+<strong>org.apache.accumulo.core.iterators.aggregation.Aggregator</strong> and adding a jar containing that class to Accumulo's lib directory. </p>
+<p>An example of an aggregator can be found under <br />
+accumulo/src/examples/main/java/accumulo/examples/aggregation/SortedSetAggregator.java </p>
+<hr />
+<p><strong> Next:</strong> <a href="Table_Design.html">Table Design</a> <strong> Up:</strong> <a href="accumulo_user_manual.html">Accumulo User Manual Version 1.3</a> <strong> Previous:</strong> <a href="Writing_Accumulo_Clients.html">Writing Accumulo Clients</a>   <strong> <a href="Contents.html">Contents</a></strong></p>
+  </div>
+
+  <div id="footer">
+    <div class="copyright">
+      <p>
+        Copyright &copy; 2011 The Apache Software Foundation, Licensed under
+        the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
+        <br />
+        Apache and the Apache feather logos are trademarks of The Apache Software Foundation.
+      </p>
+    </div> 
+    <a alt="Apache Incubator" href="http://incubator.apache.org">
+      <img id="asf-logo" alt="Apache Incubator" src="/accumulo/images/apache-incubator-logo.png" width="150"/>
+    </a>
+
+  </div>
+
+</body>
+</html>

Added: websites/staging/accumulo/trunk/content/accumulo/user_manual_1.3-incubating/Table_Design.html
==============================================================================
--- websites/staging/accumulo/trunk/content/accumulo/user_manual_1.3-incubating/Table_Design.html (added)
+++ websites/staging/accumulo/trunk/content/accumulo/user_manual_1.3-incubating/Table_Design.html Tue Nov  1 17:08:17 2011
@@ -0,0 +1,254 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE- 2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+  <link href="/accumulo/css/accumulo.css" rel="stylesheet" type="text/css">
+  <title>Accumulo User Manual: Table Design</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+  <script type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-21103458-6']);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+  </script>
+</head>
+
+<body>
+  <div id="banner">
+    <img id="logo" alt="Apache accumulo (Incubating)" src="/accumulo/images/accumulo-logo.png"/>
+    <div id="bannertext">
+&nbsp; 
+    </div><br />
+  </div>
+  
+  <div id="navigation">
+  <h1 id="project">Project</h1>
+<ul>
+<li><a href="/accumulo">Home</a></li>
+<li><a href="http://incubator.apache.org/projects/accumulo.html">Incubator page</a>
+<!--  - Download --></li>
+<li><a href="/accumulo/notable_features.html">Features</a></li>
+<li><a href="http://www.apache.org/licenses/LICENSE-2.0">License</a></li>
+</ul>
+<h1 id="community">Community</h1>
+<ul>
+<li><a href="/accumulo/get_involved.html">Get Involved</a></li>
+<li><a href="/accumulo/mailing_list.html">Mailing Lists</a></li>
+<li><a href="https://issues.apache.org/jira/secure/ConfigureReport.jspa?versionId=-2&amp;selectedProjectId=12312121&amp;reportKey=com.sourcelabs.jira.plugin.report.contributions%3Acontributionreport">People</a></li>
+</ul>
+<h1 id="development">Development</h1>
+<ul>
+<li><a href="/accumulo/source.html">Source Code</a></li>
+<li><a href="https://issues.apache.org/jira/browse/accumulo">Issues</a></li>
+<li><a href="https://builds.apache.org/job/Accumulo-Trunk">Builds</a></li>
+</ul>
+<h1 id="documentation">Documentation</h1>
+<ul>
+<li><a href="/accumulo/user_manual_1.3-incubating">Manual v1.3</a><ul>
+<li><a href="/accumulo/user_manual_1.3-incubating/examples.html">Examples v1.3</a></li>
+</ul>
+</li>
+<li><a href="/accumulo/user_manual_1.4-incubating">Manual v1.4</a>
+<!-- - klzzwxh:0005 -->
+<!-- - Javadoc -->
+<!-- - Examples --></li>
+<li><a href="/accumulo/screenshots.html">Screenshots</a></li>
+</ul>
+<!--
+# Development
+ - Source code
+ - Building
+-->
+
+<h1 id="asf_links">ASF links</h1>
+<ul>
+<li><a href="http://www.apache.org">Apache Software Foundation</a></li>
+<li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
+<li><a href="http://www.apache.org/foundation/sponsorship.html">Become a Sponsor</a></li>
+</ul>
+  </div>
+
+  <div id="content">
+    <h1 class="title">Accumulo User Manual: Table Design</h1>
+    <p><strong> Next:</strong> <a href="High_Speed_Ingest.html">High-Speed Ingest</a> <strong> Up:</strong> <a href="accumulo_user_manual.html">Accumulo User Manual Version 1.3</a> <strong> Previous:</strong> <a href="Table_Configuration.html">Table Configuration</a>   <strong> <a href="Contents.html">Contents</a></strong> <br />
+</p>
+<p><a id=CHILD_LINKS></a><strong>Subsections</strong></p>
+<ul>
+<li><a href="Table_Design.html#Basic_Table">Basic Table</a></li>
+<li><a href="Table_Design.html#RowID_Design">RowID Design</a></li>
+<li><a href="Table_Design.html#Indexing">Indexing</a></li>
+<li><a href="Table_Design.html#Entity-Attribute_and_Graph_Tables">Entity-Attribute and Graph Tables</a></li>
+<li><a href="Table_Design.html#Document-Partitioned_Indexing">Document-Partitioned Indexing</a></li>
+</ul>
+<hr />
+<h2 id="a_idtable_designa_table_design"><a id=Table_Design></a> Table Design</h2>
+<h2 id="a_idbasic_tablea_basic_table"><a id=Basic_Table></a> Basic Table</h2>
+<p>Since Accumulo tables are sorted by row ID, each table can be thought of as being indexed by the row ID. Lookups performed row ID can be executed quickly, by doing a binary search, first across the tablets, and then within a tablet. Clients should choose a row ID carefully in order to support their desired application. A simple rule is to select a unique identifier as the row ID for each entity to be stored and assign all the other attributes to be tracked to be columns under this row ID. For example, if we have the following data in a comma-separated file: </p>
+<div class="codehilite"><pre>    <span class="n">userid</span><span class="p">,</span><span class="n">age</span><span class="p">,</span><span class="n">address</span><span class="p">,</span><span class="n">account</span><span class="o">-</span><span class="n">balance</span>
+</pre></div>
+
+
+<p>We might choose to store this data using the userid as the rowID and the rest of the data in column families: </p>
+<div class="codehilite"><pre><span class="n">Mutation</span> <span class="n">m</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Mutation</span><span class="p">(</span><span class="k">new</span> <span class="n">Text</span><span class="p">(</span><span class="n">userid</span><span class="p">));</span>
+<span class="n">m</span><span class="o">.</span><span class="n">put</span><span class="p">(</span><span class="k">new</span> <span class="n">Text</span><span class="p">(</span><span class="s">&quot;age&quot;</span><span class="p">),</span> <span class="n">age</span><span class="p">);</span>
+<span class="n">m</span><span class="o">.</span><span class="n">put</span><span class="p">(</span><span class="k">new</span> <span class="n">Text</span><span class="p">(</span><span class="s">&quot;address&quot;</span><span class="p">),</span> <span class="n">address</span><span class="p">);</span>
+<span class="n">m</span><span class="o">.</span><span class="n">put</span><span class="p">(</span><span class="k">new</span> <span class="n">Text</span><span class="p">(</span><span class="s">&quot;balance&quot;</span><span class="p">),</span> <span class="n">account_balance</span><span class="p">);</span>
+
+<span class="n">writer</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">m</span><span class="p">);</span>
+</pre></div>
+
+
+<p>We could then retrieve any of the columns for a specific userid by specifying the userid as the range of a scanner and fetching specific columns: </p>
+<div class="codehilite"><pre><span class="n">Range</span> <span class="n">r</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Range</span><span class="p">(</span><span class="n">userid</span><span class="p">,</span> <span class="n">userid</span><span class="p">);</span> <span class="sr">//</span> <span class="n">single</span> <span class="n">row</span>
+<span class="n">Scanner</span> <span class="n">s</span> <span class="o">=</span> <span class="n">conn</span><span class="o">.</span><span class="n">createScanner</span><span class="p">(</span><span class="s">&quot;userdata&quot;</span><span class="p">,</span> <span class="n">auths</span><span class="p">);</span>
+<span class="n">s</span><span class="o">.</span><span class="n">setRange</span><span class="p">(</span><span class="n">r</span><span class="p">);</span>
+<span class="n">s</span><span class="o">.</span><span class="n">fetchColumnFamily</span><span class="p">(</span><span class="k">new</span> <span class="n">Text</span><span class="p">(</span><span class="s">&quot;age&quot;</span><span class="p">));</span>
+
+<span class="k">for</span><span class="p">(</span><span class="n">Entry</span><span class="sr">&lt;Key,Value&gt;</span> <span class="n">entry</span> <span class="p">:</span> <span class="n">s</span><span class="p">)</span>
+    <span class="n">System</span><span class="o">.</span><span class="n">out</span><span class="o">.</span><span class="n">println</span><span class="p">(</span><span class="n">entry</span><span class="o">.</span><span class="n">getValue</span><span class="p">()</span><span class="o">.</span><span class="n">toString</span><span class="p">());</span>
+</pre></div>
+
+
+<h2 id="a_idrowid_designa_rowid_design"><a id=RowID_Design></a> RowID Design</h2>
+<p>Often it is necessary to transform the rowID in order to have rows ordered in a way that is optimal for anticipated access patterns. A good example of this is reversing the order of components of internet domain names in order to group rows of the same parent domain together: </p>
+<div class="codehilite"><pre><span class="n">com</span><span class="o">.</span><span class="n">google</span><span class="o">.</span><span class="n">code</span>
+<span class="n">com</span><span class="o">.</span><span class="n">google</span><span class="o">.</span><span class="n">labs</span>
+<span class="n">com</span><span class="o">.</span><span class="n">google</span><span class="o">.</span><span class="n">mail</span>
+<span class="n">com</span><span class="o">.</span><span class="n">yahoo</span><span class="o">.</span><span class="n">mail</span>
+<span class="n">com</span><span class="o">.</span><span class="n">yahoo</span><span class="o">.</span><span class="n">research</span>
+</pre></div>
+
+
+<p>Some data may result in the creation of very large rows - rows with many columns. In this case the table designer may wish to split up these rows for better load balancing while keeping them sorted together for scanning purposes. This can be done by appending a random substring at the end of the row: </p>
+<div class="codehilite"><pre><span class="n">com</span><span class="o">.</span><span class="n">google</span><span class="o">.</span><span class="n">code_00</span>
+<span class="n">com</span><span class="o">.</span><span class="n">google</span><span class="o">.</span><span class="n">code_01</span>
+<span class="n">com</span><span class="o">.</span><span class="n">google</span><span class="o">.</span><span class="n">code_02</span>
+<span class="n">com</span><span class="o">.</span><span class="n">google</span><span class="o">.</span><span class="n">labs_00</span>
+<span class="n">com</span><span class="o">.</span><span class="n">google</span><span class="o">.</span><span class="n">mail_00</span>
+<span class="n">com</span><span class="o">.</span><span class="n">google</span><span class="o">.</span><span class="n">mail_01</span>
+</pre></div>
+
+
+<p>It could also be done by adding a string representation of some period of time such as date to the week or month: </p>
+<div class="codehilite"><pre><span class="n">com</span><span class="o">.</span><span class="n">google</span><span class="o">.</span><span class="n">code_201003</span>
+<span class="n">com</span><span class="o">.</span><span class="n">google</span><span class="o">.</span><span class="n">code_201004</span>
+<span class="n">com</span><span class="o">.</span><span class="n">google</span><span class="o">.</span><span class="n">code_201005</span>
+<span class="n">com</span><span class="o">.</span><span class="n">google</span><span class="o">.</span><span class="n">labs_201003</span>
+<span class="n">com</span><span class="o">.</span><span class="n">google</span><span class="o">.</span><span class="n">mail_201003</span>
+<span class="n">com</span><span class="o">.</span><span class="n">google</span><span class="o">.</span><span class="n">mail_201004</span>
+</pre></div>
+
+
+<p>Appending dates provides the additional capability of restricting a scan to a given date range. </p>
+<h2 id="a_idindexinga_indexing"><a id=Indexing></a> Indexing</h2>
+<p>In order to support lookups via more than one attribute of an entity, additional indexes can be built. However, because Accumulo tables can support any number of columns without specifying them beforehand, a single additional index will often suffice for supporting lookups of records in the main table. Here, the index has, as the rowID, the Value or Term from the main table, the column families are the same, and the column qualifier of the index table contains the rowID from the main table. </p>
+<p><img alt="converted table" src="img2.png" /></p>
+<p>Note: We store rowIDs in the column qualifier rather than the Value so that we can have more than one rowID associated with a particular term within the index. If we stored this in the Value we would only see one of the rows in which the value appears since Accumulo is configured by default to return the one most recent value associated with a key. </p>
+<p>Lookups can then be done by scanning the Index Table first for occurrences of the desired values in the columns specified, which returns a list of row ID from the main table. These can then be used to retrieve each matching record, in their entirety, or a subset of their columns, from the Main Table. </p>
+<p>To support efficient lookups of multiple rowIDs from the same table, the Accumulo client library provides a BatchScanner. Users specify a set of Ranges to the BatchScanner, which performs the lookups in multiple threads to multiple servers and returns an Iterator over all the rows retrieved. The rows returned are NOT in sorted order, as is the case with the basic Scanner interface. </p>
+<div class="codehilite"><pre><span class="sr">//</span> <span class="n">first</span> <span class="n">we</span> <span class="n">scan</span> <span class="n">the</span> <span class="nb">index</span> <span class="k">for</span> <span class="n">IDs</span> <span class="n">of</span> <span class="n">rows</span> <span class="n">matching</span> <span class="k">our</span> <span class="n">query</span>
+
+<span class="n">Text</span> <span class="n">term</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Text</span><span class="p">(</span><span class="s">&quot;mySearchTerm&quot;</span><span class="p">);</span>
+
+<span class="n">HashSet</span><span class="sr">&lt;Text&gt;</span> <span class="n">matchingRows</span> <span class="o">=</span> <span class="k">new</span> <span class="n">HashSet</span><span class="sr">&lt;Text&gt;</span><span class="p">();</span>
+
+<span class="n">Scanner</span> <span class="n">indexScanner</span> <span class="o">=</span> <span class="n">createScanner</span><span class="p">(</span><span class="s">&quot;index&quot;</span><span class="p">,</span> <span class="n">auths</span><span class="p">);</span>
+<span class="n">indexScanner</span><span class="o">.</span><span class="n">setRange</span><span class="p">(</span><span class="k">new</span> <span class="n">Range</span><span class="p">(</span><span class="n">term</span><span class="p">,</span> <span class="n">term</span><span class="p">));</span>
+
+<span class="sr">//</span> <span class="n">we</span> <span class="n">retrieve</span> <span class="n">the</span> <span class="n">matching</span> <span class="n">rowIDs</span> <span class="ow">and</span> <span class="n">create</span> <span class="n">a</span> <span class="n">set</span> <span class="n">of</span> <span class="n">ranges</span>
+<span class="k">for</span><span class="p">(</span><span class="n">Entry</span><span class="sr">&lt;Key,Value&gt;</span> <span class="n">entry</span> <span class="p">:</span> <span class="n">indexScanner</span><span class="p">)</span>
+    <span class="n">matchingRows</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="k">new</span> <span class="n">Text</span><span class="p">(</span><span class="n">entry</span><span class="o">.</span><span class="n">getValue</span><span class="p">()));</span>
+
+<span class="sr">//</span> <span class="n">now</span> <span class="n">we</span> <span class="n">pass</span> <span class="n">the</span> <span class="n">set</span> <span class="n">of</span> <span class="n">rowIDs</span> <span class="n">to</span> <span class="n">the</span> <span class="n">batch</span> <span class="n">scanner</span> <span class="n">to</span> <span class="n">retrieve</span> <span class="n">them</span>
+<span class="n">BatchScanner</span> <span class="n">bscan</span> <span class="o">=</span> <span class="n">conn</span><span class="o">.</span><span class="n">createBatchScanner</span><span class="p">(</span><span class="s">&quot;table&quot;</span><span class="p">,</span> <span class="n">auths</span><span class="p">,</span> <span class="mi">10</span><span class="p">);</span>
+
+<span class="n">bscan</span><span class="o">.</span><span class="n">setRanges</span><span class="p">(</span><span class="n">matchingRows</span><span class="p">);</span>
+<span class="n">bscan</span><span class="o">.</span><span class="n">fetchFamily</span><span class="p">(</span><span class="s">&quot;attributes&quot;</span><span class="p">);</span>
+
+<span class="k">for</span><span class="p">(</span><span class="n">Entry</span><span class="sr">&lt;Key,Value&gt;</span> <span class="n">entry</span> <span class="p">:</span> <span class="n">scan</span><span class="p">)</span>
+    <span class="n">System</span><span class="o">.</span><span class="n">out</span><span class="o">.</span><span class="n">println</span><span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="n">getValue</span><span class="p">());</span>
+</pre></div>
+
+
+<p>One advantage of the dynamic schema capabilities of Accumulo is that different fields may be indexed into the same physical table. However, it may be necessary to create different index tables if the terms must be formatted differently in order to maintain proper sort order. For example, real numbers must be formatted differently than their usual notation in order to be sorted correctly. In these cases, usually one index per unique data type will suffice. </p>
+<h2 id="a_identity-attribute_and_graph_tablesa_entity-attribute_and_graph_tables"><a id=Entity-Attribute_and_Graph_Tables></a> Entity-Attribute and Graph Tables</h2>
+<p>Accumulo is ideal for storing entities and their attributes, especially of the attributes are sparse. It is often useful to join several datasets together on common entities within the same table. This can allow for the representation of graphs, including nodes, their attributes, and connections to other nodes. </p>
+<p>Rather than storing individual events, Entity-Attribute or Graph tables store aggregate information about the entities involved in the events and the relationships between entities. This is often preferrable when single events aren't very useful and when a continuously updated summarization is desired. </p>
+<p>The physical shema for an entity-attribute or graph table is as follows: </p>
+<p><img alt="converted table" src="img3.png" /></p>
+<p>For example, to keep track of employees, managers and products the following entity-attribute table could be used. Note that the weights are not always necessary and are set to 0 when not used. </p>
+<p><img alt="converted table" src="img4.png" /> <br />
+</p>
+<p>To allow efficient updating of edge weights, an aggregating iterator can be configured to add the value of all mutations applied with the same key. These types of tables can easily be created from raw events by simply extracting the entities, attributes, and relationships from individual events and inserting the keys into Accumulo each with a count of 1. The aggregating iterator will take care of maintaining the edge weights. </p>
+<h2 id="a_iddocument-partitioned_indexinga_document-partitioned_indexing"><a id=Document-Partitioned_Indexing></a> Document-Partitioned Indexing</h2>
+<p>Using a simple index as described above works well when looking for records that match one of a set of given criteria. When looking for records that match more than one criterion simultaneously, such as when looking for documents that contain all of the words <code>the' and</code>white' and `house', there are several issues. </p>
+<p>First is that the set of all records matching any one of the search terms must be sent to the client, which incurs a lot of network traffic. The second problem is that the client is responsible for performing set intersection on the sets of records returned to eliminate all but the records matching all search terms. The memory of the client may easily be overwhelmed during this operation. </p>
+<p>For these reasons Accumulo includes support for a scheme known as sharded indexing, in which these set operations can be performed at the TabletServers and decisions about which records to include in the result set can be made without incurring network traffic. </p>
+<p>This is accomplished via partitioning records into bins that each reside on at most one TabletServer, and then creating an index of terms per record within each bin as follows: </p>
+<p><img alt="converted table" src="img5.png" /></p>
+<p>Documents or records are mapped into bins by a user-defined ingest application. By storing the BinID as the RowID we ensure that all the information for a particular bin is contained in a single tablet and hosted on a single TabletServer since Accumulo never splits rows across tablets. Storing the Terms as column families serves to enable fast lookups of all the documents within this bin that contain the given term. </p>
+<p>Finally, we perform set intersection operations on the TabletServer via a special iterator called the Intersecting Iterator. Since documents are partitioned into many bins, a search of all documents must search every bin. We can use the BatchScanner to scan all bins in parallel. The Intersecting Iterator should be enabled on a BatchScanner within user query code as follows: </p>
+<div class="codehilite"><pre><span class="n">Text</span><span class="o">[]</span> <span class="n">terms</span> <span class="o">=</span> <span class="p">{</span><span class="k">new</span> <span class="n">Text</span><span class="p">(</span><span class="s">&quot;the&quot;</span><span class="p">),</span> <span class="k">new</span> <span class="n">Text</span><span class="p">(</span><span class="s">&quot;white&quot;</span><span class="p">),</span> <span class="k">new</span> <span class="n">Text</span><span class="p">(</span><span class="s">&quot;house&quot;</span><span class="p">)};</span>
+
+<span class="n">BatchScanner</span> <span class="n">bs</span> <span class="o">=</span> <span class="n">conn</span><span class="o">.</span><span class="n">createBatchScanner</span><span class="p">(</span><span class="n">table</span><span class="p">,</span> <span class="n">auths</span><span class="p">,</span> <span class="mi">20</span><span class="p">);</span>
+<span class="n">bs</span><span class="o">.</span><span class="n">setScanIterators</span><span class="p">(</span><span class="mi">20</span><span class="p">,</span> <span class="n">IntersectingIterator</span><span class="o">.</span><span class="n">class</span><span class="o">.</span><span class="n">getName</span><span class="p">(),</span> <span class="s">&quot;ii&quot;</span><span class="p">);</span>
+
+<span class="sr">//</span> <span class="n">tells</span> <span class="n">scanner</span> <span class="n">to</span> <span class="n">look</span> <span class="k">for</span> <span class="n">terms</span> <span class="n">in</span> <span class="n">the</span> <span class="n">column</span> <span class="n">family</span> <span class="ow">and</span> <span class="n">sends</span> <span class="n">terms</span>
+<span class="n">bs</span><span class="o">.</span><span class="n">setScanIteratorOption</span><span class="p">(</span><span class="s">&quot;ii&quot;</span><span class="p">,</span>
+    <span class="n">IntersectingIterator</span><span class="o">.</span><span class="n">columnFamiliesOptionName</span><span class="p">,</span>
+    <span class="n">IntersectingIterator</span><span class="o">.</span><span class="n">encodeColumns</span><span class="p">(</span><span class="n">terms</span><span class="p">));</span>
+
+<span class="n">bs</span><span class="o">.</span><span class="n">setRanges</span><span class="p">(</span><span class="n">Collections</span><span class="o">.</span><span class="n">singleton</span><span class="p">(</span><span class="k">new</span> <span class="n">Range</span><span class="p">()));</span>
+
+<span class="k">for</span><span class="p">(</span><span class="n">Entry</span><span class="sr">&lt;Key,Value&gt;</span> <span class="n">entry</span> <span class="p">:</span> <span class="n">bs</span><span class="p">)</span> <span class="p">{</span>
+    <span class="n">System</span><span class="o">.</span><span class="n">out</span><span class="o">.</span><span class="n">println</span><span class="p">(</span><span class="s">&quot; &quot;</span> <span class="o">+</span> <span class="n">entry</span><span class="o">.</span><span class="n">getKey</span><span class="p">()</span><span class="o">.</span><span class="n">getColumnQualifier</span><span class="p">());</span>
+<span class="p">}</span>
+</pre></div>
+
+
+<p>This code effectively has the BatchScanner scan all tablets of a table, looking for documents that match all the given terms. Because all tablets are being scanned for every query, each query is more expensive than other Accumulo scans, which typically involve a small number of TabletServers. This reduces the number of concurrent queries supported and is subject to what is known as the `straggler' problem in which every query runs as slow as the slowest server participating. </p>
+<p>Of course, fast servers will return their results to the client which can display them to the user immediately while they wait for the rest of the results to arrive. If the results are unordered this is quite effective as the first results to arrive are as good as any others to the user. </p>
+<hr />
+<p><strong> Next:</strong> <a href="High_Speed_Ingest.html">High-Speed Ingest</a> <strong> Up:</strong> <a href="accumulo_user_manual.html">Accumulo User Manual Version 1.3</a> <strong> Previous:</strong> <a href="Table_Configuration.html">Table Configuration</a>   <strong> <a href="Contents.html">Contents</a></strong></p>
+  </div>
+
+  <div id="footer">
+    <div class="copyright">
+      <p>
+        Copyright &copy; 2011 The Apache Software Foundation, Licensed under
+        the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
+        <br />
+        Apache and the Apache feather logos are trademarks of The Apache Software Foundation.
+      </p>
+    </div> 
+    <a alt="Apache Incubator" href="http://incubator.apache.org">
+      <img id="asf-logo" alt="Apache Incubator" src="/accumulo/images/apache-incubator-logo.png" width="150"/>
+    </a>
+
+  </div>
+
+</body>
+</html>

Added: websites/staging/accumulo/trunk/content/accumulo/user_manual_1.3-incubating/Writing_Accumulo_Clients.html
==============================================================================
--- websites/staging/accumulo/trunk/content/accumulo/user_manual_1.3-incubating/Writing_Accumulo_Clients.html (added)
+++ websites/staging/accumulo/trunk/content/accumulo/user_manual_1.3-incubating/Writing_Accumulo_Clients.html Tue Nov  1 17:08:17 2011
@@ -0,0 +1,207 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE- 2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+  <link href="/accumulo/css/accumulo.css" rel="stylesheet" type="text/css">
+  <title>Accumulo User Manual: Writing Accumulo Clients</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+  <script type="text/javascript">
+
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-21103458-6']);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+  </script>
+</head>
+
+<body>
+  <div id="banner">
+    <img id="logo" alt="Apache accumulo (Incubating)" src="/accumulo/images/accumulo-logo.png"/>
+    <div id="bannertext">
+&nbsp; 
+    </div><br />
+  </div>
+  
+  <div id="navigation">
+  <h1 id="project">Project</h1>
+<ul>
+<li><a href="/accumulo">Home</a></li>
+<li><a href="http://incubator.apache.org/projects/accumulo.html">Incubator page</a>
+<!--  - Download --></li>
+<li><a href="/accumulo/notable_features.html">Features</a></li>
+<li><a href="http://www.apache.org/licenses/LICENSE-2.0">License</a></li>
+</ul>
+<h1 id="community">Community</h1>
+<ul>
+<li><a href="/accumulo/get_involved.html">Get Involved</a></li>
+<li><a href="/accumulo/mailing_list.html">Mailing Lists</a></li>
+<li><a href="https://issues.apache.org/jira/secure/ConfigureReport.jspa?versionId=-2&amp;selectedProjectId=12312121&amp;reportKey=com.sourcelabs.jira.plugin.report.contributions%3Acontributionreport">People</a></li>
+</ul>
+<h1 id="development">Development</h1>
+<ul>
+<li><a href="/accumulo/source.html">Source Code</a></li>
+<li><a href="https://issues.apache.org/jira/browse/accumulo">Issues</a></li>
+<li><a href="https://builds.apache.org/job/Accumulo-Trunk">Builds</a></li>
+</ul>
+<h1 id="documentation">Documentation</h1>
+<ul>
+<li><a href="/accumulo/user_manual_1.3-incubating">Manual v1.3</a><ul>
+<li><a href="/accumulo/user_manual_1.3-incubating/examples.html">Examples v1.3</a></li>
+</ul>
+</li>
+<li><a href="/accumulo/user_manual_1.4-incubating">Manual v1.4</a>
+<!-- - klzzwxh:0005 -->
+<!-- - Javadoc -->
+<!-- - Examples --></li>
+<li><a href="/accumulo/screenshots.html">Screenshots</a></li>
+</ul>
+<!--
+# Development
+ - Source code
+ - Building
+-->
+
+<h1 id="asf_links">ASF links</h1>
+<ul>
+<li><a href="http://www.apache.org">Apache Software Foundation</a></li>
+<li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
+<li><a href="http://www.apache.org/foundation/sponsorship.html">Become a Sponsor</a></li>
+</ul>
+  </div>
+
+  <div id="content">
+    <h1 class="title">Accumulo User Manual: Writing Accumulo Clients</h1>
+    <p><strong> Next:</strong> <a href="Table_Configuration.html">Table Configuration</a> <strong> Up:</strong> <a href="accumulo_user_manual.html">Accumulo User Manual Version 1.3</a> <strong> Previous:</strong> <a href="Accumulo_Shell.html">Accumulo Shell</a>   <strong> <a href="Contents.html">Contents</a></strong> <br />
+</p>
+<p><a id=CHILD_LINKS></a><strong>Subsections</strong></p>
+<ul>
+<li><a href="Writing_Accumulo_Clients.html#Writing_Data">Writing Data</a></li>
+<li><a href="Writing_Accumulo_Clients.html#Reading_Data">Reading Data</a></li>
+</ul>
+<hr />
+<h2 id="a_idwriting_accumulo_clientsa_writing_accumulo_clients"><a id=Writing_Accumulo_Clients></a> Writing Accumulo Clients</h2>
+<p>All clients must first identify the Accumulo instance to which they will be communicating. Code to do this is as follows: </p>
+<div class="codehilite"><pre><span class="n">String</span> <span class="n">instanceName</span> <span class="o">=</span> <span class="s">&quot;myinstance&quot;</span><span class="p">;</span>
+<span class="n">String</span> <span class="n">zooServers</span> <span class="o">=</span> <span class="s">&quot;zooserver-one,zooserver-two&quot;</span>
+<span class="n">Instance</span> <span class="n">inst</span> <span class="o">=</span> <span class="k">new</span> <span class="n">ZooKeeperInstance</span><span class="p">(</span><span class="n">instanceName</span><span class="p">,</span> <span class="n">zooServers</span><span class="p">);</span>
+
+<span class="n">Connector</span> <span class="n">conn</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Connector</span><span class="p">(</span><span class="n">inst</span><span class="p">,</span> <span class="s">&quot;user&quot;</span><span class="p">,</span><span class="s">&quot;passwd&quot;</span><span class="o">.</span><span class="n">getBytes</span><span class="p">());</span>
+</pre></div>
+
+
+<h2 id="a_idwriting_dataa_writing_data"><a id=Writing_Data></a> Writing Data</h2>
+<p>Data are written to Accumulo by creating Mutation objects that represent all the changes to the columns of a single row. The changes are made atomically in the TabletServer. Clients then add Mutations to a BatchWriter which submits them to the appropriate TabletServers. </p>
+<p>Mutations can be created thus: </p>
+<div class="codehilite"><pre><span class="n">Text</span> <span class="n">rowID</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Text</span><span class="p">(</span><span class="s">&quot;row1&quot;</span><span class="p">);</span>
+<span class="n">Text</span> <span class="n">colFam</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Text</span><span class="p">(</span><span class="s">&quot;myColFam&quot;</span><span class="p">);</span>
+<span class="n">Text</span> <span class="n">colQual</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Text</span><span class="p">(</span><span class="s">&quot;myColQual&quot;</span><span class="p">);</span>
+<span class="n">ColumnVisibility</span> <span class="n">colVis</span> <span class="o">=</span> <span class="k">new</span> <span class="n">ColumnVisibility</span><span class="p">(</span><span class="s">&quot;public&quot;</span><span class="p">);</span>
+<span class="n">long</span> <span class="n">timestamp</span> <span class="o">=</span> <span class="n">System</span><span class="o">.</span><span class="n">currentTimeMillis</span><span class="p">();</span>
+
+<span class="n">Value</span> <span class="n">value</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Value</span><span class="p">(</span><span class="s">&quot;myValue&quot;</span><span class="o">.</span><span class="n">getBytes</span><span class="p">());</span>
+
+<span class="n">Mutation</span> <span class="n">mutation</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Mutation</span><span class="p">(</span><span class="n">rowID</span><span class="p">);</span>
+<span class="n">mutation</span><span class="o">.</span><span class="n">put</span><span class="p">(</span><span class="n">colFam</span><span class="p">,</span> <span class="n">colQual</span><span class="p">,</span> <span class="n">colVis</span><span class="p">,</span> <span class="n">timestamp</span><span class="p">,</span> <span class="n">value</span><span class="p">);</span>
+</pre></div>
+
+
+<h3 id="a_idbatchwritera_batchwriter"><a id=BatchWriter></a> BatchWriter</h3>
+<p>The BatchWriter is highly optimized to send Mutations to multiple TabletServers and automatically batches Mutations destined for the same TabletServer to amortize network overhead. Care must be taken to avoid changing the contents of any Object passed to the BatchWriter since it keeps objects in memory while batching. </p>
+<p>Mutations are added to a BatchWriter thus: </p>
+<div class="codehilite"><pre><span class="n">long</span> <span class="n">memBuf</span> <span class="o">=</span> <span class="mi">1000000</span><span class="n">L</span><span class="p">;</span> <span class="sr">//</span> <span class="n">bytes</span> <span class="n">to</span> <span class="n">store</span> <span class="n">before</span> <span class="n">sending</span> <span class="n">a</span> <span class="n">batch</span>
+<span class="n">long</span> <span class="n">timeout</span> <span class="o">=</span> <span class="mi">1000</span><span class="n">L</span><span class="p">;</span> <span class="sr">//</span> <span class="n">milliseconds</span> <span class="n">to</span> <span class="nb">wait</span> <span class="n">before</span> <span class="n">sending</span>
+<span class="nb">int</span> <span class="n">numThreads</span> <span class="o">=</span> <span class="mi">10</span><span class="p">;</span>
+
+<span class="n">BatchWriter</span> <span class="n">writer</span> <span class="o">=</span>
+    <span class="n">conn</span><span class="o">.</span><span class="n">createBatchWriter</span><span class="p">(</span><span class="s">&quot;table&quot;</span><span class="p">,</span> <span class="n">memBuf</span><span class="p">,</span> <span class="n">timeout</span><span class="p">,</span> <span class="n">numThreads</span><span class="p">)</span>
+
+<span class="n">writer</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">mutation</span><span class="p">);</span>
+
+<span class="n">writer</span><span class="o">.</span><span class="nb">close</span><span class="p">();</span>
+</pre></div>
+
+
+<p>An example of using the batch writer can be found at <br />
+accumulo/docs/examples/README.batch </p>
+<h2 id="a_idreading_dataa_reading_data"><a id=Reading_Data></a> Reading Data</h2>
+<p>Accumulo is optimized to quickly retrieve the value associated with a given key, and to efficiently return ranges of consecutive keys and their associated values. </p>
+<h3 id="a_idscannera_scanner"><a id=Scanner></a> Scanner</h3>
+<p>To retrieve data, Clients use a Scanner, which provides acts like an Iterator over keys and values. Scanners can be configured to start and stop at particular keys, and to return a subset of the columns available. </p>
+<div class="codehilite"><pre><span class="sr">//</span> <span class="n">specify</span> <span class="n">which</span> <span class="n">visibilities</span> <span class="n">we</span> <span class="n">are</span> <span class="n">allowed</span> <span class="n">to</span> <span class="n">see</span>
+<span class="n">Authorizations</span> <span class="n">auths</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Authorizations</span><span class="p">(</span><span class="s">&quot;public&quot;</span><span class="p">);</span>
+
+<span class="n">Scanner</span> <span class="n">scan</span> <span class="o">=</span>
+    <span class="n">conn</span><span class="o">.</span><span class="n">createScanner</span><span class="p">(</span><span class="s">&quot;table&quot;</span><span class="p">,</span> <span class="n">auths</span><span class="p">);</span>
+
+<span class="n">scan</span><span class="o">.</span><span class="n">setRange</span><span class="p">(</span><span class="k">new</span> <span class="n">Range</span><span class="p">(</span><span class="s">&quot;harry&quot;</span><span class="p">,</span><span class="s">&quot;john&quot;</span><span class="p">));</span>
+<span class="n">scan</span><span class="o">.</span><span class="n">fetchFamily</span><span class="p">(</span><span class="s">&quot;attributes&quot;</span><span class="p">);</span>
+
+<span class="k">for</span><span class="p">(</span><span class="n">Entry</span><span class="sr">&lt;Key,Value&gt;</span> <span class="n">entry</span> <span class="p">:</span> <span class="n">scan</span><span class="p">)</span> <span class="p">{</span>
+    <span class="n">String</span> <span class="n">row</span> <span class="o">=</span> <span class="n">e</span><span class="o">.</span><span class="n">getKey</span><span class="p">()</span><span class="o">.</span><span class="n">getRow</span><span class="p">();</span>
+    <span class="n">Value</span> <span class="n">value</span> <span class="o">=</span> <span class="n">e</span><span class="o">.</span><span class="n">getValue</span><span class="p">();</span>
+<span class="p">}</span>
+</pre></div>
+
+
+<h3 id="a_idbatchscannera_batchscanner"><a id=BatchScanner></a> BatchScanner</h3>
+<p>For some types of access, it is more efficient to retrieve several ranges simultaneously. This arises when accessing a set of rows that are not consecutive whose IDs have been retrieved from a secondary index, for example. </p>
+<p>The BatchScanner is configured similarly to the Scanner; it can be configured to retrieve a subset of the columns available, but rather than passing a single Range, BatchScanners accept a set of Ranges. It is important to note that the keys returned by a BatchScanner are not in sorted order since the keys streamed are from multiple TabletServers in parallel. </p>
+<div class="codehilite"><pre><span class="n">ArrayList</span><span class="sr">&lt;Range&gt;</span> <span class="n">ranges</span> <span class="o">=</span> <span class="k">new</span> <span class="n">ArrayList</span><span class="sr">&lt;Range&gt;</span><span class="p">();</span>
+<span class="sr">//</span> <span class="n">populate</span> <span class="n">list</span> <span class="n">of</span> <span class="n">ranges</span> <span class="o">...</span>
+
+<span class="n">BatchScanner</span> <span class="n">bscan</span> <span class="o">=</span>
+    <span class="n">conn</span><span class="o">.</span><span class="n">createBatchScanner</span><span class="p">(</span><span class="s">&quot;table&quot;</span><span class="p">,</span> <span class="n">auths</span><span class="p">,</span> <span class="mi">10</span><span class="p">);</span>
+
+<span class="n">bscan</span><span class="o">.</span><span class="n">setRanges</span><span class="p">(</span><span class="n">ranges</span><span class="p">);</span>
+<span class="n">bscan</span><span class="o">.</span><span class="n">fetchFamily</span><span class="p">(</span><span class="s">&quot;attributes&quot;</span><span class="p">);</span>
+
+<span class="k">for</span><span class="p">(</span><span class="n">Entry</span><span class="sr">&lt;Key,Value&gt;</span> <span class="n">entry</span> <span class="p">:</span> <span class="n">scan</span><span class="p">)</span>
+    <span class="n">System</span><span class="o">.</span><span class="n">out</span><span class="o">.</span><span class="n">println</span><span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="n">getValue</span><span class="p">());</span>
+</pre></div>
+
+
+<p>An example of the BatchScanner can be found at <br />
+accumulo/docs/examples/README.batch </p>
+<hr />
+<p><strong> Next:</strong> <a href="Table_Configuration.html">Table Configuration</a> <strong> Up:</strong> <a href="accumulo_user_manual.html">Accumulo User Manual Version 1.3</a> <strong> Previous:</strong> <a href="Accumulo_Shell.html">Accumulo Shell</a>   <strong> <a href="Contents.html">Contents</a></strong></p>
+  </div>
+
+  <div id="footer">
+    <div class="copyright">
+      <p>
+        Copyright &copy; 2011 The Apache Software Foundation, Licensed under
+        the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
+        <br />
+        Apache and the Apache feather logos are trademarks of The Apache Software Foundation.
+      </p>
+    </div> 
+    <a alt="Apache Incubator" href="http://incubator.apache.org">
+      <img id="asf-logo" alt="Apache Incubator" src="/accumulo/images/apache-incubator-logo.png" width="150"/>
+    </a>
+
+  </div>
+
+</body>
+</html>



Mime
View raw message