incubator-blur-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From amccu...@apache.org
Subject [07/12] git commit: Added in documentation pages to be included into a distribution
Date Sat, 17 Aug 2013 16:55:47 GMT
Added in documentation pages to be included into a distribution

Signed-off-by: Aaron McCurry <amccurry@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/commit/079113a8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/079113a8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/079113a8

Branch: refs/heads/master
Commit: 079113a88093d742c21babe7288fdb7e5d85b0ba
Parents: 87881e1
Author: Chris Rohr <crohr@nearinfinity.com>
Authored: Fri Aug 16 23:19:46 2013 -0400
Committer: Aaron McCurry <amccurry@gmail.com>
Committed: Sat Aug 17 12:39:40 2013 -0400

----------------------------------------------------------------------
 docs/cluster-setup.html                | 253 ++++++++
 docs/data-model.html                   | 146 +++++
 docs/extra.html                        |  71 +++
 docs/getting-started.html              | 239 +++++++
 docs/index.html                        |  65 ++
 docs/resources/css/bootstrap.min.css   |   9 +
 docs/resources/css/bs-docs.css         | 929 ++++++++++++++++++++++++++++
 docs/resources/img/BlurShardServer.png | Bin 0 -> 52813 bytes
 docs/resources/js/bootstrap.min.js     |   6 +
 docs/resources/js/docs.js              |  46 ++
 docs/resources/js/jquery-2.0.3.min.js  |   6 +
 docs/resources/js/respond.min.js       |   6 +
 docs/using-blur.html                   | 144 +++++
 13 files changed, 1920 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/079113a8/docs/cluster-setup.html
----------------------------------------------------------------------
diff --git a/docs/cluster-setup.html b/docs/cluster-setup.html
new file mode 100644
index 0000000..eb3432f
--- /dev/null
+++ b/docs/cluster-setup.html
@@ -0,0 +1,253 @@
+<!DOCTYPE html>
+<!-- 
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <title>Cluster Setup - Apache Blur (Incubator) Documentation</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <!-- Bootstrap -->
+    <link href="resources/css/bootstrap.min.css" rel="stylesheet" media="screen">
+    <link href="resources/css/bs-docs.css" rel="stylesheet" media="screen">
+  </head>
+  <body>
+    <div class="navbar navbar-inverse navbar-fixed-top">
+      <div class="container">
+        <div class="navbar-header">
+          <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+          </button>
+          <a class="navbar-brand" href="index.html">Apache Blur (Incubator)</a>
+        </div>
+        <div class="collapse navbar-collapse">
+          <ul class="nav navbar-nav">
+            <li><a href="getting-started.html">Getting Started</a></li>
+            <li><a href="data-model.html">Data Model</a></li>
+            <li class="active"><a href="cluster-setup.html">Cluster Setup</a></li>
+            <li><a href="using-blur.html">Using Blur</a></li>
+            <li><a href="extra.html">Extra</a></li>
+          </ul>
+        </div>
+      </div>
+    </div>
+    <div class="container bs-docs-container">
+      <div class="row">
+        <div class="col-md-3">
+          <div class="bs-sidebar hidden-print affix" role="complementary">
+            <ul class="nav bs-sidenav">
+              <li>
+                <a href="#controller">Controller Server Configuration</a>
+                <ul class="nav">
+                  <li><a href="#controller-blur-site">blur-site.properties</a></li>
+                  <li><a href="#controller-blur-env">blur-env.sh</a></li>
+                </ul>
+              </li>
+              <li>
+                <a href="#shard">Shard Server Configuration</a>
+                <ul class="nav">
+                  <li><a href="#shard-blur-site">blur-site.properties</a></li>
+                  <li><a href="#shard-blur-env">blur-env.sh</a></li>
+                  <li><a href="#block-cache">Block Cache Configuration</a></li>
+                </ul>
+              </li>
+              <li>
+                <a href="#metrics">Metrics</a>
+                <ul class="nav">
+                  <li><a href="#shard-mbean">Shard Server - MBean</a></li>
+                  <li><a href="#reporters">Other Reporters</a></li>
+                </ul>
+              </li>
+            </ul>
+          </div>
+        </div>
+        <div class="col-md-9" role="main">
+          <section>
+            <div class="page-header">
+              <h1 id="controller">Controller Server Configuration</h1>
+            </div>
+            <h3 id="controller-blur-site">blur-site.properties</h3>
+            <p>
+              These are the default settings for the shard server that can be overridden
in the blur-site.properties file. Consider increasing the various thread pool counts (*.thread.count).
The blur.controller.server.remote.thread.count is very important to increase for larger clusters,
basically one thread is used per shard server per query. Some production cluster have used
set this thread pool to 2000 or more threads.
+            </p>
+            <pre><code class="bash">blur.controller.hostname=
+blur.controller.bind.address=0.0.0.0
+blur.controller.bind.port=40010
+blur.controller.server.thrift.thread.count=32
+blur.controller.server.remote.thread.count=64
+blur.controller.remote.fetch.count=100
+
+blur.controller.retry.max.fetch.retries=3
+blur.controller.retry.max.mutate.retries=3
+blur.controller.retry.max.default.retries=3
+blur.controller.retry.fetch.delay=500
+blur.controller.retry.mutate.delay=500
+blur.controller.retry.default.delay=500
+blur.controller.retry.max.fetch.delay=2000
+blur.controller.retry.max.mutate.delay=2000
+blur.controller.retry.max.default.delay=2000
+
+blur.query.max.results.fetch=1000
+blur.query.max.row.fetch=100
+blur.query.max.record.fetch=1000
+
+blur.gui.controller.port=40080
+
+blur.metrics.reporters=</code></pre>
+            <h3 id="controller-blur-env">blur-env.sh</h3>
+            <pre><code class="bash"># JAVA JVM OPTIONS for the controller servers,
jvm tuning parameters are placed here.
+# Consider adding the -XX:OnOutOfMemoryError="kill -9 %p" option to kill jvms that are failing
due to memory issues.
+export BLUR_CONTROLLER_JVM_OPTIONS="-Xmx1024m -Djava.net.preferIPv4Stack=true "
+
+# Time to sleep between controller server commands.
+export BLUR_CONTROLLER_SLEEP=0.1
+
+# The of controller servers to spawn per machine.
+export BLUR_NUMBER_OF_CONTROLLER_SERVER_INSTANCES_PER_MACHINE=1</code></pre>
+          </section>
+          <section>
+            <div class="page-header">
+              <h1 id="shard">Shard Server Configuration</h1>
+            </div>
+            <h3 id="shard-blur-site">blur-site.properties</h3>
+            <p>
+              These are the default settings for the shard server that can be overridden
in the blur-site.properties file. Consider increasing the various thread pool counts (*.thread.count).
Also the blur.max.clause.count sets the BooleanQuery max clause count for Lucene queries.
+            </p>
+            <pre><code class="bash">blur.shard.hostname=
+blur.shard.bind.address=0.0.0.0
+blur.shard.bind.port=40020
+blur.shard.data.fetch.thread.count=8
+blur.shard.server.thrift.thread.count=8
+blur.shard.opener.thread.count=8
+blur.shard.cache.max.querycache.elements=128
+blur.shard.cache.max.timetolive=60000
+blur.shard.filter.cache.class=org.apache.blur.manager.DefaultBlurFilterCache
+blur.shard.index.warmup.class=org.apache.blur.manager.indexserver.DefaultBlurIndexWarmup
+blur.shard.blockcache.direct.memory.allocation=true
+blur.shard.blockcache.slab.count=-1
+blur.shard.buffercache.1024=8192
+blur.shard.buffercache.8192=8192
+blur.shard.safemodedelay=5000
+blur.shard.time.between.commits=30000
+blur.shard.time.between.refreshs=3000
+blur.max.clause.count=1024
+blur.indexmanager.search.thread.count=8
+
+blur.query.max.results.fetch=1000
+blur.query.max.row.fetch=100
+blur.query.max.record.fetch=1000
+
+blur.gui.shard.port=40090
+
+blur.metrics.reporters=</code></pre>
+            <h3 id="shard-blur-env">blur-env.sh</h3>
+            <pre><code class="bash"># JAVA JVM OPTIONS for the shard servers,
jvm tuning parameters are placed here.
+export BLUR_SHARD_JVM_OPTIONS="-Xmx1024m -Djava.net.preferIPv4Stack=true -XX:MaxDirectMemorySize=256m
"
+
+# Time to sleep between shard server commands.
+export BLUR_SHARD_SLEEP=0.1
+
+# The of shard servers to spawn per machine.
+export BLUR_NUMBER_OF_SHARD_SERVER_INSTANCES_PER_MACHINE=1</code></pre>
+
+            <h3 id="block-cache">Block Cache Configuration</h3>
+            <h4>Why</h4>
+            <p>HDFS is a great filesystem for streaming large amounts data across large
scale clusters. However the random access latency is typically the same performance you would
get in reading from a local drive if the data you are trying to access is not in the operating
systems file cache. In other words every access to HDFS is similar to a local read with a
cache miss. There have been great performance boosts in HDFS over the past few years but it
still can't perform at the level that a search engine needs.</p>
+            <p>Now you might be thinking that Lucene reads from the local hard drive
and performs great, so why wouldn't HDFS perform fairly well on it's own? However most of
time the Lucene index files are cached by the operating system's file system cache. So Blur
has it's own file system cache allows it to perform low latency data look-ups against HDFS.</p>
+            <h4>How</h4>
+            <p>On shard server start-up Blur creates 1 or more block cache slabs blur.shard.blockcache.slab.count
that are each 128 MB in size. These slabs can be allocated on or off the heap blur.shard.blockcache.direct.memory.allocation.
Each slab is broken up into 16,384 blocks with each block size being 8K. Then on the heap
there is a concurrent LRU cache that tracks what blocks of what files are in which slab(s)
at what offset. So the more slabs of cache you create the more entries there will be in the
LRU thus more heap.</p>
+            <h4>Configuration</h4>
+            <p>Scenario:
+
+            Say the shard server(s) that you are planning to run Blur on have 32G of ram.
These machines are probably also running HDFS data nodes as well with very high xcievers (dfs.datanode.max.xcievers
in hdfs-site.xml) say 8K. If the data nodes are configured with 1G of heap then they may consume
up to 4G of memory due to the high thread count because of the xcievers. Next let's say you
configure Blur to 4G of heap as well, and you want to use 12G of off heap cache.</p>
+            <h5>Auto Configuration</h5>
+            <p>In the blur-env.sh file you would need to change BLUR_SHARD_JVM_OPTIONS
to include "-XX:MaxDirectMemorySize=12g" and possibly "-XX:+UseLargePages" depending on your
Linux setup. If you leave the blur.shard.blockcache.slab.count to the default -1 the shard
startup will automatically detect the -XX:MaxDirectMemorySize size and automatically use almost
all of the memory. By default the JVm has 64m in reserve for direct memory so by default Blur
leaves at least that amount available to the JVM.</p>
+            <h5>Custom Configuration</h5>
+            <p>Again in the blur-env.sh file you would need to change BLUR_SHARD_JVM_OPTIONS
to include "-XX:MaxDirectMemorySize=13g" and possibly "-XX:+UseLargePages" depending on your
Linux setup. I set the MaxDirectMemorySize to more than 12G to make sure we don't hit the
maximum limit and cause a OOM exception, this does not reserve 13G it's a control to not allow
more than that. Below is a working example, it also contains GC logging and GC configuration:</p>
+            <pre><code class="bash">export BLUR_SHARD_JVM_OPTIONS="-XX:MaxDirectMemorySize=13g
\
+            -XX:+UseLargePages \
+            -Xms4g \
+            -Xmx4g \
+            -Xmn512m \
+            -XX:+UseCompressedOops \
+            -XX:+UseConcMarkSweepGC \
+            -XX:+CMSIncrementalMode \
+            -XX:CMSIncrementalDutyCycleMin=10 \
+            -XX:CMSIncrementalDutyCycle=50 \
+            -XX:ParallelGCThreads=8 \
+            -XX:+UseParNewGC \
+            -XX:MaxGCPauseMillis=200 \
+            -XX:GCTimeRatio=10 \
+            -XX:+DisableExplicitGC \
+            -verbose:gc \
+            -XX:+PrintGCDetails \
+            -XX:+PrintGCDateStamps \
+            -Xloggc:$BLUR_HOME/logs/gc-blur-shard-server_`date +%Y%m%d_%H%M%S`.log"</code></pre>
+            <p>Next you will need to setup blur-site.properties by changing blur.shard.blockcache.slab.count
to 96. This is telling blur to allocate 96 128MB slabs of memory at shard server start-up.
Note, that the first time you do this that the shard servers may take long time to allocate
the memory. This is because the OS could be using most of that memory for it's own filesystem
caching and it will need to unload it which may cause some IO due the cache synching to disk.</p>
+            <p>Also the blur.shard.blockcache.direct.memory.allocation is set to true
by default, this will tell the JVM to try and allocate the memory off heap. If you want to
run the slabs in the heap (which is not recommended) set this value to false.</p>
+          </section>
+          <section>
+            <div class="page-header">
+              <h1 id="metrics">Metrics</h1>
+            </div>
+            <p class="lead">Internally Blur uses the Metrics library from Coda Hale
(<a href="http://metrics.codahale.com/">http://metrics.codahale.com/</a>). So
by default all metrics are available through JMX here is a screenshot of what is available
in the Shard server.</p>
+            <h3 id="shard-mbean">Shard Server - MBean Screenshot</h3>
+            <img src="resources/img/BlurShardServer.png" style="max-width:1000px"/>
+            <h3 id="reporters">Configuring Other Reporters</h3>
+            <p class="lead">New reporters can be added configured in the blur-site.properties.
Multiple reporters can be configured.</p>
+            <h4>Example</h4>
+            <pre><code class="bash">blur.metrics.reporters=GangliaReporter
+blur.metrics.reporter.ganglia.period=3
+blur.metrics.reporter.ganglia.unit=SECONDS
+blur.metrics.reporter.ganglia.host=ganglia1
+blur.metrics.reporter.ganglia.port=8649</code></pre>
+            <h4>Reporters to Enable</h4>
+            <pre><code class="bash">blur.metrics.reporters=[ConsoleReporter,CsvReporter,GangliaReporter,GraphiteReporter]</code></pre>
+            <h4>ConsoleReporter</h4>
+            <pre><code class="bash">blur.metrics.reporter.console.period=[5]
+blur.metrics.reporter.console.unit=[NANOSECONDS,MICROSECONDS,MILLISECONDS,SECONDS,MINUTES,HOURS,DAYS]</code></pre>
+            <h4>CsvReporter</h4>
+            <pre><code class="bash">blur.metrics.reporter.csv.period=[5]
+blur.metrics.reporter.csv.unit=[NANOSECONDS,MICROSECONDS,MILLISECONDS,SECONDS,MINUTES,HOURS,DAYS]
+blur.metrics.reporter.csv.outputDir=[.]</code></pre>
+            <h4>GangliaReporter</h4>
+            <pre><code class="bash">blur.metrics.reporter.ganglia.period=[5]
+blur.metrics.reporter.ganglia.unit=[NANOSECONDS,MICROSECONDS,MILLISECONDS,SECONDS,MINUTES,HOURS,DAYS]
+blur.metrics.reporter.ganglia.host=[localhost]
+blur.metrics.reporter.ganglia.port=[-1]
+blur.metrics.reporter.ganglia.prefix=[""]
+blur.metrics.reporter.ganglia.compressPackageNames=[false]</code></pre>
+            <h4>GraphiteReporter</h4>
+            <pre><code class="bash">blur.metrics.reporter.graphite.period=[5]
+blur.metrics.reporter.graphite.unit=[NANOSECONDS,MICROSECONDS,MILLISECONDS,SECONDS,MINUTES,HOURS,DAYS]
+blur.metrics.reporter.graphite.host=[localhost]
+blur.metrics.reporter.graphite.port=[-1]
+blur.metrics.reporter.graphite.prefix=[""]</code></pre>
+          </section>
+        </div>
+      </div>
+    </div>
+    
+    <!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
+    <script src="resources/js/jquery-2.0.3.min.js"></script>
+    <!-- Include all compiled plugins (below), or include individual files as needed -->
+    <script src="resources/js/bootstrap.min.js"></script>
+    <!-- Enable responsive features in IE8 with Respond.js (https://github.com/scottjehl/Respond)
-->
+    <script src="resources/js/respond.min.js"></script>
+    <script src="resources/js/docs.js"></script>
+  </body>
+</html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/079113a8/docs/data-model.html
----------------------------------------------------------------------
diff --git a/docs/data-model.html b/docs/data-model.html
new file mode 100644
index 0000000..867c52e
--- /dev/null
+++ b/docs/data-model.html
@@ -0,0 +1,146 @@
+<!DOCTYPE html>
+<!-- 
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <title>Data Model - Apache Blur (Incubator) Documentation</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <!-- Bootstrap -->
+    <link href="resources/css/bootstrap.min.css" rel="stylesheet" media="screen">
+    <link href="resources/css/bs-docs.css" rel="stylesheet" media="screen">
+  </head>
+  <body>
+    <div class="navbar navbar-inverse navbar-fixed-top">
+      <div class="container">
+        <div class="navbar-header">
+          <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+          </button>
+          <a class="navbar-brand" href="index.html">Apache Blur (Incubator)</a>
+        </div>
+        <div class="collapse navbar-collapse">
+          <ul class="nav navbar-nav">
+            <li><a href="getting-started.html">Getting Started</a></li>
+            <li class="active"><a href="data-model.html">Data Model</a></li>
+            <li><a href="cluster-setup.html">Cluster Setup</a></li>
+            <li><a href="using-blur.html">Using Blur</a></li>
+            <li><a href="extra.html">Extra</a></li>
+          </ul>
+        </div>
+      </div>
+    </div>
+    <div class="container bs-docs-container">
+      <div class="row">
+        <div class="col-md-3">
+          <div class="bs-sidebar hidden-print affix" role="complementary">
+            <ul class="nav bs-sidenav">
+              <li>
+                <a href="#structure">Structure</a>
+                <ul class="nav">
+                  <li><a href="#columns">Columns</a></li>
+                  <li><a href="#records">Records</a></li>
+                  <li><a href="#rows">Rows</a></li>
+                </ul>
+              </li>
+              <li><a href="#querying">Querying</a></li>
+            </ul>
+          </div>
+        </div>
+        <div class="col-md-9" role="main">
+          <section>
+            <div class="page-header">
+              <h1 id="structure">Data Structure</h1>
+            </div>
+            <p class="lead">
+              Blur is a table based query system. So within a single shard cluster there
can be many different tables, each with a different schema, shard size, analyzers, etc. Each
table contains Rows. A Row contains a row id (Lucene StringField internally) and many Records.
A record has a record id (Lucene StringField internally), a family (Lucene StringField internally),
and many Columns. A column contains a name and value, both are Strings in the API but the
value can be interpreted as different types. All base Lucene Field types are supported, Text,
String, Long, Int, Double, and Float.
+            </p>
+            <p>Starting with the most basic structure and building on it.</p>
+            <h3 id="columns">Columns</h3>
+            <p>
+              Columns contain a name and value, both are strings in the API but can be interpreted
as an Integer, Float, Long, Double, String, or Text. All Column types default to Text and
will be analyzed during the indexing process.
+            </p>
+            <pre><code class="json">Column {"name" => "value"}</code></pre>
+            <h3 id="records">Records</h3>
+            <p>
+              Record contains a Record Id, Family, and one or more Columns
+            </p>
+            <pre><code class="json">Record {
+  "recordId" => "1234",
+  "family" => "family1",
+  "columns" => [
+    Column {"column1" => "value1"}
+    Column {"column2" => "value2"}
+    Column {"column2" => "value3"}
+    Column {"column3" => "value4"}
+  ]
+}</code></pre>
+            <div class="bs-callout bs-callout-info"><h4>Quick Tip!</h4><p>The
column names do not have to be unique within the Record. So you can treat multiple Columns
with the same name as an array of values. Also the order of the values will be maintained.</p></div>
+            <h3 id="rows">Rows</h3>
+            <p>
+              Rows contain a row id and a list of Records.
+            </p>
+            <pre><code class="json">Row {
+  "id" => "r-5678",
+  "records" => [
+    Record {
+      "recordId" => "1234",
+      "family" => "family1",
+      "columns" => [
+        Column {"column1" => "value1"}
+        Column {"column2" => "value2"}
+        Column {"column2" => "value3"}
+        Column {"column3" => "value4"}
+      ]
+    },
+    Record {
+      "recordId" => "9012",
+      "family" => "family1",
+      "columns" => [
+        Column {"column1" => "value1"}
+      ]
+    },
+    Record {
+      "recordId" => "4321",
+      "family" => "family2",
+      "columns" => [
+        Column {"column16" => "value1"}
+      ]
+    }
+  ]
+}</code></pre>
+          </section>
+          <section>
+            <div class="page-header">
+              <h1 id="querying">Querying</h1>
+              <p class="lead">TODO</p>
+            </div>
+          </section>
+        </div>
+      </div>
+    </div>
+    
+    <!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
+    <script src="resources/js/jquery-2.0.3.min.js"></script>
+    <!-- Include all compiled plugins (below), or include individual files as needed -->
+    <script src="resources/js/bootstrap.min.js"></script>
+    <!-- Enable responsive features in IE8 with Respond.js (https://github.com/scottjehl/Respond)
-->
+    <script src="resources/js/respond.min.js"></script>
+    <script src="resources/js/docs.js"></script>
+  </body>
+</html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/079113a8/docs/extra.html
----------------------------------------------------------------------
diff --git a/docs/extra.html b/docs/extra.html
new file mode 100644
index 0000000..426f13a
--- /dev/null
+++ b/docs/extra.html
@@ -0,0 +1,71 @@
+<!DOCTYPE html>
+<!-- 
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <title>Extra - Apache Blur (Incubator) Documentation</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <!-- Bootstrap -->
+    <link href="resources/css/bootstrap.min.css" rel="stylesheet" media="screen">
+    <link href="resources/css/bs-docs.css" rel="stylesheet" media="screen">
+  </head>
+  <body>
+    <div class="navbar navbar-inverse navbar-fixed-top">
+      <div class="container">
+        <div class="navbar-header">
+          <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+          </button>
+          <a class="navbar-brand" href="index.html">Apache Blur (Incubator)</a>
+        </div>
+        <div class="collapse navbar-collapse">
+          <ul class="nav navbar-nav">
+            <li><a href="getting-started.html">Getting Started</a></li>
+            <li><a href="data-model.html">Data Model</a></li>
+            <li><a href="cluster-setup.html">Cluster Setup</a></li>
+            <li><a href="using-blur.html">Using Blur</a></li>
+            <li class="active"><a href="extra.html">Extra</a></li>
+          </ul>
+        </div>
+      </div>
+    </div>
+    <div class="container bs-docs-container">
+      <div class="row">
+        <div class="col-md-3">
+          <div class="bs-sidebar hidden-print affix" role="complementary">
+            <ul class="nav bs-sidenav">
+              
+            </ul>
+          </div>
+        </div>
+        <div class="col-md-9" role="main">
+          
+        </div>
+      </div>
+    </div>
+    
+    <!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
+    <script src="resources/js/jquery-2.0.3.min.js"></script>
+    <!-- Include all compiled plugins (below), or include individual files as needed -->
+    <script src="resources/js/bootstrap.min.js"></script>
+    <!-- Enable responsive features in IE8 with Respond.js (https://github.com/scottjehl/Respond)
-->
+    <script src="resources/js/respond.min.js"></script>
+    <script src="resources/js/docs.js"></script>
+  </body>
+</html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/079113a8/docs/getting-started.html
----------------------------------------------------------------------
diff --git a/docs/getting-started.html b/docs/getting-started.html
new file mode 100644
index 0000000..4e1bcc2
--- /dev/null
+++ b/docs/getting-started.html
@@ -0,0 +1,239 @@
+<!DOCTYPE html>
+<!-- 
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <title>Getting started - Apache Blur (Incubator) Documentation</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <!-- Bootstrap -->
+    <link href="resources/css/bootstrap.min.css" rel="stylesheet" media="screen">
+    <link href="resources/css/bs-docs.css" rel="stylesheet" media="screen">
+  </head>
+  <body>
+    <div class="navbar navbar-inverse navbar-fixed-top">
+      <div class="container">
+        <div class="navbar-header">
+          <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+          </button>
+          <a class="navbar-brand" href="index.html">Apache Blur (Incubator)</a>
+        </div>
+        <div class="collapse navbar-collapse">
+          <ul class="nav navbar-nav">
+            <li class="active"><a href="getting-started.html">Getting Started</a></li>
+            <li><a href="data-model.html">Data Model</a></li>
+            <li><a href="cluster-setup.html">Cluster Setup</a></li>
+            <li><a href="using-blur.html">Using Blur</a></li>
+            <li><a href="extra.html">Extra</a></li>
+          </ul>
+        </div>
+      </div>
+    </div>
+    <div class="container bs-docs-container">
+      <div class="row">
+        <div class="col-md-3">
+          <div class="bs-sidebar hidden-print affix" role="complementary">
+            <ul class="nav bs-sidenav">
+              <li><a href="#prerequisites">Prerequisites</a></li>
+              <li><a href="#download">Download</a></li>
+              <li><a href="#compile">Compile</a></li>
+              <li><a href="#install">Install</a></li>
+              <li><a href="#min-config">Minimum Configuration</a></li>
+              <li><a href="#start">Starting Apache Blur</a></li>
+              <li><a href="#shell">Shell</a></li>
+              <li><a href="#shell-example">Simple Shell Example</a></li>
+            </ul>
+          </div>
+        </div>
+        <div class="col-md-9" role="main">
+          <section>
+            <div class="page-header">
+              <h1 id="prerequisites">Prerequisites</h1>
+            </div>
+            <p class="lead">
+              You will at a minimum need the following:
+            </p>
+            <ul>
+              <li>Java 6 installed (Java 7 has not been tested)</li>
+            </ul>
+            <h3>Setup passphraseless ssh</h3>
+            <p>These instructions are taken from the Hadoop Quick Start Guide.</p>
+            <p>Now check that you can ssh to the localhost without a passphrase:</p>
+            <pre><code class="bash">ssh localhost</code></pre>
+            <p>If you cannot ssh to localhost without a passphrase, execute the following
commands:</p>
+            <pre><code class="bash">ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
+cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys</code></pre>
+            <div class="bs-callout bs-callout-info"><h4>Heads Up!</h4><p>Also
you will need to know the location of the JAVA_HOME directory.</p></div>
+          </section>
+          <section>
+            <div class="page-header">
+              <h1 id="download">Download</h1>
+            </div>
+            <p class="lead">
+              There are a few ways to download Blur depending on how involved you want to
be.
+            </p>
+            <h3>Download full source</h3>
+            <p>
+              Until a full release has been made, please download the latest full source
code and look in the <code>/distribution</code> folder.
+              <a href="https://git-wip-us.apache.org/repos/asf/incubator-blur.git" class="btn
btn-large btn-primary">Download Latest Apache Blur 0.2.0</a>
+            </p>
+          </section>
+          <section>
+            <div class="page-header">
+              <h1 id="compile">Compile</h1>
+            </div>
+            <p class="lead">
+              If building from source, the distribution needs to be compiled before use
+            </p>
+            <p>Clone master</p>
+            <pre><code class="bash">git clone https://git-wip-us.apache.org/repos/asf/incubator-blur.git</code></pre>
+            <p>Build the artifacts (if you want to run the tests remove the "-DskipTests")</p>
+            <pre><code class="bash">cd incubator-blur/
+mvn install -DskipTests -P distribution</code></pre>
+            <p>The binary artifact is located <code>distribution/target/apache-blur-0.2.0-incubating-SNAPSHOT-bin.tar.gz</code>.</p>
+          </section>
+          <section>
+            <div class="page-header">
+              <h1 id="install">Install</h1>
+            </div>
+            <p class="lead">
+              Once a distribution is available, follow the simple steps to install.
+            </p>
+            <p>Extract the contents of the distribution</p>
+            <pre><code class="bash">tar -xzvf apache-blur-*-bin.tar.gz</code></pre>
+            <div class="bs-callout bs-callout-info">While it's not required it is a
good idea to set BLUR_HOME in your environment variables.</div>
+            <p>For bash edit .bash_profile and add:</p>
+            <pre><code class="bash">export BLUR_HOME=&lt;directory where
Blur was extracted&gt;</code></pre>
+          </section>
+          <section>
+            <div class="page-header">
+              <h1 id="min-config">Minimum Configuration</h1>
+            </div>
+            <p class="lead">
+              There are a few things at a minimum that will need to be configured to start
Apache Blur
+            </p>
+            <p>Edit $BLUR_HOME/conf/blur-env.sh and set JAVA_HOME:</p>
+            <pre><code class="bash">export JAVA_HOME=&lt;Java Home Directory&gt;</code></pre>
+            <div class="bs-callout bs-callout-warning"><h4>Caution</h4>If
this variable is not set, then the script will attempt to locate JAVA_HOME by using the location
of the "java" command.</div>
+          </section>
+          <section>
+            <div class="page-header">
+              <h1 id="start">Starting Apache Blur</h1>
+            </div>
+            <p class="lead">
+              Starting Apache blur is a simple one command step
+            </p>
+            <p>To start Apache Blur run the following command:</p>
+            <pre><code class="bash">$BLUR_HOME/bin/start-all.sh</code></pre>
+            <p>
+              This will start a single Controller server and a single Shard server on your
localhost.
+            </p>
+            <p>
+              You should see:
+            </p>
+            <pre><code class="bash">blur@blurvm:~$ apache-blur-0.2.0-incubating/bin/start-all.sh

+localhost: ZooKeeper starting as process 6650.
+localhost: Shard [0] starting as process 6783.
+localhost: Controller [0] starting as process 6933.</code></pre>
+            <p>If you run the start command again you should see:</p>
+            <pre><code class="bash">blur@blurvm:~$ apache-blur-0.2.0-incubating/bin/stop-all.sh

+localhost: Stopping Controller [0] server with pid [6933].
+localhost: Stopping Shard [0] server with pid [6783].
+localhost: Stopping ZooKeeper with pid [6650].</code></pre>
+            <p>If you see it starting the servers again, then there is likely some
issue with startup. Look in the $BLUR_HOME/logs directory for log and out files.</p>
+          </section>
+          <section>
+            <div class="page-header">
+              <h1 id="shell">Shell</h1>
+            </div>
+            <p class="lead">
+              Once the servers have been started, you can use the shell to interact with
Blur.
+            </p>
+            <p>The shell command can be found in the bin directory</p>
+            <p>Auto detect the controller servers from the $BLUR_HOME/conf/controllers
file</p>
+            <pre><code class="bash">$BLUR_HOME/bin/blur shell</code></pre>
+            <p>You can also explicitly call out the controller servers.</p>
+            <pre><code class="bash">$BLUR_HOME/bin/blur shell controller1:40010,controller2:40010</code></pre>
+            <p>Once in the shell, tables and be created, enabled, disabled, and removed.
Type help to get a list of the commands.</p>
+          </section>
+          <section>
+            <div class="page-header">
+              <h1 id="shell-example">Shell Example</h1>
+            </div>
+            <p class="lead">
+              The below example creates a table and stores the contents of the table in a
local directory of /data/testTableName which will only work if you are running blur in a single
instance. Normally if you are running a hadoop cluster this will be a hdfs URI for example
hdfs://host:port/blur/tables/testTableName.
+            </p>
+            <pre><code class="bash">blur> #Creates a table called testtable
in the local directory of /data/testtable with 11 shards
+blur> create testtable file:///data/testtable 11
+blur> 
+blur> #Adds a row to testtable
+blur> mutate testtable 1 1 fam0 col1 value1
+blur> 
+blur> #Runs a query on testtable
+blur> query testtable fam0.col1:value1
+ - Results Summary -
+    total : 1
+    time  : 7.874 ms
+-----------------------------------------------------------------------------------------------------
+      hit : 0
+    score : 1.4142135381698608
+       id : 1
+ recordId : 1
+   family : fam0
+     col1 : value1
+-----------------------------------------------------------------------------------------------------
+ - Results Summary -
+    total : 1
+    time  : 7.874 ms
+blur> 
+blur> #Turns highlighting on
+blur> highlight
+highlight of query command is now on
+blur> 
+blur> #Runs a query on testtable with highlighting on, notice <<<value1>>>
is highlighted 
+blur> query testtable2 fam0.col1:value1
+ - Results Summary -
+    total : 1
+    time  : 13.395 ms
+-----------------------------------------------------------------------------------------------------
+      hit : 0
+    score : 1.4142135381698608
+       id : 1
+ recordId : 1
+   family : fam0
+     col1 : <<<value1>>>
+-----------------------------------------------------------------------------------------------------
+ - Results Summary -
+    total : 1
+    time  : 13.395 ms
+blur></code></pre>
+          </section>
+        </div>
+      </div>
+    </div>
+    
+    <!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
+    <script src="resources/js/jquery-2.0.3.min.js"></script>
+    <!-- Include all compiled plugins (below), or include individual files as needed -->
+    <script src="resources/js/bootstrap.min.js"></script>
+    <!-- Enable responsive features in IE8 with Respond.js (https://github.com/scottjehl/Respond)
-->
+    <script src="resources/js/respond.min.js"></script>
+    <script src="resources/js/docs.js"></script>
+  </body>
+</html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/079113a8/docs/index.html
----------------------------------------------------------------------
diff --git a/docs/index.html b/docs/index.html
new file mode 100644
index 0000000..c94f50d
--- /dev/null
+++ b/docs/index.html
@@ -0,0 +1,65 @@
+<!DOCTYPE html>
+<!-- 
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <title>Apache Blur (Incubator) Documentation</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <!-- Bootstrap -->
+    <link href="resources/css/bootstrap.min.css" rel="stylesheet" media="screen">
+    <link href="resources/css/bs-docs.css" rel="stylesheet" media="screen">
+  </head>
+  <body>
+    <div class="navbar navbar-inverse navbar-fixed-top">
+      <div class="container">
+        <div class="navbar-header">
+          <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+          </button>
+          <a class="navbar-brand" href="index.html">Apache Blur (Incubator)</a>
+        </div>
+        <div class="collapse navbar-collapse">
+          <ul class="nav navbar-nav">
+            <li><a href="getting-started.html">Getting Started</a></li>
+            <li><a href="data-model.html">Data Model</a></li>
+            <li><a href="cluster-setup.html">Cluster Setup</a></li>
+            <li><a href="using-blur.html">Using Blur</a></li>
+            <li><a href="extra.html">Extra</a></li>
+          </ul>
+        </div>
+      </div>
+    </div>
+    <div class="container">
+      <div class="jumbotron">
+        <h1>Apache Blur (Incubator)</h1>
+        <p>
+          Blur is an open source search platform capable of querying massive amounts of data
at incredible speeds. Blur is built on top of Lucene, Hadoop, Thrift, and ZooKeeper. Tables
consist of a series of shards (Lucene indexes) that are distributed across a cluster of commodity
servers. All index information is stored in HDFS, cluster coordination is handled by Apache
ZooKeeper, Apache Thrift is used for the RPC, and Lucene handles all of the indexing and data
storage.
+        </p>
+        <a href="getting-started.html" class="btn btn-primary btn-large">Get Started
&raquo;</a>
+      </div>
+    </div>
+    
+    <!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
+    <script src="resources/js/jquery-2.0.3.min.js"></script>
+    <!-- Include all compiled plugins (below), or include individual files as needed -->
+    <script src="resources/js/bootstrap.min.js"></script>
+    <!-- Enable responsive features in IE8 with Respond.js (https://github.com/scottjehl/Respond)
-->
+    <script src="resources/js/respond.min.js"></script>
+  </body>
+</html>
\ No newline at end of file


Mime
View raw message