gora-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From build...@apache.org
Subject svn commit: r861110 [3/10] - in /websites/staging/gora/trunk/content: ./ current/ resources/ resources/css/ resources/img/ resources/js/
Date Mon, 06 May 2013 19:19:11 GMT
Added: websites/staging/gora/trunk/content/current/tutorial.html
==============================================================================
--- websites/staging/gora/trunk/content/current/tutorial.html (added)
+++ websites/staging/gora/trunk/content/current/tutorial.html Mon May  6 19:19:10 2013
@@ -0,0 +1,1135 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License. 
+-->
+<html>
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=UTF-8" />
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <meta name="author" content="dev@gora.apache.org" />
+
+  <META http-equiv="Content-Type" content="text/html;charset=UTF-8" />
+  <META name="Description" content="Apache Gora -- Gora Tutorial" />
+  <META name="Keywords" content="Apache Gora NoSQL Framework" />
+  <META name="Owner" content="dev@gora.apache.org" />
+  <META name="Robots" content="index, follow" />
+  <META name="Security" content="Public" />
+  <META name="Source" content="wiki template" />
+  <META name="DC.Rights" content="Copyright � 2010-2013, The Apache Software Foundation" />
+
+  <!-- The styles -->
+  <link href="./../resources/css/bootstrap.css" rel="stylesheet">
+  <style type="text/css">
+    body {
+      padding-top: 60px;
+      padding-bottom: 40px;
+    }
+  </style>
+  <link href="./../resources/css/bootstrap-responsive.css" rel="stylesheet">
+  <link href="./../resources/css/gora.css" rel="stylesheet">
+
+  <!-- HTML5 shim, for IE6-8 support of HTML5 elements -->
+  <!--[if lt IE 9]>
+    <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
+  <![endif]-->
+
+  <!-- Fav and touch icons -->
+  <link rel="apple-touch-icon-precomposed" sizes="144x144" href="http://twitter.github.com/bootstrap/assets/ico/apple-touch-icon-144-precomposed.png">
+  <link rel="apple-touch-icon-precomposed" sizes="114x114" href="http://twitter.github.com/bootstrap/assets/ico/apple-touch-icon-114-precomposed.png">
+  <link rel="apple-touch-icon-precomposed" sizes="72x72" href="http://twitter.github.com/bootstrap/assets/ico/apple-touch-icon-72-precomposed.png">
+  <link rel="apple-touch-icon-precomposed" href="http://twitter.github.com/bootstrap/assets/ico/apple-touch-icon-57-precomposed.png">
+  <link rel="shortcut icon" href="./../resources/img/feather-small.png">
+
+  <title>Apache Gora&trade;</title>
+</head>
+
+<body>
+  <div class="navbar navbar-inverse navbar-fixed-top">
+    <div class="navbar-inner">
+      <div class="container">
+        <a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
+          <span class="icon-bar"></span>
+          <span class="icon-bar"></span>
+          <span class="icon-bar"></span>
+        </a>
+        <a class="brand" href="./../index.html"><img src="./..//resources/images/gora-logo.png" alt="Apache Gora" title="Apache Gora" /></a>
+        <div class="nav-collapse collapse">
+          <ul class="nav">
+            <li><a href="./../downloads.html">Downloads</a></li>
+            <li class="dropdown">
+              <a href="#" class="dropdown-toggle" data-toggle="dropdown">Community <b class="caret"></b></a>
+              <ul class="dropdown-menu pull-right">
+                <li><a href="./../board-reports/index.html">Board Reporting</a></li>
+                <li><a href="./../contribute.html">Contribute</a></li>
+                <li><a href="./../mailing_lists.html">Mailing Lists</a></li>
+                <li><a href="./../credits.html">People</a></li>
+                <li><a href="./../related.html">Related Projects</a></li>
+              </ul>
+            </li>
+            <li class="dropdown">
+              <a href="#" class="dropdown-toggle" data-toggle="dropdown">Documentation <b class="caret"></b></a>
+              <ul class="dropdown-menu pull-right">
+                <li><a href="./../about.html">About</a></li>
+                <li><a href="./../current/overview.html">Current Documentation</a></li>
+                <li><a href="./../current/api/index.html">JavaDoc Documentation</a></li>
+              </ul>
+            </li>
+            <li class="dropdown">
+              <a href="#" class="dropdown-toggle" data-toggle="dropdown">Development <b class="caret"></b></a>
+              <ul class="dropdown-menu pull-right">
+                <li><a href="./../issue_tracking.html">Issue Tracking</a></li>
+                <li><a href="./../mailing_lists.html">Mailing Lists</a></li>
+                <li><a href="./../nightly_builds.html">Nightly Builds</a></li>
+                <li><a href="./../version_control.html">Version Control</a></li>
+              </ul>
+            </li>
+            <li class="dropdown">
+              <a href="#" class="dropdown-toggle" data-toggle="dropdown">
+                <img src="./../resources/img/feather-small.png" alt="Apache" title="Apache" /> <b class="caret"></b></a>
+              <ul class="dropdown-menu pull-right">
+                <li><a href="http://www.apache.org">Apache Home</a></li>
+                <li><a href="http://www.apache.org/licenses/">Apache License</a></li>
+                <li><a href="http://www.apache.org/security/">Security</a></li>
+                <li><a href="http://www.apache.org/foundation/sponsorship.html">Support</a></li>
+                <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
+              </ul>
+            </li>
+          </ul>
+          <form id="search-form" class="navbar-search pull-right" action="http://www.google.com/cse" method="get">
+            <input value="gora.apache.org" name="sitesearch" type="hidden" />
+            <input class="search-query" name="q" id="query" type="text" />
+          </form>
+          <script type="text/javascript" src="http://www.google.com/coop/cse/brand?form=search-form"></script>
+        </div><!--/.nav-collapse -->
+      </div>
+    </div>
+  </div>
+
+  <div class="container" id="Gora_Gora Tutorial">
+
+<p>Author : Enis Söztutar, enis [at] apache [dot] org</p>
+<h2 id="introduction">Introduction</h2>
+<p>This is the official tutorial for Apache Gora. For this tutorial, we 
+will be implementing a system to store our web server logs in Apache HBase,
+and analyze the results using Apache Hadoop and store the results either in HSQLDB or MySQL.</p>
+<p>In this tutorial we will first look at how to set up the environment and 
+configure Gora and the data stores. Later, we will go over the data we will use and
+define the data beans that will be used to interact with the persistency layer. 
+Next, we will go over the API of Gora to do some basic tasks such as storing objects, 
+fetching and querying objects, and deleting objects. Last, we will go over an example 
+program which uses Hadoop MapReduce to analyze the web server logs, and discuss the Gora 
+MapReduce API in some detail.</p>
+<h2 id="introduction-to-gora">Introduction to Gora</h2>
+<p>The Apache Gora open source framework provides an in-memory data 
+model and persistence for big data. Gora supports persisting to 
+column stores, key value stores, document stores and RDBMSs, and 
+analyzing the data with extensive Apache Hadoop MapReduce support. In Avro, the 
+beans to hold the data and RPC interfaces are defined using a JSON 
+schema. In mapping the data beans to data store specific settings, 
+Gora depends on mapping files, which are specific to each data store. 
+Unlike other ORM implementations, Gora the data bean to data store 
+specific schema mapping is explicit. This has the advantage that, 
+when using data models such as HBase and Cassandra, you can always 
+know how the values are persisted.</p>
+<p>Gora has a modular architecture. Most of the data stores in Gora, 
+has it's own module, such as gora-hbase, gora-cassandra,
+and gora-sql. In your projects, you need to only include 
+the artifacts from the modules you use. You can consult the <a href="/quickstart.html">quick start</a>
+for setting up your project.</p>
+<h2 id="setting-up-gora">Setting up Gora</h2>
+<p>As a first step, we need to download and compile the Gora source code. The source codes 
+for the tutorial is in the gora-tutorial module. If you have
+already downloaded Gora, that's cool, otherwise, please go
+over the steps at the <a href="/quickstart.html">quickstart</a> guide for
+how to download and compile Gora.</p>
+<p>Now, after the source code for Gora is at hand, let's have a look at the files under the 
+directory gora-tutorial. </p>
+<div class="codehilite"><pre><span class="nv">$</span> <span class="nv">cd</span> <span class="n">gora</span><span class="o">-</span><span class="n">tutorial</span>
+<span class="nv">$</span> <span class="nv">tree</span>
+
+<span class="o">|--</span> <span class="n">build</span><span class="o">.</span><span class="n">xml</span>
+<span class="o">|--</span> <span class="n">conf</span>
+<span class="o">|</span>   <span class="o">|--</span> <span class="n">gora</span><span class="o">-</span><span class="n">hbase</span><span class="o">-</span><span class="n">mapping</span><span class="o">.</span><span class="n">xml</span>
+<span class="o">|</span>   <span class="o">|--</span> <span class="n">gora</span><span class="o">-</span><span class="n">sql</span><span class="o">-</span><span class="n">mapping</span><span class="o">.</span><span class="n">xml</span>
+<span class="o">|</span>   <span class="sb">`-- gora.properties</span>
+<span class="sb">|-- ivy</span>
+<span class="sb">|   `</span><span class="o">--</span> <span class="n">ivy</span><span class="o">.</span><span class="n">xml</span>
+<span class="sb">`-- src</span>
+<span class="sb">    |-- examples</span>
+<span class="sb">    |   `</span><span class="o">--</span> <span class="n">java</span>
+    <span class="o">|--</span> <span class="n">main</span>
+    <span class="o">|</span>   <span class="o">|--</span> <span class="n">avro</span>
+    <span class="o">|</span>   <span class="o">|</span>   <span class="o">|--</span> <span class="n">metricdatum</span><span class="o">.</span><span class="n">json</span>
+    <span class="o">|</span>   <span class="o">|</span>   <span class="sb">`-- pageview.json</span>
+<span class="sb">    |   |-- java</span>
+<span class="sb">    |   |   `</span><span class="o">--</span> <span class="n">org</span>
+    <span class="o">|</span>   <span class="o">|</span>       <span class="sb">`-- apache</span>
+<span class="sb">    |   |           `</span><span class="o">--</span> <span class="n">gora</span>
+    <span class="o">|</span>   <span class="o">|</span>               <span class="sb">`-- tutorial</span>
+<span class="sb">    |   |                   `</span><span class="o">--</span> <span class="nb">log</span>
+    <span class="o">|</span>   <span class="o">|</span>                       <span class="o">|--</span> <span class="n">KeyValueWritable</span><span class="o">.</span><span class="n">java</span>
+    <span class="o">|</span>   <span class="o">|</span>                       <span class="o">|--</span> <span class="n">LogAnalytics</span><span class="o">.</span><span class="n">java</span>
+    <span class="o">|</span>   <span class="o">|</span>                       <span class="o">|--</span> <span class="n">LogManager</span><span class="o">.</span><span class="n">java</span>
+    <span class="o">|</span>   <span class="o">|</span>                       <span class="o">|--</span> <span class="n">TextLong</span><span class="o">.</span><span class="n">java</span>
+    <span class="o">|</span>   <span class="o">|</span>                       <span class="sb">`-- generated</span>
+<span class="sb">    |   |                           |-- MetricDatum.java</span>
+<span class="sb">    |   |                           `</span><span class="o">--</span> <span class="n">Pageview</span><span class="o">.</span><span class="n">java</span>
+    <span class="o">|</span>   <span class="sb">`-- resources</span>
+<span class="sb">    |       `</span><span class="o">--</span> <span class="n">access</span><span class="o">.</span><span class="nb">log</span><span class="o">.</span><span class="n">tar</span><span class="o">.</span><span class="n">gz</span>
+    <span class="sb">`-- test</span>
+<span class="sb">        |-- conf</span>
+<span class="sb">        `</span><span class="o">--</span> <span class="n">java</span>
+</pre></div>
+
+
+<p>Since gora-tutorial is a top level module of Gora, it depends on the directory
+structure imposed by Gora's main build scripts (build.xml and 
+build-common.xml with Ivy and pom.xml for Maven). The Java source code resides in directory 
+src/main/java/, avro schemas in src/main/avro/, and data in src/main/resources/.</p>
+<h2 id="setting-up-hbase">Setting up HBase</h2>
+<p>For this tutorial we will be using HBase to 
+store the logs. For those of you not familiar with HBase, it is a NoSQL
+column store with an architecture very similar to Google's BigTable.</p>
+<p>If you don't already have already HBase setup, you can go over the steps at 
+<a href="http://hbase.apache.org/book/quickstart.html">HBase Overview</a>
+documentation. Gora aims to support the most recent HBase versions however if you
+find compatability problems please <a href="../mailing_lists.html">get in touch</a>.
+So download an <a href="http://hbase.apache.org/releases.html">HBase release</a>. 
+After extracting the file, cd to the hbase-${dist} directory and start the HBase server. </p>
+<div class="codehilite"><pre><span class="nv">$</span> <span class="nv">bin</span><span class="o">/</span><span class="n">start</span><span class="o">-</span><span class="n">hbase</span><span class="o">.</span><span class="n">sh</span>
+</pre></div>
+
+
+<p>and make sure that HBase is available by using the Hbase shell. </p>
+<div class="codehilite"><pre><span class="nv">$</span> <span class="nv">bin</span><span class="o">/</span><span class="n">hbase</span> <span class="n">shell</span>
+</pre></div>
+
+
+<h2 id="configuring-gora">Configuring Gora</h2>
+<p>Gora is configured through a file in the classpath named gora.properties. 
+We will be using the following file gora-tutorial/conf/gora.properties</p>
+<div class="codehilite"><pre>  <span class="n">gora</span><span class="o">.</span><span class="n">datastore</span><span class="o">.</span><span class="n">default</span><span class="o">=</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">gora</span><span class="o">.</span><span class="n">hbase</span><span class="o">.</span><span class="n">store</span><span class="o">.</span><span class="n">HBaseStore</span>
+  <span class="n">gora</span><span class="o">.</span><span class="n">datastore</span><span class="o">.</span><span class="n">autocreateschema</span><span class="o">=</span><span class="n">true</span>
+</pre></div>
+
+
+<p>This file states that the default store will be HBaseStore,
+and schemas(tables) should be automatically created.
+More information for configuring different settings in gora.properties 
+can be found <a href="/gora-conf.html">here</a>.</p>
+<h2 id="modelling-the-data">Modelling the data</h2>
+<p>For this tutorial, we will be parsing and storing the logs of a web server. 
+Some example logs are at src/main/resources/access.log.tar.gz, which 
+belongs to the (now shutdown) server at http://www.buldinle.com/. 
+Example logs contain 10,000 lines, between dates 2009/03/10 - 2009/03/15.
+The first thing, we need to do is to extract the logs.</p>
+<div class="codehilite"><pre><span class="nv">$</span> <span class="nv">tar</span> <span class="n">zxvf</span> <span class="n">src</span><span class="sr">/main/</span><span class="n">resources</span><span class="sr">/access.log.tar.gz -C src/m</span><span class="n">ain</span><span class="sr">/resources/</span>
+</pre></div>
+
+
+<p>You can also use your own log files, given that the log 
+format is <a href="http://httpd.apache.org/docs/current/logs.html">Combined Log Format</a>.
+Some example lines from the log are:</p>
+<div class="codehilite"><pre><span class="mf">88.254.190.73</span> <span class="o">-</span> <span class="o">-</span> <span class="p">[</span><span class="mi">10</span><span class="sr">/Mar/</span><span class="mi">2009</span><span class="p">:</span><span class="mi">20</span><span class="p">:</span><span class="mi">40</span><span class="p">:</span><span class="mi">26</span> <span class="o">+</span><span class="mo">0200</span><span class="p">]</span> <span class="s">&quot;GET / HTTP/1.1&quot;</span> <span class="mi">200</span> <span class="mi">43</span> <span class="s">&quot;http://www.buldinle.com/&quot;</span> <span class="s">&quot;Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; GTB5; .NET CLR 2.0.50727; InfoPath.2)</span>
+<span class="s">78.179.56.27 - - [11/Mar/2009:00:07:40 +0200] &quot;</span><span class="n">GET</span> <span class="sr">/index.php?i=3&amp;amp;a=1__6x39kovbji8&amp;amp;k=3750105 HTTP/</span><span class="mf">1.1</span><span class="s">&quot; 200 43 &quot;</span><span class="n">http:</span><span class="sr">//</span><span class="n">www</span><span class="o">.</span><span class="n">buldinle</span><span class="o">.</span><span class="n">com</span><span class="sr">/index.php?i=3&amp;amp;a=1__6X39Kovbji8&amp;amp;k=3750105&quot; &quot;Mozilla/</span><span class="mf">4.0</span> <span class="p">(</span><span class="n">compatible</span><span class="p">;</span> <span class="n">MSIE</span> <span class="mf">6.0</span><span class="p">;</span> <span class="n">Windows</span> <span class="n">NT</span> <span class="mf">5.1</span><span class="p">;</span> <span class="n">SV1</span><span class="p">;</span> <span class="o">.</span><span class="n">NET</span> <span class="n">CLR</span> <span class="mf
 ">2.0.50727</span><span class="p">;</span> <span class="n">OfficeLiveConnector</span><span class="mf">.1.3</span><span class="p">;</span> <span class="n">OfficeLivePatch</span><span class="mf">.0.0</span><span class="p">)</span>
+<span class="mf">78.163.99.14</span> <span class="o">-</span> <span class="o">-</span> <span class="p">[</span><span class="mi">12</span><span class="sr">/Mar/</span><span class="mi">2009</span><span class="p">:</span><span class="mi">18</span><span class="p">:</span><span class="mi">18</span><span class="p">:</span><span class="mi">25</span> <span class="o">+</span><span class="mo">0200</span><span class="p">]</span> <span class="s">&quot;GET /index.php?a=3__x7l72c&amp;amp;k=4476881 HTTP/1.1&quot;</span> <span class="mi">200</span> <span class="mi">43</span> <span class="s">&quot;http://www.buldinle.com/index.php?a=3__x7l72c&amp;amp;k=4476881&quot;</span> <span class="err">&quot;</span><span class="n">Mozilla</span><span class="o">/</span><span class="mf">4.0</span> <span class="p">(</span><span class="n">compatible</span><span class="p">;</span> <span class="n">MSIE</span> <span class="mf">7.0</span><span class="p">;</span> <span class="n">Windows</span> <span class="n">NT
 </span> <span class="mf">5.1</span><span class="p">;</span> <span class="n">InfoPath</span><span class="mf">.1</span><span class="p">)</span>
+</pre></div>
+
+
+<p>The first fields in order are: User's ip, ignored, ignored, Date and 
+time, HTTP method, URL, HTTP Method, HTTP status code, Number of bytes 
+returned, Referrer, and User Agent.</p>
+<h2 id="defining-data-beans">Defining data beans</h2>
+<p>Data beans are the main way to hold the data in memory and persist in Gora. Gora 
+needs to explicitly keep track of the status of the data in memory, so 
+we use <a href="http://avro.apache.org">Apache Avro</a> for defining the beans. Using 
+Avro gives us the possibility to explicitly keep track object's persistency state, 
+and a way to serialize object's data. 
+Defining data beans is a very easy task, but for the exact syntax, please 
+consult to <a href="http://avro.apache.org/docs/current/spec.html">Avro Specification</a>.
+First, we need to define the bean Pageview to hold a
+single URL access in the logs. Let's go over the class at src/main/avro/pageview.json </p>
+<div class="codehilite"><pre> <span class="p">{</span>
+  <span class="s">&quot;type&quot;</span><span class="p">:</span> <span class="s">&quot;record&quot;</span><span class="p">,</span>
+  <span class="s">&quot;name&quot;</span><span class="p">:</span> <span class="s">&quot;Pageview&quot;</span><span class="p">,</span>
+  <span class="s">&quot;namespace&quot;</span><span class="p">:</span> <span class="s">&quot;org.apache.gora.tutorial.log.generated&quot;</span><span class="p">,</span>
+  <span class="s">&quot;fields&quot;</span> <span class="p">:</span> <span class="p">[</span>
+    <span class="p">{</span><span class="s">&quot;name&quot;</span><span class="p">:</span> <span class="s">&quot;url&quot;</span><span class="p">,</span> <span class="s">&quot;type&quot;</span><span class="p">:</span> <span class="s">&quot;string&quot;</span><span class="p">},</span>
+    <span class="p">{</span><span class="s">&quot;name&quot;</span><span class="p">:</span> <span class="s">&quot;timestamp&quot;</span><span class="p">,</span> <span class="s">&quot;type&quot;</span><span class="p">:</span> <span class="s">&quot;long&quot;</span><span class="p">},</span>
+    <span class="p">{</span><span class="s">&quot;name&quot;</span><span class="p">:</span> <span class="s">&quot;ip&quot;</span><span class="p">,</span> <span class="s">&quot;type&quot;</span><span class="p">:</span> <span class="s">&quot;string&quot;</span><span class="p">},</span>
+    <span class="p">{</span><span class="s">&quot;name&quot;</span><span class="p">:</span> <span class="s">&quot;httpMethod&quot;</span><span class="p">,</span> <span class="s">&quot;type&quot;</span><span class="p">:</span> <span class="s">&quot;string&quot;</span><span class="p">},</span>
+    <span class="p">{</span><span class="s">&quot;name&quot;</span><span class="p">:</span> <span class="s">&quot;httpStatusCode&quot;</span><span class="p">,</span> <span class="s">&quot;type&quot;</span><span class="p">:</span> <span class="s">&quot;int&quot;</span><span class="p">},</span>
+    <span class="p">{</span><span class="s">&quot;name&quot;</span><span class="p">:</span> <span class="s">&quot;responseSize&quot;</span><span class="p">,</span> <span class="s">&quot;type&quot;</span><span class="p">:</span> <span class="s">&quot;int&quot;</span><span class="p">},</span>
+    <span class="p">{</span><span class="s">&quot;name&quot;</span><span class="p">:</span> <span class="s">&quot;referrer&quot;</span><span class="p">,</span> <span class="s">&quot;type&quot;</span><span class="p">:</span> <span class="s">&quot;string&quot;</span><span class="p">},</span>
+    <span class="p">{</span><span class="s">&quot;name&quot;</span><span class="p">:</span> <span class="s">&quot;userAgent&quot;</span><span class="p">,</span> <span class="s">&quot;type&quot;</span><span class="p">:</span> <span class="s">&quot;string&quot;</span><span class="p">}</span>
+  <span class="p">]</span>
+<span class="p">}</span>
+</pre></div>
+
+
+<p>Avro schemas are declared in JSON. 
+<a href="http://avro.apache.org/docs/current/spec.html#schema_record">Records</a>
+are defined with type "record", with a name as the name of the class, and a 
+namespace which is mapped to the package name in Java. The fields 
+are listed in the "fields" element. Each field is given with its type. </p>
+<h2 id="compiling-avro-schemas">Compiling Avro Schemas</h2>
+<p>The next step after defining the data beans is to compile the schemas 
+into Java classes. For that we will use GoraCompiler&gt;. 
+Invoking the Gora compiler by (from Gora top level directory)</p>
+<div class="codehilite"><pre><span class="nv">$</span> <span class="nv">bin</span><span class="o">/</span><span class="n">gora</span> <span class="n">compile</span>
+</pre></div>
+
+
+<p>results in:</p>
+<div class="codehilite"><pre><span class="nv">$</span> <span class="nv">Usage:</span> <span class="n">SpecificCompiler</span> <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">schema</span> <span class="n">file</span><span class="o">&amp;</span><span class="ow">gt</span><span class="p">;</span> <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">output</span> <span class="n">dir</span><span class="o">&amp;</span><span class="ow">gt</span><span class="p">;</span>
+</pre></div>
+
+
+<p>so we will issue :</p>
+<div class="codehilite"><pre><span class="nv">$</span> <span class="nv">bin</span><span class="sr">/gora compile gora-tutorial/s</span><span class="n">rc</span><span class="sr">/main/</span><span class="n">avro</span><span class="sr">/pageview.json gora-tutorial/s</span><span class="n">rc</span><span class="sr">/main/</span><span class="n">java</span><span class="o">/</span>
+</pre></div>
+
+
+<p>to compile the Pageview class into gora-tutorial/src/main/java/org/apache/gora/tutorial/log/generated/Pageview.java. 
+However, the tutorial java classes are already committed, so you do not need to do that now.</p>
+<p>Gora compiler extends Avro's SpecificCompiler to convert JSON definition 
+into a Java class. Generated classes extend the Persistent interface. 
+Most of the methods of the Persistent interface deal with bookkeeping for 
+persistence, and state tracking, so most of the time they are not used explicitly by the
+user. Now, let's look at the internals of the generated class Pageview.java.</p>
+<div class="codehilite"><pre><span class="n">public</span> <span class="n">class</span> <span class="n">Pageview</span> <span class="n">extends</span> <span class="n">PersistentBase</span> <span class="p">{</span>
+
+<span class="n">private</span> <span class="n">Utf8</span> <span class="n">url</span><span class="p">;</span>
+<span class="n">private</span> <span class="n">long</span> <span class="n">timestamp</span><span class="p">;</span>
+<span class="n">private</span> <span class="n">Utf8</span> <span class="n">ip</span><span class="p">;</span>
+<span class="n">private</span> <span class="n">Utf8</span> <span class="n">httpMethod</span><span class="p">;</span>
+<span class="n">private</span> <span class="nb">int</span> <span class="n">httpStatusCode</span><span class="p">;</span>
+<span class="n">private</span> <span class="nb">int</span> <span class="n">responseSize</span><span class="p">;</span>
+<span class="n">private</span> <span class="n">Utf8</span> <span class="n">referrer</span><span class="p">;</span>
+<span class="n">private</span> <span class="n">Utf8</span> <span class="n">userAgent</span><span class="p">;</span>
+
+<span class="o">...</span>
+
+<span class="n">public</span> <span class="n">static</span> <span class="n">final</span> <span class="n">Schema</span> <span class="n">_SCHEMA</span> <span class="o">=</span> <span class="n">Schema</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="s">&quot;{\&quot;type\&quot;:\&quot;record\&quot;, ... &quot;</span><span class="p">);</span>
+  <span class="n">public</span> <span class="n">static</span> <span class="n">enum</span> <span class="n">Field</span> <span class="p">{</span>
+  <span class="n">URL</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="s">&quot;url&quot;</span><span class="p">),</span>
+  <span class="n">TIMESTAMP</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="s">&quot;timestamp&quot;</span><span class="p">),</span>
+  <span class="n">IP</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="s">&quot;ip&quot;</span><span class="p">),</span>
+  <span class="n">HTTP_METHOD</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span><span class="s">&quot;httpMethod&quot;</span><span class="p">),</span>
+  <span class="n">HTTP_STATUS_CODE</span><span class="p">(</span><span class="mi">4</span><span class="p">,</span><span class="s">&quot;httpStatusCode&quot;</span><span class="p">),</span>
+  <span class="n">RESPONSE_SIZE</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span><span class="s">&quot;responseSize&quot;</span><span class="p">),</span>
+  <span class="n">REFERRER</span><span class="p">(</span><span class="mi">6</span><span class="p">,</span><span class="s">&quot;referrer&quot;</span><span class="p">),</span>
+  <span class="n">USER_AGENT</span><span class="p">(</span><span class="mi">7</span><span class="p">,</span><span class="s">&quot;userAgent&quot;</span><span class="p">),</span>
+  <span class="p">;</span>
+  <span class="n">private</span> <span class="nb">int</span> <span class="nb">index</span><span class="p">;</span>
+  <span class="n">private</span> <span class="n">String</span> <span class="n">name</span><span class="p">;</span>
+  <span class="n">Field</span><span class="p">(</span><span class="nb">int</span> <span class="nb">index</span><span class="p">,</span> <span class="n">String</span> <span class="n">name</span><span class="p">)</span> <span class="p">{</span><span class="n">this</span><span class="o">.</span><span class="nb">index</span><span class="o">=</span><span class="nb">index</span><span class="p">;</span><span class="n">this</span><span class="o">.</span><span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">;}</span>
+  <span class="n">public</span> <span class="nb">int</span> <span class="n">getIndex</span><span class="p">()</span> <span class="p">{</span><span class="k">return</span> <span class="nb">index</span><span class="p">;}</span>
+  <span class="n">public</span> <span class="n">String</span> <span class="n">getName</span><span class="p">()</span> <span class="p">{</span><span class="k">return</span> <span class="n">name</span><span class="p">;}</span>
+  <span class="n">public</span> <span class="n">String</span> <span class="n">toString</span><span class="p">()</span> <span class="p">{</span><span class="k">return</span> <span class="n">name</span><span class="p">;}</span>
+  <span class="p">};</span>
+<span class="n">public</span> <span class="n">static</span> <span class="n">final</span> <span class="n">String</span><span class="o">[]</span> <span class="n">_ALL_FIELDS</span> <span class="o">=</span> <span class="p">{</span><span class="s">&quot;url&quot;</span><span class="p">,</span><span class="s">&quot;timestamp&quot;</span><span class="p">,</span><span class="s">&quot;ip&quot;</span><span class="p">,</span><span class="s">&quot;httpMethod&quot;</span>
+  <span class="p">,</span><span class="s">&quot;httpStatusCode&quot;</span><span class="p">,</span><span class="s">&quot;responseSize&quot;</span><span class="p">,</span><span class="s">&quot;referrer&quot;</span><span class="p">,</span><span class="s">&quot;userAgent&quot;</span><span class="p">,};</span>
+
+<span class="o">...</span>
+<span class="p">}</span>
+</pre></div>
+
+
+<p>We can see the actual field declarations in the class. Note that Avro uses Utf8 
+class as a placeholder for string fields. We can also see the embedded Avro 
+Schema declaration and an inner enum named Field. This enum and 
+the _ALL_FIELDS field will come in handy when we will use them 
+to query the datastore for specific fields. </p>
+<h2 id="defining-data-store-mappings">Defining data store mappings</h2>
+<p>Gora is designed to flexibly work with various types of data modeling, 
+including column stores(such as HBase, Cassandra, etc), SQL databases, flat files(binary, 
+JSON, XML encoded), and key-value stores. The mapping between the data bean and 
+the data store is thus defined in XML mapping files. Each data store has its own 
+mapping format, so that data-store specific settings can be leveraged more easily.
+The mapping files declare how the fields of the classes declared in Avro schemas 
+are serialized and persisted to the data store.</p>
+<h3 id="hbase-mappings">HBase mappings</h3>
+<p>HBase mappings are stored at file named gora-hbase-mappings.xml. 
+For this tutorial we will be using the file gora-tutorial/conf/gora-hbase-mappings.xml.</p>
+<div class="codehilite"><pre>  <span class="o">&lt;!--</span>  <span class="n">This</span> <span class="n">is</span> <span class="n">gora</span><span class="o">-</span><span class="n">sql</span><span class="o">-</span><span class="n">mapping</span><span class="o">.</span><span class="n">xml</span>
+</pre></div>
+
+
+<p><source>
+ &lt;gora-orm&gt;
+  &lt;class name="org.apache.gora.tutorial.log.generated.Pageview" keyClass="java.lang.Long" table="AccessLog"&gt;
+    &lt;primarykey column="line"/&gt;
+    &lt;field name="url" column="url" length="512" primarykey="true"/&gt;
+    &lt;field name="timestamp" column="timestamp"/&gt;
+    &lt;field name="ip" column="ip" length="16"/&gt;
+    &lt;field name="httpMethod" column="httpMethod" length="6"/&gt;
+    &lt;field name="httpStatusCode" column="httpStatusCode"/&gt;
+    &lt;field name="responseSize" column="responseSize"/&gt;
+    &lt;field name="referrer" column="referrer" length="512"/&gt;
+    &lt;field name="userAgent" column="userAgent" length="512"/&gt;
+  &lt;/class&gt;</p>
+<p>...</p>
+<p>&lt;/gora-orm&gt;</p>
+<div class="codehilite"><pre>  <span class="sr">&lt;/source&gt;</span>
+  <span class="o">--&gt;</span>
+
+<span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">gora</span><span class="o">-</span><span class="n">orm</span><span class="o">&amp;</span><span class="ow">gt</span><span class="p">;</span>
+  <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">table</span> <span class="n">name</span><span class="o">=</span><span class="s">&quot;Pageview&quot;</span><span class="o">&amp;</span><span class="ow">gt</span><span class="p">;</span> <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="o">!--</span> <span class="n">optional</span> <span class="n">descriptors</span> <span class="k">for</span> <span class="n">tables</span> <span class="o">--&amp;</span><span class="ow">gt</span><span class="p">;</span>
+    <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">family</span> <span class="n">name</span><span class="o">=</span><span class="s">&quot;common&quot;</span><span class="o">/&amp;</span><span class="ow">gt</span><span class="p">;</span> <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="o">!--</span> <span class="n">This</span> <span class="n">can</span> <span class="n">also</span> <span class="n">have</span> <span class="n">params</span> <span class="n">like</span> <span class="n">compression</span><span class="p">,</span> <span class="n">bloom</span> <span class="n">filters</span> <span class="o">--&amp;</span><span class="ow">gt</span><span class="p">;</span>
+    <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">family</span> <span class="n">name</span><span class="o">=</span><span class="s">&quot;http&quot;</span><span class="o">/&amp;</span><span class="ow">gt</span><span class="p">;</span>
+    <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">family</span> <span class="n">name</span><span class="o">=</span><span class="s">&quot;misc&quot;</span><span class="o">/&amp;</span><span class="ow">gt</span><span class="p">;</span>
+  <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="o">/</span><span class="n">table</span><span class="o">&amp;</span><span class="ow">gt</span><span class="p">;</span>
+
+  <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">class</span> <span class="n">name</span><span class="o">=</span><span class="s">&quot;org.apache.gora.tutorial.log.generated.Pageview&quot;</span> <span class="n">keyClass</span><span class="o">=</span><span class="s">&quot;java.lang.Long&quot;</span> <span class="n">table</span><span class="o">=</span><span class="s">&quot;AccessLog&quot;</span><span class="o">&amp;</span><span class="ow">gt</span><span class="p">;</span>
+    <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">field</span> <span class="n">name</span><span class="o">=</span><span class="s">&quot;url&quot;</span> <span class="n">family</span><span class="o">=</span><span class="s">&quot;common&quot;</span> <span class="n">qualifier</span><span class="o">=</span><span class="s">&quot;url&quot;</span><span class="o">/&amp;</span><span class="ow">gt</span><span class="p">;</span>
+    <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">field</span> <span class="n">name</span><span class="o">=</span><span class="s">&quot;timestamp&quot;</span> <span class="n">family</span><span class="o">=</span><span class="s">&quot;common&quot;</span> <span class="n">qualifier</span><span class="o">=</span><span class="s">&quot;timestamp&quot;</span><span class="o">/&amp;</span><span class="ow">gt</span><span class="p">;</span>
+    <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">field</span> <span class="n">name</span><span class="o">=</span><span class="s">&quot;ip&quot;</span> <span class="n">family</span><span class="o">=</span><span class="s">&quot;common&quot;</span> <span class="n">qualifier</span><span class="o">=</span><span class="s">&quot;ip&quot;</span> <span class="o">/&amp;</span><span class="ow">gt</span><span class="p">;</span>
+    <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">field</span> <span class="n">name</span><span class="o">=</span><span class="s">&quot;httpMethod&quot;</span> <span class="n">family</span><span class="o">=</span><span class="s">&quot;http&quot;</span> <span class="n">qualifier</span><span class="o">=</span><span class="s">&quot;httpMethod&quot;</span><span class="o">/&amp;</span><span class="ow">gt</span><span class="p">;</span>
+    <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">field</span> <span class="n">name</span><span class="o">=</span><span class="s">&quot;httpStatusCode&quot;</span> <span class="n">family</span><span class="o">=</span><span class="s">&quot;http&quot;</span> <span class="n">qualifier</span><span class="o">=</span><span class="s">&quot;httpStatusCode&quot;</span><span class="o">/&amp;</span><span class="ow">gt</span><span class="p">;</span>
+    <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">field</span> <span class="n">name</span><span class="o">=</span><span class="s">&quot;responseSize&quot;</span> <span class="n">family</span><span class="o">=</span><span class="s">&quot;http&quot;</span> <span class="n">qualifier</span><span class="o">=</span><span class="s">&quot;responseSize&quot;</span><span class="o">/&amp;</span><span class="ow">gt</span><span class="p">;</span>
+    <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">field</span> <span class="n">name</span><span class="o">=</span><span class="s">&quot;referrer&quot;</span> <span class="n">family</span><span class="o">=</span><span class="s">&quot;misc&quot;</span> <span class="n">qualifier</span><span class="o">=</span><span class="s">&quot;referrer&quot;</span><span class="o">/&amp;</span><span class="ow">gt</span><span class="p">;</span>
+    <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">field</span> <span class="n">name</span><span class="o">=</span><span class="s">&quot;userAgent&quot;</span> <span class="n">family</span><span class="o">=</span><span class="s">&quot;misc&quot;</span> <span class="n">qualifier</span><span class="o">=</span><span class="s">&quot;userAgent&quot;</span><span class="o">/&amp;</span><span class="ow">gt</span><span class="p">;</span>
+  <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="o">/</span><span class="n">class</span><span class="o">&amp;</span><span class="ow">gt</span><span class="p">;</span>
+
+  <span class="o">...</span>
+
+<span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="o">/</span><span class="n">gora</span><span class="o">-</span><span class="n">orm</span><span class="o">&amp;</span><span class="ow">gt</span><span class="p">;</span>
+</pre></div>
+
+
+<p>Every mapping file starts with the top level element &lt;gora-orm&gt;. 
+Gora HBase mapping files can have two type of child elements, table and 
+class declarations. All of the table and class definitions should be 
+listed at this level.</p>
+<p>table declaration is optional and most of the time, Gora infers the table 
+declaration from the class sub elements. However, some of the HBase 
+specific table configuration such as compression, blockCache, etc can be given here, 
+if Gora is used to auto-create the tables. The exact syntax for the file can be found 
+<a href="/gora-hbase.html">here</a>.</p>
+<p>In Gora, data store access is always 
+done in a key-value data model, since most of the target backends support this model.
+DataStore API expects to know the class names of the key and persistent classes, so that 
+they can be instantiated. The key value pair is declared in the class element.
+The name attribute is the fully qualified name of the class, 
+and the keyClass attribute is the fully qualified class name of the key class.</p>
+<p>Children of the &lt;class&gt; element are &lt;field&gt; 
+elements. Each field element has a name and family attribute, and 
+an optional qualifier attribute. name attribute contains the name 
+of the field in the persistent class, and family declares the column family 
+of the HBase data model. If the qualifier is not given, the name of the field is used 
+as the column qualifier. Note that map and array type fields are stored in unique column 
+families, so the configuration should be list unique column families for each map and 
+array type, and no qualifier should be given. The exact data model is discussed further 
+at the <a href="/gora-hbase.html">gora-hbase</a> documentation. </p>
+<h2 id="basic-api-wzxhzdk83">Basic API </title></h2>
+<h3 id="parsing-the-logs">Parsing the logs</h3>
+<p>Now that we have the basic setup, we can see Gora API in action. As you can notice below the API 
+is pretty simple to use. We will be using the class LogManager (which is located at
+gora-tutorial/src/main/java/org/apache/gora/tutorial/log/LogManager.java) for parsing 
+and storing the logs, deleting some lines and querying. </p>
+<p>First of all, let us look at the constructor. The only real thing it does is to call the 
+init() method. init() method constructs the 
+DataStore instance so that it can be used by the LogManager's methods.</p>
+<div class="codehilite"><pre>  <span class="n">public</span> <span class="n">LogManager</span><span class="p">()</span> <span class="p">{</span>
+    <span class="n">try</span> <span class="p">{</span>
+     <span class="n">init</span><span class="p">();</span>
+    <span class="p">}</span> <span class="n">catch</span> <span class="p">(</span><span class="n">IOException</span> <span class="n">ex</span><span class="p">)</span> <span class="p">{</span>
+    <span class="n">throw</span> <span class="k">new</span> <span class="n">RuntimeException</span><span class="p">(</span><span class="n">ex</span><span class="p">);</span>
+    <span class="p">}</span>
+  <span class="p">}</span>
+  <span class="n">private</span> <span class="n">void</span> <span class="n">init</span><span class="p">()</span> <span class="n">throws</span> <span class="n">IOException</span> <span class="p">{</span>
+    <span class="n">dataStore</span> <span class="o">=</span> <span class="n">DataStoreFactory</span><span class="o">.</span><span class="n">getDataStore</span><span class="p">(</span><span class="n">Long</span><span class="o">.</span><span class="n">class</span><span class="p">,</span> <span class="n">Pageview</span><span class="o">.</span><span class="n">class</span><span class="p">);</span>
+  <span class="p">}</span>
+</pre></div>
+
+
+<p>DataStore is probably the most important class in the Gora API. 
+DataStore handles actual object persistence. Objects can be persisted, 
+fetched, queried or deleted by the DataStore methods. Every data store that Gora supports, defines its own subclass 
+of the DataStore class. For example gora-hbase module defines HBaseStore, and 
+gora-sql module defines SqlStore. However, these subclasses are not explicitly 
+used by the user.</p>
+<p>DataStores always have associated key and value(persistent) classes. Key class is the class of the keys of the 
+data store, and the value is the actual data bean's class. The value class is almost always generated by 
+Avro schema definitions using the Gora compiler.</p>
+<p>Data store objects are created by DataStoreFactory. It is necessary to 
+provide the key and value class. The datastore class is optional, 
+and if not specified it will be read from the configuration (gora.properties).</p>
+<p>For this tutorial, we have already defined the avro schema to use and compiled
+our data bean into Pageview class. For keys in the data store, we will be using Longs. 
+The keys will hold the line of the pageview in the data file.</p>
+<p>Next, let's look at the main function of the LogManager class.</p>
+<div class="codehilite"><pre><span class="n">public</span> <span class="n">static</span> <span class="n">void</span> <span class="n">main</span><span class="p">(</span><span class="n">String</span><span class="o">[]</span> <span class="n">args</span><span class="p">)</span> <span class="n">throws</span> <span class="n">Exception</span> <span class="p">{</span>
+  <span class="k">if</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="nb">length</span> <span class="o">&amp;</span><span class="n">lg</span><span class="p">;</span> <span class="mi">2</span><span class="p">)</span> <span class="p">{</span>
+    <span class="n">System</span><span class="o">.</span><span class="n">err</span><span class="o">.</span><span class="n">println</span><span class="p">(</span><span class="n">USAGE</span><span class="p">);</span>
+    <span class="n">System</span><span class="o">.</span><span class="nb">exit</span><span class="p">(</span><span class="mi">1</span><span class="p">);</span>
+  <span class="p">}</span>
+
+  <span class="n">LogManager</span> <span class="n">manager</span> <span class="o">=</span> <span class="k">new</span> <span class="n">LogManager</span><span class="p">();</span>
+
+  <span class="k">if</span><span class="p">(</span><span class="s">&quot;-parse&quot;</span><span class="o">.</span><span class="n">equals</span><span class="p">(</span><span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span> <span class="p">{</span>
+    <span class="n">manager</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">args</span><span class="p">[</span><span class="mi">1</span><span class="p">]);</span>
+  <span class="p">}</span> <span class="k">else</span> <span class="k">if</span><span class="p">(</span><span class="s">&quot;-query&quot;</span><span class="o">.</span><span class="n">equals</span><span class="p">(</span><span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span> <span class="p">{</span>
+  <span class="k">if</span><span class="p">(</span><span class="n">args</span><span class="o">.</span><span class="nb">length</span> <span class="o">==</span> <span class="mi">2</span><span class="p">)</span> 
+    <span class="n">manager</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">Long</span><span class="o">.</span><span class="n">parseLong</span><span class="p">(</span><span class="n">args</span><span class="p">[</span><span class="mi">1</span><span class="p">]));</span>
+  <span class="k">else</span> 
+    <span class="n">manager</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="n">Long</span><span class="o">.</span><span class="n">parseLong</span><span class="p">(</span><span class="n">args</span><span class="p">[</span><span class="mi">1</span><span class="p">]),</span> <span class="n">Long</span><span class="o">.</span><span class="n">parseLong</span><span class="p">(</span><span class="n">args</span><span class="p">[</span><span class="mi">2</span><span class="p">]));</span>
+  <span class="p">}</span> <span class="k">else</span> <span class="k">if</span><span class="p">(</span><span class="s">&quot;-delete&quot;</span><span class="o">.</span><span class="n">equals</span><span class="p">(</span><span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span> <span class="p">{</span>
+    <span class="n">manager</span><span class="o">.</span><span class="nb">delete</span><span class="p">(</span><span class="n">Long</span><span class="o">.</span><span class="n">parseLong</span><span class="p">(</span><span class="n">args</span><span class="p">[</span><span class="mi">1</span><span class="p">]));</span>
+  <span class="p">}</span> <span class="k">else</span> <span class="k">if</span><span class="p">(</span><span class="s">&quot;-deleteByQuery&quot;</span><span class="o">.</span><span class="n">equalsIgnoreCase</span><span class="p">(</span><span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span> <span class="p">{</span>
+    <span class="n">manager</span><span class="o">.</span><span class="n">deleteByQuery</span><span class="p">(</span><span class="n">Long</span><span class="o">.</span><span class="n">parseLong</span><span class="p">(</span><span class="n">args</span><span class="p">[</span><span class="mi">1</span><span class="p">]),</span> <span class="n">Long</span><span class="o">.</span><span class="n">parseLong</span><span class="p">(</span><span class="n">args</span><span class="p">[</span><span class="mi">2</span><span class="p">]));</span>
+  <span class="p">}</span> <span class="k">else</span> <span class="p">{</span>
+    <span class="n">System</span><span class="o">.</span><span class="n">err</span><span class="o">.</span><span class="n">println</span><span class="p">(</span><span class="n">USAGE</span><span class="p">);</span>
+    <span class="n">System</span><span class="o">.</span><span class="nb">exit</span><span class="p">(</span><span class="mi">1</span><span class="p">);</span>
+  <span class="p">}</span>
+
+  <span class="n">manager</span><span class="o">.</span><span class="nb">close</span><span class="p">();</span>
+<span class="p">}</span>
+</pre></div>
+
+
+<p>We can use the example log manager program from the command line (in the top level Gora directory): </p>
+<div class="codehilite"><pre><span class="nv">$</span> <span class="nv">bin</span><span class="o">/</span><span class="n">gora</span> <span class="n">logmanager</span>
+</pre></div>
+
+
+<p>which lists the usage as:</p>
+<div class="codehilite"><pre><span class="n">LogManager</span> <span class="o">-</span><span class="n">parse</span> <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">input_log_file</span><span class="o">&amp;</span><span class="ow">gt</span><span class="p">;</span>
+       <span class="o">-</span><span class="n">get</span> <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">lineNum</span><span class="o">&amp;</span><span class="ow">gt</span><span class="p">;</span>
+       <span class="o">-</span><span class="n">query</span> <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">lineNum</span><span class="o">&amp;</span><span class="ow">gt</span><span class="p">;</span>
+       <span class="o">-</span><span class="n">query</span> <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">startLineNum</span><span class="o">&amp;</span><span class="ow">gt</span><span class="p">;</span> <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">endLineNum</span><span class="o">&amp;</span><span class="ow">gt</span><span class="p">;</span>
+       <span class="o">-</span><span class="nb">delete</span> <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">lineNum</span><span class="o">&amp;</span><span class="ow">gt</span><span class="p">;</span>
+       <span class="o">-</span><span class="n">deleteByQuery</span> <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">startLineNum</span><span class="o">&amp;</span><span class="ow">gt</span><span class="p">;</span> <span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">endLineNum</span><span class="o">&amp;</span><span class="ow">gt</span><span class="p">;</span>
+</pre></div>
+
+
+<p>So to parse and store our logs located at gora-tutorial/src/main/resources/access.log, we will issue:</p>
+<div class="codehilite"><pre><span class="nv">$</span> <span class="nv">bin</span><span class="sr">/gora logmanager -parse gora-tutorial/s</span><span class="n">rc</span><span class="sr">/main/</span><span class="n">resources</span><span class="o">/</span><span class="n">access</span><span class="o">.</span><span class="nb">log</span>
+</pre></div>
+
+
+<p>This should output something like:</p>
+<div class="codehilite"><pre><span class="mi">10</span><span class="sr">/09/</span><span class="mi">30</span> <span class="mi">18</span><span class="p">:</span><span class="mi">30</span><span class="p">:</span><span class="mi">17</span> <span class="n">INFO</span> <span class="nb">log</span><span class="o">.</span><span class="n">LogManager:</span> <span class="n">Parsing</span> <span class="n">file:gora</span><span class="o">-</span><span class="n">tutorial</span><span class="sr">/src/m</span><span class="n">ain</span><span class="sr">/resources/</span><span class="n">access</span><span class="o">.</span><span class="nb">log</span>
+<span class="mi">10</span><span class="sr">/09/</span><span class="mi">30</span> <span class="mi">18</span><span class="p">:</span><span class="mi">30</span><span class="p">:</span><span class="mi">23</span> <span class="n">INFO</span> <span class="nb">log</span><span class="o">.</span><span class="n">LogManager:</span> <span class="n">finished</span> <span class="n">parsing</span> <span class="n">file</span><span class="o">.</span> <span class="n">Total</span> <span class="n">number</span> <span class="n">of</span> <span class="nb">log</span> <span class="n">lines:10000</span>
+</pre></div>
+
+
+<p>Now, let's look at the code which parses the data and stores the logs.</p>
+<div class="codehilite"><pre><span class="n">private</span> <span class="n">void</span> <span class="n">parse</span><span class="p">(</span><span class="n">String</span> <span class="n">input</span><span class="p">)</span> <span class="n">throws</span> <span class="n">IOException</span><span class="p">,</span> <span class="n">ParseException</span> <span class="p">{</span>
+  <span class="n">BufferedReader</span> <span class="n">reader</span> <span class="o">=</span> <span class="k">new</span> <span class="n">BufferedReader</span><span class="p">(</span><span class="k">new</span> <span class="n">FileReader</span><span class="p">(</span><span class="n">input</span><span class="p">));</span>
+  <span class="n">long</span> <span class="n">lineCount</span> <span class="o">=</span> <span class="mi">0</span><span class="p">;</span>
+  <span class="n">try</span> <span class="p">{</span>
+    <span class="n">String</span> <span class="n">line</span> <span class="o">=</span> <span class="n">reader</span><span class="o">.</span><span class="n">readLine</span><span class="p">();</span>
+    <span class="k">do</span> <span class="p">{</span>
+      <span class="n">Pageview</span> <span class="n">pageview</span> <span class="o">=</span> <span class="n">parseLine</span><span class="p">(</span><span class="n">line</span><span class="p">);</span>
+
+      <span class="k">if</span><span class="p">(</span><span class="n">pageview</span> <span class="o">!=</span> <span class="n">null</span><span class="p">)</span> <span class="p">{</span>
+        <span class="sr">//s</span><span class="n">tore</span> <span class="n">the</span> <span class="n">pageview</span> 
+        <span class="n">storePageview</span><span class="p">(</span><span class="n">lineCount</span><span class="o">++</span><span class="p">,</span> <span class="n">pageview</span><span class="p">);</span>
+      <span class="p">}</span>
+
+      <span class="n">line</span> <span class="o">=</span> <span class="n">reader</span><span class="o">.</span><span class="n">readLine</span><span class="p">();</span>
+    <span class="p">}</span> <span class="k">while</span><span class="p">(</span><span class="n">line</span> <span class="o">!=</span> <span class="n">null</span><span class="p">);</span>
+
+  <span class="p">}</span> <span class="n">finally</span> <span class="p">{</span>
+  <span class="n">reader</span><span class="o">.</span><span class="nb">close</span><span class="p">();</span>  
+  <span class="p">}</span>
+<span class="p">}</span>
+</pre></div>
+
+
+<p>The file is iterated line-by-line. Notice that the parseLine(line)
+function does the actual parsing converting the string to a Pageview object 
+defined earlier.</p>
+<div class="codehilite"><pre><span class="n">private</span> <span class="n">Pageview</span> <span class="n">parseLine</span><span class="p">(</span><span class="n">String</span> <span class="n">line</span><span class="p">)</span> <span class="n">throws</span> <span class="n">ParseException</span> <span class="p">{</span>
+  <span class="n">StringTokenizer</span> <span class="n">matcher</span> <span class="o">=</span> <span class="k">new</span> <span class="n">StringTokenizer</span><span class="p">(</span><span class="n">line</span><span class="p">);</span>
+  <span class="sr">//</span><span class="n">parse</span> <span class="n">the</span> <span class="nb">log</span> <span class="n">line</span>
+  <span class="n">String</span> <span class="n">ip</span> <span class="o">=</span> <span class="n">matcher</span><span class="o">.</span><span class="n">nextToken</span><span class="p">();</span>
+  <span class="o">...</span>
+
+  <span class="sr">//co</span><span class="n">nstruct</span> <span class="ow">and</span> <span class="k">return</span> <span class="n">pageview</span> <span class="n">object</span>
+  <span class="n">Pageview</span> <span class="n">pageview</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Pageview</span><span class="p">();</span>
+  <span class="n">pageview</span><span class="o">.</span><span class="n">setIp</span><span class="p">(</span><span class="k">new</span> <span class="n">Utf8</span><span class="p">(</span><span class="n">ip</span><span class="p">));</span>
+  <span class="n">pageview</span><span class="o">.</span><span class="n">setTimestamp</span><span class="p">(</span><span class="n">timestamp</span><span class="p">);</span>
+  <span class="o">...</span>
+
+  <span class="k">return</span> <span class="n">pageview</span><span class="p">;</span>
+<span class="p">}</span>
+</pre></div>
+
+
+<p>parseLine() uses standard StringTokenizers for the job 
+and constructs and returns a Pageview object.</p>
+<h3 id="storing-objects-in-the-datastore">Storing objects in the DataStore</h3>
+<p>If we look back at the parse() method above, we can see that the 
+Pageview objects returned by parseLine() are stored via 
+storePageview() method. </p>
+<p>The storePageview() method is where magic happens, but if we look at the code,
+we can see that it is dead simple.</p>
+<div class="codehilite"><pre><span class="sr">/** Stores the pageview object with the given key */</span>
+<span class="n">private</span> <span class="n">void</span> <span class="n">storePageview</span><span class="p">(</span><span class="n">long</span> <span class="n">key</span><span class="p">,</span> <span class="n">Pageview</span> <span class="n">pageview</span><span class="p">)</span> <span class="n">throws</span> <span class="n">IOException</span> <span class="p">{</span>
+  <span class="n">dataStore</span><span class="o">.</span><span class="n">put</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">pageview</span><span class="p">);</span>
+<span class="p">}</span>
+</pre></div>
+
+
+<p>All we need to do is to call the put() method, which expects a long as key and an instance of Pageview
+as a value.</p>
+<h3 id="closing-the-datastore">Closing the DataStore</h3>
+<p>DataStore implementations can do a lot of caching for performance. 
+However, this means that data is not always flushed to persistent storage all the times. 
+So we need to make sure that upon finishing storing objects, we need to close the datastore 
+instance by calling it's close() method. 
+LogManager always closes it's datastore in it's own close() method.<br />
+</p>
+<div class="codehilite"><pre><span class="n">private</span> <span class="n">void</span> <span class="nb">close</span><span class="p">()</span> <span class="n">throws</span> <span class="n">IOException</span> <span class="p">{</span>
+  <span class="sr">//</span><span class="n">It</span> <span class="n">is</span> <span class="n">very</span> <span class="n">important</span> <span class="n">to</span> <span class="nb">close</span> <span class="n">the</span> <span class="n">datastore</span> <span class="n">properly</span><span class="p">,</span> <span class="n">otherwise</span>
+  <span class="sr">//som</span><span class="n">e</span> <span class="n">data</span> <span class="n">loss</span> <span class="n">might</span> <span class="n">occur</span><span class="o">.</span>
+  <span class="k">if</span><span class="p">(</span><span class="n">dataStore</span> <span class="o">!=</span> <span class="n">null</span><span class="p">)</span>
+  <span class="n">dataStore</span><span class="o">.</span><span class="nb">close</span><span class="p">();</span>
+<span class="p">}</span>
+</pre></div>
+
+
+<p>If you are pushing a lot of data, or if you want your data to be accessible before closing 
+the data store, you can also the flush()
+method which, as expected, flushes the data to the underlying data store. However, the actual flush 
+semantics can vary by the data store backend. For example, in SQL flush calls commit()
+on the jdbc Connection object, whereas in Hbase, HTable#flush() is called.
+Also note that even if you call flush() at the end of all data manipulation operations, 
+you still need to call the close() on the datastore.</p>
+<h2 id="persisted-data-in-hbase">Persisted data in HBase</h2>
+<p>Now that we have stored the web access log data in HBase, we can look at
+how the data is stored at HBase. For that, start the HBase shell.</p>
+<div class="codehilite"><pre>$ cd ../hbase-<span class="cp">${</span><span class="n">version</span><span class="cp">}</span>
+$ bin/hbase shell
+</pre></div>
+
+
+<p>If you have a fresh HBase installation, there should be one table.</p>
+<div class="codehilite"><pre><span class="n">hbase</span><span class="p">(</span><span class="n">main</span><span class="p">):</span><span class="mo">010</span><span class="p">:</span><span class="mi">0</span><span class="o">&gt;</span> <span class="n">list</span>
+
+<span class="n">AccessLog</span>                                                                                                     
+<span class="mi">1</span> <span class="n">row</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="n">in</span> <span class="mf">0.0470</span> <span class="n">seconds</span>
+</pre></div>
+
+
+<p>Remember that AccessLog is the name of the table we specified at 
+gora-hbase-mapping.xml. Looking at the contents of the table:</p>
+<div class="codehilite"><pre><span class="n">hbase</span><span class="p">(</span><span class="n">main</span><span class="p">):</span><span class="mo">010</span><span class="p">:</span><span class="mi">0</span><span class="o">&gt;</span> <span class="n">scan</span> <span class="s">&#39;AccessLog&#39;</span><span class="p">,</span> <span class="p">{</span><span class="n">LIMIT</span><span class="o">=&gt;</span><span class="mi">1</span><span class="p">}</span>
+
+<span class="n">ROW</span>                          <span class="n">COLUMN</span><span class="o">+</span><span class="n">CELL</span>                                                                      
+ <span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x0</span> <span class="n">column</span><span class="o">=</span><span class="n">common:ip</span><span class="p">,</span> <span class="n">timestamp</span><span class="o">=</span><span class="mi">1285860617341</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="mf">88.240.129.183</span>                  
+ <span class="mi">0</span><span class="o">\</span><span class="n">x00</span>                                                                                                        
+ <span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x0</span> <span class="n">column</span><span class="o">=</span><span class="n">common:timestamp</span><span class="p">,</span> <span class="n">timestamp</span><span class="o">=</span><span class="mi">1285860617341</span><span class="p">,</span> <span class="n">value</span><span class="o">=\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x01</span><span class="o">\</span><span class="n">x1F</span><span class="o">\</span><span class="n">xF1</span><span class="o">\</span><span class="n">xAEl</span>
+ <span class="mi">0</span><span class="o">\</span><span class="n">x00</span>                       <span class="n">P</span>                                                                                
+ <span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x0</span> <span class="n">column</span><span class="o">=</span><span class="n">common:url</span><span class="p">,</span> <span class="n">timestamp</span><span class="o">=</span><span class="mi">1285860617341</span><span class="p">,</span> <span class="n">value</span><span class="o">=/</span><span class="nb">index</span><span class="o">.</span><span class="n">php</span><span class="p">?</span><span class="n">a</span><span class="o">=</span><span class="mi">1</span><span class="n">__wwv40pdxdpo</span><span class="o">&amp;</span><span class="n">amp</span><span class="p">;</span><span class="n">k</span><span class="o">=</span><span class="mi">2
 </span>
+ <span class="mi">0</span><span class="o">\</span><span class="n">x00</span>                       <span class="mi">18978</span>                                                                            
+ <span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x0</span> <span class="n">column</span><span class="o">=</span><span class="n">http:httpMethod</span><span class="p">,</span> <span class="n">timestamp</span><span class="o">=</span><span class="mi">1285860617341</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="n">GET</span>                       
+ <span class="mi">0</span><span class="o">\</span><span class="n">x00</span>                                                                                                        
+ <span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x0</span> <span class="n">column</span><span class="o">=</span><span class="n">http:httpStatusCode</span><span class="p">,</span> <span class="n">timestamp</span><span class="o">=</span><span class="mi">1285860617341</span><span class="p">,</span> <span class="n">value</span><span class="o">=\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">xC8</span>      
+ <span class="mi">0</span><span class="o">\</span><span class="n">x00</span>                                                                                                        
+ <span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x0</span> <span class="n">column</span><span class="o">=</span><span class="n">http:responseSize</span><span class="p">,</span> <span class="n">timestamp</span><span class="o">=</span><span class="mi">1285860617341</span><span class="p">,</span> <span class="n">value</span><span class="o">=\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">+</span>           
+ <span class="mi">0</span><span class="o">\</span><span class="n">x00</span>                                                                                                        
+ <span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x0</span> <span class="n">column</span><span class="o">=</span><span class="n">misc:referrer</span><span class="p">,</span> <span class="n">timestamp</span><span class="o">=</span><span class="mi">1285860617341</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="n">http:</span><span class="sr">//</span><span class="n">www</span><span class="o">.</span><span class="n">buldinle</span><span class="o">.</span><span class="n">com</span><span class="o">/</span><span class="n">inde</span>
+ <span class="mi">0</span><span class="o">\</span><span class="n">x00</span>                       <span class="n">x</span><span class="o">.</span><span class="n">php</span><span class="p">?</span><span class="n">a</span><span class="o">=</span><span class="mi">1</span><span class="n">__WWV40pdxdpo</span><span class="o">&amp;</span><span class="n">amp</span><span class="p">;</span><span class="n">k</span><span class="o">=</span><span class="mi">218978</span>                                                  
+ <span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x00</span><span class="o">\</span><span class="n">x0</span> <span class="n">column</span><span class="o">=</span><span class="n">misc:userAgent</span><span class="p">,</span> <span class="n">timestamp</span><span class="o">=</span><span class="mi">1285860617341</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="n">Mozilla</span><span class="o">/</span><span class="mf">4.0</span> <span class="p">(</span><span class="n">compatible</span><span class="p">;</span> <span class="n">MS</span>
+ <span class="mi">0</span><span class="o">\</span><span class="n">x00</span>                       <span class="n">IE</span> <span class="mf">6.0</span><span class="p">;</span> <span class="n">Windows</span> <span class="n">NT</span> <span class="mf">5.1</span><span class="p">)</span>
+</pre></div>
+
+
+<p>The output shows all the columns matching the first line with key 0. We can see 
+the columns common:ip, common:timestamp, common:url, etc. Remember that 
+these are the columns that we have described in the gora-hbase-mapping.xml file.</p>
+<p>You can also count the number of entries in the table to make sure that all the records
+have been stored.</p>
+<div class="codehilite"><pre><span class="n">hbase</span><span class="p">(</span><span class="n">main</span><span class="p">):</span><span class="mo">010</span><span class="p">:</span><span class="mi">0</span><span class="o">&gt;</span> <span class="n">count</span> <span class="s">&#39;AccessLog&#39;</span>
+  <span class="o">...</span> 
+  <span class="mi">10000</span> <span class="n">row</span><span class="p">(</span><span class="n">s</span><span class="p">)</span> <span class="n">in</span> <span class="mf">1.0580</span> <span class="n">seconds</span>
+</pre></div>
+
+
+<h2 id="fetching-objects-from-data-store">Fetching objects from data store</h2>
+<p>Fetching objects from the data store is as easy as storing them. There are essentially 
+two methods for fetching objects. First one is to fetch a single object given it's key. The 
+second method is to run a query through the data store.</p>
+<p>To fetch objects one by one, we can use one of the overloaded 
+get() methods. 
+The method with signature get(K key) returns the object corresponding to the given key fetching all the 
+fields. On the other hand get(K key, String[] fields) returns the object corresponding to the 
+given key, but fetching only the fields given as the second argument.</p>
+<p>When run with the argument -get LogManager class fetches the pageview object 
+from the data store and prints the results.</p>
+<div class="codehilite"><pre><span class="sr">/** Fetches a single pageview object and prints it*/</span>
+<span class="n">private</span> <span class="n">void</span> <span class="n">get</span><span class="p">(</span><span class="n">long</span> <span class="n">key</span><span class="p">)</span> <span class="n">throws</span> <span class="n">IOException</span> <span class="p">{</span>
+  <span class="n">Pageview</span> <span class="n">pageview</span> <span class="o">=</span> <span class="n">dataStore</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">key</span><span class="p">);</span>
+  <span class="n">printPageview</span><span class="p">(</span><span class="n">pageview</span><span class="p">);</span>
+<span class="p">}</span>
+</pre></div>
+
+
+<p>To display the 42nd line of the access log :</p>
+<div class="codehilite"><pre><span class="nv">$</span> <span class="nv">bin</span><span class="o">/</span><span class="n">gora</span> <span class="n">logmanager</span> <span class="o">-</span><span class="n">get</span> <span class="mi">42</span>
+
+<span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">gora</span><span class="o">.</span><span class="n">tutorial</span><span class="o">.</span><span class="nb">log</span><span class="o">.</span><span class="n">generated</span><span class="o">.</span><span class="n">Pageview</span><span class="nv">@321ce053</span> <span class="p">{</span>
+  <span class="s">&quot;url&quot;</span><span class="p">:</span><span class="s">&quot;/index.php?i=0&amp;amp;a=1__rntjt9z0q9w&amp;amp;k=398179&quot;</span>
+  <span class="s">&quot;timestamp&quot;</span><span class="p">:</span><span class="s">&quot;1236710649000&quot;</span>
+  <span class="s">&quot;ip&quot;</span><span class="p">:</span><span class="s">&quot;88.240.129.183&quot;</span>
+  <span class="s">&quot;httpMethod&quot;</span><span class="p">:</span><span class="s">&quot;GET&quot;</span>
+  <span class="s">&quot;httpStatusCode&quot;</span><span class="p">:</span><span class="s">&quot;200&quot;</span>
+  <span class="s">&quot;responseSize&quot;</span><span class="p">:</span><span class="s">&quot;43&quot;</span>
+  <span class="s">&quot;referrer&quot;</span><span class="p">:</span><span class="s">&quot;http://www.buldinle.com/index.php?i=0&amp;amp;a=1__RnTjT9z0Q9w&amp;amp;k=398179&quot;</span>
+  <span class="s">&quot;userAgent&quot;</span><span class="p">:</span><span class="s">&quot;Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)&quot;</span>
+<span class="p">}</span>
+</pre></div>
+
+
+<h2 id="querying-objects">Querying objects</h2>
+<p>DataStore API defines a Query interface to query the objects at the data store. 
+Each data store implementation can use a specific implementation of the Query interface. Queries are 
+instantiated by calling DataStore#newQuery(). When the query is run through the datastore, the results 
+are returned via the Result interface. Let's see how we can run a query and display the results below in the 
+the LogManager class.</p>
+<div class="codehilite"><pre><span class="sr">/** Queries and prints pageview object that have keys between startKey and endKey*/</span>
+<span class="n">private</span> <span class="n">void</span> <span class="n">query</span><span class="p">(</span><span class="n">long</span> <span class="n">startKey</span><span class="p">,</span> <span class="n">long</span> <span class="n">endKey</span><span class="p">)</span> <span class="n">throws</span> <span class="n">IOException</span> <span class="p">{</span>
+  <span class="n">Query</span><span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">Long</span><span class="p">,</span> <span class="n">Pageview</span><span class="o">&amp;</span><span class="ow">gt</span><span class="p">;</span> <span class="n">query</span> <span class="o">=</span> <span class="n">dataStore</span><span class="o">.</span><span class="n">newQuery</span><span class="p">();</span>
+  <span class="sr">//s</span><span class="n">et</span> <span class="n">the</span> <span class="n">properties</span> <span class="n">of</span> <span class="n">query</span>
+  <span class="n">query</span><span class="o">.</span><span class="n">setStartKey</span><span class="p">(</span><span class="n">startKey</span><span class="p">);</span>
+  <span class="n">query</span><span class="o">.</span><span class="n">setEndKey</span><span class="p">(</span><span class="n">endKey</span><span class="p">);</span>
+
+  <span class="n">Result</span><span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">Long</span><span class="p">,</span> <span class="n">Pageview</span><span class="o">&amp;</span><span class="ow">gt</span><span class="p">;</span> <span class="n">result</span> <span class="o">=</span> <span class="n">query</span><span class="o">.</span><span class="n">execute</span><span class="p">();</span>
+
+  <span class="n">printResult</span><span class="p">(</span><span class="n">result</span><span class="p">);</span>
+<span class="p">}</span>
+</pre></div>
+
+
+<p>After constructing a Query, its properties 
+are set via the setter methods. Then calling query.execute() returns
+the Result object.</p>
+<p>Result interface allows us to iterate the results one by one by calling the 
+next() method. The getKey() method returns the current key and get()
+returns current persistent object.</p>
+<div class="codehilite"><pre><span class="n">private</span> <span class="n">void</span> <span class="n">printResult</span><span class="p">(</span><span class="n">Result</span><span class="o">&amp;</span><span class="ow">lt</span><span class="p">;</span><span class="n">Long</span><span class="p">,</span> <span class="n">Pageview</span><span class="o">&amp;</span><span class="ow">gt</span><span class="p">;</span> <span class="n">result</span><span class="p">)</span> <span class="n">throws</span> <span class="n">IOException</span> <span class="p">{</span>
+
+  <span class="k">while</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="k">next</span><span class="p">())</span> <span class="p">{</span> <span class="sr">//</span><span class="n">advances</span> <span class="n">the</span> <span class="n">Result</span> <span class="n">object</span> <span class="ow">and</span> <span class="n">breaks</span> <span class="k">if</span> <span class="n">at</span> <span class="n">end</span>
+    <span class="n">long</span> <span class="n">resultKey</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">getKey</span><span class="p">();</span> <span class="sr">//o</span><span class="n">btain</span> <span class="n">current</span> <span class="n">key</span>
+    <span class="n">Pageview</span> <span class="n">resultPageview</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">();</span> <span class="sr">//o</span><span class="n">btain</span> <span class="n">current</span> <span class="n">value</span> <span class="n">object</span>
+
+    <span class="sr">//</span><span class="k">print</span> <span class="n">the</span> <span class="n">results</span>
+    <span class="n">System</span><span class="o">.</span><span class="n">out</span><span class="o">.</span><span class="n">println</span><span class="p">(</span><span class="n">resultKey</span> <span class="o">+</span> <span class="s">&quot;:&quot;</span><span class="p">);</span>
+    <span class="n">printPageview</span><span class="p">(</span><span class="n">resultPageview</span><span class="p">);</span>
+  <span class="p">}</span>
+
+  <span class="n">System</span><span class="o">.</span><span class="n">out</span><span class="o">.</span><span class="n">println</span><span class="p">(</span><span class="s">&quot;Number of pageviews from the query:&quot;</span> <span class="o">+</span> <span class="n">result</span><span class="o">.</span><span class="n">getOffset</span><span class="p">());</span>
+<span class="p">}</span>
+</pre></div>
+
+
+<p>With these functions defined, we can run the Log Manager class, to query the 
+access logs at HBase. For example, to display the log records between lines 10 and 12 
+we can use:</p>
+<div class="codehilite"><pre><span class="n">bin</span><span class="o">/</span><span class="n">gora</span> <span class="n">logmanager</span> <span class="o">-</span><span class="n">query</span> <span class="mi">10</span> <span class="mi">12</span>
+</pre></div>
+
+
+<p>Which results in:</p>
+<div class="codehilite"><pre><span class="err">10:</span>
+<span class="err">org.apache.gora.tutorial.log.generated.Pageview@d38d0eaa</span> <span class="err">{</span>
+  <span class="err">&quot;url&quot;:&quot;/&quot;</span>
+  <span class="err">&quot;timestamp&quot;:&quot;1236710442000&quot;</span>
+  <span class="err">&quot;ip&quot;:&quot;144.122.180.55&quot;</span>
+  <span class="err">&quot;httpMethod&quot;:&quot;GET&quot;</span>
+  <span class="err">&quot;httpStatusCode&quot;:&quot;200&quot;</span>
+  <span class="err">&quot;responseSize&quot;:&quot;43&quot;</span>
+  <span class="err">&quot;referrer&quot;:&quot;http:</span><span class="c1">//buldinle.com/&quot;</span>
+  <span class="err">&quot;userAgent&quot;:&quot;Mozilla/5.0</span> <span class="err">(X11;</span> <span class="err">U;</span> <span class="err">Linux</span> <span class="err">x86_64;</span> <span class="err">en-US;</span> <span class="err">rv:1.9.0.6)</span> <span class="err">Gecko/2009020911</span> <span class="err">Ubuntu/8.10</span> <span class="err">(intrepid)</span> <span class="err">Firefox/3.0.6&quot;</span>
+<span class="err">}</span>
+<span class="err">11:</span>
+<span class="err">org.apache.gora.tutorial.log.generated.Pageview@b513110a</span> <span class="err">{</span>
+  <span class="err">&quot;url&quot;:&quot;/index.php?i=7&amp;amp;a=1__gefuumyhl5c&amp;amp;k=5143555&quot;</span>
+  <span class="err">&quot;timestamp&quot;:&quot;1236710453000&quot;</span>
+  <span class="err">&quot;ip&quot;:&quot;85.100.75.104&quot;</span>
+  <span class="err">&quot;httpMethod&quot;:&quot;GET&quot;</span>
+  <span class="err">&quot;httpStatusCode&quot;:&quot;200&quot;</span>
+  <span class="err">&quot;responseSize&quot;:&quot;43&quot;</span>
+  <span class="err">&quot;referrer&quot;:&quot;http:</span><span class="c1">//www.buldinle.com/index.php?i=7&amp;amp;a=1__GeFUuMyHl5c&amp;amp;k=5143555&quot;</span>
+  <span class="err">&quot;userAgent&quot;:&quot;Mozilla/5.0</span> <span class="err">(Windows;</span> <span class="err">U;</span> <span class="err">Windows</span> <span class="err">NT</span> <span class="err">5.1;</span> <span class="err">tr;</span> <span class="err">rv:1.9.0.7)</span> <span class="err">Gecko/2009021910</span> <span class="err">Firefox/3.0.7&quot;</span>
+<span class="err">}</span>
+</pre></div>
+
+
+<h2 id="deleting-objects">Deleting objects</h2>
+<p>Just like fetching objects, there are two main methods to delete 
+objects from the data store. The first one is to delete objects one by 
+one using the DataStore#delete(K) method, which takes the key of the object. 
+Alternatively we can delete all of the data that matches a given query by 
+calling the DataStore#deleteByQuery(Query) method. By using deleteByQuery, we can 
+do fine-grain deletes, for example deleting just a specific field 
+from several records. 
+Continueing from the LogManager class, the api's for both are given below.</p>
+<div class="codehilite"><pre><span class="sr">/**Deletes the pageview with the given line number */</span>
+<span class="n">private</span> <span class="n">void</span> <span class="nb">delete</span><span class="p">(</span><span class="n">long</span> <span class="n">lineNum</span><span class="p">)</span> <span class="n">throws</span> <span class="n">Exception</span> <span class="p">{</span>
+  <span class="n">dataStore</span><span class="o">.</span><span class="nb">delete</span><span class="p">(</span><span class="n">lineNum</span><span class="p">);</span>
+  <span class="n">dataStore</span><span class="o">.</span><span class="n">flush</span><span class="p">();</span> <span class="sr">//</span><span class="nb">write</span> <span class="n">changes</span> <span class="n">may</span> <span class="n">need</span> <span class="n">to</span> <span class="n">be</span> <span class="n">flushed</span> <span class="n">before</span> <span class="n">they</span> <span class="n">are</span> <span class="n">committed</span> 
+<span class="p">}</span>
+
+<span class="sr">/** This method illustrates delete by query call */</span>
+<span class="n">private</span> <span class="n">void</span> <span class="n">deleteByQuery</span><span class="p">(</span><span class="n">long</span> <span class="n">startKey</span><span class="p">,</span> <span class="n">long</span> <span class="n">endKey</span><span class="p">)</span> <span class="n">throws</span> <span class="n">IOException</span> <span class="p">{</span>
+  <span class="sr">//</span><span class="n">Constructs</span> <span class="n">a</span> <span class="n">query</span> <span class="n">from</span> <span class="n">the</span> <span class="n">dataStore</span><span class="o">.</span> <span class="n">The</span> <span class="n">matching</span> <span class="n">rows</span> <span class="n">to</span> <span class="n">this</span> <span class="n">query</span> <span class="n">will</span> <span class="n">be</span> <span class="n">deleted</span>

[... 340 lines stripped ...]


Mime
View raw message