hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r1345788 [10/10] - in /hbase/trunk: ./ hbase-assembly/ hbase-common/ hbase-server/ hbase-site/ src/ src/assembly/ src/docbkx/ src/site/ src/site/resources/ src/site/resources/css/ src/site/resources/images/ src/site/xdoc/ src/xslt/
Date Sun, 03 Jun 2012 21:59:52 GMT
Added: hbase/trunk/src/site/xdoc/old_news.xml
URL: http://svn.apache.org/viewvc/hbase/trunk/src/site/xdoc/old_news.xml?rev=1345788&view=auto
==============================================================================
--- hbase/trunk/src/site/xdoc/old_news.xml (added)
+++ hbase/trunk/src/site/xdoc/old_news.xml Sun Jun  3 21:59:50 2012
@@ -0,0 +1,66 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Copyright 2010 The Apache Software Foundation
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+          "http://forrest.apache.org/dtd/document-v20.dtd">
+
+<document xmlns="http://maven.apache.org/XDOC/2.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
+  <properties>
+    <title> 
+      Old News
+    </title>
+  </properties>
+  <body>
+  <section name="Old News">
+            <p>March 27th, 2012 <a href="http://www.meetup.com/hbaseusergroup/events/56021562/">Meetup
@ StumbleUpon</a> in San Francisco</p>
+
+            <p>January 19th, 2012 <a href="http://www.meetup.com/hbaseusergroup/events/46702842/">Meetup
@ EBay</a></p>
+            <p>January 23rd, 2012 HBase 0.92.0 released. <a href="http://www.apache.org/dyn/closer.cgi/hbase/">Download
it!</a></p>
+            <p>December 23rd, 2011 HBase 0.90.5 released. <a href="http://www.apache.org/dyn/closer.cgi/hbase/">Download
it!</a></p>
+            <p>November 29th, 2011 <a href="http://www.meetup.com/hackathon/events/41025972/">Developer
Pow-Wow in SF</a> at Salesforce HQ</p>
+            <p>November 7th, 2011 <a href="http://www.meetup.com/hbaseusergroup/events/35682812/">HBase
Meetup in NYC (6PM)</a> at the AppNexus office</p>
+            <p>August 22nd, 2011 <a href="http://www.meetup.com/hbaseusergroup/events/28518471/">HBase
Hackathon (11AM) and Meetup (6PM)</a> at FB in PA</p>
+            <p>June 30th, 2011 <a href="http://www.meetup.com/hbaseusergroup/events/20572251/">HBase
Contributor Day</a>, the day after the <a href="http://developer.yahoo.com/events/hadoopsummit2011/">Hadoop
Summit</a> hosted by Y!</p>
+            <p>June 8th, 2011 <a href="http://berlinbuzzwords.de/wiki/hbase-workshop-and-hackathon">HBase
Hackathon</a> in Berlin to coincide with <a href="http://berlinbuzzwords.de/">Berlin
Buzzwords</a></p>
+            <p>May 19th, 2011 HBase 0.90.3 released. <a href="http://www.apache.org/dyn/closer.cgi/hbase/">Download
it!</a></p>
+            <p>April 12th, 2011 HBase 0.90.2 released. <a href="http://www.apache.org/dyn/closer.cgi/hbase/">Download
it!</a></p>
+            <p>March 21st, <a href="http://www.meetup.com/hackathon/events/16770852/">HBase
0.92 Hackathon at StumbleUpon, SF</a></p>
+            <p>February 22nd, <a href="http://www.meetup.com/hbaseusergroup/events/16492913/">HUG12:
February HBase User Group at StumbleUpon SF</a></p>
+            <p>December 13th, <a href="http://www.meetup.com/hackathon/calendar/15597555/">HBase
Hackathon: Coprocessor Edition</a></p>
+      <p>November 19th, <a href="http://huguk.org/">Hadoop HUG in London</a>
is all about HBase</p>
+      <p>November 15-19th, <a href="http://www.devoxx.com/display/Devoxx2K10/Home">Devoxx</a>
features HBase Training and multiple HBase presentations</p>
+      <p>October 12th, HBase-related presentations by core contributors and users at
<a href="http://www.cloudera.com/company/press-center/hadoop-world-nyc/">Hadoop World
2010</a></p>
+      <p>October 11th, <a href="http://www.meetup.com/hbaseusergroup/calendar/14606174/">HUG-NYC:
HBase User Group NYC Edition</a> (Night before Hadoop World)</p>
+      <p>June 30th, <a href="http://www.meetup.com/hbaseusergroup/calendar/13562846/">HBase
Contributor Workshop</a> (Day after Hadoop Summit)</p>
+      <p>May 10th, 2010: HBase graduates from Hadoop sub-project to Apache Top Level
Project </p>
+      <p>Signup for <a href="http://www.meetup.com/hbaseusergroup/calendar/12689490/">HBase
User Group Meeting, HUG10</a> hosted by Trend Micro, April 19th, 2010</p>
+
+      <p><a href="http://www.meetup.com/hbaseusergroup/calendar/12689351/">HBase
User Group Meeting, HUG9</a> hosted by Mozilla, March 10th, 2010</p>
+      <p>Sign up for the <a href="http://www.meetup.com/hbaseusergroup/calendar/12241393/">HBase
User Group Meeting, HUG8</a>, January 27th, 2010 at StumbleUpon in SF</p>
+      <p>September 8th, 2010: HBase 0.20.0 is faster, stronger, slimmer, and sweeter
tasting than any previous HBase release.  Get it off the <a href="http://www.apache.org/dyn/closer.cgi/hbase/">Releases</a>
page.</p>
+      <p><a href="http://dev.us.apachecon.com/c/acus2009/">ApacheCon</a>
in Oakland: November 2-6th, 2009: 
+      The Apache Foundation will be celebrating its 10th anniversary in beautiful Oakland
by the Bay. Lots of good talks and meetups including an HBase presentation by a couple of
the lads.</p>
+      <p>HBase at Hadoop World in NYC: October 2nd, 2009: A few of us will be talking
on Practical HBase out east at <a href="http://www.cloudera.com/hadoop-world-nyc">Hadoop
World: NYC</a>.</p>
+      <p>HUG7 and HBase Hackathon: August 7th-9th, 2009 at StumbleUpon in SF: Sign
up for the <a href="http://www.meetup.com/hbaseusergroup/calendar/10950511/">HBase User
Group Meeting, HUG7</a> or for the <a href="http://www.meetup.com/hackathon/calendar/10951718/">Hackathon</a>
or for both (all are welcome!).</p>
+      <p>June, 2009 -- HBase at HadoopSummit2009 and at NOSQL: See the <a href="http://wiki.apache.org/hadoop/HBase/HBasePresentations">presentations</a></p>
+      <p>March 3rd, 2009 -- HUG6: <a href="http://www.meetup.com/hbaseusergroup/calendar/9764004/">HBase
User Group 6</a></p>
+      <p>January 30th, 2009 -- LA Hbackathon:<a href="http://www.meetup.com/hbasela/calendar/9450876/">HBase
January Hackathon Los Angeles</a> at <a href="http://streamy.com" >Streamy</a>
in Manhattan Beach</p>
+  </section>
+  </body>
+</document>

Added: hbase/trunk/src/site/xdoc/pseudo-distributed.xml
URL: http://svn.apache.org/viewvc/hbase/trunk/src/site/xdoc/pseudo-distributed.xml?rev=1345788&view=auto
==============================================================================
--- hbase/trunk/src/site/xdoc/pseudo-distributed.xml (added)
+++ hbase/trunk/src/site/xdoc/pseudo-distributed.xml Sun Jun  3 21:59:50 2012
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Copyright 2010 The Apache Software Foundation
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+          "http://forrest.apache.org/dtd/document-v20.dtd">
+
+<document xmlns="http://maven.apache.org/XDOC/2.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
+  <properties>
+    <title> 
+Running HBase in pseudo-distributed mode
+    </title>
+  </properties>
+
+  <body>
+      <p>This page has been retired.  The contents have been moved to the 
+      <a href="http://hbase.apache.org/book.html#distributed">Distributed Operation:
Pseudo- and Fully-distributed modes</a> section
+ in the Reference Guide.
+ </p>
+
+ </body>
+
+</document>
+

Added: hbase/trunk/src/site/xdoc/replication.xml
URL: http://svn.apache.org/viewvc/hbase/trunk/src/site/xdoc/replication.xml?rev=1345788&view=auto
==============================================================================
--- hbase/trunk/src/site/xdoc/replication.xml (added)
+++ hbase/trunk/src/site/xdoc/replication.xml Sun Jun  3 21:59:50 2012
@@ -0,0 +1,401 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Copyright 2010 The Apache Software Foundation
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+          "http://forrest.apache.org/dtd/document-v20.dtd">
+
+<document xmlns="http://maven.apache.org/XDOC/2.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
+  <properties>
+    <title>
+      HBase Replication
+    </title>
+  </properties>
+  <body>
+    <section name="Overview">
+      <p>
+        HBase replication is a way to copy data between HBase deployments. It
+        can serve as a disaster recovery solution and can contribute to provide
+        higher availability at the HBase layer. It can also serve more practically;
+        for example, as a way to easily copy edits from a web-facing cluster to a "MapReduce"
+        cluster which will process old and new data and ship back the results
+        automatically.
+      </p>
+      <p>
+        The basic architecture pattern used for HBase replication is (HBase cluster) master-push;
+        it is much easier to keep track of what’s currently being replicated since
+        each region server has its own write-ahead-log (aka WAL or HLog), just like
+        other well known solutions like MySQL master/slave replication where
+        there’s only one bin log to keep track of. One master cluster can
+        replicate to any number of slave clusters, and each region server will
+        participate to replicate their own stream of edits. For more information
+        on the different properties of master/slave replication and other types
+        of replication, please consult <a href="http://highscalability.com/blog/2009/8/24/how-google-serves-data-from-multiple-datacenters.html">
+        How Google Serves Data From Multiple Datacenters</a>.
+      </p>
+      <p>
+        The replication is done asynchronously, meaning that the clusters can
+        be geographically distant, the links between them can be offline for
+        some time, and rows inserted on the master cluster won’t be
+        available at the same time on the slave clusters (eventual consistency).
+      </p>
+      <p>
+        The replication format used in this design is conceptually the same as
+        <a href="http://dev.mysql.com/doc/refman/5.1/en/replication-formats.html">
+        MySQL’s statement-based replication </a>. Instead of SQL statements, whole
+        WALEdits (consisting of multiple cell inserts coming from the clients'
+        Put and Delete) are replicated in order to maintain atomicity.
+      </p>
+      <p>
+        The HLogs from each region server are the basis of HBase replication,
+        and must be kept in HDFS as long as they are needed to replicate data
+        to any slave cluster. Each RS reads from the oldest log it needs to
+        replicate and keeps the current position inside ZooKeeper to simplify
+        failure recovery. That position can be different for every slave 
+        cluster, same for the queue of HLogs to process.
+      </p>
+      <p>
+        The clusters participating in replication can be of asymmetric sizes
+        and the master cluster will do its “best effort” to balance the stream
+        of replication on the slave clusters by relying on randomization.
+      </p>
+      <p>
+        As of version 0.92 HBase supports master/master and cyclic replication as
+        well as replication to multiple slaves.
+      </p>
+      <img src="images/replication_overview.png"/>
+    </section>
+    <section name="Enabling replication">
+      <p>
+        The guide on enabling and using cluster replication is contained
+        in the API documentation shipped with your HBase distribution.
+      </p>
+      <p>
+        The most up-to-date documentation is
+        <a href="apidocs/org/apache/hadoop/hbase/replication/package-summary.html#requirements">
+        available at this address</a>.
+      </p>
+    </section>
+    <section name="Life of a log edit">
+      <p>
+        The following sections describe the life of a single edit going from a
+        client that communicates with a master cluster all the way to a single
+        slave cluster.
+      </p>
+      <section name="Normal processing">
+        <p>
+          The client uses a HBase API that sends a Put, Delete or ICV to a region
+          server. The key values are transformed into a WALEdit by the region
+          server and is inspected by the replication code that, for each family
+          that is scoped for replication, adds the scope to the edit. The edit
+          is appended to the current WAL and is then applied to its MemStore.
+        </p>
+        <p>
+          In a separate thread, the edit is read from the log (as part of a batch)
+          and only the KVs that are replicable are kept (that is, that they are part
+          of a family scoped GLOBAL in the family's schema, non-catalog so not
+          .META. or -ROOT-, and did not originate in the target slave cluster - in
+          case of cyclic replication).
+        </p>
+        <p>
+          The edit is then tagged with the master's cluster UUID.
+          When the buffer is filled, or the reader hits the end of the file,
+          the buffer is sent to a random region server on the slave cluster.
+        </p>
+        <p>
+          Synchronously, the region server that receives the edits reads them
+          sequentially and separates each of them into buffers, one per table.
+          Once all edits are read, each buffer is flushed using the normal HBase
+          client (HTables managed by a HTablePool). This is done in order to
+          leverage parallel insertion (MultiPut).
+          The master's cluster UUID is retained in the edits applied at the
+          slave cluster in order to allow cyclic replication.
+        </p>
+        <p>
+          Back in the master cluster's region server, the offset for the current
+          WAL that's being replicated is registered in ZooKeeper.
+        </p>
+      </section>
+      <section name="Non-responding slave clusters">
+        <p>
+          The edit is inserted in the same way.
+        </p>
+        <p>
+          In the separate thread, the region server reads, filters and buffers
+          the log edits the same way as during normal processing. The slave
+          region server that's contacted doesn't answer to the RPC, so the master
+          region server will sleep and retry up to a configured number of times.
+          If the slave RS still isn't available, the master cluster RS will select a
+          new subset of RS to replicate to and will retry sending the buffer of
+          edits.
+        </p>
+        <p>
+          In the mean time, the WALs will be rolled and stored in a queue in
+          ZooKeeper. Logs that are archived by their region server (archiving is
+          basically moving a log from the region server's logs directory to a
+          central logs archive directory) will update their paths in the in-memory
+          queue of the replicating thread.
+        </p>
+        <p>
+          When the slave cluster is finally available, the buffer will be applied
+          the same way as during normal processing. The master cluster RS will then
+          replicate the backlog of logs.
+        </p>
+      </section>
+    </section>
+    <section name="Internals">
+      <p>
+        This section describes in depth how each of replication's internal
+        features operate.
+      </p>
+      <section name="Choosing region servers to replicate to">
+        <p>
+          When a master cluster RS initiates a replication source to a slave cluster,
+          it first connects to the slave's ZooKeeper ensemble using the provided
+          cluster key (that key is composed of the value of hbase.zookeeper.quorum,
+          zookeeper.znode.parent and hbase.zookeeper.property.clientPort). It
+          then scans the "rs" directory to discover all the available sinks
+          (region servers that are accepting incoming streams of edits to replicate)
+          and will randomly choose a subset of them using a configured
+          ratio (which has a default value of 10%). For example, if a slave
+          cluster has 150 machines, 15 will be chosen as potential recipient for
+          edits that this master cluster RS will be sending. Since this is done by all
+          master cluster RSs, the probability that all slave RSs are used is very high,
+          and this method works for clusters of any size. For example, a master cluster
+          of 10 machines replicating to a slave cluster of 5 machines with a ratio
+          of 10% means that the master cluster RSs will choose one machine each
+          at random, thus the chance of overlapping and full usage of the slave
+          cluster is higher.
+        </p>
+      </section>
+      <section name="Keeping track of logs">
+        <p>
+          Every master cluster RS has its own znode in the replication znodes hierarchy.
+          It contains one znode per peer cluster (if 5 slave clusters, 5 znodes
+          are created), and each of these contain a queue
+          of HLogs to process. Each of these queues will track the HLogs created
+          by that RS, but they can differ in size. For example, if one slave
+          cluster becomes unavailable for some time then the HLogs should not be deleted,
+          thus they need to stay in the queue (while the others are processed).
+          See the section named "Region server failover" for an example.
+        </p>
+        <p>
+          When a source is instantiated, it contains the current HLog that the
+          region server is writing to. During log rolling, the new file is added
+          to the queue of each slave cluster's znode just before it's made available.
+          This ensures that all the sources are aware that a new log exists
+          before HLog is able to append edits into it, but this operations is
+          now more expensive.
+          The queue items are discarded when the replication thread cannot read
+          more entries from a file (because it reached the end of the last block)
+          and that there are other files in the queue.
+          This means that if a source is up-to-date and replicates from the log
+          that the region server writes to, reading up to the "end" of the
+          current file won't delete the item in the queue.
+        </p>
+        <p>
+          When a log is archived (because it's not used anymore or because there's
+          too many of them per hbase.regionserver.maxlogs typically because insertion
+          rate is faster than region flushing), it will notify the source threads that the
path
+          for that log changed. If the a particular source was already done with
+          it, it will just ignore the message. If it's in the queue, the path
+          will be updated in memory. If the log is currently being replicated,
+          the change will be done atomically so that the reader doesn't try to
+          open the file when it's already moved. Also, moving a file is a NameNode
+          operation so, if the reader is currently reading the log, it won't
+          generate any exception.
+        </p>
+      </section>
+      <section name="Reading, filtering and sending edits">
+        <p>
+          By default, a source will try to read from a log file and ship log
+          entries as fast as possible to a sink. This is first limited by the
+          filtering of log entries; only KeyValues that are scoped GLOBAL and
+          that don't belong to catalog tables will be retained. A second limit
+          is imposed on the total size of the list of edits to replicate per slave,
+          which by default is 64MB. This means that a master cluster RS with 3 slaves
+          will use at most 192MB to store data to replicate. This doesn't account
+          the data filtered that wasn't garbage collected.
+        </p>
+        <p>
+          Once the maximum size of edits was buffered or the reader hits the end
+          of the log file, the source thread will stop reading and will choose
+          at random a sink to replicate to (from the list that was generated by
+          keeping only a subset of slave RSs). It will directly issue a RPC to
+          the chosen machine and will wait for the method to return. If it's
+          successful, the source will determine if the current file is emptied
+          or if it should continue to read from it. If the former, it will delete
+          the znode in the queue. If the latter, it will register the new offset
+          in the log's znode. If the RPC threw an exception, the source will retry
+          10 times until trying to find a different sink.
+        </p>
+      </section>
+      <section name="Cleaning logs">
+        <p>
+          If replication isn't enabled, the master's logs cleaning thread will
+          delete old logs using a configured TTL. This doesn't work well with
+          replication since archived logs passed their TTL may still be in a
+          queue. Thus, the default behavior is augmented so that if a log is
+          passed its TTL, the cleaning thread will lookup every queue until it
+          finds the log (while caching the ones it finds). If it's not found,
+          the log will be deleted. The next time it has to look for a log,
+          it will first use its cache.
+        </p>
+      </section>
+      <section name="Region server failover">
+        <p>
+          As long as region servers don't fail, keeping track of the logs in ZK
+          doesn't add any value. Unfortunately, they do fail, so since ZooKeeper
+          is highly available we can count on it and its semantics to help us
+          managing the transfer of the queues.
+        </p>
+        <p>
+          All the master cluster RSs keep a watcher on every other one of them to be
+          notified when one dies (just like the master does). When it happens,
+          they all race to create a znode called "lock" inside the dead RS' znode
+          that contains its queues. The one that creates it successfully will
+          proceed by transferring all the queues to its own znode (one by one
+          since ZK doesn't support the rename operation) and will delete all the
+          old ones when it's done. The recovered queues' znodes will be named
+          with the id of the slave cluster appended with the name of the dead
+          server. 
+        </p>
+        <p>
+          Once that is done, the master cluster RS will create one new source thread per
+          copied queue, and each of them will follow the read/filter/ship pattern.
+          The main difference is that those queues will never have new data since
+          they don't belong to their new region server, which means that when
+          the reader hits the end of the last log, the queue's znode will be
+          deleted and the master cluster RS will close that replication source.
+        </p>
+        <p>
+          For example, consider a master cluster with 3 region servers that's
+          replicating to a single slave with id '2'. The following hierarchy
+          represents what the znodes layout could be at some point in time. We
+          can see the RSs' znodes all contain a "peers" znode that contains a
+          single queue. The znode names in the queues represent the actual file
+          names on HDFS in the form "address,port.timestamp".
+        </p>
+        <pre>
+/hbase/replication/rs/
+                      1.1.1.1,60020,123456780/
+                          2/
+                              1.1.1.1,60020.1234  (Contains a position)
+                              1.1.1.1,60020.1265
+                      1.1.1.2,60020,123456790/
+                          2/
+                              1.1.1.2,60020.1214  (Contains a position)
+                              1.1.1.2,60020.1248
+                              1.1.1.2,60020.1312
+                      1.1.1.3,60020,    123456630/
+                          2/
+                              1.1.1.3,60020.1280  (Contains a position)
+        </pre>
+        <p>
+          Now let's say that 1.1.1.2 loses its ZK session. The survivors will race
+          to create a lock, and for some reasons 1.1.1.3 wins. It will then start
+          transferring all the queues to its local peers znode by appending the
+          name of the dead server. Right before 1.1.1.3 is able to clean up the
+          old znodes, the layout will look like the following:
+        </p>
+        <pre>
+/hbase/replication/rs/
+                      1.1.1.1,60020,123456780/
+                          2/
+                              1.1.1.1,60020.1234  (Contains a position)
+                              1.1.1.1,60020.1265
+                      1.1.1.2,60020,123456790/
+                          lock
+                          2/
+                              1.1.1.2,60020.1214  (Contains a position)
+                              1.1.1.2,60020.1248
+                              1.1.1.2,60020.1312
+                      1.1.1.3,60020,123456630/
+                          2/
+                              1.1.1.3,60020.1280  (Contains a position)
+
+                          2-1.1.1.2,60020,123456790/
+                              1.1.1.2,60020.1214  (Contains a position)
+                              1.1.1.2,60020.1248
+                              1.1.1.2,60020.1312
+        </pre>
+        <p>
+          Some time later, but before 1.1.1.3 is able to finish replicating the
+          last HLog from 1.1.1.2, let's say that it dies too (also some new logs
+          were created in the normal queues). The last RS will then try to lock
+          1.1.1.3's znode and will begin transferring all the queues. The new
+          layout will be:
+        </p>
+        <pre>
+/hbase/replication/rs/
+                      1.1.1.1,60020,123456780/
+                          2/
+                              1.1.1.1,60020.1378  (Contains a position)
+
+                          2-1.1.1.3,60020,123456630/
+                              1.1.1.3,60020.1325  (Contains a position)
+                              1.1.1.3,60020.1401
+
+                          2-1.1.1.2,60020,123456790-1.1.1.3,60020,123456630/
+                              1.1.1.2,60020.1312  (Contains a position)
+                      1.1.1.3,60020,123456630/
+                          lock
+                          2/
+                              1.1.1.3,60020.1325  (Contains a position)
+                              1.1.1.3,60020.1401
+
+                          2-1.1.1.2,60020,123456790/
+                              1.1.1.2,60020.1312  (Contains a position)
+        </pre>
+      </section>
+    </section>
+    <section name="FAQ">
+      <section name="GLOBAL means replicate? Any provision to replicate only to cluster
X and not to cluster Y? or is that for later?">
+        <p>
+          Yes, this is for much later.
+        </p>
+      </section>
+      <section name="You need a bulk edit shipper? Something that allows you transfer
64MB of edits in one go?">
+        <p>
+          You can use the HBase-provided utility called CopyTable from the package
+          org.apache.hadoop.hbase.mapreduce in order to have a discp-like tool to
+          bulk copy data.
+        </p>
+      </section>
+      <section name="Is it a mistake that WALEdit doesn't carry Put and Delete objects,
that we have to reinstantiate not only when replicating but when replaying edits also?">
+        <p>
+          Yes, this behavior would help a lot but it's not currently available
+          in HBase (BatchUpdate had that, but it was lost in the new API).
+        </p>
+      </section>
+    </section>
+    <section name="Known bugs/missing features">
+      <p>
+        Here's a list of all the jiras that relate to major issues or missing
+        features in the replication implementation.
+      </p>
+      <ol>
+        <li>
+            HBASE-2611, basically if a region server dies while recovering the
+            queues of another dead RS, we will miss the data from the queues
+            that weren't copied.
+        </li>
+      </ol>
+    </section>
+  </body>
+</document>

Added: hbase/trunk/src/site/xdoc/sponsors.xml
URL: http://svn.apache.org/viewvc/hbase/trunk/src/site/xdoc/sponsors.xml?rev=1345788&view=auto
==============================================================================
--- hbase/trunk/src/site/xdoc/sponsors.xml (added)
+++ hbase/trunk/src/site/xdoc/sponsors.xml Sun Jun  3 21:59:50 2012
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<document xmlns="http://maven.apache.org/XDOC/2.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
+  <properties>
+    <title>Installing HBase on Windows using Cygwin</title>
+  </properties>
+
+<body>
+<section name="Sponsors">
+<p>The below companies have been gracious enough to provide their commerical tool offerings
free of charge to the Apache HBase project.
+<ul>
+	<li>The crew at <a href="http://www.ej-technologies.com/">ej-technologies</a>
have
+        been let us use <a href="http://www.ej-technologies.com/products/jprofiler/overview.html">JProfiler</a>
for years now.</li>
+	<li>The lads at <a href="http://headwaysoftware.com/">headway software</a>
have
+        given us a license for <a href="http://headwaysoftware.com/products/?code=Restructure101">Restructure101</a>
+        so we can untangle our interdependency mess.</li>
+</ul>
+</p>
+</section>
+</body>
+</document>

Added: hbase/trunk/src/xslt/configuration_to_docbook_section.xsl
URL: http://svn.apache.org/viewvc/hbase/trunk/src/xslt/configuration_to_docbook_section.xsl?rev=1345788&view=auto
==============================================================================
--- hbase/trunk/src/xslt/configuration_to_docbook_section.xsl (added)
+++ hbase/trunk/src/xslt/configuration_to_docbook_section.xsl Sun Jun  3 21:59:50 2012
@@ -0,0 +1,68 @@
+<?xml version="1.0"?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+<xsl:output method="xml"/>
+<xsl:template match="configuration">
+<!--
+/**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+This stylesheet is used making an html version of hbase-default.xml.
+-->
+<section xml:id="hbase_default_configurations"
+version="5.0" xmlns="http://docbook.org/ns/docbook"
+      xmlns:xlink="http://www.w3.org/1999/xlink"
+      xmlns:xi="http://www.w3.org/2001/XInclude"
+      xmlns:svg="http://www.w3.org/2000/svg"
+      xmlns:m="http://www.w3.org/1998/Math/MathML"
+      xmlns:html="http://www.w3.org/1999/xhtml"
+      xmlns:db="http://docbook.org/ns/docbook">
+<title>HBase Default Configuration</title>
+<para>
+</para>
+
+<glossary xmlns='http://docbook.org/ns/docbook' xml:id="hbase.default.configuration">
+<title>HBase Default Configuration</title>
+<para>
+The documentation below is generated using the default hbase configuration file,
+<filename>hbase-default.xml</filename>, as source.
+</para>
+
+<xsl:for-each select="property">
+<xsl:if test="not(@skipInDoc)">
+<glossentry>
+  <xsl:attribute name="id">
+    <xsl:value-of select="name" />
+  </xsl:attribute>
+  <glossterm>
+    <varname><xsl:value-of select="name"/></varname>
+  </glossterm>
+  <glossdef>
+  <para><xsl:value-of select="description"/></para>
+  <para>Default: <varname><xsl:value-of select="value"/></varname></para>
+  </glossdef>
+</glossentry>
+</xsl:if>
+</xsl:for-each>
+
+</glossary>
+
+</section>
+</xsl:template>
+</xsl:stylesheet>



Mime
View raw message