db-derby-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From j..@apache.org
Subject svn commit: r278552 - in /db/derby/site/trunk: build/site/ build/site/papers/ src/documentation/content/xdocs/ src/documentation/content/xdocs/papers/
Date Sun, 04 Sep 2005 03:11:32 GMT
Author: jta
Date: Sat Sep  3 20:11:25 2005
New Revision: 278552

URL: http://svn.apache.org/viewcvs?rev=278552&view=rev
Log:
Added BTree writeup to the Papers tab that Dibyendu Majumdar 
<dibyendu@mazumdar.demon.co.uk> posted to derby-dev.

Added:
    db/derby/site/trunk/build/site/papers/btree_package.html   (with props)
    db/derby/site/trunk/src/documentation/content/xdocs/papers/btree_package.html   (with props)
Modified:
    db/derby/site/trunk/build/site/linkmap.html
    db/derby/site/trunk/build/site/papers/ApacheConUs04.html
    db/derby/site/trunk/build/site/papers/DerbyClientSpec.html
    db/derby/site/trunk/build/site/papers/Intersect-design.html
    db/derby/site/trunk/build/site/papers/JDBCImplementation.html
    db/derby/site/trunk/build/site/papers/MiscPresentations.html
    db/derby/site/trunk/build/site/papers/derby_arch.html
    db/derby/site/trunk/build/site/papers/derby_htw.html
    db/derby/site/trunk/build/site/papers/derby_web.html
    db/derby/site/trunk/build/site/papers/fortune_tut.html
    db/derby/site/trunk/build/site/papers/index.html
    db/derby/site/trunk/build/site/papers/logformats.html
    db/derby/site/trunk/build/site/papers/optimizer.html
    db/derby/site/trunk/build/site/papers/pageformats.html
    db/derby/site/trunk/build/site/papers/recovery.html
    db/derby/site/trunk/build/site/papers/versionupgrade.html
    db/derby/site/trunk/src/documentation/content/xdocs/papers/index.xml
    db/derby/site/trunk/src/documentation/content/xdocs/site.xml

Modified: db/derby/site/trunk/build/site/linkmap.html
URL: http://svn.apache.org/viewcvs/db/derby/site/trunk/build/site/linkmap.html?rev=278552&r1=278551&r2=278552&view=diff
==============================================================================
--- db/derby/site/trunk/build/site/linkmap.html (original)
+++ db/derby/site/trunk/build/site/linkmap.html Sat Sep  3 20:11:25 2005
@@ -123,6 +123,9 @@
 <a href="papers/derby_arch.html">Architecture</a>
 </div>
 <div class="menuitem">
+<a href="papers/btree_package.html">BTree</a>
+</div>
+<div class="menuitem">
 <a href="papers/pageformats.html">Disk Page Format</a>
 </div>
 <div class="menuitem">
@@ -654,6 +657,12 @@
 <ul>
 <li>
 <a href="papers/derby_arch.html">Architecture</a>&nbsp;&nbsp;&nbsp;_________________________&nbsp;&nbsp;<em>index</em>
+</li>
+</ul>
+      
+<ul>
+<li>
+<a href="papers/btree_package.html">BTree</a>&nbsp;&nbsp;&nbsp;_________________________&nbsp;&nbsp;<em>index</em>
 </li>
 </ul>
       

Modified: db/derby/site/trunk/build/site/papers/ApacheConUs04.html
URL: http://svn.apache.org/viewcvs/db/derby/site/trunk/build/site/papers/ApacheConUs04.html?rev=278552&r1=278551&r2=278552&view=diff
==============================================================================
--- db/derby/site/trunk/build/site/papers/ApacheConUs04.html (original)
+++ db/derby/site/trunk/build/site/papers/ApacheConUs04.html Sat Sep  3 20:11:25 2005
@@ -81,6 +81,9 @@
 <a href="../papers/derby_arch.html">Architecture</a>
 </div>
 <div class="menuitem">
+<a href="../papers/btree_package.html">BTree</a>
+</div>
+<div class="menuitem">
 <a href="../papers/pageformats.html">Disk Page Format</a>
 </div>
 <div class="menuitem">

Modified: db/derby/site/trunk/build/site/papers/DerbyClientSpec.html
URL: http://svn.apache.org/viewcvs/db/derby/site/trunk/build/site/papers/DerbyClientSpec.html?rev=278552&r1=278551&r2=278552&view=diff
==============================================================================
--- db/derby/site/trunk/build/site/papers/DerbyClientSpec.html (original)
+++ db/derby/site/trunk/build/site/papers/DerbyClientSpec.html Sat Sep  3 20:11:25 2005
@@ -84,6 +84,9 @@
 <a href="../papers/derby_arch.html">Architecture</a>
 </div>
 <div class="menuitem">
+<a href="../papers/btree_package.html">BTree</a>
+</div>
+<div class="menuitem">
 <a href="../papers/pageformats.html">Disk Page Format</a>
 </div>
 <div class="menuitem">

Modified: db/derby/site/trunk/build/site/papers/Intersect-design.html
URL: http://svn.apache.org/viewcvs/db/derby/site/trunk/build/site/papers/Intersect-design.html?rev=278552&r1=278551&r2=278552&view=diff
==============================================================================
--- db/derby/site/trunk/build/site/papers/Intersect-design.html (original)
+++ db/derby/site/trunk/build/site/papers/Intersect-design.html Sat Sep  3 20:11:25 2005
@@ -83,6 +83,9 @@
 <a href="../papers/derby_arch.html">Architecture</a>
 </div>
 <div class="menuitem">
+<a href="../papers/btree_package.html">BTree</a>
+</div>
+<div class="menuitem">
 <a href="../papers/pageformats.html">Disk Page Format</a>
 </div>
 <div class="menuitem">

Modified: db/derby/site/trunk/build/site/papers/JDBCImplementation.html
URL: http://svn.apache.org/viewcvs/db/derby/site/trunk/build/site/papers/JDBCImplementation.html?rev=278552&r1=278551&r2=278552&view=diff
==============================================================================
--- db/derby/site/trunk/build/site/papers/JDBCImplementation.html (original)
+++ db/derby/site/trunk/build/site/papers/JDBCImplementation.html Sat Sep  3 20:11:25 2005
@@ -88,6 +88,9 @@
 <a href="../papers/derby_arch.html">Architecture</a>
 </div>
 <div class="menuitem">
+<a href="../papers/btree_package.html">BTree</a>
+</div>
+<div class="menuitem">
 <a href="../papers/pageformats.html">Disk Page Format</a>
 </div>
 <div class="menuitem">

Modified: db/derby/site/trunk/build/site/papers/MiscPresentations.html
URL: http://svn.apache.org/viewcvs/db/derby/site/trunk/build/site/papers/MiscPresentations.html?rev=278552&r1=278551&r2=278552&view=diff
==============================================================================
--- db/derby/site/trunk/build/site/papers/MiscPresentations.html (original)
+++ db/derby/site/trunk/build/site/papers/MiscPresentations.html Sat Sep  3 20:11:25 2005
@@ -81,6 +81,9 @@
 <a href="../papers/derby_arch.html">Architecture</a>
 </div>
 <div class="menuitem">
+<a href="../papers/btree_package.html">BTree</a>
+</div>
+<div class="menuitem">
 <a href="../papers/pageformats.html">Disk Page Format</a>
 </div>
 <div class="menuitem">

Added: db/derby/site/trunk/build/site/papers/btree_package.html
URL: http://svn.apache.org/viewcvs/db/derby/site/trunk/build/site/papers/btree_package.html?rev=278552&view=auto
==============================================================================
--- db/derby/site/trunk/build/site/papers/btree_package.html (added)
+++ db/derby/site/trunk/build/site/papers/btree_package.html Sat Sep  3 20:11:25 2005
@@ -0,0 +1,295 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.7">
+<meta name="Forrest-skin-name" content="pelt">
+<meta name="generator" content="">
+<meta name="" content="">
+<title>org.apache.derby.impl.store.access.btree</title>
+<link type="text/css" href="../skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="../skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="../skin/print.css" rel="stylesheet">
+<link type="text/css" href="../skin/profile.css" rel="stylesheet">
+<script src="../skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="../skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="../skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="../">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<div class="breadtrail">
+<a href="http://www.apache.org/">apache</a> &gt; <a href="http://db.apache.org/">db</a><script src="../skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<div class="header">
+<div class="grouplogo">
+<a href="http://db.apache.org"><img class="logoImage" alt="" src="../images/db-logo-white.png" title=""></a>
+</div>
+<div class="projectlogo">
+<a href="http://db.apache.org/derby/"><img class="logoImage" alt="Derby" src="../images/derby-logo.jpg" title="Derby is a zero admin Java RDBMS."></a>
+</div>
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input attr="value" name="Search" value="Search" type="submit">
+</form>
+</div>
+<ul id="tabs">
+<li>
+<a class="base-not-selected" href="../index.html">Home</a>
+</li>
+<li>
+<a class="base-not-selected" href="../derby_comm.html">Community</a>
+</li>
+<li>
+<a class="base-not-selected" href="../manuals/index.html">Manuals</a>
+</li>
+<li class="current">
+<a class="base-selected" href="../papers/index.html">Papers</a>
+</li>
+<li>
+<a class="base-not-selected" href="../integrate/index.html">Integration</a>
+</li>
+</ul>
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<div id="level2tabs"></div>
+<script type="text/javascript"><!--
+document.write("<text>Last Published:</text> " + document.lastModified);
+//  --></script>
+</div>
+<div class="breadtrail">
+             
+             &nbsp;
+           </div>
+<div id="menu">
+<div onclick="SwitchMenu('menu_1.1', '../skin/')" id="menu_1.1Title" class="menutitle">About</div>
+<div id="menu_1.1" class="menuitemgroup">
+<div class="menuitem">
+<a href="../papers/index.html">Index</a>
+</div>
+<div class="menuitem">
+<a href="../papers/derby_web.html">Derby Web Site</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_selected_1.2', '../skin/')" id="menu_selected_1.2Title" class="menutitle" style="background-image: url('../skin/images/chapter_open.gif');">Derby Engine</div>
+<div id="menu_selected_1.2" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/engine">Javadoc</a>
+</div>
+<div class="menuitem">
+<a href="../papers/derby_arch.html">Architecture</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">BTree</div>
+</div>
+<div class="menuitem">
+<a href="../papers/pageformats.html">Disk Page Format</a>
+</div>
+<div class="menuitem">
+<a href="../papers/derby_htw.html">How Things Work</a>
+</div>
+<div class="menuitem">
+<a href="../papers/Intersect-design.html">Intersect &amp; Except</a>
+</div>
+<div class="menuitem">
+<a href="../papers/JDBCImplementation.html">JDBC</a>
+</div>
+<div class="menuitem">
+<a href="../papers/logformats.html">Log Format</a>
+</div>
+<div class="menuitem">
+<a href="../papers/recovery.html">Logging &amp; Recovery</a>
+</div>
+<div class="menuitem">
+<a href="../papers/optimizer.html">Optimizer</a>
+</div>
+<div class="menuitem">
+<a href="http://svn.apache.org/repos/asf/db/derby/code/trunk/java/engine/org/apache/derby/iapi/types/package.html">Type System</a>
+</div>
+<div class="menuitem">
+<a href="../papers/versionupgrade.html">Versioning</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.3', '../skin/')" id="menu_1.3Title" class="menutitle">Derby Network Client</div>
+<div id="menu_1.3" class="menuitemgroup">
+<div class="menuitem">
+<a href="../papers/DerbyClientSpec.html">Functional Spec</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.4', '../skin/')" id="menu_1.4Title" class="menutitle">Presentations</div>
+<div id="menu_1.4" class="menuitemgroup">
+<div class="menuitem">
+<a href="../papers/MiscPresentations.html#OSCON+2005">OSCON 2005</a>
+</div>
+<div class="menuitem">
+<a href="../papers/ApacheConUs04.html">ApacheCon US '04</a>
+</div>
+<div class="menuitem">
+<a href="../papers/MiscPresentations.html#Colorado+Software+Summit+2004">Colorado 2004</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.5', '../skin/')" id="menu_1.5Title" class="menutitle">Instruction</div>
+<div id="menu_1.5" class="menuitemgroup">
+<div class="menuitem">
+<a href="http://coffeecode.net/archives/16-Apache-Derby-tutorial-OSCON-2005-materials.html">Derby+Perl/PHP/Python</a>
+</div>
+<div class="menuitem">
+<a href="../papers/fortune_tut.html">Embedded Tutorial</a>
+</div>
+<div class="menuitem">
+<a href="http://objectinnovations.com/CourseOutlines/168.html">Object Innovations JDBC Course</a>
+</div>
+<div class="menuitem">
+<a href="http://www.ibm.com/developerworks/edu/dm-dw-dm-0412kubasta-i.html?S_TACT=104AHW11&S_CMP=LIB">Cloudscape Detective</a>
+</div>
+<div class="menuitem">
+<a href="http://www.eclipse.org/birt/tutorial/basic/">Eclipse BIRT</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.6', '../skin/')" id="menu_1.6Title" class="menutitle">Articles</div>
+<div id="menu_1.6" class="menuitemgroup">
+<div class="menuitem">
+<a href="http://www.db2mag.com/story/showArticle.jhtml?articleID=51000456">Double Take</a>
+</div>
+<div class="menuitem">
+<a href="http://www.vsj.co.uk/databases/display.asp?id=466">Flying out of the gate with Derby</a>
+</div>
+<div class="menuitem">
+<a href="http://www.devx.com/IBMCloudscape/Article/28526/1763">IBM's Cloudscape Versus MySQL</a>
+</div>
+<div class="menuitem">
+<a href="http://www.ibm.com/developerworks/db2/zones/cloudscape/">developerWorks</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="../skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<div id="credit2"></div>
+</div>
+<div id="content">
+<div class="trail">
+<text>Font size:</text> 
+	          &nbsp;<input value="Reset" class="resetfont" title="Reset text" onclick="ndeSetTextSize('reset'); return false;" type="button">      
+	          &nbsp;<input value="-a" class="smallerfont" title="Shrink text" onclick="ndeSetTextSize('decr'); return false;" type="button">
+	          &nbsp;<input value="+a" class="biggerfont" title="Enlarge text" onclick="ndeSetTextSize('incr'); return false;" type="button">
+</div>
+<h1>org.apache.derby.impl.store.access.btree</h1>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#Overview">Overview</a>
+</li>
+<li>
+<a href="#High+level+structure+of+the+B%2BTree">High level structure of the B+Tree</a>
+</li>
+<li>
+<a href="#Latching+implementation">Latching implementation</a>
+</li>
+<li>
+<a href="#Locking+and+Isolation+Levels">Locking and Isolation Levels</a>
+</li>
+<li>
+<a href="#BTree+Structure+Modifications">BTree Structure Modifications</a>
+</li>
+<li>
+<a href="#Logical+Key+Deletes">Logical Key Deletes</a>
+</li>
+<li>
+<a href="#Garbage+Collection+of+deleted+keys">Garbage Collection of deleted keys</a>
+</li>
+<li>
+<a href="#Logging+and+Recovery">Logging and Recovery</a>
+</li>
+</ul>
+</div>
+<a name="N10019"></a><a name="Overview"></a>
+<h2 class="boxed">Overview</h2>
+<div class="section">
+<p>Derby implements secondary SQL indexes as BTrees. The high level features of the BTree implementation are:</p>
+<ol>
+<li>Derby implements the standard B+Tree algorithm, in which all keys are stored in leaf pages, and the upper levels are organized as a redundant index for enabling rapid location of a particular key. See <a class="external" href="http://portal.acm.org/citation.cfm?id=356776"><cite>The Ubiquitous B-Tree, Douglas Comer</cite></a> for a definition of the B+Tree.</li>
+<li>The BTree implementation supports concurrency using page level latches, and recovery using a Write Ahead Log.</li>
+<li>Derby uses data only locking for its logical row level locking.</li>
+<li>Derby supports all four SQL isolation levels, serializable, repeatable read, read committed and read uncommitted.</li>
+<li>Derby uses logical key deletes. This enables it to perform undos during rollbacks and restart recovery as single page operations.</li>
+</ol>
+</div>
+<a name="N1002F"></a><a name="High+level+structure+of+the+B%2BTree"></a>
+<h2 class="boxed">High level structure of the B+Tree</h2>
+<div class="section">
+<ul>
+<li>The first row in all pages, branch or leaf, is a control row. In branch pages, this is a {@link org.apache.derby.impl.store.access.btree.BranchControlRow BranchControlRow} and in leaf pages, it is a {@link org.apache.derby.impl.store.access.btree.LeafControlRow LeafControlRow}.</li>
+<li>In addition to the BranchControlRow, branch level pages contain {@link org.apache.derby.impl.store.access.btree.BranchRow BranchRows}.</li>
+<li>At branch level, a page is linked to is left and right siblings, parent, as well as children. The number of child pages is 1 greater than the number of BranchRows in the page. The leftmost child pointer is stored in the BranchControlRow itself. Each BranchRow contains a child pointer as its last column.</li>
+<li>At leaf level also, a page is linked to its left and right siblings, and the parent. In addition to the LeafControlRow, leaf level pages contain {@link org.apache.derby.impl.sql.execute.IndexRow IndexRows}. The last column in an IndexRow is a {@link org.apache.derby.iapi.types.RowLocation RowLocation}.</li>
+<li>IndexRows are generated by the {@link org.apache.derby.iapi.sql.dictionary.IndexRowGenerator IndexRowGenerator}.</li>
+</ul>
+</div>
+<a name="N1003E"></a><a name="Latching+implementation"></a>
+<h2 class="boxed">Latching implementation</h2>
+<div class="section">
+<p>Derby uses latches on pages to get exclusive access to the page while reading or writing the page (Derby only uses exclusive latches, no shared latches are used). In order to prevent deadlocks latches requested while holding other latches are always requested top/down and left to right. Btree splits are always left to right. If for any reason the code needs to break this protocol then it will first request the latch NOWAIT and if it can't get the latch it will release all current latches and wait for the latch it is trying to get, and then after obtaining it go about getting the other latches it needs for the particular operation. While traversing down the tree Derby may hold 2 latches: one on parent and one on child. It then continues doing "ladder" latching down the tree releasing the highest node when it has successfully got a new lower node latch. Latches are short term, only held while reading/modifying the page, never held while an I/O is happening. Structure modifications are all isolated from other operations through the use of latches.</p>
+</div>
+<a name="N10044"></a><a name="Locking+and+Isolation+Levels"></a>
+<h2 class="boxed">Locking and Isolation Levels</h2>
+<div class="section">
+<p>Derby uses data only locking for its logical row level locking. All isolation level implementation is done using logical locks (Derby does not support non-locking isolation such as multi-versioning).</p>
+<p>In data only locking the key for all locks whether obtained in the BTree or in the base table is the address of the base table row. In the BTree case the address of the base table row ({@link org.apache.derby.iapi.types.RowLocation RowLocation}) is always the last column of the leaf index entry ({@link org.apache.derby.impl.sql.execute.IndexRow IndexRows}).</p>
+<dl>
+<dt>Serializable</dt>
+<dd>Derby uses "previous key" locking to prevent phantoms from being inserted into a range being accessed by a scan. A scan always locks the index row just "previous" to the first entry that satisfies the scan condition and then all index entries going forward until the end of the scan. All row locks are held until end of transaction. Inserts also get previous key locks, which will block if the existing row is locked.</dd>
+<dt>Repeatable Read</dt>
+<dd>Same as serializable except that no previous key locking is done.</dd>
+<dt>Read Committed</dt>
+<dd>Write locks are held until end of transaction. Read locks are released once the query in question no longer needs them.</dd>
+<dt>Read Uncommitted</dt>
+<dd>No row locks are acquired. The code still gets table level intent locks to prevent concurrent DDL during the query.</dd>
+</dl>
+</div>
+<a name="N1005D"></a><a name="BTree+Structure+Modifications"></a>
+<h2 class="boxed">BTree Structure Modifications</h2>
+<div class="section">
+<p>In Derby, SMOs (structure modification operations - ie. page splits), only happen top down. This is not as concurrent as bottom up in <a class="external" href="http://www.almaden.ibm.com/u/mohan/RJ6846.pdf">ARIES/IM</a>, but is simpler. As in ARIES/IM <q>Not more than 2 index pages are held latched simultaneously at anytime. In order to improve concurrency and to avoid deadlocks involving latches, even those latches are not held while waiting for a lock wich is not immediately grantable. No data page latch is held or acquired during an index access. Latch coupling is used while traversing the tree - ie. the latch on a parent page is held while requesting a latch on a child page.</q>
+</p>
+<p>In the case of a simple split, exclusive latch is held on P (parent), and C (child). If there is room in P, then new page C1 (child right) is created, new descriminator is inserted into P, and rows moved to C1. Latches are held on P and C for duration of split, and then released.</p>
+<p>The hard case is when P does not have room for descriminator key. In this case all latches are released, and Derby does a split pass from top to bottom, and will split the internal nodes that do not have room for the decrimator key. Note this may result in more splits than necessary for this particular insert, but the assumption is that the splits will have to happen eventually anyway. After this split pass is done, the search for the insert starts again from top down, but it must once again check for space because it has given up all its latches and some other transaction may have acquired the space in the meantime.</p>
+<p>Optimization is possible to remember C and see if it is right location, and/or use sideway pointers to search right rather than do research of tree.</p>
+</div>
+<a name="N1006F"></a><a name="Logical+Key+Deletes"></a>
+<h2 class="boxed">Logical Key Deletes</h2>
+<div class="section">
+<p>In both the BTree and the Heap, deletes are first executed by marking a "deleted" bit. This is to insure space on the page for abort, since row level locking will allow other rows on the page to be modified conncurrently with the transaction executing the delete. The system uses a background daemon to schedule work after commit to reclaim the space of the deleted rows. A row marked deleted can be "purged" if one can obtain a lock on it (if it was an uncommitted delete then the transaction doing the commit would still have an exclusive lock on the row).</p>
+</div>
+<a name="N10075"></a><a name="Garbage+Collection+of+deleted+keys"></a>
+<h2 class="boxed">Garbage Collection of deleted keys</h2>
+<div class="section">
+<p>Since rows are only marked as "deleted", and not physically removed, it is necessary to perform space reclamation on deleted rows.</p>
+</div>
+<a name="N1007B"></a><a name="Logging+and+Recovery"></a>
+<h2 class="boxed">Logging and Recovery</h2>
+<div class="section">
+<p>Derby uses physical redo and logical undo for BTree inserts and deletes. Logical undo is simplified as a result of using logical key deletes. If keys were physically removed during deletes, then the undo of a key delete would have required an insert operation which can potentially lead to page splits at various levels within the tree. Since the key is not physically removed, but only marked as "deleted", undoing a key delete is accomplished easily. However, since the page where the insert or delete should take place may have moved, it may be necessary to search for the page.</p>
+<p>Structure modification operations (SMOs) are handled as independent internal transactions and commit separately from the transaction that initiated the SMO. Once an SMO has been completed successfully, it is not undone, even if the transaction that caused it decides to abort. During restart recovery undo phase, incomplete internal transactions are undone BEFORE any regular transactions. This ensures that the BTrees are structurally consistent before normal undo begins.</p>
+</div>
+</div>
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("<text>Last Published:</text> " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2004-2005 Apache Software Foundation</div>
+<div id="feedback">
+    Send feedback about the website to:
+  <a id="feedbackto" href="mailto:derby-dev@db.apache.org?subject=Feedback%C2%A0papers/btree_package.html">derby-dev@db.apache.org</a>
+</div>
+</div>
+</body>
+</html>

Propchange: db/derby/site/trunk/build/site/papers/btree_package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: db/derby/site/trunk/build/site/papers/derby_arch.html
URL: http://svn.apache.org/viewcvs/db/derby/site/trunk/build/site/papers/derby_arch.html?rev=278552&r1=278551&r2=278552&view=diff
==============================================================================
--- db/derby/site/trunk/build/site/papers/derby_arch.html (original)
+++ db/derby/site/trunk/build/site/papers/derby_arch.html Sat Sep  3 20:11:25 2005
@@ -81,6 +81,9 @@
 <div class="menupagetitle">Architecture</div>
 </div>
 <div class="menuitem">
+<a href="../papers/btree_package.html">BTree</a>
+</div>
+<div class="menuitem">
 <a href="../papers/pageformats.html">Disk Page Format</a>
 </div>
 <div class="menuitem">

Modified: db/derby/site/trunk/build/site/papers/derby_htw.html
URL: http://svn.apache.org/viewcvs/db/derby/site/trunk/build/site/papers/derby_htw.html?rev=278552&r1=278551&r2=278552&view=diff
==============================================================================
--- db/derby/site/trunk/build/site/papers/derby_htw.html (original)
+++ db/derby/site/trunk/build/site/papers/derby_htw.html Sat Sep  3 20:11:25 2005
@@ -81,6 +81,9 @@
 <a href="../papers/derby_arch.html">Architecture</a>
 </div>
 <div class="menuitem">
+<a href="../papers/btree_package.html">BTree</a>
+</div>
+<div class="menuitem">
 <a href="../papers/pageformats.html">Disk Page Format</a>
 </div>
 <div class="menupage">

Modified: db/derby/site/trunk/build/site/papers/derby_web.html
URL: http://svn.apache.org/viewcvs/db/derby/site/trunk/build/site/papers/derby_web.html?rev=278552&r1=278551&r2=278552&view=diff
==============================================================================
--- db/derby/site/trunk/build/site/papers/derby_web.html (original)
+++ db/derby/site/trunk/build/site/papers/derby_web.html Sat Sep  3 20:11:25 2005
@@ -81,6 +81,9 @@
 <a href="../papers/derby_arch.html">Architecture</a>
 </div>
 <div class="menuitem">
+<a href="../papers/btree_package.html">BTree</a>
+</div>
+<div class="menuitem">
 <a href="../papers/pageformats.html">Disk Page Format</a>
 </div>
 <div class="menuitem">

Modified: db/derby/site/trunk/build/site/papers/fortune_tut.html
URL: http://svn.apache.org/viewcvs/db/derby/site/trunk/build/site/papers/fortune_tut.html?rev=278552&r1=278551&r2=278552&view=diff
==============================================================================
--- db/derby/site/trunk/build/site/papers/fortune_tut.html (original)
+++ db/derby/site/trunk/build/site/papers/fortune_tut.html Sat Sep  3 20:11:25 2005
@@ -81,6 +81,9 @@
 <a href="../papers/derby_arch.html">Architecture</a>
 </div>
 <div class="menuitem">
+<a href="../papers/btree_package.html">BTree</a>
+</div>
+<div class="menuitem">
 <a href="../papers/pageformats.html">Disk Page Format</a>
 </div>
 <div class="menuitem">

Modified: db/derby/site/trunk/build/site/papers/index.html
URL: http://svn.apache.org/viewcvs/db/derby/site/trunk/build/site/papers/index.html?rev=278552&r1=278551&r2=278552&view=diff
==============================================================================
--- db/derby/site/trunk/build/site/papers/index.html (original)
+++ db/derby/site/trunk/build/site/papers/index.html Sat Sep  3 20:11:25 2005
@@ -81,6 +81,9 @@
 <a href="../papers/derby_arch.html">Architecture</a>
 </div>
 <div class="menuitem">
+<a href="../papers/btree_package.html">BTree</a>
+</div>
+<div class="menuitem">
 <a href="../papers/pageformats.html">Disk Page Format</a>
 </div>
 <div class="menuitem">
@@ -221,6 +224,7 @@
 
 </tr>
 
+
 <tr>
 <td colspan="1" rowspan="1"> Architecture</td>
     <td colspan="1" rowspan="1"> <a href="derby_arch.html">Derby Engine Architecture Overview</a></td>
@@ -228,6 +232,13 @@
 </tr>
 
 <tr>
+<td colspan="1" rowspan="1"> BTree</td>
+    <td colspan="1" rowspan="1"> <a href="btree_package.html">BTree package documentation</a></td>
+
+</tr>
+
+
+<tr>
 <td colspan="1" rowspan="1">Disk Page Format</td>
     <td colspan="1" rowspan="1"><a href="pageformats.html">Derby On Disk Page Format</a></td>
 
@@ -289,7 +300,7 @@
 </div>
 
 
-<a name="N100CA"></a><a name="Derby+Network+Client"></a>
+<a name="N100D9"></a><a name="Derby+Network+Client"></a>
 <h2 class="boxed">Derby Network Client</h2>
 <div class="section">
 <p>
@@ -312,7 +323,7 @@
 
 
 
-<a name="N100F0"></a><a name="Presentations"></a>
+<a name="N100FF"></a><a name="Presentations"></a>
 <h2 class="boxed">Presentations</h2>
 <div class="section">
 <ul>
@@ -386,7 +397,7 @@
 </div>
 
 
-<a name="N1016D"></a><a name="Instruction"></a>
+<a name="N1017C"></a><a name="Instruction"></a>
 <h2 class="boxed">Instruction</h2>
 <div class="section">
 <ul>
@@ -427,7 +438,7 @@
 </div>
 
 
-<a name="N1019A"></a><a name="How+to+Contribute+Papers"></a>
+<a name="N101A9"></a><a name="How+to+Contribute+Papers"></a>
 <h2 class="boxed">How to Contribute Papers</h2>
 <div class="section">
 <p>
@@ -519,7 +530,7 @@
 
 
 <p>
-<em>Last Updated: August 24, 2005</em>
+<em>Last Updated: September 3, 2005</em>
 </p>
 
 </div>

Modified: db/derby/site/trunk/build/site/papers/logformats.html
URL: http://svn.apache.org/viewcvs/db/derby/site/trunk/build/site/papers/logformats.html?rev=278552&r1=278551&r2=278552&view=diff
==============================================================================
--- db/derby/site/trunk/build/site/papers/logformats.html (original)
+++ db/derby/site/trunk/build/site/papers/logformats.html Sat Sep  3 20:11:25 2005
@@ -81,6 +81,9 @@
 <a href="../papers/derby_arch.html">Architecture</a>
 </div>
 <div class="menuitem">
+<a href="../papers/btree_package.html">BTree</a>
+</div>
+<div class="menuitem">
 <a href="../papers/pageformats.html">Disk Page Format</a>
 </div>
 <div class="menuitem">

Modified: db/derby/site/trunk/build/site/papers/optimizer.html
URL: http://svn.apache.org/viewcvs/db/derby/site/trunk/build/site/papers/optimizer.html?rev=278552&r1=278551&r2=278552&view=diff
==============================================================================
--- db/derby/site/trunk/build/site/papers/optimizer.html (original)
+++ db/derby/site/trunk/build/site/papers/optimizer.html Sat Sep  3 20:11:25 2005
@@ -81,6 +81,9 @@
 <a href="../papers/derby_arch.html">Architecture</a>
 </div>
 <div class="menuitem">
+<a href="../papers/btree_package.html">BTree</a>
+</div>
+<div class="menuitem">
 <a href="../papers/pageformats.html">Disk Page Format</a>
 </div>
 <div class="menuitem">

Modified: db/derby/site/trunk/build/site/papers/pageformats.html
URL: http://svn.apache.org/viewcvs/db/derby/site/trunk/build/site/papers/pageformats.html?rev=278552&r1=278551&r2=278552&view=diff
==============================================================================
--- db/derby/site/trunk/build/site/papers/pageformats.html (original)
+++ db/derby/site/trunk/build/site/papers/pageformats.html Sat Sep  3 20:11:25 2005
@@ -80,6 +80,9 @@
 <div class="menuitem">
 <a href="../papers/derby_arch.html">Architecture</a>
 </div>
+<div class="menuitem">
+<a href="../papers/btree_package.html">BTree</a>
+</div>
 <div class="menupage">
 <div class="menupagetitle">Disk Page Format</div>
 </div>

Modified: db/derby/site/trunk/build/site/papers/recovery.html
URL: http://svn.apache.org/viewcvs/db/derby/site/trunk/build/site/papers/recovery.html?rev=278552&r1=278551&r2=278552&view=diff
==============================================================================
--- db/derby/site/trunk/build/site/papers/recovery.html (original)
+++ db/derby/site/trunk/build/site/papers/recovery.html Sat Sep  3 20:11:25 2005
@@ -81,6 +81,9 @@
 <a href="../papers/derby_arch.html">Architecture</a>
 </div>
 <div class="menuitem">
+<a href="../papers/btree_package.html">BTree</a>
+</div>
+<div class="menuitem">
 <a href="../papers/pageformats.html">Disk Page Format</a>
 </div>
 <div class="menuitem">

Modified: db/derby/site/trunk/build/site/papers/versionupgrade.html
URL: http://svn.apache.org/viewcvs/db/derby/site/trunk/build/site/papers/versionupgrade.html?rev=278552&r1=278551&r2=278552&view=diff
==============================================================================
--- db/derby/site/trunk/build/site/papers/versionupgrade.html (original)
+++ db/derby/site/trunk/build/site/papers/versionupgrade.html Sat Sep  3 20:11:25 2005
@@ -81,6 +81,9 @@
 <a href="../papers/derby_arch.html">Architecture</a>
 </div>
 <div class="menuitem">
+<a href="../papers/btree_package.html">BTree</a>
+</div>
+<div class="menuitem">
 <a href="../papers/pageformats.html">Disk Page Format</a>
 </div>
 <div class="menuitem">

Added: db/derby/site/trunk/src/documentation/content/xdocs/papers/btree_package.html
URL: http://svn.apache.org/viewcvs/db/derby/site/trunk/src/documentation/content/xdocs/papers/btree_package.html?rev=278552&view=auto
==============================================================================
--- db/derby/site/trunk/src/documentation/content/xdocs/papers/btree_package.html (added)
+++ db/derby/site/trunk/src/documentation/content/xdocs/papers/btree_package.html Sat Sep  3 20:11:25 2005
@@ -0,0 +1,177 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+<title>org.apache.derby.impl.store.access.btree</title>
+</head>
+<body>
+<p>Implements BTree access method, which is the basis for SQL indexes.</p>
+<h1>Overview</h1>
+<p>
+Derby implements secondary SQL indexes as BTrees. The high level
+features of the BTree implementation are:
+</p>
+<ol>
+	<li>Derby implements the standard B+Tree algorithm, in which all keys
+	are stored in leaf pages, and the upper levels are organized as a
+	redundant index for enabling rapid location of a particular key. See <a
+		href="http://portal.acm.org/citation.cfm?id=356776"> <cite>The
+	Ubiquitous B-Tree, Douglas Comer</cite></a> for a definition of the
+	B+Tree.</li>
+	<li>The BTree implementation supports concurrency using page level
+	latches, and recovery using a Write Ahead Log.</li>
+	<li>Derby uses data only locking for its logical row level locking.</li>
+	<li>Derby supports all four SQL isolation levels, serializable,
+	repeatable read, read committed and read uncommitted.</li>
+	<li>Derby uses logical key deletes. This enables it to perform undos
+	during rollbacks and restart recovery as single page operations.</li>
+</ol>
+<h1>High level structure of the B+Tree</h1>
+<ul>
+	<li>The first row in all pages, branch or leaf, is a control row. In
+	branch pages, this is a {@link
+	org.apache.derby.impl.store.access.btree.BranchControlRow
+	BranchControlRow} and in leaf pages, it is a {@link
+	org.apache.derby.impl.store.access.btree.LeafControlRow
+	LeafControlRow}.</li>
+	<li>In addition to the BranchControlRow, branch level pages contain
+	{@link org.apache.derby.impl.store.access.btree.BranchRow BranchRows}.
+	</li>
+	<li>At branch level, a page is linked to is left and right siblings,
+	parent, as well as children. The number of child pages is 1 greater
+	than the number of BranchRows in the page. The leftmost child pointer
+	is stored in the BranchControlRow itself. Each BranchRow contains a
+	child pointer as its last column.</li>
+	<li>At leaf level also, a page is linked to its left and right
+	siblings, and the parent. In addition to the LeafControlRow, leaf level
+	pages contain {@link org.apache.derby.impl.sql.execute.IndexRow
+	IndexRows}. The last column in an IndexRow is a {@link
+	org.apache.derby.iapi.types.RowLocation RowLocation}.</li>
+	<li>IndexRows are generated by the {@link
+	org.apache.derby.iapi.sql.dictionary.IndexRowGenerator
+	IndexRowGenerator}.</li>
+</ul>
+<h1>Latching implementation</h1>
+<p>Derby uses latches on pages to get exclusive access to the page while
+reading or writing the page (Derby only uses exclusive latches, no
+shared latches are used). In order to prevent deadlocks latches
+requested while holding other latches are always requested top/down and
+left to right. Btree splits are always left to right. If for any reason
+the code needs to break this protocol then it will first request the
+latch NOWAIT and if it can't get the latch it will release all current
+latches and wait for the latch it is trying to get, and then after
+obtaining it go about getting the other latches it needs for the
+particular operation. While traversing down the tree Derby may hold 2
+latches: one on parent and one on child. It then continues doing
+"ladder" latching down the tree releasing the highest node when it has
+successfully got a new lower node latch. Latches are short term, only
+held while reading/modifying the page, never held while an I/O is
+happening. Structure modifications are all isolated from other
+operations through the use of latches.</p>
+<h1>Locking and Isolation Levels</h1>
+<p>Derby uses data only locking for its logical row level locking. All
+isolation level implementation is done using logical locks (Derby does
+not support non-locking isolation such as multi-versioning).</p>
+<p>In data only locking the key for all locks whether obtained in the
+BTree or in the base table is the address of the base table row. In the
+BTree case the address of the base table row ({@link
+org.apache.derby.iapi.types.RowLocation RowLocation}) is always the last
+column of the leaf index entry ({@link
+org.apache.derby.impl.sql.execute.IndexRow IndexRows}).</p>
+<dl>
+	<dt>Serializable</dt>
+	<dd>Derby uses "previous key" locking to prevent phantoms from being
+	inserted into a range being accessed by a scan. A scan always locks the
+	index row just "previous" to the first entry that satisfies the scan
+	condition and then all index entries going forward until the end of the
+	scan. All row locks are held until end of transaction. Inserts also get
+	previous key locks, which will block if the existing row is locked.</dd>
+	<dt>Repeatable Read</dt>
+	<dd>Same as serializable except that no previous key locking is done.</dd>
+	<dt>Read Committed</dt>
+	<dd>Write locks are held until end of transaction. Read locks are
+	released once the query in question no longer needs them.</dd>
+	<dt>Read Uncommitted</dt>
+	<dd>No row locks are acquired. The code still gets table level intent
+	locks to prevent concurrent DDL during the query.</dd>
+</dl>
+<h1>BTree Structure Modifications</h1>
+<p>In Derby, SMOs (structure modification operations - ie. page splits),
+only happen top down. This is not as concurrent as bottom up in <a
+	href="http://www.almaden.ibm.com/u/mohan/RJ6846.pdf">ARIES/IM</a>, but
+is simpler. As in ARIES/IM <q>Not more than 2 index pages are held
+latched simultaneously at anytime. In order to improve concurrency and
+to avoid deadlocks involving latches, even those latches are not held
+while waiting for a lock wich is not immediately grantable. No data page
+latch is held or acquired during an index access. Latch coupling is used
+while traversing the tree - ie. the latch on a parent page is held while
+requesting a latch on a child page.</q></p>
+<p>In the case of a simple split, exclusive latch is held on P (parent),
+and C (child). If there is room in P, then new page C1 (child right) is
+created, new descriminator is inserted into P, and rows moved to C1.
+Latches are held on P and C for duration of split, and then released.</p>
+<p>The hard case is when P does not have room for descriminator key. In
+this case all latches are released, and Derby does a split pass from top
+to bottom, and will split the internal nodes that do not have room for
+the decrimator key. Note this may result in more splits than necessary
+for this particular insert, but the assumption is that the splits will
+have to happen eventually anyway. After this split pass is done, the
+search for the insert starts again from top down, but it must once again
+check for space because it has given up all its latches and some other
+transaction may have acquired the space in the meantime.</p>
+<p>Optimization is possible to remember C and see if it is right
+location, and/or use sideway pointers to search right rather than do
+research of tree.</p>
+<h1>Logical Key Deletes</h1>
+<p>In both the BTree and the Heap, deletes are first executed by marking
+a "deleted" bit. This is to insure space on the page for abort, since
+row level locking will allow other rows on the page to be modified
+conncurrently with the transaction executing the delete. The system uses
+a background daemon to schedule work after commit to reclaim the space
+of the deleted rows. A row marked deleted can be "purged" if one can
+obtain a lock on it (if it was an uncommitted delete then the
+transaction doing the commit would still have an exclusive lock on the
+row).</p>
+<h1>Garbage Collection of deleted keys</h1>
+<p>Since rows are only marked as "deleted", and not physically removed,
+it is necessary to perform space reclamation on deleted rows.</p>
+<p></p>
+<h3>Online during BTREE split</h3>
+<p>Whenever there is not enough room on a leaf to do an insert the code
+attempts to find space on the leaf, by checking if it can reclaim any
+committed deletes on that leaf. That work only requires the latch on the
+leaf and NOWAIT row write locks. It is expected that most of the space
+reclaim done in the BTree goes through this path. Most of this work is
+done in {@link
+org.apache.derby.impl.store.access.btree.BTreeController.reclaim_deleted_rows}.</p>
+<h3>BTREE post commit work</h3>
+<p>Whenever a delete operation deletes the last row from a leaf page
+then a BtreePostCommit job is queued to be executed after the
+transaction which did the delete commits. This work currently requires a
+table level lock as page merges have not been implemented to be allowed
+concurrent with other operations. Many DBMSes don't even try to do page
+merges except when called from some sort of reorg utility. If all rows
+on page are purged, then the page will move to the free list and perform
+a merge to the tree.</p>
+<p>It is expected that the normal space reclamation happens with row
+locks during btree split, which is why not much work has been done to
+optimize btree post commit path</p>
+<h1>Logging and Recovery</h1>
+<p>Derby uses physical redo and logical undo for BTree inserts and
+deletes. Logical undo is simplified as a result of using logical key
+deletes. If keys were physically removed during deletes, then the undo
+of a key delete would have required an insert operation which can
+potentially lead to page splits at various levels within the tree. Since
+the key is not physically removed, but only marked as "deleted", undoing
+a key delete is accomplished easily. However, since the page where the
+insert or delete should take place may have moved, it may be necessary
+to search for the page.</p>
+<p>Structure modification operations (SMOs) are handled as independent
+internal transactions and commit separately from the transaction that
+initiated the SMO. Once an SMO has been completed successfully, it is
+not undone, even if the transaction that caused it decides to abort.
+During restart recovery undo phase, incomplete internal transactions are
+undone BEFORE any regular transactions. This ensures that the BTrees are
+structurally consistent before normal undo begins.</p>
+</body>
+</html>

Propchange: db/derby/site/trunk/src/documentation/content/xdocs/papers/btree_package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: db/derby/site/trunk/src/documentation/content/xdocs/papers/index.xml
URL: http://svn.apache.org/viewcvs/db/derby/site/trunk/src/documentation/content/xdocs/papers/index.xml?rev=278552&r1=278551&r2=278552&view=diff
==============================================================================
--- db/derby/site/trunk/src/documentation/content/xdocs/papers/index.xml (original)
+++ db/derby/site/trunk/src/documentation/content/xdocs/papers/index.xml Sat Sep  3 20:11:25 2005
@@ -24,9 +24,14 @@
 <tr><td> Javadoc</td>
     <td> <a href="http://db.apache.org/derby/javadoc/engine/">Apache Derby Engine Documentation</a></td>
 </tr>
+
 <tr><td> Architecture</td>
     <td> <a href="derby_arch.html">Derby Engine Architecture Overview</a></td>
 </tr>
+<tr><td> BTree</td>
+    <td> <a href="btree_package.html">BTree package documentation</a></td>
+</tr>
+
 <tr><td>Disk Page Format</td>
     <td><a href="pageformats.html">Derby On Disk Page Format</a></td>
 </tr>
@@ -254,6 +259,6 @@
 
 </section>
 
-<p><em>Last Updated: August 24, 2005</em></p>
+<p><em>Last Updated: September 3, 2005</em></p>
 </body>
 </document>

Modified: db/derby/site/trunk/src/documentation/content/xdocs/site.xml
URL: http://svn.apache.org/viewcvs/db/derby/site/trunk/src/documentation/content/xdocs/site.xml?rev=278552&r1=278551&r2=278552&view=diff
==============================================================================
--- db/derby/site/trunk/src/documentation/content/xdocs/site.xml (original)
+++ db/derby/site/trunk/src/documentation/content/xdocs/site.xml Sat Sep  3 20:11:25 2005
@@ -28,6 +28,7 @@
   <engine label="Derby Engine" href="papers/" tab="papers">
       <javadoc2  label="Javadoc"                href="http://db.apache.org/derby/javadoc/engine"/>
       <index     label="Architecture"           href="derby_arch.html"/>
+      <index     label="BTree"                  href="btree_package.html"/>
       <pformat   label="Disk Page Format"       href="pageformats.html"/>
       <misc      label="How Things Work"        href="derby_htw.html"/>
       <intersect label="Intersect &amp; Except" href="Intersect-design.html"/>



Mime
View raw message