db-general mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rhille...@apache.org
Subject svn commit: r843115 [26/44] - in /websites/production/db/content/derby: ./ binaries/ blogs/ blogs/images/ dev/ docs/ images/ integrate/ integrate/plugin_help/ integrate/plugin_help/images/ logo/ manuals/ papers/ papers/DerbyTut/ releases/ skin/ skin/cs...
Date Wed, 19 Dec 2012 18:20:28 GMT
Added: websites/production/db/content/derby/papers/pageformats.html
==============================================================================
--- websites/production/db/content/derby/papers/pageformats.html (added)
+++ websites/production/db/content/derby/papers/pageformats.html Wed Dec 19 18:20:21 2012
@@ -0,0 +1,1440 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Derby On Disk Page Format</title>
+<link type="text/css" href="../skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="../skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="../skin/print.css" rel="stylesheet">
+<link type="text/css" href="../skin/profile.css" rel="stylesheet">
+<script src="../skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="../skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="../skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="../">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">apache</a> &gt; <a href="http://db.apache.org/">db</a><script src="../skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://db.apache.org/derby"><img class="logoImage" alt="Apache Derby" src="../images/derby-logo-web.png" title="Derby is a zero-admin Java RDBMS"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogoA1">
+<a href="http://db.apache.org"><img class="logoImage" alt="Apache DB Project" src="../images/db-logo-white.png" title="Apache DB creates and maintains database solutions."></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="../index.html">Home</a>
+</li>
+<li>
+<a class="unselected" href="../quick_start.html">Quick Start</a>
+</li>
+<li>
+<a class="unselected" href="../derby_downloads.html">Download</a>
+</li>
+<li>
+<a class="unselected" href="../derby_comm.html">Community</a>
+</li>
+<li>
+<a class="unselected" href="../manuals/index.html">Documentation</a>
+</li>
+<li class="current">
+<a class="selected" href="../blogs/index.html">Resources</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_1.1', '../skin/')" id="menu_1.1Title" class="menutitle">Blogs and Articles About Derby</div>
+<div id="menu_1.1" class="menuitemgroup">
+<div class="menuitem">
+<a href="../blogs/index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#blogs">Blogs</a>
+</div>
+<div onclick="SwitchMenu('menu_1.1.3', '../skin/')" id="menu_1.1.3Title" class="menutitle">Articles</div>
+<div id="menu_1.1.3" class="menuitemgroup">
+<div onclick="SwitchMenu('menu_1.1.3.1', '../skin/')" id="menu_1.1.3.1Title" class="menutitle">Tutorials, Tips and Tuning</div>
+<div id="menu_1.1.3.1" class="menuitemgroup">
+<div class="menuitem">
+<a href="../blogs/index.html#getstarted">Getting Started</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#features">Features, Hints and Tips</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#security">Security</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#performance">Performance and Tuning</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.1.3.2', '../skin/')" id="menu_1.1.3.2Title" class="menutitle">Tools and Migration</div>
+<div id="menu_1.1.3.2" class="menuitemgroup">
+<div class="menuitem">
+<a href="../blogs/index.html#tools">Tools</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#migration">Migration</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.1.3.3', '../skin/')" id="menu_1.1.3.3Title" class="menutitle">Applications</div>
+<div id="menu_1.1.3.3" class="menuitemgroup">
+<div class="menuitem">
+<a href="../blogs/index.html#client">Client</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#middletier">Middle Tier</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#persistence">Persistence</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#scalability">Scalability and Failover</a>
+</div>
+</div>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.2', '../skin/')" id="menu_1.2Title" class="menutitle">Integration With Other Products</div>
+<div id="menu_1.2" class="menuitemgroup">
+<div class="menuitem">
+<a href="../integrate/index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="../integrate/index.html#uses">What works with Derby?</a>
+</div>
+<div class="menuitem">
+<a href="../integrate/index.html#products">Product Writeups</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.3', '../skin/')" id="menu_1.3Title" class="menutitle">Eclipse Plug-ins</div>
+<div id="menu_1.3" class="menuitemgroup">
+<div class="menuitem">
+<a href="../integrate/derby_plugin_info.html">Info</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_selected_1.4', '../skin/')" id="menu_selected_1.4Title" class="menutitle" style="background-image: url('../skin/images/chapter_open.gif');">Papers and Presentations</div>
+<div id="menu_selected_1.4" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="../papers/index.html">Overview</a>
+</div>
+<div onclick="SwitchMenu('menu_selected_1.4.2', '../skin/')" id="menu_selected_1.4.2Title" class="menutitle" style="background-image: url('../skin/images/chapter_open.gif');">Derby Engine</div>
+<div id="menu_selected_1.4.2" class="selectedmenuitemgroup" style="display: block;">
+<div onclick="SwitchMenu('menu_1.4.2.1', '../skin/')" id="menu_1.4.2.1Title" class="menutitle">Javadoc</div>
+<div id="menu_1.4.2.1" class="menuitemgroup">
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/engine">Engine</a>
+</div>
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/language">Language</a>
+</div>
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/tools">Tools</a>
+</div>
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/publishedapi">API</a>
+</div>
+</div>
+<div class="menuitem">
+<a href="../papers/derby_arch.html">Architecture</a>
+</div>
+<div class="menuitem">
+<a href="../papers/btree_package.html">BTree</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">Disk Page Format</div>
+</div>
+<div class="menuitem">
+<a href="../papers/derby_htw.html">How Things Work</a>
+</div>
+<div class="menuitem">
+<a href="../papers/Intersect-design.html">Intersect &amp; Except</a>
+</div>
+<div class="menuitem">
+<a href="../papers/JDBCImplementation.html">JDBC</a>
+</div>
+<div class="menuitem">
+<a href="../papers/logformats.html">Log Format</a>
+</div>
+<div class="menuitem">
+<a href="../papers/recovery.html">Logging &amp; Recovery</a>
+</div>
+<div class="menuitem">
+<a href="../papers/optimizer.html">Optimizer</a>
+</div>
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/types/package-summary.html#package_description">Type System</a>
+</div>
+<div class="menuitem">
+<a href="../papers/versionupgrade.html">Versioning</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.4.3', '../skin/')" id="menu_1.4.3Title" class="menutitle">Derby Network Client</div>
+<div id="menu_1.4.3" class="menuitemgroup">
+<div class="menuitem">
+<a href="../papers/DerbyClientSpec.html">Functional Spec</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.4.4', '../skin/')" id="menu_1.4.4Title" class="menutitle">Derby Tutorial</div>
+<div id="menu_1.4.4" class="menuitemgroup">
+<div class="menuitem">
+<a href="../papers/DerbyTut/index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="../papers/DerbyTut/install_software.html">Step 1: Install Software</a>
+</div>
+<div class="menuitem">
+<a href="../papers/DerbyTut/ij_intro.html">Step 2: ij Basics</a>
+</div>
+<div class="menuitem">
+<a href="../papers/DerbyTut/embedded_intro.html">Step 3: Embedded Derby</a>
+</div>
+<div class="menuitem">
+<a href="../papers/DerbyTut/ns_intro.html">Step 4: Derby Network Server</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.4.5', '../skin/')" id="menu_1.4.5Title" class="menutitle">Presentations</div>
+<div id="menu_1.4.5" class="menuitemgroup">
+<div class="menuitem">
+<a href="../papers/ApacheCon.html">ApacheCon</a>
+</div>
+<div class="menuitem">
+<a href="../papers/MiscPresentations.html#Victorian+Java+User+Group">Victorian JUG 2008</a>
+</div>
+<div class="menuitem">
+<a href="../papers/MiscPresentations.html#OSCON+2005">OSCON 2005</a>
+</div>
+<div class="menuitem">
+<a href="../papers/MiscPresentations.html#Colorado+Software+Summit+2004">Colorado 2004</a>
+</div>
+</div>
+</div>
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<hr>
+<form action="http://www.google.com/search" method="get">
+<input value="db.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="18" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                  <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="../skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div class="trail">Font size: 
+	          &nbsp;<input value="Reset" class="resetfont" title="Reset text" onclick="ndeSetTextSize('reset'); return false;" type="button">      
+	          &nbsp;<input value="-a" class="smallerfont" title="Shrink text" onclick="ndeSetTextSize('decr'); return false;" type="button">
+	          &nbsp;<input value="+a" class="biggerfont" title="Enlarge text" onclick="ndeSetTextSize('incr'); return false;" type="button">
+</div>
+<h1>Derby On Disk Page Format</h1>
+<div class="abstract">This document describes the storage format of Derby disk pages. 
+
+    This is a work-in-progress derived from Javadoc comments and 
+
+    from explanations Mike Matrigali posted to the Derby lists. 
+
+    Please post questions, comments, and corrections to 
+
+    derby-dev@db.apache.org.
+
+    </div>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#introduction"> Introduction </a>
+</li>
+<li>
+<a href="#storedpage">Data Page Format</a>
+<ul class="minitoc">
+<li>
+<a href="#formatid">Format Id </a>
+</li>
+<li>
+<a href="#pageheader"> Page Header </a>
+</li>
+<li>
+<a href="#records"> Records </a>
+</li>
+<li>
+<a href="#slottable">Slot Offset Table</a>
+</li>
+<li>
+<a href="#checksum">Checksum</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#allocpage">Allocation Page</a>
+<ul class="minitoc">
+<li>
+<a href="#Alloc+Page+detailed+implementation+notes">
+
+	Alloc Page detailed implementation notes</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#Allocation+Extent">Allocation Extent</a>
+</li>
+</ul>
+</div>
+    
+<a name="N10010"></a><a name="introduction"></a>
+<h2 class="boxed"> Introduction </h2>
+<div class="section">
+<p>Derby stores table and index data in Containers, which currently map 
+
+        to files in the <span class="codefrag">seg0</span>
+
+        directory of the database. In the current Derby implementation there is a 1 to 1 mapping of
+
+        containers to files. Two containers never map to a single file and 1
+
+	  container never maps to multiple files.</p>
+<p>
+
+       Data is stored in pages within the container.</p>
+<p>A page contains a set of records, which can be accessed by "slot", which 
+
+        defines the order of the records on the page, or by "id" which defines 
+
+        the identity of the records on the page. Clients access records by both 
+
+        slot and id, depending on their needs.</p>
+<p>A Table or a BTree index provides a row-based access mechanism (row-based 
+
+        access interface is known as conglomerate). Rows are mapped to records 
+
+        in data pages; in case of a table, a single row can span multiple records in 
+
+        multiple pages.</p>
+<p>A container can have three types of pages:</p>
+<ul>
+        
+<li>Header Page - which is just a specialized version of the Alloc Page.</li>
+        
+<li>Data Pages which hold data, and</li>
+        
+<li>Alloc Pages which hold page allocation information. An Alloc page is a specialized verion of the Data page.</li>
+      
+</ul>
+<p>The container can be visualised as:<br>
+<img alt="" src="container-format.png"></p>
+<p>
+
+Header Page is currently always page 0 of the container.  It
+
+contains information that raw store needs to maintain about the
+
+container once per container, and is currently implemented as an Alloc
+
+Page which "borrows" space from the alloc page for it's information.
+
+The original decision was that the designers did not want to waste a whole page for
+
+header information, so a part of the page was used and the first allocation
+
+map was put on the second half of it. See <span class="codefrag">AllocPage.java</span> for info about layout and
+
+borrowing.
+
+</p>
+<p>
+        
+<a href="#allocpage"> Allocation Page</a> - After page 0, all subsequent Allocation pages only
+
+have allocation bit maps.
+
+</p>
+</div>
+    
+<a name="N10049"></a><a name="storedpage"></a>
+<h2 class="boxed">Data Page Format</h2>
+<div class="section">
+<p>A data page is broken into five sections. 
+
+        <img alt="" src="page-format.png"></p>
+<a name="N10055"></a><a name="formatid"></a>
+<h3 class="boxed">Format Id </h3>
+<p> The formatId is a 4 bytes array, it contains the format Id of this 
+
+          page. The possible values are RAW_STORE_STORED_PAGE or RAW_STORE_ALLOC_PAGE.</p>
+<a name="N1005F"></a><a name="pageheader"></a>
+<h3 class="boxed"> Page Header </h3>
+<p> The page header is a fixed size, 56 bytes. </p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+          
+<tr>
+            
+<th colspan="1" rowspan="1">Size</th>
+            <th colspan="1" rowspan="1">Type</th>
+            <th colspan="1" rowspan="1">Description</th>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">1 byte</td>
+            <td colspan="1" rowspan="1">boolean</td>
+            <td colspan="1" rowspan="1">is page an overflow page</td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">1 byte</td>
+            <td colspan="1" rowspan="1">byte</td>
+            <td colspan="1" rowspan="1">
+              
+<p>page status is either VALID_PAGE or INVALID_PAGE(a field 
+
+                  maintained in base page)</p>
+              
+<p>page goes thru the following transition: 
+
+                  <br>
+
+                  VALID_PAGE &lt;-&gt; deallocated page -&gt; free page &lt;-&gt; 
+
+                  VALID_PAGE</p>
+              
+<p>deallocated and free page are both INVALID_PAGE as far as BasePage 
+
+                  is concerned. 
+
+                  <br>
+
+                  When a page is deallocated, it transitioned from VALID_PAGE 
+
+                  to INVALID_PAGE. 
+
+                  <br>
+
+                  When a page is allocated, it trnasitioned from INVALID_PAGE 
+
+                  to VALID_PAGE.</p>
+            
+</td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">8 bytes</td>
+            <td colspan="1" rowspan="1">long</td>
+            <td colspan="1" rowspan="1">pageVersion (a field maintained in base page)</td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">2 bytes</td>
+            <td colspan="1" rowspan="1">unsigned short</td>
+            <td colspan="1" rowspan="1">number of slots in slot offset table</td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">4 bytes</td>
+            <td colspan="1" rowspan="1">integer</td>
+            <td colspan="1" rowspan="1">next record identifier</td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">4 bytes</td>
+            <td colspan="1" rowspan="1">integer</td>
+            <td colspan="1" rowspan="1">generation number of this page (Future Use)</td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">4 bytes</td>
+            <td colspan="1" rowspan="1">integer</td>
+            <td colspan="1" rowspan="1">previous generation of this page (Future Use)</td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">8 bytes</td>
+            <td colspan="1" rowspan="1">bipLocation</td>
+            <td colspan="1" rowspan="1">the location of the beforeimage page (Future Use)</td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">2 bytes</td>
+            <td colspan="1" rowspan="1">unsigned short</td>
+            <td colspan="1" rowspan="1">number of deleted rows on page. (new release 2.0)</td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">2 bytes</td>
+            <td colspan="1" rowspan="1">short</td>
+            <td colspan="1" rowspan="1">spare for future use</td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">4 bytes</td>
+            <td colspan="1" rowspan="1">integer</td>
+            <td colspan="1" rowspan="1">spare for future use (encryption uses to write random bytes 
+
+                here).</td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">8 bytes</td>
+            <td colspan="1" rowspan="1">long</td>
+            <td colspan="1" rowspan="1">spare for future use</td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">8 bytes</td>
+            <td colspan="1" rowspan="1">long</td>
+            <td colspan="1" rowspan="1">spare for future use</td>
+          
+</tr>
+        
+</table>
+<div class="note">
+<div class="label">Note</div>
+<div class="content">Spare space is guaranteed to be writen with "0", so that future 
+
+            use of field should not either not use "0" as a valid data item or 
+
+            pick 0 as a valid default value so that on the fly upgrade can assume 
+
+            that 0 means field was never assigned. </div>
+</div>
+<a name="N1017A"></a><a name="records"></a>
+<h3 class="boxed"> Records </h3>
+<p>The records section contains zero or more records. Each record starts 
+
+            with a Record Header</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+          
+<caption>Record Header</caption>
+          
+<tr>
+            
+<th colspan="1" rowspan="1">Type</th>
+            <th colspan="1" rowspan="1">Description</th>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">1 byte</td>
+            <td colspan="1" rowspan="1">
+              
+<p>Status bits for the record header</p>
+              
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+                
+<tr>
+                  
+<td colspan="1" rowspan="1">RECORD_DELETED</td>
+                  <td colspan="1" rowspan="1">used to indicate the record has been deleted</td>
+                
+</tr>
+                
+<tr>
+                  
+<td colspan="1" rowspan="1">RECORD_OVERFLOW</td>
+                  <td colspan="1" rowspan="1">used to indicate the record has been overflowed, it will 
+
+                      point to the overflow page and ID</td>
+                
+</tr>
+                
+<tr>
+                  
+<td colspan="1" rowspan="1">RECORD_HAS_FIRST_FIELD</td>
+                  <td colspan="1" rowspan="1">used to indicate that firstField is stored will be stored. 
+
+                      When RECORD_OVERFLOW and RECORD_HAS_FIRST_FIELD both are 
+
+                      set, part of record is on the page, the record header also 
+
+                      stores the overflow point to the next part of the record.</td>
+                
+</tr>
+                
+<tr>
+                  
+<td colspan="1" rowspan="1">RECORD_VALID_MASK</td>
+                  <td colspan="1" rowspan="1">A mask of valid bits that can be set currently, such that 
+
+                      the following assert can be made: </td>
+                
+</tr>
+              
+</table>
+            
+</td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">compressed int</td>
+            <td colspan="1" rowspan="1">record identifier</td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">compressed long</td>
+            <td colspan="1" rowspan="1">overflow page only if RECORD_OVERFLOW is set</td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">compressed int</td>
+            <td colspan="1" rowspan="1">overflow id only if RECORD_OVERFLOW is set</td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">compressed int</td>
+            <td colspan="1" rowspan="1">first field only if RECORD_HAS_FIRST_FIELD is set - otherwise 
+
+                0</td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">compressed int</td>
+            <td colspan="1" rowspan="1">number of fields in this portion - only if RECORD_OVERFLOW is 
+
+                false OR RECORD_HAS_FIRST_FIELD is true - otherwise 0</td>
+          
+</tr>
+        
+</table>
+<div class="note">
+<div class="label">Long Rows</div>
+<div class="content"> A row is long if all of it's columns can't fit on a single page. 
+
+            When storing a long row, the segment of the row which fits on the 
+
+            page is left there, and a pointer column is added at the end of the 
+
+            row. It points to another row in the same container on a different 
+
+            page. That row will contain the next set of columns and a continuation 
+
+            pointer if necessary. The overflow portion will be on an "overflow" 
+
+            page, and that page may have overflow portions of other rows on it 
+
+            (unlike overflow columns). </div>
+</div>
+<p>The Record Header is followed by one or more fields. Each field contains 
+
+            a Field Header and optional Field Data.</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+          
+<caption>Stored Field Header Format</caption>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">status</td>
+            <td colspan="1" rowspan="1">
+              
+<p> The status is 1 byte, it indicates the state of the field. 
+
+                  A FieldHeader can be in the following states: </p>
+              
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+                
+<tr>
+                  
+<td colspan="1" rowspan="1">NULL</td>
+                  <td colspan="1" rowspan="1">if the field is NULL, no field data length is stored</td>
+                
+</tr>
+                
+<tr>
+                  
+<td colspan="1" rowspan="1">OVERFLOW</td>
+                  <td colspan="1" rowspan="1">indicates the field has been overflowed to another page. 
+
+                        overflow page and overflow ID is stored at the end of 
+
+                        the user data. field data length must be a number greater 
+
+                        or equal to 0, indicating the length of the field that 
+
+                        is stored on the current page. The format looks like this: 
+
+                        <img alt="" src="field-header-overflow.png">
+
+                        overflowPage will be written as compressed long, overflowId 
+
+                        will be written as compressed Int</td>
+                
+</tr>
+                
+<tr>
+                  
+<td colspan="1" rowspan="1">NONEXISTENT</td>
+                  <td colspan="1" rowspan="1">the field no longer exists, e.g. column has been dropped 
+
+                        during an alter table</td>
+                
+</tr>
+                
+<tr>
+                  
+<td colspan="1" rowspan="1">EXTENSIBLE</td>
+                  <td colspan="1" rowspan="1">the field is of user defined data type. The field may 
+
+                        be tagged.</td>
+                
+</tr>
+                
+<tr>
+                  
+<td colspan="1" rowspan="1">TAGGED</td>
+                  <td colspan="1" rowspan="1">the field is TAGGED if and only if it is EXTENSIBLE.</td>
+                
+</tr>
+                
+<tr>
+                  
+<td colspan="1" rowspan="1">FIXED</td>
+                  <td colspan="1" rowspan="1">the field is FIXED if and only if it is used in the 
+
+                        log records for version 1.2 and higher.</td>
+                
+</tr>
+              
+</table>
+            
+</td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">fieldDataLength</td>
+            <td colspan="1" rowspan="1"> The fieldDataLength is only set if the field is not NULL. It 
+
+                is the length of the field that is stored on the current page. 
+
+                The fieldDataLength is a variable length CompressedInt. </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">fieldData</td>
+            <td colspan="1" rowspan="1">
+              
+<p> Overflow page and overflow id are stored as field data. If 
+
+                the overflow bit in status is set, the field data is the overflow 
+
+                information. When the overflow bit is not set in status, then, 
+
+                fieldData is the actually user data for the field. That means, 
+
+                field header consists only field status, and field data length. 
+
+                <br>
+
+                A non-overflow field: 
+
+                <br>
+<img alt="" src="field-header-non-overflow.png"><br>
+
+                An overflow field: 
+
+                <br>
+<img alt="" src="field-header-overflow.png"><br>
+<strong>overflowPage 
+
+                  and overflowID</strong>
+<br>
+
+                The overflowPage is a variable length CompressedLong, overflowID 
+
+                is a variable Length CompressedInt. They are only stored when 
+
+                the field state is OVERFLOW. And they are not stored in the field 
+
+                header. Instead, they are stored at the end of the field data. 
+
+                The reason we do that is to save a copy if the field has to overflow. </p>
+            
+</td>
+          
+</tr>
+        
+</table>
+<div class="note">
+<div class="label">Long Columns</div>
+<div class="content"> A column is long if it can't fit on a single page. A long column 
+
+            is marked as long in the base row, and it's field contains a pointer 
+
+            to a chain of other rows in the same container with contain the data 
+
+            of the row. Each of the subsequent rows is on a page to itself. Each 
+
+            subsquent row, except for the last piece has 2 columns, the first 
+
+            is the next segment of the row and the second is the pointer to the 
+
+            the following segment. The last segment only has the data segment. 
+
+          </div>
+</div>
+<a name="N102C3"></a><a name="slottable"></a>
+<h3 class="boxed">Slot Offset Table</h3>
+<p>The slot offset table is a table of 6 or 12 bytes per record, depending 
+
+          on the pageSize being less or greater than 64K: </p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+          
+<caption>Slot Table Record</caption>
+          
+<tr>
+            
+<th colspan="1" rowspan="1">Size</th>
+            <th colspan="1" rowspan="1">Content</th>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">2 bytes (unsigned short) or 4 bytes (int)</td>
+            <td colspan="1" rowspan="1">page offset for the record that is assigned to the slot</td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">2 bytes (unsigned short) or 4 bytes (int)</td>
+            <td colspan="1" rowspan="1">the length of the record on this page.</td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">2 bytes (unsigned short) or 4 bytes (int)</td>
+            <td colspan="1" rowspan="1">the length of the reserved number of bytes for this record on 
+
+                this page.</td>
+          
+</tr>
+        
+</table>
+<p>
+
+          First slot is slot 0. The slot table grows backwards. Slots are never 
+
+          left empty. </p>
+<a name="N1030A"></a><a name="checksum"></a>
+<h3 class="boxed">Checksum</h3>
+<p>8 bytes of a java.util.zip.CRC32 checksum of the entire's page contents 
+
+          without the 8 bytes representing the checksum.</p>
+</div>
+    
+<a name="N10315"></a><a name="allocpage"></a>
+<h2 class="boxed">Allocation Page</h2>
+<div class="section">
+<p> An allocation page of the file container extends a normal Stored page, 
+
+        with the exception that a hunk of space may be 'borrowed' by the file 
+
+        container to store the file header.</p>
+<p> The borrowed space is not visible to the alloc page even though it is 
+
+        present in the page data array. It is accessed directly by the FileContainer. 
+
+        Any change made to the borrowed space is not managed or seen by the allocation 
+
+        page.</p>
+<p> The reason for having this borrowed space is so that the container header 
+
+        does not need to have a page of its own. </p>
+<p>
+        
+<strong>Page Format</strong>
+        
+<br>
+
+        An allocation page extends a stored page, the on disk format is different 
+
+        from a stored page in that N bytes are 'borrowed' by the container and 
+
+        the page header of an allocation page will be slightly bigger than a normal 
+
+        stored page. This N bytes are stored between the page header and the record 
+
+        space.</p>
+<p> The reason why this N bytes can't simply be a row is because it needs 
+
+        to be statically accessible by the container object to avoid a chicken 
+
+        and egg problem of the container object needing to instantiate an alloc 
+
+        page object before it can be objectified, and an alloc page object needing 
+
+        to instantiate a container object before it can be objectified. So this 
+
+        N bytes must be stored outside of the normal record interface yet it must 
+
+        be settable because only the first alloc page has this borrowed space. 
+
+        Other (non-first) alloc page have N == 0. 
+
+        <br>
+<img alt="" src="alloc-page.png"></p>
+<p>
+
+	N is a byte that indicates the size of the borrowed space.  Once an alloc
+
+	page is initialized, the value of N cannot change.
+
+	</p>
+<p>
+
+	The maximum space that can be borrowed by the container is 256 bytes.
+
+      </p>
+<p>
+
+	The allocation pages are of the same page size as any other pages in the
+
+	container. The first allocation page of the FileContainer starts at the
+
+	first physical byte of the container.  Subsequent allocation pages are
+
+	chained via the nextAllocPageOffset.  Each allocation page is expected to
+
+	manage at least 1000 user pages (for 1K page size) so this chaining may not
+
+	be a severe performance hit.  The logical -&gt; physical mapping of an
+
+	allocation page is stored in the previous allocation page.  The container
+
+	object will need to maintain this mapping.</p>
+<p>
+
+	The following fields are stored in the page header:
+
+      </p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+        
+<caption>
+
+                Format of Alloc Page
+
+            </caption>
+        
+<tr>
+          
+<th colspan="1" rowspan="1">
+
+                Type
+
+             </th>
+          <th colspan="1" rowspan="1">
+
+                Description
+
+            </th>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">
+
+                int
+
+            </td>
+          <td colspan="1" rowspan="1">
+
+                FormatId (Although 4 bytes are allocated, this uses only the first 2 bytes. Next 2 bytes are unused.)
+
+            </td>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">StoredPageHeader</td>
+          <td colspan="1" rowspan="1">see <a href="#storedpage">Stored Page Header</a></td>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">long</td>
+          <td colspan="1" rowspan="1">nextAllocPageNumber - the next allocation page's number</td>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">long</td>
+          <td colspan="1" rowspan="1">nextAllocPageOffset - the file offset of the next allocation page</td>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">long</td>
+          <td colspan="1" rowspan="1">reserved1 - reserved for future usage</td>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">long</td>
+          <td colspan="1" rowspan="1">reserved2 - reserved for future usage</td>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">long</td>
+          <td colspan="1" rowspan="1">reserved3 - reserved for future usage</td>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">long</td>
+          <td colspan="1" rowspan="1">reserved4 - reserved for future usage</td>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">byte</td>
+          <td colspan="1" rowspan="1">N - the size of the borrowed container info</td>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">byte[N]</td>
+          <td colspan="1" rowspan="1">containerInfo - the content of the borrowed container info</td>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">AllocExtent</td>
+          <td colspan="1" rowspan="1">The one and only extent on this alloc page.</td>
+        
+</tr>
+      
+</table>
+<p>
+
+	The allocation page contains allocation extent rows.  In this first cut
+
+	implementation, there is only 1 allocation extent row per allocation page.
+
+	</p>
+<p>
+
+	The allocation extent row is an externalizable object and is directly
+
+	written on to the page by the alloc page.  In other words, it will not be
+
+	converted in to a storeableRow.  This is to cut down overhead, enhance
+
+	performance and gives more control of the size and layout of the allocation
+
+	extent row to the alloc page.
+
+	</p>
+<a name="N103EA"></a><a name="Alloc+Page+detailed+implementation+notes"></a>
+<h3 class="boxed">
+
+	Alloc Page detailed implementation notes</h3>
+<p>
+
+	Create Container - an embryonic allocation page is formatted on disk by a
+
+	special static function to avoid instantiating a full AllocPage object.
+
+	This embryonic allocation has enough information that it can find the
+
+	file header and not much else.  Then the allocation page is properly
+
+	initialized by creating the first extent.
+
+      </p>
+<p>
+
+	Open Container - A static AllocPage method will be used to read off the
+
+	container information directly from disk.  Even if
+
+	the first alloc page (page 0) is already in the page cache, it will not be
+
+	used because cleaning the alloc page will introduce a deadlock if the
+
+	container is not in the container cache.  Long term, the first alloc page
+
+	should probably live in the container cache rather than in the page cache.
+
+      </p>
+<p>
+
+	Get Page - The first alloc page (page 0) will be read into the page cache.
+
+	Continue to follow the alloc page chain until the alloc page that manages
+
+	the specified page is found.  From the alloc page, the physical offset of
+
+	the specified page is located.
+
+      </p>
+<p>
+
+	Cleaning alloc page - the alloc page is written out the same way any page
+
+	is written out.  The container object will provide a call back to the alloc
+
+	page to write the current version of the container object back into the
+
+	borrowed space before the alloc page itself is written out.
+
+	</p>
+<p>
+
+	Cleaning the container object - get the the first alloc page, dirty it and
+
+	clean it (which will cause it to call the container object to write itself
+
+	out into the borrowed space).  The versioning of the container is
+
+	independent of the versioning of the alloc page.  The container version is
+
+	stored inside the borrowed space and is opaque to the alloc page.
+
+	</p>
+<p>For the fields in an allocation extent row.</p>
+</div>
+    
+<a name="N10404"></a><a name="Allocation+Extent"></a>
+<h2 class="boxed">Allocation Extent</h2>
+<div class="section">
+<p>
+
+	An allocation extent row manages the page status of page in the extent.
+
+	AllocExtent is externalizable and is written to the AllocPage directly,
+
+	without being converted to a row first.
+
+	</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+        
+<caption>Format of Allocation Extent</caption>
+        
+<tr>
+          
+<th colspan="1" rowspan="1">Type</th>
+          <th colspan="1" rowspan="1">Description</th>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">long</td>
+          <td colspan="1" rowspan="1">extentOffset - the begin physical byte offset of the first page of this extent</td>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">long</td>
+          <td colspan="1" rowspan="1">extentStart - the first logical page mananged by this extent.</td>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">long</td>
+          <td colspan="1" rowspan="1">extentEnd - the last page this extent can ever hope to manage.</td>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">int</td>
+          <td colspan="1" rowspan="1">extentLength - the number of pages allocated in this extent</td>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">int</td>
+          <td colspan="1" rowspan="1">
+            
+<p>extentStatus - status bits for the whole extent.
+
+				<br>HAS_DEALLOCATED - most likely, this extent has a deallocated 
+
+                        page somewhere. If !HAD_DEALLOCATED, the extent has no deallocated page.
+
+				<br>HAS_FREE - most likely, this extent has a free page somewhere.
+
+						If !HAS_FREE, there is no free page in the extent.
+
+				<br>ALL_FREE - most likely, this extent only has free pages, good 
+
+                        candidate for shrinking the file.
+
+						If !ALL_FREE, the extent is not all free.
+
+				<br>HAS_UNFILLED_PAGES - most likely, this extent has unfilled pages.
+
+						if !HAS_UNFILLED_PAGES, all pages are filled.
+
+				<br>KEEP_UNFILLED_PAGES - this extent keeps track of unfilled pages
+
+						(post v1.3).  If not set, this extent has no notion of
+
+						unfilled page and has no unFilledPage bitmap.
+
+				<br>NO_DEALLOC_PAGE_MAP - this extents do not have a dealloc and a
+
+						free page bit maps.  Prior to 2.0, there are 2 bit
+
+						maps, a deallocate page bit map and a free page bit
+
+						map.  Cloudscape 2.0 and later merged the dealloc page
+
+						bit map into the free page bit map.
+
+				<br>RETIRED - this extent contains only 'retired' pages, never use 
+
+                        any page from this extent.  The pages don't actually 
+
+                        exist, i.e., it maps to nothing (physicalOffset is 
+
+                        garbage).  The purpose of this extent is to blot out a 
+
+                        range of logical page numbers that no longer exists 
+
+                        for this container.  Use this to reuse a physical page
+
+                        when a logical page has exhausted all recordId or for
+
+                        logical pages that has been shrunk out.
+
+               </p>
+          
+</td>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">int</td>
+          <td colspan="1" rowspan="1">preAllocLength - the number of pages that have been preallocated</td>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">int</td>
+          <td colspan="1" rowspan="1">reserved1</td>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">long</td>
+          <td colspan="1" rowspan="1">reserved2 - reserved for future use</td>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">long</td>
+          <td colspan="1" rowspan="1">reserved3 - reserved for future use</td>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">FreePages(bit)</td>
+          <td colspan="1" rowspan="1">Bitmap of free pages. Bit[i] is ON if page is free for immediate (re)use.</td>
+        
+</tr>
+        
+<tr>
+          
+<td colspan="1" rowspan="1">unFilledPages(bit)</td>
+          <td colspan="1" rowspan="1">Bitmap of pages that have free space. Bit[i] is ON if page is likely to be &lt; 1/2 full.</td>
+        
+</tr>
+      
+</table>
+<p>
+
+		org.apache.derby.iapi.services.io.FormatableBitSet is used to store the bit map.  
+
+            FormatableBitSet is an externalizable class.
+
+         </p>
+<p>
+
+	A page can have the following logical state:
+
+	<br>Free - a page that is free to be used
+
+	<br>Valid - a page that is currently in use
+
+      </p>
+<p>
+
+	There is another type of transitional pages which pages that have been
+
+	allocated on disk but has not yet been used.  These pages are Free.
+
+	</p>
+<p>
+
+	Bit[K] freePages
+
+		Bit[i] is ON iff page i maybe free for reuse.  User must get the
+
+		dealloc page lock on the free page to make sure the transaction.
+
+	</p>
+<p>
+
+	K is the size of the bit array, it must be &gt;= length.
+
+      </p>
+</div>
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2004-2012 Apache Software Foundation</div>
+<div id="feedback">
+    Send feedback about the website to:
+  <a id="feedbackto" href="mailto:derby-user@db.apache.org?subject=Feedback%C2%A0papers/pageformats.html">derby-user@db.apache.org</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Added: websites/production/db/content/derby/papers/recovery.html
==============================================================================
--- websites/production/db/content/derby/papers/recovery.html (added)
+++ websites/production/db/content/derby/papers/recovery.html Wed Dec 19 18:20:21 2012
@@ -0,0 +1,918 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Derby Logging and Recovery</title>
+<link type="text/css" href="../skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="../skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="../skin/print.css" rel="stylesheet">
+<link type="text/css" href="../skin/profile.css" rel="stylesheet">
+<script src="../skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="../skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="../skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="../">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">apache</a> &gt; <a href="http://db.apache.org/">db</a><script src="../skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://db.apache.org/derby"><img class="logoImage" alt="Apache Derby" src="../images/derby-logo-web.png" title="Derby is a zero-admin Java RDBMS"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogoA1">
+<a href="http://db.apache.org"><img class="logoImage" alt="Apache DB Project" src="../images/db-logo-white.png" title="Apache DB creates and maintains database solutions."></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="../index.html">Home</a>
+</li>
+<li>
+<a class="unselected" href="../quick_start.html">Quick Start</a>
+</li>
+<li>
+<a class="unselected" href="../derby_downloads.html">Download</a>
+</li>
+<li>
+<a class="unselected" href="../derby_comm.html">Community</a>
+</li>
+<li>
+<a class="unselected" href="../manuals/index.html">Documentation</a>
+</li>
+<li class="current">
+<a class="selected" href="../blogs/index.html">Resources</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_1.1', '../skin/')" id="menu_1.1Title" class="menutitle">Blogs and Articles About Derby</div>
+<div id="menu_1.1" class="menuitemgroup">
+<div class="menuitem">
+<a href="../blogs/index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#blogs">Blogs</a>
+</div>
+<div onclick="SwitchMenu('menu_1.1.3', '../skin/')" id="menu_1.1.3Title" class="menutitle">Articles</div>
+<div id="menu_1.1.3" class="menuitemgroup">
+<div onclick="SwitchMenu('menu_1.1.3.1', '../skin/')" id="menu_1.1.3.1Title" class="menutitle">Tutorials, Tips and Tuning</div>
+<div id="menu_1.1.3.1" class="menuitemgroup">
+<div class="menuitem">
+<a href="../blogs/index.html#getstarted">Getting Started</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#features">Features, Hints and Tips</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#security">Security</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#performance">Performance and Tuning</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.1.3.2', '../skin/')" id="menu_1.1.3.2Title" class="menutitle">Tools and Migration</div>
+<div id="menu_1.1.3.2" class="menuitemgroup">
+<div class="menuitem">
+<a href="../blogs/index.html#tools">Tools</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#migration">Migration</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.1.3.3', '../skin/')" id="menu_1.1.3.3Title" class="menutitle">Applications</div>
+<div id="menu_1.1.3.3" class="menuitemgroup">
+<div class="menuitem">
+<a href="../blogs/index.html#client">Client</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#middletier">Middle Tier</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#persistence">Persistence</a>
+</div>
+<div class="menuitem">
+<a href="../blogs/index.html#scalability">Scalability and Failover</a>
+</div>
+</div>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.2', '../skin/')" id="menu_1.2Title" class="menutitle">Integration With Other Products</div>
+<div id="menu_1.2" class="menuitemgroup">
+<div class="menuitem">
+<a href="../integrate/index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="../integrate/index.html#uses">What works with Derby?</a>
+</div>
+<div class="menuitem">
+<a href="../integrate/index.html#products">Product Writeups</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.3', '../skin/')" id="menu_1.3Title" class="menutitle">Eclipse Plug-ins</div>
+<div id="menu_1.3" class="menuitemgroup">
+<div class="menuitem">
+<a href="../integrate/derby_plugin_info.html">Info</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_selected_1.4', '../skin/')" id="menu_selected_1.4Title" class="menutitle" style="background-image: url('../skin/images/chapter_open.gif');">Papers and Presentations</div>
+<div id="menu_selected_1.4" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="../papers/index.html">Overview</a>
+</div>
+<div onclick="SwitchMenu('menu_selected_1.4.2', '../skin/')" id="menu_selected_1.4.2Title" class="menutitle" style="background-image: url('../skin/images/chapter_open.gif');">Derby Engine</div>
+<div id="menu_selected_1.4.2" class="selectedmenuitemgroup" style="display: block;">
+<div onclick="SwitchMenu('menu_1.4.2.1', '../skin/')" id="menu_1.4.2.1Title" class="menutitle">Javadoc</div>
+<div id="menu_1.4.2.1" class="menuitemgroup">
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/engine">Engine</a>
+</div>
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/language">Language</a>
+</div>
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/tools">Tools</a>
+</div>
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/publishedapi">API</a>
+</div>
+</div>
+<div class="menuitem">
+<a href="../papers/derby_arch.html">Architecture</a>
+</div>
+<div class="menuitem">
+<a href="../papers/btree_package.html">BTree</a>
+</div>
+<div class="menuitem">
+<a href="../papers/pageformats.html">Disk Page Format</a>
+</div>
+<div class="menuitem">
+<a href="../papers/derby_htw.html">How Things Work</a>
+</div>
+<div class="menuitem">
+<a href="../papers/Intersect-design.html">Intersect &amp; Except</a>
+</div>
+<div class="menuitem">
+<a href="../papers/JDBCImplementation.html">JDBC</a>
+</div>
+<div class="menuitem">
+<a href="../papers/logformats.html">Log Format</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">Logging &amp; Recovery</div>
+</div>
+<div class="menuitem">
+<a href="../papers/optimizer.html">Optimizer</a>
+</div>
+<div class="menuitem">
+<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/types/package-summary.html#package_description">Type System</a>
+</div>
+<div class="menuitem">
+<a href="../papers/versionupgrade.html">Versioning</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.4.3', '../skin/')" id="menu_1.4.3Title" class="menutitle">Derby Network Client</div>
+<div id="menu_1.4.3" class="menuitemgroup">
+<div class="menuitem">
+<a href="../papers/DerbyClientSpec.html">Functional Spec</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.4.4', '../skin/')" id="menu_1.4.4Title" class="menutitle">Derby Tutorial</div>
+<div id="menu_1.4.4" class="menuitemgroup">
+<div class="menuitem">
+<a href="../papers/DerbyTut/index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="../papers/DerbyTut/install_software.html">Step 1: Install Software</a>
+</div>
+<div class="menuitem">
+<a href="../papers/DerbyTut/ij_intro.html">Step 2: ij Basics</a>
+</div>
+<div class="menuitem">
+<a href="../papers/DerbyTut/embedded_intro.html">Step 3: Embedded Derby</a>
+</div>
+<div class="menuitem">
+<a href="../papers/DerbyTut/ns_intro.html">Step 4: Derby Network Server</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.4.5', '../skin/')" id="menu_1.4.5Title" class="menutitle">Presentations</div>
+<div id="menu_1.4.5" class="menuitemgroup">
+<div class="menuitem">
+<a href="../papers/ApacheCon.html">ApacheCon</a>
+</div>
+<div class="menuitem">
+<a href="../papers/MiscPresentations.html#Victorian+Java+User+Group">Victorian JUG 2008</a>
+</div>
+<div class="menuitem">
+<a href="../papers/MiscPresentations.html#OSCON+2005">OSCON 2005</a>
+</div>
+<div class="menuitem">
+<a href="../papers/MiscPresentations.html#Colorado+Software+Summit+2004">Colorado 2004</a>
+</div>
+</div>
+</div>
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<hr>
+<form action="http://www.google.com/search" method="get">
+<input value="db.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="18" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                  <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="../skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div class="trail">Font size: 
+	          &nbsp;<input value="Reset" class="resetfont" title="Reset text" onclick="ndeSetTextSize('reset'); return false;" type="button">      
+	          &nbsp;<input value="-a" class="smallerfont" title="Shrink text" onclick="ndeSetTextSize('decr'); return false;" type="button">
+	          &nbsp;<input value="+a" class="biggerfont" title="Enlarge text" onclick="ndeSetTextSize('incr'); return false;" type="button">
+</div>
+<h1>Derby Logging and Recovery</h1>
+<div class="abstract">This document describes how Derby implements logging and recovery. 
+        This is a work-in-progress derived from Javadoc comments and from explanations 
+        Mike Matrigali and others posted to the Derby lists. Please post questions, 
+        comments, and corrections to derby-dev@db.apache.org. </div>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#introduction"> Introduction </a>
+</li>
+<li>
+<a href="#ARIES+-+An+Overview">ARIES - An Overview</a>
+</li>
+<li>
+<a href="#Features+of+ARIES">Features of ARIES</a>
+</li>
+<li>
+<a href="#References">References</a>
+</li>
+<li>
+<a href="#Derby+implementation+of+ARIES">Derby implementation of ARIES</a>
+</li>
+<li>
+<a href="#Derby+recovery+process">Derby recovery process</a>
+</li>
+<li>
+<a href="#Recovery+Redo+pass">Recovery Redo pass</a>
+</li>
+<li>
+<a href="#Recovery+Undo+pass">Recovery Undo pass</a>
+</li>
+<li>
+<a href="#Checkpoints">Checkpoints</a>
+</li>
+<li>
+<a href="#Derby+Logging+Overview">Derby Logging Overview</a>
+</li>
+<li>
+<a href="#Loggable+Interface+Hierarchy">Loggable Interface Hierarchy</a>
+</li>
+<li>
+<a href="#Container+Log+Operations+Hierarchy">Container Log Operations Hierarchy</a>
+</li>
+<li>
+<a href="#Transaction+Management+Log+Operations+Hierarchy">Transaction Management Log Operations Hierarchy</a>
+</li>
+<li>
+<a href="#Page+Level+Log+Operations+Hierarchy">Page Level Log Operations Hierarchy</a>
+</li>
+</ul>
+</div>
+    
+<a name="N10010"></a><a name="introduction"></a>
+<h2 class="boxed"> Introduction </h2>
+<div class="section">
+<p>Derby transaction logging and recovery is based upon the ARIES algorithm.</p>
+</div>
+    
+<a name="N1001A"></a><a name="ARIES+-+An+Overview"></a>
+<h2 class="boxed">ARIES - An Overview</h2>
+<div class="section">
+<p>Following is a brief description of the main principles behind ARIES.</p>
+<p>Firstly, in ARIES, changes always take the system forward. That is to say,
+even transaction rollbacks are treated as if they are updates to the system.
+This is counter-inituitive to what the user thinks, because when a user asks for a
+transaction to be rolled back, they assume that the system is going back
+to a previous state of affairs. However, from the perspective of ARIES, there
+is no such thing as going back. For example, if a transaction changes A to B
+and then rolls back, ARIES treats the rollback as simply an update that
+changes B to A. The forward change from A to B (redo) and the reversal of B to
+A (undo) are both recorded as updates to the system. Changes during normal
+operations are recorded as Redo-Undo log records. As the name implies, these
+log records can be 'redone' in case of a system crash, or 'undone' in case a
+rollback is required. Changes made during rollbacks, however, are recorded as
+Redo-only log records. These log records are called Compensation Log Records
+(CLRs). The reason these are redo only is that by definition a rollback does
+not need to be undone, whereas normal updates need to be undone if the
+transaction decides to rollback.
+</p>
+<p>The second basic principle of ARIES is that during recovery, history 
+        is repeated. This can be explained as follows.</p>
+<p>When a system crashes, there would be some transactions that have completed 
+        (committed or aborted), and others that are still active. The WAL protocol 
+        ensures that changes made by completed transactions have been recorded 
+        in the Log. Changes made by incomplete transactions may also be present 
+        in the Log, because Log Records are created in the same order as the changes 
+        are made by the system.</p>
+<p>During recovery, ARIES initially replays the Log to the bring the system 
+        back to a state close to that when the crash occurred. This means that 
+        ARIES replays the effects of not only those transactions that committed 
+        or aborted, but also those that were active at the time of the crash. 
+        Having brought the system to this state, ARIES then identifies transactions 
+        that were incomplete, and rolls them back. The basic idea is to repeat 
+        the entire history upto the point of crash, and then undo failed transactions.</p>
+<p>This approach has the advantage that during the redo phase, changes can 
+        be replayed at a fairly low level, for example, the level of a disk page. 
+        ARIES calls this page oriented redo. This feature is significant because 
+        it means that until the redo phase is over, the system does not need to 
+        know about higher level data structures such as Indexes. Only during the 
+        undo phase, when incomplete transactions are being rolled back, does the 
+        system need to know about high level data structures. </p>
+</div>
+    
+<a name="N10033"></a><a name="Features+of+ARIES"></a>
+<h2 class="boxed">Features of ARIES</h2>
+<div class="section">
+<p>ARIES includes a number of optimisations to reduce the amount of work 
+        required during normal operations and recovery.</p>
+<p>One optimisation is to avoid application of log records unnecessarily. 
+        The LSN of the most recently generated log record is stored in each disk 
+        page. This is known as the PageLsn. The PageLsn allows ARIES to determine 
+        during the redo phase, whether the changes represented by a log record 
+        have been applied to the page or not.</p>
+<p>ARIES chains log records for transactions in such a way that those records 
+        that are no longer necessary, are skipped during recovery. For example, 
+        if a transaction changed A to B, and then rolled back, generating a log 
+        record for changing B to A, then during recovery, ARIES would automatically 
+        skip the log record that represents the change from A to B. This is made 
+        possible by maintaining a UndoLsn pointer in every Log Record. The UndoLsn 
+        normally points to the previous log record generated by the transaction. 
+        However, in log records generated during Rollback (known as Compensation 
+        Log Records), the UndoLsn is made to point to the Log record preceding 
+        the one that is being undone. To take an example, let us assume that a 
+        transaction generated log record 1, containing change from A to B, then 
+        log record 2 containing change from B to C. At this point the transaction 
+        decides to rollback the change from B to C. It therefore generates a new 
+        log record 3, containing a change from C to B. The UndoLsn of this log 
+        record is made to point at log record 1, instead of log record 2. When 
+        following the UndoLsn chain, ARIES would skip log record 2.</p>
+<p>ARIES also supports efficient checkpoints. During a checkpoint, it is 
+        not necessary to flush all database pages to disk. Instead ARIES records 
+        a list of dirty buffer pages along with their RecoveryLsn(s). The RecoveryLsn 
+        of a page is the LSN of the earliest log record that represents a change 
+        to the page since it was read from disk. By using this list, ARIES is 
+        able to determine during recovery, where to start replaying the Log.</p>
+<p>ARIES supports nested top-level action concept whereby part of a transaction 
+        can be commited even if the transaction aborts. This is useful for situations 
+        where a structural change should not be undone even if the transaction 
+        aborts. Nested top level actions are implemented using Dummy Compensation 
+        Log Records - and make use of the ability to skip logs records using the 
+        UndoLsn pointer as described previously.</p>
+</div>
+    
+<a name="N10049"></a><a name="References"></a>
+<h2 class="boxed">References</h2>
+<div class="section">
+<ol>
+      
+<li>
+<p> For a full description of ARIES, please see 
+        <em>Mohan, C., Haderle, D., Lindsay, B., Pirahesh, H., Schwarz, P. ARIES: 
+          A Transaction Recovery Method Supporting Fine-Granularity Locking and 
+          Partial Rollbacks Using Write-Ahead Logging, ACM Transactions on Database 
+          Systems, Vol. 17, No. 1, March 1992, pp94-162.</em>
+        A version of this document is freely available as 
+        <a class="external" href="http://www.almaden.ibm.com/u/mohan/RJ6649Rev.pdf">IBM Research 
+          Report RJ6649</a>.</p>
+      
+</li>
+      
+<li>
+<p> A good description of Write Ahead Logging, and how a log is typically 
+          implemented, can be found in 
+          <em> 
+            <a class="external" href="http://portal.acm.org/citation.cfm?id=573304">Transaction 
+              Processing: Concepts and Techniques</a>
+            , by Jim Gray and Andreas Reuter, 1993, Morgan Kaufmann Publishers</em>
+          .</p>
+      
+</li>
+      
+</ol>
+</div>
+    
+<a name="N1006B"></a><a name="Derby+implementation+of+ARIES"></a>
+<h2 class="boxed">Derby implementation of ARIES</h2>
+<div class="section">
+<p>I shall only describe how Derby differs from standard ARIES implementation. 
+        Therefore, for a full understanding of the logging and recovery mechanisms 
+        in Derby, it is necessary to consult above mentioned papers on ARIES.</p>
+<p>Derby uses Log Sequence Numbers to identify Log records. In Derby terminology, 
+        LSNs are called LogInstants. LogCounter is an implementation of LogInstant.</p>
+<p>Although Derby uses LogInstant, it does not save this with the page data. 
+        Instead, a page version number is stored. The page version number is also 
+        stored in the log records associated with the page. During recovery (redo), 
+        Derby uses the page version to determine whether the page needs redo or 
+        not. Here is a comment on the rationale behind this:</p>
+<p class="quote"> 
+        
+<em>Mike Matrigali:</em>
+        
+<br>
+        Am going to defer on page version vs. LSN question, but at least mention 
+        some guesses, not researched info. You are right bout what exists. I spoke 
+        with some colleagues and the best we can come up with is that the implementor 
+        wanted to separate the page and the log, in case we ever did a different 
+        log format. I will try to do some more research here. I also vaguely remember 
+        the implementor mentioning if we ever wanted to implement the LSN on the 
+        page, we had space to do so. It may simply have been easier to code the 
+        page versions, since in the current system the LSN is the address in the 
+        log file (which and it may not be available when the code wants to write 
+        it on the page). 
+        <br>
+        As you say in derby all the log records are associated with a page, and 
+        thus have a page version number. That page version number in the log is 
+        compared with the page version number of the page during redo to determine 
+        if the log record needs to be applied. This also has helped us in the 
+        past to catch some bugs as we can sanity check during redo that the page 
+        is progressing along the expected path, ie. it is a bug during redo to 
+        be applying a page version 10 log record to page that is at page version 
+        8. I haven't seen this sanity check in many years, but was useful when 
+        the product was first coded. </p>
+<p>Derby does not write the dirty pages list within a Checkpoint record. 
+        Instead, during checkpoint, Derby flushes all database pages to 
+        disk. The redo Low Water Mark (redoLWM) is set to the current LSN when the
+        checkpoint starts. The undo Low Water Mark (undoLWM) is set to the 
+        starting LSN of the oldest active transaction. At restart, Derby replays
+        the log from redoLWM or undoLWM whichever is earlier. For a good description
+        of concepts behind the checkpoint method used by Derby, and the use of redo/undo Low
+        Water Marks, please refer to TPCT book (Section 11.3).</p>
+<p>Derby uses 'internal' transactions instead of nested top-level actions 
+        to separate structural changes from normal operations. Internal transactions 
+        have the property that they are always page-oriented and do not require 
+        logical undo, ie, undo is always physical. Also, during recovery, incomplete 
+        internal transactions 
+        are undone before any regular transactions. In ARIES, no special processing 
+        is required to handle this, as nested top-level actions are automatically 
+        handled as part of normal redo, and are skipped during undo unless they 
+        are incomplete, in which case they are undone.</p>
+<p>ARIES uses three passes during recovery. The first pass is the analysis 
+        pass when ARIES collects information and determines where redo must start. 
+        This is followed by the redo pass, and then by the undo pass. Derby omits 
+        the analysis pass as this is not required due to the way checkpoints are 
+        done.</p>
+</div>
+    
+<a name="N1008F"></a><a name="Derby+recovery+process"></a>
+<h2 class="boxed">Derby recovery process</h2>
+<div class="section">
+<p>Implemented in <span class="codefrag">org.apache.derby.impl.store.raw.log.LogToFile.recover()</span>
+</p>
+<p>Following is a high level description of Derby recovery process in Derby.</p>
+<p> In this implementation, the log is a stream of log records stored in 
+        one or more flat files. Recovery is done in 2 passes: redo and undo. </p>
+<dl> 
+        
+<dt>Redo pass </dt>
+        
+<dd> In the redo pass, reconstruct the state of the rawstore by repeating 
+          exactly what happened before as recorded in the log. </dd>
+        
+<dt>Undo pass </dt>
+        
+<dd> In the undo pass, all incomplete transactions are rolled back in 
+          the order from the most recently started to the oldest.</dd>
+      
+</dl>
+</div>
+    
+<a name="N100B0"></a><a name="Recovery+Redo+pass"></a>
+<h2 class="boxed">Recovery Redo pass</h2>
+<div class="section">
+<p>Implemented in <span class="codefrag">org.apache.derby.impl.store.raw.log.FileLogger.redo()</span>
+</p>
+<p> The log stream is scanned from the beginning (or
+	from the undo low water mark of a checkpoint) forward until the end.
+	The purpose of the redo pass is to repeat history, i.e, to repeat
+	exactly the same set of changes the rawStore went thru right before it
+	stopped.   With each log record that is encountered in the redo pass:</p>
+<ol>
+	
+<li>if it isFirst(), then the transaction factory is called upon to
+	    create a new transaction object.</li>
+	
+<li>if it needsRedo(), its doMe() is called (if it is a compensation
+	    operation, then the undoable operation needs to be created first
+           before the doMe is called).</li>
+	
+<li>if it isComplete(), then the transaction object is closed.</li>
+	
+</ol>
+</div>
+    
+<a name="N100CB"></a><a name="Recovery+Undo+pass"></a>
+<h2 class="boxed">Recovery Undo pass</h2>
+<div class="section">
+<p>Implemented in <span class="codefrag">org.apache.derby.impl.store.raw.xact.XactFactory.rollbackAllTransactions()</span>
+</p>
+<p>	Rollback all active transactions that has updated the raw store.
+	Transactions are rolled back in the following order:</p>
+<ol>
+	
+<li>Internal transactions in reversed beginXact chronological order</li>
+	
+<li>all other transactions in reversed beginXact chronological order</li>
+	
+</ol>
+</div>
+    
+<a name="N100E3"></a><a name="Checkpoints"></a>
+<h2 class="boxed">Checkpoints</h2>
+<div class="section">
+<p>Implemented in <span class="codefrag">org.apache.derby.impl.store.raw.log.LogToFile.checkpoint()</span>
+</p>
+<p>Only one checkpoint is to be taking place at any given time.</p>
+<p>The steps of a checkpoint are:</p>
+<ol>
+	
+<li>
+<p>Switch to a new log file if possible.</p>
+            
+<ol>
+            
+<li>Freeze the log (for the transition to a new log file)</li>
+            
+<li>Flush current log file</li>
+		
+<li>Create and flush the new log file (with file number 1 higher
+            than the previous log file). The new log file becomes the
+            current log file.</li>
+		
+<li>Unfreeze the log</li>
+		
+</ol>
+	 
+</li>
+	 
+<li>Start checkpoint transaction</li>
+	 
+<li>
+<p>Gather interesting information about the rawStore: </p>
+           
+<ol>
+           
+<li>The current log instant (redoLWM)</li>
+           
+<li>The earliest active transaction begin tran log record instant (undoLWM)
+              , all the truncation LWM set by clients of raw store
+	        (replication)</li>
+           
+</ol>
+           
+</li>
+       
+<li>Clean the buffer cache</li>
+	 
+<li>Log the next checkpoint log record, which contains (repPoint,
+	     undoLWM, redoLWM) and commit checkpoint transaction.</li>
+	 
+<li>Synchronously write the control file containing the next checkpoint
+	     log record log instant</li>
+	 
+<li>The new checkpoint becomes the current checkpoint. Somewhere near
+	     the beginning of each log file should be a checkpoint log record (not
+	     guarenteed to be there)</li>
+	 
+<li>See if the log can be truncated</li>
+	 
+</ol>
+<p>The earliest useful log record is determined by the repPoint and the
+	 undoLWM, whichever is earlier.</p>
+<p>Every log file whose log file number is smaller than the earliest useful
+	 log record's log file number can be deleted.</p>
+<p>Transactions can be at the following states w/r to a checkpoint -
+	 consider the log as a continous stream and not as series of log files for
+	 the sake of clarity:<br>
+       <!-- <img src="checkpoint.png" alt=""/> -->
+       
+</p>
+<pre class="code">
+|(BT)-------(ET)| marks the begin and end of a transaction.
+.                          checkpoint started
+.       |__undoLWM          |
+.       V                   |___redoLWM
+.                           |___TruncationLWM
+.                           |
+.                           V
+1 |-----------------|
+2       |--------------------------------|
+3           |-------|
+4               |--------------------------------------(end of log)
+5                                       |-^-|
+.                                   Checkpoint Log Record
+---A---&gt;|&lt;-------B---------&gt;|&lt;-------------C-----------
+</pre>
+<p>
+	 There are only 3 periods of interest :<br>
+	 A) before undoLWM, B) between undo and redo LWM, C) after redoLWM.
+	 </p>
+<p>
+	 Transaction 1 started in A and terminates in B.<br>
+	 During redo, we should only see log records and endXact from this
+	 transaction in the first phase (between undoLWM and redoLWM). No
+	 beginXact log record for this transaction will be seen.
+	 </p>
+<p>
+	 Transaction 2 started in B (right on the undoLWM) and terminated in C.
+	 <br>
+	 Any transaction that terminates in C must have a beginXact at or after
+	 undoLWM. In other words, no transaction can span A, B and C. During redo,
+	 we will see beginXact, other log records and endXact for this
+	 transaction.
+	 </p>
+<p>
+	 Transaction 3 started in B and ended in B.<br>
+	 During redo, we will see beginXact, other log records and endXact for
+	 this transaction.
+	 </p>
+<p>
+	 Transaction 4 begins in B and never ends. <br>
+	 During redo, we will see beginXact, other log records. In undo, this
+	 loser transaction will be rolled back.
+	 </p>
+<p>
+	 Transaction 5 is the transaction taking the checkpoint. <br>
+	 The checkpoint action started way back in time but the checkpoint log
+	 record is only written after the buffer cache has been flushed.
+	 </p>
+<p>
+	 Note that if any time elapse between taking the undoLWM and the redoLWM,
+	 then it will create a 4th period of interest.      
+       </p>
+</div>
+    
+<a name="N1015E"></a><a name="Derby+Logging+Overview"></a>
+<h2 class="boxed">Derby Logging Overview</h2>
+<div class="section">
+<p>A loggable action in Derby is redoable. If the action implements Undoable interface, then it is also
+	    undoable. When an undoable action is rolled back, it must generate a Compensation log which represents 
+	    the action necessary to repeat the undo.
+		</p>
+<p>Normally a logged action is rolled back on the same page that it was originally applied to. This is
+		called physical or physiological undo. If the undo needs to be applied to a different page (such as due to
+		a page split in a BTree), then it is called
+    	a Logical Undo. In Derby, BTree inserts and deletes require logical undo.</p>
+<p>When performing a loggable action, Derby follows this sequence:</p>
+<ol>
+    		
+<li>Convert the action into a corresponding log operation. Most BTree and Heap operations are
+   		    translated to Page level actions - ie - the action involves updating one or more pages. For example,
+   		    a single Heap row insert may be translated to inserts on several pages. Each page insert
+   		    will be a separate loggable action.</li>
+    		
+<li>Generate the log data that describes the page level action.</li>
+    		
+<li>Perform the action <em>after</em> it has been logged. Also, the action is 
+    		performed using the logged data, in the same way as it would be performed during recovery.
+    		In other words, the logged data is used both for normal operations as well as for repeating
+    		history. This has the advantage that the recovery execution path is the same as the execution
+    		path during normal execution.</li>    		  
+    		
+<li>If a transaction is being rolled back, first the loggable action is asked to generate
+    		the corresponding undo (Compensation) log data. This is then logged, and after that it is performed.
+    		As described before, a Compensation action is only redoable, because by definition, an undo
+    		action does not need to be undone.
+    		</li>
+    	
+</ol>
+</div>
+    
+<a name="N10180"></a><a name="Loggable+Interface+Hierarchy"></a>
+<h2 class="boxed">Loggable Interface Hierarchy</h2>
+<div class="section">
+<ul>
+			
+<li>interface org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Loggable.html" title="interface in org.apache.derby.iapi.store.raw"><strong>Loggable</strong></a>
+				
+<ul>
+					
+<li>interface org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Compensation.html" title="interface in org.apache.derby.iapi.store.raw"><strong>Compensation</strong></a>
+</li>
+					
+<li>interface org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Undoable.html" title="interface in org.apache.derby.iapi.store.raw"><strong>Undoable</strong></a>
+						
+<ul>
+							
+<li>interface org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/LogicalUndoable.html" title="interface in org.apache.derby.iapi.store.raw"><strong>LogicalUndoable</strong></a>
+</li>
+						
+</ul>
+					
+</li>
+				
+</ul>
+			
+</li>
+		
+</ul>
+</div>
+    
+<a name="N101B2"></a><a name="Container+Log+Operations+Hierarchy"></a>
+<h2 class="boxed">Container Log Operations Hierarchy</h2>
+<div class="section">
+<ul>
+			
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/ContainerBasicOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>ContainerBasicOperation</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Loggable.html" title="interface in org.apache.derby.iapi.store.raw">Loggable</a>)
+				<ul>
+					
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/ContainerOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>ContainerOperation</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Undoable.html" title="interface in org.apache.derby.iapi.store.raw">Undoable</a>)</li>
+					
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/ContainerUndoOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>ContainerUndoOperation</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Compensation.html" title="interface in org.apache.derby.iapi.store.raw">Compensation</a>)</li>
+				
+</ul>
+			
+</li>
+			
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/RemoveFileOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>RemoveFileOperation</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Undoable.html" title="interface in org.apache.derby.iapi.store.raw">Undoable</a>)</li>
+		
+</ul>
+</div>
+    
+<a name="N101F7"></a><a name="Transaction+Management+Log+Operations+Hierarchy"></a>
+<h2 class="boxed">Transaction Management Log Operations Hierarchy</h2>
+<div class="section">
+<ul>
+			
+<li>class org.apache.derby.impl.store.raw.xact.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/xact/BeginXact.html" title="class in org.apache.derby.impl.store.raw.xact"><strong>BeginXact</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Loggable.html" title="interface in org.apache.derby.iapi.store.raw">Loggable</a>)</li>
+			
+<li>class org.apache.derby.impl.store.raw.xact.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/xact/EndXact.html" title="class in org.apache.derby.impl.store.raw.xact"><strong>EndXact</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Loggable.html" title="interface in org.apache.derby.iapi.store.raw">Loggable</a>)</li>
+			
+<li>class org.apache.derby.impl.store.raw.log.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/log/CheckpointOperation.html" title="class in org.apache.derby.impl.store.raw.log"><strong>CheckpointOperation</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Loggable.html" title="interface in org.apache.derby.iapi.store.raw">Loggable</a>)</li>
+		
+</ul>
+</div>
+    
+<a name="N1022B"></a><a name="Page+Level+Log+Operations+Hierarchy"></a>
+<h2 class="boxed">Page Level Log Operations Hierarchy</h2>
+<div class="section">
+<ul>
+			
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/PageBasicOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>PageBasicOperation</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Loggable.html" title="interface in org.apache.derby.iapi.store.raw">Loggable</a>, org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/RePreparable.html" title="interface in org.apache.derby.iapi.store.raw">RePreparable</a>)
+				<ul>
+					
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/LogicalPageOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>LogicalPageOperation</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/LogicalUndoable.html" title="interface in org.apache.derby.iapi.store.raw">LogicalUndoable</a>)
+					<ul>
+						
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/DeleteOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>DeleteOperation</strong></a>
+</li>
+						
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/InsertOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>InsertOperation</strong></a>
+</li>
+						
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/UpdateFieldOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>UpdateFieldOperation</strong></a>
+</li>
+					
+</ul>
+					
+</li>
+					
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/LogicalUndoOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>LogicalUndoOperation</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Compensation.html" title="interface in org.apache.derby.iapi.store.raw">Compensation</a>)</li>
+					
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/PhysicalPageOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>PhysicalPageOperation</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Undoable.html" title="interface in org.apache.derby.iapi.store.raw">Undoable</a>)
+						<ul>
+							
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/AllocPageOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>AllocPageOperation</strong></a>
+</li>
+							
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/ChainAllocPageOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>ChainAllocPageOperation</strong></a>
+</li>
+							
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/CopyRowsOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>CopyRowsOperation</strong></a>
+</li>
+							
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/InitPageOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>InitPageOperation</strong></a>
+</li>
+							
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/InvalidatePageOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>InvalidatePageOperation</strong></a>
+</li>
+							
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/PurgeOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>PurgeOperation</strong></a>
+</li>
+							
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/UpdateOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>UpdateOperation</strong></a>
+</li>
+						
+</ul>
+					
+</li>
+					
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/PhysicalUndoOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>PhysicalUndoOperation</strong></a> (implements org.apache.derby.iapi.store.raw.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/iapi/store/raw/Compensation.html" title="interface in org.apache.derby.iapi.store.raw">Compensation</a>)</li>
+					
+<li>class org.apache.derby.impl.store.raw.data.<a href="http://db.apache.org/derby/javadoc/engine/org/apache/derby/impl/store/raw/data/SetReservedSpaceOperation.html" title="class in org.apache.derby.impl.store.raw.data"><strong>SetReservedSpaceOperation</strong></a>
+</li>
+				
+</ul>
+			
+</li>
+		
+</ul>
+</div>
+    
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2004-2012 Apache Software Foundation</div>
+<div id="feedback">
+    Send feedback about the website to:
+  <a id="feedbackto" href="mailto:derby-user@db.apache.org?subject=Feedback%C2%A0papers/recovery.html">derby-user@db.apache.org</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>



Mime
View raw message