subversion-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From stef...@apache.org
Subject svn commit: r1516325 - in /subversion/branches/log-addressing/subversion/libsvn_fs_fs: index.c index.h structure-indexes
Date Wed, 21 Aug 2013 23:14:21 GMT
Author: stefan2
Date: Wed Aug 21 23:14:21 2013
New Revision: 1516325

URL: http://svn.apache.org/r1516325
Log:
On the log-addressing branch:  Store a checksum with each item
in the phys-to-log index.  Code to set and use will be added in
following commits.

Also, add precise file length information to that index.  We will
need that later during verification.

* subversion/libsvn_fs_fs/index.h
  (svn_fs_fs__p2l_entry_t): add checksum member
  (svn_fs_fs__p2l_get_max_offset): update docstring

* subversion/libsvn_fs_fs/index.c
  (p2l_header_t): add file size member
  (svn_fs_fs__p2l_index_create): store file size in header;
                                 add checksums to items
  (get_p2l_header): read file size as well
  (read_entry): read checksum as well
  (p2l_get_max_offset_func,
   svn_fs_fs__p2l_get_max_offset): return exact pack / rev file size

* subversion/libsvn_fs_fs/structure-indexes
  (Phys-to-log index): update

Modified:
    subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.c
    subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.h
    subversion/branches/log-addressing/subversion/libsvn_fs_fs/structure-indexes

Modified: subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.c
URL: http://svn.apache.org/viewvc/subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.c?rev=1516325&r1=1516324&r2=1516325&view=diff
==============================================================================
--- subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.c (original)
+++ subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.c Wed Aug 21 23:14:21
2013
@@ -119,6 +119,9 @@ typedef struct p2l_header_t
   /* number of pages / clusters in that rev file */
   apr_size_t page_count;
 
+  /* number of bytes in the rev file */
+  apr_uint64_t file_size;
+
   /* offsets of the pages / cluster descriptions within the index file */
   apr_off_t *offsets;
 } p2l_header_t;
@@ -1528,6 +1531,7 @@ svn_fs_fs__p2l_index_create(svn_fs_t *fs
   apr_uint64_t last_page_end = 0;
   apr_size_t last_buffer_size = 0;  /* byte offset in the spill buffer at
                                        the begin of the current revision */
+  apr_uint64_t file_size = 0;
 
   /* temporary data structures that collect the data which will be moved
      to the target file in a second step */
@@ -1566,6 +1570,8 @@ svn_fs_fs__p2l_index_create(svn_fs_t *fs
          at the end the of the last page. */
       if (eof)
         {
+          file_size = last_entry_end;
+
           entry.offset = last_entry_end;
           entry.size = APR_ALIGN(entry.offset, page_size) - entry.offset;
           entry.type = 0;
@@ -1621,6 +1627,9 @@ svn_fs_fs__p2l_index_create(svn_fs_t *fs
       SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
                                   encode_int(encoded, rev_diff),
                                   iter_pool));
+      SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
+                                  encode_uint(encoded, entry.fnv1_checksum),
+                                  iter_pool));
      
       last_entry_end = entry_end;
 
@@ -1636,11 +1645,14 @@ svn_fs_fs__p2l_index_create(svn_fs_t *fs
                            | APR_CREATE | APR_TRUNCATE | APR_BUFFERED,
                            APR_OS_DEFAULT, local_pool));
 
-  /* write the start revision and page size */
+  /* write the start revision, file size and page size */
   SVN_ERR(svn_io_file_write_full(index_file, encoded,
                                  encode_uint(encoded, revision),
                                  NULL, local_pool));
   SVN_ERR(svn_io_file_write_full(index_file, encoded,
+                                 encode_uint(encoded, file_size),
+                                 NULL, local_pool));
+  SVN_ERR(svn_io_file_write_full(index_file, encoded,
                                  encode_uint(encoded, page_size),
                                  NULL, local_pool));
 
@@ -1721,6 +1733,8 @@ get_p2l_header(p2l_header_t **header,
   SVN_ERR(packed_stream_get(&value, *stream));
   result->first_revision = (svn_revnum_t)value;
   SVN_ERR(packed_stream_get(&value, *stream));
+  result->file_size = value;
+  SVN_ERR(packed_stream_get(&value, *stream));
   result->page_size = value;
   SVN_ERR(packed_stream_get(&value, *stream));
   result->page_count = (apr_size_t)value;
@@ -1905,6 +1919,9 @@ read_entry(packed_number_stream_t *strea
   *last_revision += (svn_revnum_t)decode_int(value);
   entry.item.revision = *last_revision;
 
+  SVN_ERR(packed_stream_get(&value, stream));
+  entry.fnv1_checksum = (apr_uint32_t)value;
+
   APR_ARRAY_PUSH(result, svn_fs_fs__p2l_entry_t) = entry;
   *item_offset += entry.size;
 
@@ -2347,7 +2364,7 @@ p2l_get_max_offset_func(void **out,
                         apr_pool_t *result_pool)
 {
   const p2l_header_t *header = data;
-  apr_off_t max_offset = header->page_size * header->page_count;
+  apr_off_t max_offset = header->file_size;
   *out = apr_pmemdup(result_pool, &max_offset, sizeof(max_offset));
 
   return SVN_NO_ERROR;
@@ -2380,7 +2397,7 @@ svn_fs_fs__p2l_get_max_offset(apr_off_t 
     }
 
   SVN_ERR(get_p2l_header(&header, &stream, fs, revision, pool, pool));
-  *offset = header->page_count * header->page_size;
+  *offset = header->file_size;
   
   /* make sure we close files after usage */
   SVN_ERR(packed_stream_close(stream));

Modified: subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.h
URL: http://svn.apache.org/viewvc/subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.h?rev=1516325&r1=1516324&r2=1516325&view=diff
==============================================================================
--- subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.h (original)
+++ subversion/branches/log-addressing/subversion/libsvn_fs_fs/index.h Wed Aug 21 23:14:21
2013
@@ -62,6 +62,9 @@ typedef struct svn_fs_fs__p2l_entry_t
   /* type of the item (see SVN_FS_FS__ITEM_TYPE_*) defines */
   unsigned type;
 
+  /* modified FNV-1a checksum.  0 if unknown checksum */
+  apr_uint32_t fnv1_checksum;
+
   /* item in that block */
   svn_fs_fs__id_part_t item;
 } svn_fs_fs__p2l_entry_t;
@@ -193,8 +196,7 @@ svn_fs_fs__l2p_get_max_ids(apr_array_hea
                            apr_size_t count,
                            apr_pool_t *pool);
 
-/* In *OFFSET, return the first OFFSET in the pack / rev file containing
- * REVISION in FS not covered by the log-to-phys index.
+/* In *OFFSET, return the first OFFSET in the pack / rev file containing.
  * Use POOL for allocations.
  */
 svn_error_t *

Modified: subversion/branches/log-addressing/subversion/libsvn_fs_fs/structure-indexes
URL: http://svn.apache.org/viewvc/subversion/branches/log-addressing/subversion/libsvn_fs_fs/structure-indexes?rev=1516325&r1=1516324&r2=1516325&view=diff
==============================================================================
--- subversion/branches/log-addressing/subversion/libsvn_fs_fs/structure-indexes (original)
+++ subversion/branches/log-addressing/subversion/libsvn_fs_fs/structure-indexes Wed Aug 21
23:14:21 2013
@@ -196,7 +196,7 @@ All entries are pairs of 64 bit unsigned
 Phys-to-log index
 =================
 
-This index has to map offset -> (rev, item_index, type, len).
+This index has to map offset -> (rev, item_index, type, len, checksum).
 
 
 Index data model
@@ -228,6 +228,7 @@ hierarchy:
 header:
 
   <first revision>   ... first revision covered by this index file
+  <file size>        ... size of the rev / pack file in bytes
   <page size>        ... number of bytes in the rev / pack file covered by
                          each index page
   <page count>       ... number of pages
@@ -245,6 +246,9 @@ entry:
   <offset>           ... absolute position in the pack / rev file
   <size>             ... on-disk size of the item in the pack / rev file
   <type>             ... item type
+  <FNV checksum>     ... modified 32 bit FNV-1a checksum of that section
+                         of the pack / rev file (see below). 0 for empty
+                         or zero-length items
   <revision>         ... revision that this item belongs to
   <item_index>       ... item_index within that revision
 
@@ -255,17 +259,19 @@ Index file format
   file := header pages items
 
   header := u(<header>.<first revision>) \
+            u(<header>.<file size>) \
             u(<header>.<page size>) \
             u(<header>.<page count>)
 
   pages := u(<header>.<offsets>[k+1] - <header>.<offsets>[k]),
            for k in 0 .. <header>.<page count> -1
 
-  items := u(<items in page k>[0].<offset>)
+  items := u(<items in page k>[0].<offset>) \
            u(<items in page k>[l].<size>) \
            i(c(<items in page k>[l]) - c(<items of page k>[l-1])) \
            i(  <items in page k>[l].<revision>
-             - <items in page k>[l-1].<revision>),
+             - <items in page k>[l-1].<revision>), \
+           u(FNV checksum)
            for l in 0 .. s(<items in page k>)-1,
            for k in 0 .. <header>.<page count>-1
 
@@ -299,3 +305,27 @@ the proto index file (e.g. while still w
 with revision set to SVN_INVALID_REVNUM will therefore be automatically
 updated when creating the index file.  This is not possible in conjunction
 with rev files but not for pack files.
+
+
+FNV checksum
+------------
+
+FNV-1a can be found here: http://www.isthe.com/chongo/tech/comp/fnv/
+For performance reasons we use a modified version:
+
+* split the input byte stream [b0 .. bN] into 4 sub-streams of equal
+  length and up to 3 remnants:
+
+  [b0 b4 b8 ..], [b1 b5 b9 ..], [b2 b6 b10 ..], [b3 b7 b11 ..], [remnant]
+
+* calculate 32 bit FNV-1a checksums for the 4 substreams:
+
+  h0 = fnv_1a([b0 b4 b8 ..]), ..., h3 = fnv_1a([b3 b7 b11 ..])
+
+* combine the big endian representation of these checksums plus the
+  remnant of the original stream into a 12 to 15 byte long intermediate
+
+  [i0 .. iK], 12 <= K <= 15
+
+* FNV checksum = fnv_1a([i0 .. iK]) in big endian representation
+



Mime
View raw message