subversion-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From stef...@apache.org
Subject svn commit: r1634875 - /subversion/trunk/subversion/libsvn_fs_fs/revprops.c
Date Tue, 28 Oct 2014 13:19:30 GMT
Author: stefan2
Date: Tue Oct 28 13:19:30 2014
New Revision: 1634875

URL: http://svn.apache.org/r1634875
Log:
Speed up packed revprop access by tuning the manifest file parser.

The core loop of this accounted for approx. 40% of 'svn log -v' with
50M iterations/s.  Because the number of entries to parse is fixed
by the number of revisions in the shard, the only option here is to
rewrite that loop eliminating any overhead in it.

We put file size and contents guarantees in front of the loop.  Some
major contribution comes from the fact that (usually) all lines have
the same predictable length (the few that don't are slightly longer).
So, we can efficiently replace strchr().  Finally, we access source
and target buffers as plain C arrays.

* subversion/libsvn_fs_fs/revprops.c
  (get_min_filename_len): New utility function.
  (get_revprop_packname): Mainly rewrite.

Modified:
    subversion/trunk/subversion/libsvn_fs_fs/revprops.c

Modified: subversion/trunk/subversion/libsvn_fs_fs/revprops.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/revprops.c?rev=1634875&r1=1634874&r2=1634875&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_fs/revprops.c (original)
+++ subversion/trunk/subversion/libsvn_fs_fs/revprops.c Tue Oct 28 13:19:30 2014
@@ -280,6 +280,19 @@ read_non_packed_revprop(apr_hash_t **pro
   return SVN_NO_ERROR;
 }
 
+/* Return the minimum length of any packed revprop file name in REVPROPS. */
+static apr_size_t
+get_min_filename_len(packed_revprops_t *revprops)
+{
+  char number_buffer[SVN_INT64_BUFFER_SIZE];
+
+  /* The revprop filenames have the format <REV>.<COUNT> - with <REV> being
+   * at least the first rev in the shard and <COUNT> having at least one
+   * digit.  Thus, the minimum is 2 + #decimal places in the start rev.
+   */
+  return svn__i64toa(number_buffer, revprops->manifest_start) + 2;
+}
+
 /* Given FS and REVPROPS->REVISION, fill the FILENAME, FOLDER and MANIFEST
  * members. Use POOL for allocating results and SCRATCH_POOL for temporaries.
  */
@@ -292,9 +305,27 @@ get_revprop_packname(svn_fs_t *fs,
   fs_fs_data_t *ffd = fs->fsap_data;
   svn_stringbuf_t *content = NULL;
   const char *manifest_file_path;
-  int idx;
+  int idx, rev_count;
+  char *buffer, *buffer_end;
+  const char **filenames, **filenames_end;
+  apr_size_t min_filename_len;
+
+  /* Determine the dimensions. Rev 0 is excluded from the first shard. */
+  rev_count = ffd->max_files_per_dir;
+  revprops->manifest_start
+    = revprops->revision - (revprops->revision % rev_count);
+  if (revprops->manifest_start == 0)
+    {
+      ++revprops->manifest_start;
+      --rev_count;
+    }
+
+  revprops->manifest = apr_array_make(pool, rev_count, sizeof(const char*));
 
-  /* read content of the manifest file */
+  /* No line in the file can be less than this number of chars long. */
+  min_filename_len = get_min_filename_len(revprops);
+
+  /* Read the content of the manifest file */
   revprops->folder
     = svn_fs_fs__path_revprops_pack_shard(fs, revprops->revision, pool);
   manifest_file_path
@@ -302,37 +333,68 @@ get_revprop_packname(svn_fs_t *fs,
 
   SVN_ERR(svn_fs_fs__read_content(&content, manifest_file_path, pool));
 
-  /* parse the manifest. Every line is a file name */
-  revprops->manifest = apr_array_make(pool, ffd->max_files_per_dir,
-                                      sizeof(const char*));
-
-  /* Read all lines.  Since the last line ends with a newline, we will
-     end up with a valid but empty string after the last entry. */
-  while (content->data && *content->data)
+  /* There CONTENT must have a certain minimal size and there no
+   * unterminated lines at the end of the file.  Both guarantees also
+   * simplify the parser loop below.
+   */
+  if (   content->len < rev_count * (min_filename_len + 1)
+      || content->data[content->len - 1] != '\n')
+    return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
+                             _("Packed revprop manifest for r%ld not "
+                               "properly terminated"), revprops->revision);
+
+  /* Chop (parse) the manifest CONTENT into filenames, one per line.
+   * We only have to replace all newlines with NUL and add all line
+   * starts to REVPROPS->MANIFEST.
+   *
+   * There must be exactly REV_COUNT lines and that is the number of
+   * lines we parse from BUFFER to FILENAMES.  Set the end pointer for
+   * the source BUFFER such that BUFFER+MIN_FILENAME_LEN is still valid
+   * BUFFER_END is always valid due to CONTENT->LEN > MIN_FILENAME_LEN.
+   *
+   * Please note that this loop is performance critical for e.g. 'svn log'.
+   * It is run 1000x per revprop access, i.e. per revision and about
+   * 50 million times per sec (and CPU core).
+   */
+  for (filenames = (const char **)revprops->manifest->elts,
+       filenames_end = filenames + rev_count,
+       buffer = content->data,
+       buffer_end = buffer + content->len - min_filename_len;
+       (filenames < filenames_end) && (buffer < buffer_end);
+       ++filenames)
     {
-      APR_ARRAY_PUSH(revprops->manifest, const char*) = content->data;
-      content->data = strchr(content->data, '\n');
-      if (content->data)
-        {
-          *content->data = 0;
-          content->data++;
-        }
+      /* BUFFER always points to the start of the next line / filename. */
+      *filenames = buffer;
+
+      /* Find the next EOL.  This is guaranteed to stay within the CONTENT
+       * buffer because we left enough room after BUFFER_END and we know
+       * we will always see a newline as the last non-NUL char. */
+      buffer += min_filename_len;
+      while (*buffer != '\n')
+        ++buffer;
+
+      /* Found EOL.  Turn it into the filename terminator and move BUFFER
+       * to the start of the next line or CONTENT buffer end. */
+      *buffer = '\0';
+      ++buffer;
     }
-  content = NULL; /* No longer a valid stringbuf. */
 
-  /* Index for our revision. Rev 0 is excluded from the first shard. */
-  revprops->manifest_start = revprops->revision
-                           - (revprops->revision % ffd->max_files_per_dir);
-  if (revprops->manifest_start == 0)
-    ++revprops->manifest_start;
-  idx = (int)(revprops->revision - revprops->manifest_start);
+  /* We must have reached the end of both buffers. */
+  if (buffer < content->data + content->len)
+    return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
+                             _("Packed revprop manifest for r%ld "
+                               "has too many entries"), revprops->revision);
 
-  if (revprops->manifest->nelts <= idx)
+  if (filenames < filenames_end)
     return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
-                             _("Packed revprop manifest for r%ld too "
-                               "small"), revprops->revision);
+                             _("Packed revprop manifest for r%ld "
+                               "has too few entries"), revprops->revision);
+
+  /* The target array has now exactly one entry per revision. */
+  revprops->manifest->nelts = rev_count;
 
   /* Now get the file name */
+  idx = (int)(revprops->revision - revprops->manifest_start);
   revprops->filename = APR_ARRAY_IDX(revprops->manifest, idx, const char*);
 
   return SVN_NO_ERROR;



Mime
View raw message