subversion-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jcor...@apache.org
Subject svn commit: r1062532 - /subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff_file.c
Date Sun, 23 Jan 2011 21:02:00 GMT
Author: jcorvel
Date: Sun Jan 23 21:02:00 2011
New Revision: 1062532

URL: http://svn.apache.org/viewvc?rev=1062532&view=rev
Log:
On the diff-optimizations-bytes branch:

Speed up prefix/suffix scanning by processing data within chunks in separate
loops, and reading and comparing with machine-word granularity.

This is still implemented in a generic way, for an arbitrary number of files
(no special case code for file_len == 2). However, it's quite heavy code this
way (and a lot slower than having special case code), so I'm mainly
recording this for posterity :-), and will probably replace it soon with
three special case versions (for 2, 3 and 4 files).

* subversion/libsvn_diff/diff_file.c
  (contains_eol): Quickly check if an apr_size_t chunk contains an eol
   character (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
  (find_identical_prefix): Inside a chunk, process data between eol characters
   in a separate loop, reading and comparing with machine-word granularity.
  (find_identical_suffix): Inside a chunk, process data in separate loops,
   reading and comparing with machine-word granularity.

Patch by: stefan2
(Generified by me to implement it for N files.)

Modified:
    subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff_file.c

Modified: subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff_file.c
URL: http://svn.apache.org/viewvc/subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff_file.c?rev=1062532&r1=1062531&r2=1062532&view=diff
==============================================================================
--- subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff_file.c (original)
+++ subversion/branches/diff-optimizations-bytes/subversion/libsvn_diff/diff_file.c Sun Jan
23 21:02:00 2011
@@ -375,6 +375,32 @@ is_one_at_eof(struct file_info file[], a
   return FALSE;
 }
 
+/* Quickly determine whether there is a eol char in CHUNK.
+ * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
+ */
+#if APR_SIZEOF_VOIDP == 8
+#  define LOWER_7BITS_SET 0x7f7f7f7f7f7f7f7f
+#  define BIT_7_SET       0x8080808080808080
+#  define R_MASK          0x0a0a0a0a0a0a0a0a
+#  define N_MASK          0x0d0d0d0d0d0d0d0d
+#else
+#  define LOWER_7BITS_SET 0x7f7f7f7f
+#  define BIT_7_SET       0x80808080
+#  define R_MASK          0x0a0a0a0a
+#  define N_MASK          0x0d0d0d0d
+#endif
+
+static svn_boolean_t contains_eol(apr_size_t chunk)
+{
+  apr_size_t r_test = chunk ^ R_MASK;
+  apr_size_t n_test = chunk ^ N_MASK;
+
+  r_test |= (r_test & LOWER_7BITS_SET) + LOWER_7BITS_SET;
+  n_test |= (n_test & LOWER_7BITS_SET) + LOWER_7BITS_SET;
+
+  return (r_test & n_test & BIT_7_SET) != BIT_7_SET;
+}
+
 /* Find the prefix which is identical between all elements of the FILE array.
  * Return the number of prefix lines in PREFIX_LINES.  REACHED_ONE_EOF will be
  * set to TRUE if one of the FILEs reached its end while scanning prefix,
@@ -389,7 +415,7 @@ find_identical_prefix(svn_boolean_t *rea
                       apr_pool_t *pool)
 {
   svn_boolean_t had_cr = FALSE;
-  svn_boolean_t is_match;
+  svn_boolean_t is_match, can_read_word;
   apr_off_t lines = 0;
   apr_size_t i;
 
@@ -416,6 +442,31 @@ find_identical_prefix(svn_boolean_t *rea
 
       INCREMENT_POINTERS(file, file_len, pool);
 
+#if SVN_UNALIGNED_ACCESS_IS_OK
+
+      /* Skip quickly over the stuff between EOLs. */
+      for (i = 0, can_read_word = TRUE; i < file_len; i++)
+        can_read_word = can_read_word 
+                        && (file[i].curp + sizeof(apr_size_t) < file[i].endp);
+      while (can_read_word)
+        {
+          for (i = 1, is_match = TRUE; i < file_len; i++)
+            is_match = is_match
+                       && (   *(const apr_size_t *)file[0].curp
+                           == *(const apr_size_t *)file[i].curp);
+
+          if (!is_match || contains_eol(*(const apr_size_t *)file[0].curp))
+            break;
+
+          for (i = 0; i < file_len; i++)
+            file[i].curp += sizeof(apr_size_t);
+          for (i = 0, can_read_word = TRUE; i < file_len; i++)
+            can_read_word = can_read_word 
+                        && (file[i].curp + sizeof(apr_size_t) < file[i].endp);
+        }
+
+#endif
+
       /* curp == endp indicates EOF (this can only happen with last chunk) */
       *reached_one_eof = is_one_at_eof(file, file_len);
       if (*reached_one_eof)
@@ -477,7 +528,7 @@ find_identical_suffix(struct file_info f
   apr_off_t suffix_min_offset0;
   apr_off_t min_file_size;
   int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
-  svn_boolean_t is_match, reached_prefix;
+  svn_boolean_t is_match, can_read, can_read_word, reached_prefix;
   apr_size_t i;
 
   memset(file_for_suffix, 0, sizeof(file_for_suffix));
@@ -527,23 +578,74 @@ find_identical_suffix(struct file_info f
     }
 
   /* Scan backwards until mismatch or until we reach the prefix. */
-  for (i = 1, is_match = TRUE; i < file_len; i++)
-    is_match =
-      is_match && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
-  while (is_match)
+  while (1)
     {
+      /* Initialize the minimum pointer positions. */
+      const char *min_curp[4];
+
+      min_curp[0] = file_for_suffix[0].chunk == suffix_min_chunk0
+                  ? file_for_suffix[0].buffer + suffix_min_offset0 + 1
+                  : file_for_suffix[0].buffer + 1;
+      for (i = 1; i < file_len; i++)
+        min_curp[i] = file_for_suffix[i].buffer + 1;
+
+#if SVN_UNALIGNED_ACCESS_IS_OK
+
+      /* Scan quickly by reading with machine-word granularity. */
+      for (i = 0, can_read_word = TRUE; i < file_len; i++)
+        can_read_word = can_read_word 
+                        && (   file_for_suffix[i].curp - sizeof(apr_size_t)
+                            >= min_curp[i]);
+      if (can_read_word)
+        {
+          do
+            {
+              for (i = 0; i < file_len; i++)
+                file_for_suffix[i].curp -= sizeof(apr_size_t);
+
+              for (i = 0, can_read_word = TRUE; i < file_len; i++)
+                can_read_word = can_read_word 
+                                && (   file_for_suffix[i].curp - sizeof(apr_size_t)
+                                    >= min_curp[i]);
+              for (i = 1, is_match = TRUE; i < file_len; i++)
+                is_match = is_match
+                           && (   *(const apr_size_t *)(file_for_suffix[0].curp +
1)
+                               == *(const apr_size_t *)(file_for_suffix[i].curp + 1));
+            } while (can_read_word && is_match);
+
+            for (i = 0; i < file_len; i++)
+              file_for_suffix[i].curp += sizeof(apr_size_t);
+        }
+
+#endif
+
+      for (i = 1, is_match = TRUE; i < file_len; i++)
+        is_match =
+          is_match && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
+      for (i = 0, can_read = TRUE; i < file_len; i++)
+        can_read = can_read && (file_for_suffix[i].curp > min_curp[i]);
+      while (is_match && can_read)
+        {
+          for (i = 0; i < file_len; i++)
+            file_for_suffix[i].curp--;
+          for (i = 1, is_match = TRUE; i < file_len; i++)
+            is_match =
+              is_match && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
+          for (i = 0, can_read = TRUE; i < file_len; i++)
+            can_read = can_read && (file_for_suffix[i].curp > min_curp[i]);
+        }
+
+      if (!is_match)
+        break;
+
       DECREMENT_POINTERS(file_for_suffix, file_len, pool);
-      
+
       reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0 
                        && (file_for_suffix[0].curp - file_for_suffix[0].buffer)
                           == suffix_min_offset0;
 
       if (reached_prefix || is_one_at_bof(file_for_suffix, file_len))
         break;
-      else
-        for (i = 1, is_match = TRUE; i < file_len; i++)
-          is_match =
-            is_match && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
     }
 
   /* Slide one byte forward, to point at the first byte of identical suffix */



Mime
View raw message