subversion-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From stef...@apache.org
Subject svn commit: r1688924 - in /subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer: logic.c mergeinfo-normalizer.h missing-branches.c
Date Thu, 02 Jul 2015 22:59:35 GMT
Author: stefan2
Date: Thu Jul  2 22:59:34 2015
New Revision: 1688924

URL: http://svn.apache.org/r1688924
Log:
On the svn-mergeinfo-normalizer branch:
Add a utility object that allows us to skip most repository lookups to
check whether a branch still exists.

This is an important performance improvement because each lookup requries
a full network roundtrip and there can be hundreds of paths to check per
working copy node - painful in a WAN.  We now simply keep a list of all
paths known to exist and those known to not exist.  Only if a lookup in
that list fails, will be actually contact the repository.

* tools/client-side/svn-mergeinfo-normalizer/mergeinfo-normalizer.h
  (svn_min__branch_lookup_t,
   svn_min__branch_lookup_create,
   svn_min__branch_lookup): Declare new internal API.

* tools/client-side/svn-mergeinfo-normalizer/missing-branches.c
  New file implementing the new internal API.

* tools/client-side/svn-mergeinfo-normalizer/logic.c
  (remove_obsolete_lines): Take the new lookup structure instead of a plain
                           session and use it for efficient path checks.
  (normalize): Update session / lookup pass-through.
  (svn_min__run_normalize): Wrap the session into the new lookup structure.

Added:
    subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/missing-branches.c
  (with props)
Modified:
    subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/logic.c
    subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/mergeinfo-normalizer.h

Modified: subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/logic.c
URL: http://svn.apache.org/viewvc/subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/logic.c?rev=1688924&r1=1688923&r2=1688924&view=diff
==============================================================================
--- subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/logic.c
(original)
+++ subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/logic.c
Thu Jul  2 22:59:34 2015
@@ -129,7 +129,7 @@ typedef struct progress_t
 } progress_t;
 
 static svn_error_t *
-remove_obsolete_lines(svn_ra_session_t *session,
+remove_obsolete_lines(svn_min__branch_lookup_t *lookup,
                       svn_mergeinfo_t mergeinfo,
                       svn_min__opt_state_t *opt_state,
                       progress_t *progress,
@@ -151,12 +151,11 @@ remove_obsolete_lines(svn_ra_session_t *
        hi = apr_hash_next(hi))
     {
       const char *path = apr_hash_this_key(hi);
-      svn_node_kind_t kind;
+      svn_boolean_t deleted;
 
-      SVN_ERR_ASSERT(*path == '/');
-      SVN_ERR(svn_ra_check_path(session, path + 1, SVN_INVALID_REVNUM, &kind,
-                                scratch_pool));
-      if (kind == svn_node_none)
+      SVN_ERR(svn_min__branch_lookup(&deleted, lookup, path, FALSE,
+                                     scratch_pool));
+      if (deleted)
         APR_ARRAY_PUSH(to_remove, const char *) = path;
     }
 
@@ -290,7 +289,7 @@ progress_string(const progress_t *progre
 static svn_error_t *
 normalize(apr_array_header_t *wc_mergeinfo,
           svn_min__log_t *log,
-          svn_ra_session_t *session,
+          svn_min__branch_lookup_t *lookup,
           svn_min__opt_state_t *opt_state,
           apr_pool_t *scratch_pool)
 {
@@ -310,7 +309,7 @@ normalize(apr_array_header_t *wc_mergein
       progress.nodes_todo = i;
 
       /* Eliminate entries for deleted branches. */
-      SVN_ERR(remove_obsolete_lines(session,
+      SVN_ERR(remove_obsolete_lines(lookup,
                                     svn_min__get_mergeinfo(wc_mergeinfo, i),
                                     opt_state, &progress, iterpool));
 
@@ -325,7 +324,7 @@ normalize(apr_array_header_t *wc_mergein
 
           /* Eliminate entries for deleted branches such that parent and
              sub-node mergeinfo align again. */
-          SVN_ERR(remove_obsolete_lines(session, parent_mergeinfo,
+          SVN_ERR(remove_obsolete_lines(lookup, parent_mergeinfo,
                                         opt_state, &progress, iterpool));
 
           parent_mergeinfo_copy = svn_mergeinfo_dup(parent_mergeinfo,
@@ -419,7 +418,7 @@ svn_min__run_normalize(apr_getopt_t *os,
     {
       apr_array_header_t *wc_mergeinfo;
       svn_min__log_t *log = NULL;
-      svn_ra_session_t *session = NULL;
+      svn_min__branch_lookup_t *lookup = NULL;
       const char *url;
       const char *common_path;
 
@@ -450,11 +449,14 @@ svn_min__run_normalize(apr_getopt_t *os,
       /* open RA session */
       if (needs_session(cmd_baton->opt_state))
         {
+          svn_ra_session_t *session;
+
           svn_pool_clear(subpool);
           SVN_ERR(svn_min__add_wc_info(baton, i, iterpool, subpool));
           SVN_ERR(svn_client_open_ra_session2(&session, cmd_baton->repo_root,
                                               NULL, cmd_baton->ctx, iterpool,
                                               subpool));
+          lookup = svn_min__branch_lookup_create(session, iterpool);
         }
 
       /* actual normalization */
@@ -464,7 +466,7 @@ svn_min__run_normalize(apr_getopt_t *os,
                                                    subpool),
                                   stdout, subpool));
 
-      SVN_ERR(normalize(wc_mergeinfo, log, session, cmd_baton->opt_state,
+      SVN_ERR(normalize(wc_mergeinfo, log, lookup, cmd_baton->opt_state,
                         subpool));
 
       /* write results to disk */

Modified: subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/mergeinfo-normalizer.h
URL: http://svn.apache.org/viewvc/subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/mergeinfo-normalizer.h?rev=1688924&r1=1688923&r2=1688924&view=diff
==============================================================================
--- subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/mergeinfo-normalizer.h
(original)
+++ subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/mergeinfo-normalizer.h
Thu Jul  2 22:59:34 2015
@@ -179,11 +179,25 @@ svn_error_t *
 svn_min__print_log_stats(svn_min__log_t *log,
                          apr_pool_t *scratch_pool);
 
+typedef struct svn_min__branch_lookup_t svn_min__branch_lookup_t;
+
+svn_min__branch_lookup_t *
+svn_min__branch_lookup_create(svn_ra_session_t *session,
+                              apr_pool_t *result_pool);
+
+svn_error_t *
+svn_min__branch_lookup(svn_boolean_t *deleted,
+                       svn_min__branch_lookup_t *lookup,
+                       const char *branch,
+                       svn_boolean_t local_only,
+                       apr_pool_t *scratch_pool);
+
 svn_error_t *
 svn_min__run_normalize(apr_getopt_t *os,
                        void *baton,
                        apr_pool_t *pool);
 
+
 #ifdef __cplusplus
 }
 #endif /* __cplusplus */

Added: subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/missing-branches.c
URL: http://svn.apache.org/viewvc/subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/missing-branches.c?rev=1688924&view=auto
==============================================================================
--- subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/missing-branches.c
(added)
+++ subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/missing-branches.c
Thu Jul  2 22:59:34 2015
@@ -0,0 +1,266 @@
+/*
+ * missing-branches.c -- Efficiently scan for missing branches.
+ *
+ * ====================================================================
+ *    Licensed to the Apache Software Foundation (ASF) under one
+ *    or more contributor license agreements.  See the NOTICE file
+ *    distributed with this work for additional information
+ *    regarding copyright ownership.  The ASF licenses this file
+ *    to you under the Apache License, Version 2.0 (the
+ *    "License"); you may not use this file except in compliance
+ *    with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing,
+ *    software distributed under the License is distributed on an
+ *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *    KIND, either express or implied.  See the License for the
+ *    specific language governing permissions and limitations
+ *    under the License.
+ * ====================================================================
+ */
+
+/* ==================================================================== */
+
+
+
+/*** Includes. ***/
+
+#include <assert.h>
+
+#include "svn_hash.h"
+#include "svn_pools.h"
+#include "private/svn_subr_private.h"
+
+#include "mergeinfo-normalizer.h"
+
+
+/*** Code. ***/
+
+struct svn_min__branch_lookup_t
+{
+  /* Connection to the repository where we are looking for paths. */
+  svn_ra_session_t *session;
+
+  /* Keyed by const char * FS paths that are known not to exist.
+     It is implied that sub-paths won't and can't exist either. */
+  apr_hash_t *deleted;
+
+  /* Keyed by const char * FS paths that are known to exist. */
+  apr_hash_t *existing;
+};
+
+/* Return the location of the last '/' in PATH before LEN.
+   Return 0 for root and empty paths.  PATH must be a canonical FS path. */
+static apr_size_t
+parent_segment(const char *path,
+               apr_size_t len)
+{
+  assert(path[0] == '/');
+
+  if (len <= 1)
+    return 0;
+
+  --len;
+  while (path[len] != '/')
+    --len;
+
+  return len;
+}
+
+/* Look for BRANCH in LOOKUP without connecting to the server.  Return
+ * svn_tristate_true, if it is known to exist, svn_tristate_false if it is
+ * known to not exist.  Otherwise return svn_tristate_unknown. */
+static svn_tristate_t
+local_lookup(const svn_min__branch_lookup_t *lookup,
+             const char *branch)
+{
+  apr_size_t len;
+
+  /* Non-canonical paths are bad but we let the remote lookup take care of
+   * them.  Our hashes simply have no info on them. */
+  if (branch[0] != '/')
+    return svn_tristate_unknown;
+
+  /* Hard-coded: "/" always exists. */
+  if (branch[1] == '\0')
+    return svn_tristate_true;
+
+  /* For every existing path that we encountered, there is an entry in the
+     EXISITING hash.  So, we can just use that. */
+  len = strlen(branch);
+  if (apr_hash_get(lookup->existing, branch, len))
+    return svn_tristate_true;
+
+  /* Not known to exist and might be known to not exist.  We only record
+     the top level deleted directory for DELETED branches, so we need to
+     walk up the path until we either find that deletion or an existing
+     path.  In the latter case, we don't know what happened to the levels
+     below that, including BRANCH. */
+  while (len > 0)
+    {
+      /* Known deleted?  Note that we checked BRANCH for existence but not
+         for deletion, yet. */
+      if (apr_hash_get(lookup->deleted, branch, len))
+        return svn_tristate_false;
+
+      /* Parent known to exist?
+         Then, we don't know what happened to the BRANCH. */
+      len = parent_segment(branch, len);
+
+      if (apr_hash_get(lookup->existing, branch, len))
+        return svn_tristate_unknown;
+    }
+
+  /* We don't know. */
+  return svn_tristate_unknown;
+}
+
+/* Set *DELETED to TRUE, if PATH can't be found at HEAD in SESSION.
+   Use SCRATCH_POOL for temporary allocations. */
+static svn_error_t *
+path_deleted(svn_boolean_t *deleted,
+            svn_ra_session_t *session,
+            const char *path,
+            apr_pool_t *scratch_pool)
+{
+  svn_node_kind_t kind;
+
+  SVN_ERR_ASSERT(*path == '/');
+  SVN_ERR(svn_ra_check_path(session, path + 1, SVN_INVALID_REVNUM, &kind,
+                            scratch_pool));
+  *deleted = kind == svn_node_none;
+
+  return SVN_NO_ERROR;
+}
+
+/* Chop the last segment off PATH.  PATH must be a canonical FS path.
+   No-op for the root path. */
+static void
+to_parent(svn_stringbuf_t *path)
+{
+  path->len = parent_segment(path->data, path->len);
+  if (path->len == 0)
+    path->len = 1;
+
+  path->data[path->len] = '\0';
+}
+
+/* Contact the repository used by LOOKUP and set *DELETED to TRUE, if path
+   BRANCH does not exist at HEAD.  Cache the lookup results in LOOKUP and
+   use SCRATCH_POOL for temporary allocations.  Call this only if
+   local_lookup returned svn_tristate_unknown. */
+static svn_error_t *
+remote_lookup(svn_boolean_t *deleted,
+              const svn_min__branch_lookup_t *lookup,
+              const char *branch,
+              apr_pool_t *scratch_pool)
+{
+  svn_stringbuf_t *path = svn_stringbuf_create(branch, scratch_pool);
+  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
+
+  /* We shall call this function only after the local lookup failed. */
+  assert(local_lookup(lookup, branch) == svn_tristate_unknown);
+
+  /* Actual repository lookup. */
+  SVN_ERR(path_deleted(deleted, lookup->session, branch, scratch_pool));
+
+  /* If the path did not exist, store the furthest non-existent parent. */
+  if (*deleted)
+    {
+      svn_boolean_t parent_deleted;
+      const char *deleted_path;
+      apr_size_t len;
+
+      /* Find the closest parent that does exist.
+        "/" exists, hence, this will terminate. */
+      do
+        {
+          svn_pool_clear(iterpool);
+
+          len = path->len;
+          to_parent(path);
+
+          /* We often know that "/branches" etc. to exist.  So, we can skip
+             the final lookup in that case. */
+          if (local_lookup(lookup, path->data) == svn_tristate_true)
+            break;
+
+          /* Get the info from the repository. */
+          SVN_ERR(path_deleted(&parent_deleted, lookup->session, path->data,
+                               iterpool));
+        }
+      while (parent_deleted);
+
+      /* PATH exists, it's sub-path of length LEN does not. */
+      deleted_path = apr_pstrmemdup(apr_hash_pool_get(lookup->deleted),
+                                    branch, len);
+      apr_hash_set(lookup->deleted, deleted_path, len, deleted_path);
+    }
+
+  /* PATH and all its parents exist. Add them to the EXISITING hash.
+     Make sure to allocate only the longest path and then reference
+     sub-sequences of it to keep memory usage in check. */
+  if (!apr_hash_get(lookup->existing, path->data, path->len))
+    {
+      const char *hash_path
+        = apr_pstrmemdup(apr_hash_pool_get(lookup->existing), path->data,
+                         path->len);
+
+      /* Note that we don't need to check for exiting entries here because
+         the APR hash will reuse existing nodes and we are not allocating
+         anything else here.  So, this does not allocate duplicate nodes. */
+      for (; path->len > 1; to_parent(path))
+        apr_hash_set(lookup->existing, hash_path, path->len, hash_path);
+    }
+
+  svn_pool_destroy(iterpool);
+
+  return SVN_NO_ERROR;
+}
+
+svn_min__branch_lookup_t *
+svn_min__branch_lookup_create(svn_ra_session_t *session,
+                              apr_pool_t *result_pool)
+{
+  svn_min__branch_lookup_t *result = apr_pcalloc(result_pool,
+                                                 sizeof(*result));
+  result->session = session;
+  result->deleted = svn_hash__make(result_pool);
+  result->existing = svn_hash__make(result_pool);
+
+  return result;
+}
+
+svn_error_t *
+svn_min__branch_lookup(svn_boolean_t *deleted,
+                       svn_min__branch_lookup_t *lookup,
+                       const char *branch,
+                       svn_boolean_t local_only,
+                       apr_pool_t *scratch_pool)
+{
+  switch (local_lookup(lookup, branch))
+    {
+      case svn_tristate_false:
+        *deleted = TRUE;
+        return SVN_NO_ERROR;
+
+      case svn_tristate_true:
+        *deleted = FALSE;
+        return SVN_NO_ERROR;
+
+      default:
+        /* If the state is unknown and we are only allowed to do a local
+           lookup, default to a possible false negative. */
+        if (local_only)
+          {
+            *deleted = FALSE;
+            return SVN_NO_ERROR;
+          }
+    }
+
+  return svn_error_trace(remote_lookup(deleted, lookup, branch,
+                                       scratch_pool));
+}
+

Propchange: subversion/branches/svn-mergeinfo-normalizer/tools/client-side/svn-mergeinfo-normalizer/missing-branches.c
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message