subversion-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gst...@apache.org
Subject svn commit: r952493 - in /subversion/trunk/subversion/libsvn_wc: wc-queries.sql wc_db.c
Date Tue, 08 Jun 2010 00:47:22 GMT
Author: gstein
Date: Tue Jun  8 00:47:22 2010
New Revision: 952493

URL: http://svn.apache.org/viewvc?rev=952493&view=rev
Log:
The query that we used to fetch all children in BASE_NODE and WORKING_NODE
used a UNION between two SELECT statements. The idea was to have SQLite
remove all duplicates for us in a single query. Unfortunately, this caused
SQLite to create an ephemeral (temporary) table and place the results of
each query into that table. It created an index to remove dupliates. Then
it returned the values in that ephemeral table. For large numbers of
nodes, the construction of the table and its index becomes very costly.

This change rebuilds gather_children() in wc_db.c to do the duplicate
removal manually using a hash table. It does some simple scanning straight
into an array when it knows duplicates cannot exist (one of BASE or
WORKING is empty).

The performance problem of svn_wc__db_read_children() was first observed
in issue #3499. The actual performance improvement is untested so far, but
I'm assuming pburba can pick up this change and try in his scenario.

* subversion/libsvn_wc/wc_db.c:
  (count_children): new helper to count the number of children of a given
    PARENT_RELPATH within a specific table.
  (add_children_to_hash): new helper to scan children, placing their names
    into a hash table as keys (and the mapped values).
  (union_children): new helper to scan both BASE_NODE and WORKING_NODE and
    manually create a union of the resulting names using a hash table.
  (single_table_children): new helper to return the children from a single
    table.
  (gather_children): rebuilt in terms of the above helpers

* subversion/libsvn_wc/wc-queries.sql:
  (STMT_COUNT_BASE_NODE_CHILDREN, STMT_COUNT_WORKING_NODE_CHILDREN): new
    statements to count the number of children for a given parent_relpath
  (STMT_SELECT_WORKING_CHILDREN): removed
  (STMT_SELECT_WORKING_NODE_CHILDREN): like STMT_SELECT_BASE_NODE_CHILDREN,
    this scan all children in WORKING_NODE for a given parent_relpath

Modified:
    subversion/trunk/subversion/libsvn_wc/wc-queries.sql
    subversion/trunk/subversion/libsvn_wc/wc_db.c

Modified: subversion/trunk/subversion/libsvn_wc/wc-queries.sql
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_wc/wc-queries.sql?rev=952493&r1=952492&r2=952493&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_wc/wc-queries.sql (original)
+++ subversion/trunk/subversion/libsvn_wc/wc-queries.sql Tue Jun  8 00:47:22 2010
@@ -87,16 +87,21 @@ INSERT OR IGNORE INTO WORKING_NODE (
   wc_id, local_relpath, parent_relpath, presence, kind)
 VALUES (?1, ?2, ?3, 'incomplete', 'unknown');
 
+-- STMT_COUNT_BASE_NODE_CHILDREN
+SELECT COUNT(*) FROM BASE_NODE
+WHERE wc_id = ?1 AND parent_relpath = ?2;
+
+-- STMT_COUNT_WORKING_NODE_CHILDREN
+SELECT COUNT(*) FROM WORKING_NODE
+WHERE wc_id = ?1 AND parent_relpath = ?2;
+
 -- STMT_SELECT_BASE_NODE_CHILDREN
 select local_relpath from base_node
 where wc_id = ?1 and parent_relpath = ?2;
 
--- STMT_SELECT_WORKING_CHILDREN
-select local_relpath from base_node
-where wc_id = ?1 and parent_relpath = ?2
-union
-select local_relpath from working_node
-where wc_id = ?1 and parent_relpath = ?2;
+-- STMT_SELECT_WORKING_NODE_CHILDREN
+SELECT local_relpath FROM WORKING_NODE
+WHERE wc_id = ?1 AND parent_relpath = ?2;
 
 -- STMT_SELECT_WORKING_IS_FILE
 select kind == 'file' from working_node

Modified: subversion/trunk/subversion/libsvn_wc/wc_db.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_wc/wc_db.c?rev=952493&r1=952492&r2=952493&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_wc/wc_db.c (original)
+++ subversion/trunk/subversion/libsvn_wc/wc_db.c Tue Jun  8 00:47:22 2010
@@ -813,40 +813,97 @@ insert_working_node(void *baton,
 }
 
 
-/* */
 static svn_error_t *
-gather_children(const apr_array_header_t **children,
-                svn_boolean_t base_only,
-                svn_wc__db_t *db,
-                const char *local_abspath,
-                apr_pool_t *result_pool,
-                apr_pool_t *scratch_pool)
+count_children(int *count,
+               int stmt_idx,
+               svn_sqlite__db_t *sdb,
+               apr_int64_t wc_id,
+               const char *parent_relpath)
+{
+  svn_sqlite__stmt_t *stmt;
+
+  SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, stmt_idx));
+  SVN_ERR(svn_sqlite__bindf(stmt, "is", wc_id, parent_relpath));
+  SVN_ERR(svn_sqlite__step_row(stmt));
+  *count = svn_sqlite__column_int(stmt, 0);
+  return svn_error_return(svn_sqlite__reset(stmt));
+}
+
+
+/* Each name is allocated in RESULT_POOL and stored into CHILDREN as a key
+   pointed to the same name.  */
+static svn_error_t *
+add_children_to_hash(apr_hash_t *children,
+                     int stmt_idx,
+                     svn_sqlite__db_t *sdb,
+                     apr_int64_t wc_id,
+                     const char *parent_relpath,
+                     apr_pool_t *result_pool)
 {
-  svn_wc__db_pdh_t *pdh;
-  const char *local_relpath;
   svn_sqlite__stmt_t *stmt;
-  apr_array_header_t *child_names;
   svn_boolean_t have_row;
 
-  SVN_ERR_ASSERT(svn_dirent_is_absolute(local_abspath));
+  SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, stmt_idx));
+  SVN_ERR(svn_sqlite__bindf(stmt, "is", wc_id, parent_relpath));
+  SVN_ERR(svn_sqlite__step(&have_row, stmt));
+  while (have_row)
+    {
+      const char *child_relpath = svn_sqlite__column_text(stmt, 0, NULL);
+      const char *name = svn_relpath_basename(child_relpath, result_pool);
 
-  SVN_ERR(svn_wc__db_pdh_parse_local_abspath(&pdh, &local_relpath, db,
-                              local_abspath, svn_sqlite__mode_readonly,
-                              scratch_pool, scratch_pool));
-  VERIFY_USABLE_PDH(pdh);
+      apr_hash_set(children, name, APR_HASH_KEY_STRING, name);
 
-  SVN_ERR(svn_sqlite__get_statement(&stmt, pdh->wcroot->sdb,
-                                    base_only
-                                      ? STMT_SELECT_BASE_NODE_CHILDREN
-                                      : STMT_SELECT_WORKING_CHILDREN));
-  SVN_ERR(svn_sqlite__bindf(stmt, "is", pdh->wcroot->wc_id, local_relpath));
+      SVN_ERR(svn_sqlite__step(&have_row, stmt));
+    }
+
+  return svn_sqlite__reset(stmt);
+}
+
+
+static svn_error_t *
+union_children(const apr_array_header_t **children,
+               svn_sqlite__db_t *sdb,
+               apr_int64_t wc_id,
+               const char *parent_relpath,
+               apr_pool_t *result_pool,
+               apr_pool_t *scratch_pool)
+{
+  /* ### it would be nice to pre-size this hash table.  */
+  apr_hash_t *names = apr_hash_make(scratch_pool);
+  apr_array_header_t *names_array;
+
+  /* All of the names get allocated in RESULT_POOL.  */
+  SVN_ERR(add_children_to_hash(names, STMT_SELECT_BASE_NODE_CHILDREN,
+                               sdb, wc_id, parent_relpath, result_pool));
+  SVN_ERR(add_children_to_hash(names, STMT_SELECT_WORKING_NODE_CHILDREN,
+                               sdb, wc_id, parent_relpath, result_pool));
+
+  SVN_ERR(svn_hash_keys(&names_array, names, result_pool));
+  *children = names_array;
+
+  return SVN_NO_ERROR;
+}
+
+
+static svn_error_t *
+single_table_children(const apr_array_header_t **children,
+                      int stmt_idx,
+                      int start_size,
+                      svn_sqlite__db_t *sdb,
+                      apr_int64_t wc_id,
+                      const char *parent_relpath,
+                      apr_pool_t *result_pool)
+{
+  svn_sqlite__stmt_t *stmt;
+  apr_array_header_t *child_names;
+  svn_boolean_t have_row;
+
+  SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, stmt_idx));
+  SVN_ERR(svn_sqlite__bindf(stmt, "is", wc_id, parent_relpath));
 
   /* ### should test the node to ensure it is a directory */
 
-  /* ### 10 is based on Subversion's average of 8.5 files per versioned
-     ### directory in its repository. maybe use a different value? or
-     ### count rows first?  */
-  child_names = apr_array_make(result_pool, 10, sizeof(const char *));
+  child_names = apr_array_make(result_pool, start_size, sizeof(const char *));
 
   SVN_ERR(svn_sqlite__step(&have_row, stmt));
   while (have_row)
@@ -866,6 +923,76 @@ gather_children(const apr_array_header_t
 
 
 /* */
+static svn_error_t *
+gather_children(const apr_array_header_t **children,
+                svn_boolean_t base_only,
+                svn_wc__db_t *db,
+                const char *local_abspath,
+                apr_pool_t *result_pool,
+                apr_pool_t *scratch_pool)
+{
+  svn_wc__db_pdh_t *pdh;
+  const char *local_relpath;
+  int base_count;
+  int working_count;
+
+  SVN_ERR_ASSERT(svn_dirent_is_absolute(local_abspath));
+
+  SVN_ERR(svn_wc__db_pdh_parse_local_abspath(&pdh, &local_relpath, db,
+                                             local_abspath,
+                                             svn_sqlite__mode_readonly,
+                                             scratch_pool, scratch_pool));
+  VERIFY_USABLE_PDH(pdh);
+
+  if (base_only)
+    {
+      /* 10 is based on Subversion's average of 8.7 files per versioned
+         directory in its repository.
+
+         ### note "files". should redo count with subdirs included */
+      return svn_error_return(single_table_children(
+                                children, STMT_SELECT_BASE_NODE_CHILDREN,
+                                10 /* start_size */,
+                                pdh->wcroot->sdb, pdh->wcroot->wc_id,
+                                local_relpath, result_pool));
+    }
+
+  SVN_ERR(count_children(&base_count, STMT_COUNT_BASE_NODE_CHILDREN,
+                         pdh->wcroot->sdb, pdh->wcroot->wc_id, local_relpath));
+  SVN_ERR(count_children(&working_count, STMT_COUNT_WORKING_NODE_CHILDREN,
+                         pdh->wcroot->sdb, pdh->wcroot->wc_id, local_relpath));
+
+  if (base_count == 0)
+    {
+      if (working_count == 0)
+        {
+          *children = apr_array_make(result_pool, 0, sizeof(const char *));
+          return SVN_NO_ERROR;
+        }
+
+      return svn_error_return(single_table_children(
+                                children, STMT_SELECT_WORKING_NODE_CHILDREN,
+                                working_count,
+                                pdh->wcroot->sdb, pdh->wcroot->wc_id,
+                                local_relpath, result_pool));
+    }
+  if (working_count == 0)
+    return svn_error_return(single_table_children(
+                              children, STMT_SELECT_BASE_NODE_CHILDREN,
+                              base_count,
+                              pdh->wcroot->sdb, pdh->wcroot->wc_id,
+                              local_relpath, result_pool));
+
+  /* ### it would be nice to pass BASE_COUNT and WORKING_COUNT, but there is
+     ### nothing union_children() can do with those.  */
+  return svn_error_return(union_children(children, 
+                                         pdh->wcroot->sdb, pdh->wcroot->wc_id,
+                                         local_relpath,
+                                         result_pool, scratch_pool));
+}
+
+
+/* */
 static void
 flush_entries(const svn_wc__db_pdh_t *pdh)
 {



Mime
View raw message