Return-Path: X-Original-To: apmail-subversion-commits-archive@minotaur.apache.org Delivered-To: apmail-subversion-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 0EBA4116AD for ; Sun, 18 May 2014 12:05:42 +0000 (UTC) Received: (qmail 90775 invoked by uid 500); 18 May 2014 11:40:42 -0000 Delivered-To: apmail-subversion-commits-archive@subversion.apache.org Received: (qmail 82657 invoked by uid 500); 18 May 2014 11:15:41 -0000 Mailing-List: contact commits-help@subversion.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@subversion.apache.org Delivered-To: mailing list commits@subversion.apache.org Received: (qmail 81910 invoked by uid 99); 18 May 2014 11:14:26 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 18 May 2014 11:14:26 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 18 May 2014 11:14:24 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 0DCF22388999; Sun, 18 May 2014 11:14:00 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1595589 - in /subversion/trunk/subversion/libsvn_fs_x: ./ cached_data.c cached_data.h transaction.c Date: Sun, 18 May 2014 11:13:59 -0000 To: commits@subversion.apache.org From: stefan2@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140518111400.0DCF22388999@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: stefan2 Date: Sun May 18 11:13:59 2014 New Revision: 1595589 URL: http://svn.apache.org/r1595589 Log: Sync'ing FSX with FSFS: Merge r1554942,1555297,1555340,1575453,1575622,1580406,1580623,1580629, 1590383 from /subversion/libsvn_fs_fs into subversion/libsvn_fs_x. Conflicts were due structural and naming differences between FSX and FSFS. This ports the delta window caching and usage patches. Modified: subversion/trunk/subversion/libsvn_fs_x/ (props changed) subversion/trunk/subversion/libsvn_fs_x/cached_data.c subversion/trunk/subversion/libsvn_fs_x/cached_data.h subversion/trunk/subversion/libsvn_fs_x/transaction.c Propchange: subversion/trunk/subversion/libsvn_fs_x/ ------------------------------------------------------------------------------ Merged /subversion/trunk/subversion/libsvn_fs_fs:r1554942,1555297,1555340,1575453,1575622,1580406,1580623,1580629,1590383 Modified: subversion/trunk/subversion/libsvn_fs_x/cached_data.c URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_x/cached_data.c?rev=1595589&r1=1595588&r2=1595589&view=diff ============================================================================== --- subversion/trunk/subversion/libsvn_fs_x/cached_data.c (original) +++ subversion/trunk/subversion/libsvn_fs_x/cached_data.c Sun May 18 11:13:59 2014 @@ -723,13 +723,21 @@ svn_fs_x__check_rep(representation_t *re Do any allocations in POOL. */ svn_error_t * svn_fs_x__rep_chain_length(int *chain_length, + int *shard_count, representation_t *rep, svn_fs_t *fs, apr_pool_t *pool) { - int count = 0; + fs_x_data_t *ffd = fs->fsap_data; + svn_revnum_t shard_size = ffd->max_files_per_dir + ? ffd->max_files_per_dir + : 1; apr_pool_t *sub_pool = svn_pool_create(pool); svn_boolean_t is_delta = FALSE; + int count = 0; + int shards = 1; + svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set); + svn_revnum_t last_shard = revision / shard_size; /* Check whether the length of the deltification chain is acceptable. * Otherwise, shared reps may form a non-skipping delta chain in @@ -746,6 +754,13 @@ svn_fs_x__rep_chain_length(int *chain_le do { rep_state_t *rep_state; + revision = svn_fs_x__get_revnum(base_rep.id.change_set); + if (revision / shard_size != last_shard) + { + last_shard = revision / shard_size; + ++shards; + } + SVN_ERR(create_rep_state_body(&rep_state, &header, &file_hint, @@ -769,6 +784,7 @@ svn_fs_x__rep_chain_length(int *chain_le while (is_delta && base_rep.id.change_set); *chain_length = count; + *shard_count = shards; svn_pool_destroy(sub_pool); return SVN_NO_ERROR; @@ -1501,22 +1517,33 @@ init_rep_state(rep_state_t *rs, /* Walk through all windows in the representation addressed by RS in FS * (excluding the delta bases) and put those not already cached into the - * window caches. As a side effect, return the total sum of all expanded - * window sizes in *FULLTEXT_LEN. Use POOL for temporary allocations. + * window caches. If MAX_OFFSET is not -1, don't read windows that start + * at or beyond that offset. As a side effect, return the total sum of all + * expanded window sizes in *FULLTEXT_LEN. + * Use POOL for temporary allocations. */ static svn_error_t * cache_windows(svn_filesize_t *fulltext_len, svn_fs_t *fs, rep_state_t *rs, + apr_off_t max_offset, apr_pool_t *pool) { + apr_pool_t *iterpool = svn_pool_create(pool); *fulltext_len = 0; while (rs->current < rs->size) { svn_boolean_t is_cached = FALSE; window_sizes_t *window_sizes; - + + svn_pool_clear(iterpool); + if (max_offset != -1 && rs->start + rs->current >= max_offset) + { + svn_pool_destroy(iterpool); + return SVN_NO_ERROR; + } + /* efficiently skip windows that are still being cached instead * of fully decoding them */ SVN_ERR(get_cached_window_sizes(&window_sizes, rs, &is_cached, pool)); @@ -1559,6 +1586,8 @@ cache_windows(svn_filesize_t *fulltext_l rs->chunk_index++; } + svn_pool_destroy(iterpool); + return SVN_NO_ERROR; } @@ -1623,7 +1652,7 @@ svn_fs_x__get_representation_length(svn_ /* RS->FILE may be shared between RS instances -> make sure we point * to the right data. */ *packed_len = rs.size; - SVN_ERR(cache_windows(expanded_len, fs, &rs, pool)); + SVN_ERR(cache_windows(expanded_len, fs, &rs, -1, pool)); return SVN_NO_ERROR; } @@ -1902,6 +1931,48 @@ struct delta_read_baton unsigned char md5_digest[APR_MD5_DIGESTSIZE]; }; +/* This implements the svn_txdelta_next_window_fn_t interface. */ +static svn_error_t * +delta_read_next_window(svn_txdelta_window_t **window, void *baton, + apr_pool_t *pool) +{ + struct delta_read_baton *drb = baton; + + *window = NULL; + if (drb->rs->current < drb->rs->size) + { + SVN_ERR(read_delta_window(window, drb->rs->chunk_index, drb->rs, pool)); + drb->rs->chunk_index++; + } + + return SVN_NO_ERROR; +} + +/* This implements the svn_txdelta_md5_digest_fn_t interface. */ +static const unsigned char * +delta_read_md5_digest(void *baton) +{ + struct delta_read_baton *drb = baton; + return drb->md5_digest; +} + +/* Return a txdelta stream for on-disk representation REP_STATE + * of TARGET. Allocate the result in POOL. + */ +static svn_txdelta_stream_t * +get_storaged_delta_stream(rep_state_t *rep_state, + node_revision_t *target, + apr_pool_t *pool) +{ + /* Create the delta read baton. */ + struct delta_read_baton *drb = apr_pcalloc(pool, sizeof(*drb)); + drb->rs = rep_state; + memcpy(drb->md5_digest, target->data_rep->md5_digest, + sizeof(drb->md5_digest)); + return svn_txdelta_stream_create(drb, delta_read_next_window, + delta_read_md5_digest, pool); +} + svn_error_t * svn_fs_x__get_file_delta_stream(svn_txdelta_stream_t **stream_p, svn_fs_t *fs, @@ -1910,6 +1981,54 @@ svn_fs_x__get_file_delta_stream(svn_txde apr_pool_t *pool) { svn_stream_t *source_stream, *target_stream; + rep_state_t *rep_state; + svn_fs_x__rep_header_t *rep_header; + fs_x_data_t *ffd = fs->fsap_data; + + /* Try a shortcut: if the target is stored as a delta against the source, + then just use that delta. However, prefer using the fulltext cache + whenever that is available. */ + if (target->data_rep && (source || !ffd->fulltext_cache)) + { + /* Read target's base rep if any. */ + SVN_ERR(create_rep_state(&rep_state, &rep_header, NULL, + target->data_rep, fs, pool)); + + /* Try a shortcut: if the target is stored as a delta against the source, + then just use that delta. */ + if (source && source->data_rep && target->data_rep) + { + /* If that matches source, then use this delta as is. + Note that we want an actual delta here. E.g. a self-delta would + not be good enough. */ + if (rep_header->type == svn_fs_x__rep_delta + && rep_header->base_revision + == svn_fs_x__get_revnum(source->data_rep->id.change_set) + && rep_header->base_item_index == source->data_rep->id.number) + { + *stream_p = get_storaged_delta_stream(rep_state, target, pool); + return SVN_NO_ERROR; + } + } + else if (!source) + { + /* We want a self-delta. There is a fair chance that TARGET got + added in this revision and is already stored in the requested + format. */ + if (rep_header->type == svn_fs_x__rep_self_delta) + { + *stream_p = get_storaged_delta_stream(rep_state, target, pool); + return SVN_NO_ERROR; + } + } + + /* Don't keep file handles open for longer than necessary. */ + if (rep_state->file->file) + { + SVN_ERR(svn_io_file_close(rep_state->file->file, pool)); + rep_state->file->file = NULL; + } + } /* Read both fulltexts and construct a delta. */ if (source) @@ -2374,45 +2493,37 @@ svn_fs_x__get_changes(apr_array_header_t return SVN_NO_ERROR; } +/* Fetch the representation data (header, txdelta / plain windows) + * addressed by ENTRY->ITEM in FS and cache it if caches are enabled. + * Read the data from the already open FILE and the wrapping + * STREAM object. If MAX_OFFSET is not -1, don't read windows that start + * at or beyond that offset. Use POOL for allocations. + */ static svn_error_t * -block_read_windows(svn_fs_x__rep_header_t *rep_header, - svn_fs_t *fs, - apr_file_t *file, - svn_stream_t *stream, - svn_fs_x__p2l_entry_t* entry, - apr_pool_t *pool) -{ - fs_x_data_t *ffd = fs->fsap_data; - rep_state_t rs = { 0 }; - svn_filesize_t fulltext_len; - - if (!ffd->txdelta_window_cache || !ffd->combined_window_cache) - return SVN_NO_ERROR; - - SVN_ERR(init_rep_state(&rs, rep_header, fs, file, stream, entry, pool)); - SVN_ERR(cache_windows(&fulltext_len, fs, &rs, pool)); - - return SVN_NO_ERROR; -} - -static svn_error_t * -block_read_contents(svn_stringbuf_t **item, - svn_fs_t *fs, +block_read_contents(svn_fs_t *fs, apr_file_t *file, svn_stream_t *stream, svn_fs_x__p2l_entry_t* entry, pair_cache_key_t *key, + apr_off_t max_offset, apr_pool_t *pool) { + fs_x_data_t *ffd = fs->fsap_data; representation_cache_key_t header_key = { 0 }; + rep_state_t rs = { 0 }; + svn_filesize_t fulltext_len; svn_fs_x__rep_header_t *rep_header; + if (!ffd->txdelta_window_cache || !ffd->combined_window_cache) + return SVN_NO_ERROR; + header_key.revision = (apr_int32_t)key->revision; header_key.is_packed = svn_fs_x__is_packed_rev(fs, header_key.revision); header_key.item_index = key->second; SVN_ERR(read_rep_header(&rep_header, fs, stream, &header_key, pool)); - SVN_ERR(block_read_windows(rep_header, fs, file, stream, entry, pool)); + SVN_ERR(init_rep_state(&rs, rep_header, fs, file, stream, entry, pool)); + SVN_ERR(cache_windows(&fulltext_len, fs, &rs, max_offset, pool)); return SVN_NO_ERROR; } @@ -2709,7 +2820,7 @@ block_read(void **result, /* read all items from the block */ for (i = 0; i < entries->nelts; ++i) { - svn_boolean_t is_result; + svn_boolean_t is_result, is_wanted; apr_pool_t *pool; svn_fs_x__p2l_entry_t* entry @@ -2720,11 +2831,11 @@ block_read(void **result, continue; /* the item / container we were looking for? */ - is_result = result - && entry->offset == wanted_offset + is_wanted = entry->offset == wanted_offset && entry->item_count >= wanted_sub_item && svn_fs_x__id_part_eq(entry->items + wanted_sub_item, id); + is_result = result && is_wanted; /* select the pool that we want the item to be allocated in */ pool = is_result ? result_pool : iterpool; @@ -2748,9 +2859,12 @@ block_read(void **result, case SVN_FS_X__ITEM_TYPE_DIR_REP: case SVN_FS_X__ITEM_TYPE_FILE_PROPS: case SVN_FS_X__ITEM_TYPE_DIR_PROPS: - SVN_ERR(block_read_contents((svn_stringbuf_t **)&item, - fs, revision_file, stream, - entry, &key, pool)); + SVN_ERR(block_read_contents(fs, revision_file, stream, + entry, &key, + is_wanted + ? -1 + : block_start + ffd->block_size, + pool)); break; case SVN_FS_X__ITEM_TYPE_NODEREV: Modified: subversion/trunk/subversion/libsvn_fs_x/cached_data.h URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_x/cached_data.h?rev=1595589&r1=1595588&r2=1595589&view=diff ============================================================================== --- subversion/trunk/subversion/libsvn_fs_x/cached_data.h (original) +++ subversion/trunk/subversion/libsvn_fs_x/cached_data.h Sun May 18 11:13:59 2014 @@ -67,9 +67,11 @@ svn_fs_x__check_rep(representation_t *re /* Follow the representation delta chain in FS starting with REP. The number of reps (including REP) in the chain will be returned in - *CHAIN_LENGTH. Do any allocations in POOL. */ + *CHAIN_LENGTH. *SHARD_COUNT will be set to the number of shards + accessed. Do any allocations in POOL. */ svn_error_t * svn_fs_x__rep_chain_length(int *chain_length, + int *shard_count, representation_t *rep, svn_fs_t *fs, apr_pool_t *pool); Modified: subversion/trunk/subversion/libsvn_fs_x/transaction.c URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_x/transaction.c?rev=1595589&r1=1595588&r2=1595589&view=diff ============================================================================== --- subversion/trunk/subversion/libsvn_fs_x/transaction.c (original) +++ subversion/trunk/subversion/libsvn_fs_x/transaction.c Sun May 18 11:13:59 2014 @@ -1920,6 +1920,37 @@ rep_write_contents(void *baton, return svn_stream_write(b->rep_stream, data, len); } +/* Set *SPANNED to the number of shards touched when walking WALK steps on + * NODEREV's predecessor chain in FS. Use POOL for temporary allocations. + */ +static svn_error_t * +shards_spanned(int *spanned, + svn_fs_t *fs, + node_revision_t *noderev, + int walk, + apr_pool_t *pool) +{ + fs_x_data_t *ffd = fs->fsap_data; + int shard_size = ffd->max_files_per_dir ? ffd->max_files_per_dir : 1; + + int count = walk ? 1 : 0; /* The start of a walk already touches a shard. */ + svn_revnum_t shard, last_shard = ffd->youngest_rev_cache / shard_size; + while (walk-- && noderev->predecessor_count) + { + SVN_ERR(svn_fs_x__get_node_revision(&noderev, fs, + noderev->predecessor_id, pool)); + shard = svn_fs_x__id_rev(noderev->id) / shard_size; + if (shard != last_shard) + { + ++count; + last_shard = shard; + } + } + + *spanned = count; + return SVN_NO_ERROR; +} + /* Given a node-revision NODEREV in filesystem FS, return the representation in *REP to use as the base for a text representation delta if PROPS is FALSE. If PROPS has been set, a suitable props @@ -1941,10 +1972,9 @@ choose_delta_base(representation_t **rep int walk; node_revision_t *base; fs_x_data_t *ffd = fs->fsap_data; - svn_boolean_t maybe_shared_rep = FALSE; - /* If we have no predecessors, then use the empty stream as a - base. */ + /* If we have no predecessors, or that one is empty, then use the empty + * stream as a base. */ if (! noderev->predecessor_count) { *rep = NULL; @@ -1958,72 +1988,80 @@ choose_delta_base(representation_t **rep count = noderev->predecessor_count; count = count & (count - 1); - /* We use skip delta for limiting the number of delta operations - along very long node histories. Close to HEAD however, we create - a linear history to minimize delta size. */ - walk = noderev->predecessor_count - count; - if (walk < (int)ffd->max_linear_deltification) - count = noderev->predecessor_count - 1; - /* Finding the delta base over a very long distance can become extremely expensive for very deep histories, possibly causing client timeouts etc. OTOH, this is a rare operation and its gains are minimal. Lets simply start deltification anew close every other 1000 changes or so. */ + walk = noderev->predecessor_count - count; if (walk > (int)ffd->max_deltification_walk) { *rep = NULL; return SVN_NO_ERROR; } + /* We use skip delta for limiting the number of delta operations + along very long node histories. Close to HEAD however, we create + a linear history to minimize delta size. */ + if (walk < (int)ffd->max_linear_deltification) + { + int shards; + SVN_ERR(shards_spanned(&shards, fs, noderev, walk, pool)); + + /* We also don't want the linear deltification to span more shards + than if deltas we used in a simple skip-delta scheme. */ + if ((1 << (--shards)) <= walk) + count = noderev->predecessor_count - 1; + } + /* Walk back a number of predecessors equal to the difference between count and the original predecessor count. (For example, if noderev has ten predecessors and we want the eighth file rev, walk back two predecessors.) */ base = noderev; while ((count++) < noderev->predecessor_count) - { - svn_revnum_t base_revision; - SVN_ERR(svn_fs_x__get_node_revision(&base, fs, - base->predecessor_id, pool)); - - /* If there is a shared rep along the way, we need to limit the - * length of the deltification chain. - * - * Please note that copied nodes - such as branch directories - will - * look the same (false positive) while reps shared within the same - * revision will not be caught (false negative). - * - * Message-ID: - */ - base_revision = svn_fs_x__id_rev(base->id); - if (props) - { - if (base->prop_rep && - base_revision > svn_fs_x__get_revnum(base->prop_rep->id.change_set)) - maybe_shared_rep = TRUE; - } - else - { - if (base->data_rep && - base_revision > svn_fs_x__get_revnum(base->data_rep->id.change_set)) - maybe_shared_rep = TRUE; - } - } + SVN_ERR(svn_fs_x__get_node_revision(&base, fs, + base->predecessor_id, pool)); /* return a suitable base representation */ *rep = props ? base->prop_rep : base->data_rep; /* if we encountered a shared rep, its parent chain may be different * from the node-rev parent chain. */ - if (*rep && maybe_shared_rep) + if (*rep) { int chain_length = 0; - SVN_ERR(svn_fs_x__rep_chain_length(&chain_length, *rep, fs, pool)); + int shard_count = 0; + + /* Very short rep bases are simply not worth it as we are unlikely + * to re-coup the deltification space overhead of 20+ bytes. */ + svn_filesize_t rep_size = (*rep)->expanded_size + ? (*rep)->expanded_size + : (*rep)->size; + if (rep_size < 64) + { + *rep = NULL; + return SVN_NO_ERROR; + } + + /* Check whether the length of the deltification chain is acceptable. + * Otherwise, shared reps may form a non-skipping delta chain in + * extreme cases. */ + SVN_ERR(svn_fs_x__rep_chain_length(&chain_length, &shard_count, + *rep, fs, pool)); /* Some reasonable limit, depending on how acceptable longer linear * chains are in this repo. Also, allow for some minimal chain. */ if (chain_length >= 2 * (int)ffd->max_linear_deltification + 2) *rep = NULL; + else + /* To make it worth opening additional shards / pack files, we + * require that the reps have a certain minimal size. To deltify + * against a rep in different shard, the lower limit is 512 bytes + * and doubles with every extra shard to visit along the delta + * chain. */ + if ( shard_count > 1 + && ((svn_filesize_t)128 << shard_count) >= rep_size) + *rep = NULL; } return SVN_NO_ERROR;