pull: Redo logic for "scanning"

What in the code is called "scanning" is ensuring (potentially
recursively) have an object, and if not, fetching it.  And then if
it's metadata, parsing it and finding new objects to fetch.

This logic has grown fairly complex.  What I'm trying to fix
right now is that if we're doing a pull-local to a remote repository
via `sshfs` (FUSE) we still end up scanning, which is inefficient.

We can take advantage of the "commitpartial" logic here - if a commit
isn't partial, it's complete, hence we don't need to scan it.

At the same time, I'm changing the logic here to *always* do scans for
dirtree objects.  This will fix cases where multiple commits share
dirtree objects.  We have "commitpartial" metadata, but no such concept
of partial/complete for dirtrees.

But, we'll only ever scan dirtrees if we scan commits, which is
what the section above fixes.

Closes: https://github.com/ostreedev/ostree/issues/543

Closes: #564
Approved by: alexlarsson
This commit is contained in:
Colin Walters 2016-11-01 13:51:55 -04:00 committed by Atomic Bot
parent 37c07d2f1c
commit 814aa96825
1 changed files with 26 additions and 54 deletions

View File

@ -102,7 +102,6 @@ typedef struct {
gboolean is_untrusted;
GPtrArray *dirs;
gboolean commitpartial_exists;
gboolean have_previous_bytes;
guint64 previous_bytes_sec;
@ -1036,11 +1035,17 @@ scan_commit_object (OtPullData *pull_data,
GError **error)
{
gboolean ret = FALSE;
/* If we found a legacy transaction flag, assume we have to scan.
* We always do a scan of dirtree objects; see
* https://github.com/ostreedev/ostree/issues/543
*/
OstreeRepoCommitState commitstate;
g_autoptr(GVariant) commit = NULL;
g_autoptr(GVariant) parent_csum = NULL;
const guchar *parent_csum_bytes = NULL;
gpointer depthp;
gint depth;
gboolean is_partial;
if (recursion_depth > OSTREE_MAX_RECURSION)
{
@ -1089,6 +1094,13 @@ scan_commit_object (OtPullData *pull_data,
}
}
if (!ostree_repo_load_commit (pull_data->repo, checksum, &commit, &commitstate, error))
goto out;
/* If we found a legacy transaction flag, assume all commits are partial */
is_partial = pull_data->legacy_transaction_resuming
|| (commitstate & OSTREE_REPO_COMMIT_STATE_PARTIAL) > 0;
if (!ostree_repo_load_variant (pull_data->repo, OSTREE_OBJECT_TYPE_COMMIT, checksum,
&commit, error))
goto out;
@ -1137,7 +1149,11 @@ scan_commit_object (OtPullData *pull_data,
}
}
if (!pull_data->is_commit_only)
/* We only recurse to looking whether we need dirtree/dirmeta
* objects if the commit is partial, and we're not doing a
* commit-only fetch.
*/
if (is_partial && !pull_data->is_commit_only)
{
g_autoptr(GVariant) tree_contents_csum = NULL;
g_autoptr(GVariant) tree_meta_csum = NULL;
@ -1251,8 +1267,11 @@ scan_one_metadata_object_c (OtPullData *pull_data,
do_fetch_detached = (objtype == OSTREE_OBJECT_TYPE_COMMIT);
enqueue_one_object_request (pull_data, tmp_checksum, objtype, path, do_fetch_detached, FALSE);
}
else if (objtype == OSTREE_OBJECT_TYPE_COMMIT && pull_data->is_commit_only)
else if (is_stored && objtype == OSTREE_OBJECT_TYPE_COMMIT)
{
/* For commits, always refetch detached metadata. */
enqueue_one_object_request (pull_data, tmp_checksum, objtype, path, TRUE, TRUE);
if (!scan_commit_object (pull_data, tmp_checksum, recursion_depth,
pull_data->cancellable, error))
goto out;
@ -1260,59 +1279,12 @@ scan_one_metadata_object_c (OtPullData *pull_data,
g_hash_table_insert (pull_data->scanned_metadata, g_variant_ref (object), object);
pull_data->n_scanned_metadata++;
}
else if (is_stored)
else if (is_stored && objtype == OSTREE_OBJECT_TYPE_DIR_TREE)
{
gboolean do_scan = pull_data->legacy_transaction_resuming || is_requested || pull_data->commitpartial_exists;
/* For commits, always refetch detached metadata. */
if (objtype == OSTREE_OBJECT_TYPE_COMMIT)
enqueue_one_object_request (pull_data, tmp_checksum, objtype, path, TRUE, TRUE);
/* For commits, check whether we only had a partial fetch */
if (!do_scan && objtype == OSTREE_OBJECT_TYPE_COMMIT)
{
OstreeRepoCommitState commitstate;
if (!ostree_repo_load_commit (pull_data->repo, tmp_checksum, NULL, &commitstate, error))
goto out;
if (commitstate & OSTREE_REPO_COMMIT_STATE_PARTIAL)
{
do_scan = TRUE;
pull_data->commitpartial_exists = TRUE;
}
else if (pull_data->maxdepth != 0)
{
/* Not fully accurate, but the cost here of scanning all
* input commit objects if we're doing a depth fetch is
* pretty low. We'll do more accurate handling of depth
* when parsing the actual commit.
*/
do_scan = TRUE;
}
}
if (do_scan)
{
switch (objtype)
{
case OSTREE_OBJECT_TYPE_COMMIT:
if (!scan_commit_object (pull_data, tmp_checksum, recursion_depth,
pull_data->cancellable, error))
goto out;
break;
case OSTREE_OBJECT_TYPE_DIR_META:
break;
case OSTREE_OBJECT_TYPE_DIR_TREE:
if (!scan_dirtree_object (pull_data, tmp_checksum, path, recursion_depth,
pull_data->cancellable, error))
goto out;
break;
default:
g_assert_not_reached ();
break;
}
}
g_hash_table_insert (pull_data->scanned_metadata, g_variant_ref (object), object);
pull_data->n_scanned_metadata++;
}