pull: Only download summary if we need it for the pull operation

If we have a commit id for all the refs we're pulling, and if we
don't need the summary to list all the refs when mirroring then the
only reason to download the summary is for the list of deltas.

With the new "indexed-deltas" property in the config file (and mirrored
to the summary file) we can detect when we don't need the summary for
deltas and completely avoid downloading it then.
This commit is contained in:
Alexander Larsson 2020-10-23 13:05:25 +02:00
parent 6c8e6539e2
commit 125ed2b199
2 changed files with 489 additions and 444 deletions

View File

@ -80,6 +80,7 @@ typedef struct {
GVariant *summary;
GHashTable *summary_deltas_checksums; /* Filled from summary and delta indexes */
gboolean summary_has_deltas; /* True if the summary existed and had a delta index */
gboolean has_indexed_deltas;
GHashTable *ref_original_commits; /* Maps checksum to commit, used by timestamp checks */
GHashTable *verified_commits; /* Set<checksum> of commits that have been verified */
GHashTable *signapi_verified_commits; /* Map<checksum,verification> of commits that have been signapi verified */

View File

@ -3535,19 +3535,18 @@ initiate_request (OtPullData *pull_data,
return FALSE;
}
/* If we have a summary, we can use the newer logic */
if (pull_data->summary)
{
if (!pull_data->summary_has_deltas)
/* If we have a summary or delta index, we can use the newer logic.
* We prefer the index as it might have more deltas than the summary
* (i.e. leave some deltas out of summary to make it smaller). */
if (pull_data->has_indexed_deltas)
{
enqueue_one_static_delta_index_request (pull_data, to_revision, delta_from_revision, ref);
}
else
else if (pull_data->summary_has_deltas)
{
if (!initiate_delta_request (pull_data, ref, to_revision, delta_from_revision, error))
return FALSE;
}
}
else if (ref != NULL)
{
/* Are we doing a delta via a ref? In that case we can fall back to the older
@ -3590,6 +3589,20 @@ initiate_request (OtPullData *pull_data,
return TRUE;
}
static gboolean
all_requested_refs_have_commit (GHashTable *requested_refs /* (element-type OstreeCollectionRef utf8) */)
{
GLNX_HASH_TABLE_FOREACH_KV (requested_refs, const OstreeCollectionRef*, ref,
const char*, override_commitid)
{
/* Note: "" override means whatever is latest */
if (override_commitid == NULL || *override_commitid == 0)
return FALSE;
}
return TRUE;
}
/* ------------------------------------------------------------------------------------------
* Below is the libsoup-invariant API; these should match
* the stub functions in the #else clause
@ -3695,9 +3708,11 @@ ostree_repo_pull_with_options (OstreeRepo *self,
gboolean opt_ref_keyring_map_set = FALSE;
gboolean disable_sign_verify = FALSE;
gboolean disable_sign_verify_summary = FALSE;
gboolean need_summary = FALSE;
const char *main_collection_id = NULL;
const char *url_override = NULL;
gboolean inherit_transaction = FALSE;
gboolean require_summary_for_mirror = FALSE;
g_autoptr(GHashTable) updated_requested_refs_to_fetch = NULL; /* (element-type OstreeCollectionRef utf8) */
gsize i;
g_autofree char **opt_localcache_repos = NULL;
@ -3709,6 +3724,7 @@ ostree_repo_pull_with_options (OstreeRepo *self,
*/
const char *the_ref_to_fetch = NULL;
OstreeRepoTransactionStats tstats = { 0, };
gboolean remote_mode_loaded = FALSE;
/* Default */
pull_data->max_metadata_size = OSTREE_MAX_METADATA_SIZE;
@ -4132,8 +4148,98 @@ ostree_repo_pull_with_options (OstreeRepo *self,
if (pull_data->is_commit_only)
pull_data->disable_static_deltas = TRUE;
/* Compute the set of collection-refs (and optional commit id) to fetch */
if (pull_data->is_mirror && !refs_to_fetch && !opt_collection_refs_set && !configured_branches)
{
require_summary_for_mirror = TRUE;
}
else if (opt_collection_refs_set)
{
const gchar *collection_id, *ref_name, *checksum;
while (g_variant_iter_loop (collection_refs_iter, "(&s&s&s)", &collection_id, &ref_name, &checksum))
{
if (!ostree_validate_rev (ref_name, error))
goto out;
g_hash_table_insert (requested_refs_to_fetch,
ostree_collection_ref_new (collection_id, ref_name),
(*checksum != '\0') ? g_strdup (checksum) : NULL);
}
}
else if (refs_to_fetch != NULL)
{
char **strviter = refs_to_fetch;
char **commitid_strviter = override_commit_ids ?: NULL;
while (*strviter)
{
const char *branch = *strviter;
if (ostree_validate_checksum_string (branch, NULL))
{
char *key = g_strdup (branch);
g_hash_table_add (commits_to_fetch, key);
}
else
{
if (!ostree_validate_rev (branch, error))
goto out;
char *commitid = commitid_strviter ? g_strdup (*commitid_strviter) : NULL;
g_hash_table_insert (requested_refs_to_fetch,
ostree_collection_ref_new (NULL, branch), commitid);
}
strviter++;
if (commitid_strviter)
commitid_strviter++;
}
}
else
{
char **branches_iter;
branches_iter = configured_branches;
if (!(branches_iter && *branches_iter))
{
g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
"No configured branches for remote %s", remote_name_or_baseurl);
goto out;
}
for (;branches_iter && *branches_iter; branches_iter++)
{
const char *branch = *branches_iter;
g_hash_table_insert (requested_refs_to_fetch,
ostree_collection_ref_new (NULL, branch), NULL);
}
}
/* Deltas are necessary when mirroring or resolving a requested ref to a commit.
* We try to avoid loading the potentially large summary if it is not needed. */
need_summary = require_summary_for_mirror || !all_requested_refs_have_commit (requested_refs_to_fetch) || summary_sig_bytes_v != NULL;
/* If we don't have indexed deltas, we need the summary for deltas, so check
* the config file for support.
* NOTE: Avoid download if we don't need deltas */
if (!need_summary && !pull_data->disable_static_deltas)
{
if (!load_remote_repo_config (pull_data, &remote_config, cancellable, error))
goto out;
/* Check if remote has delta indexes outside summary */
if (!ot_keyfile_get_boolean_with_default (remote_config, "core", "indexed-deltas", FALSE,
&pull_data->has_indexed_deltas, error))
goto out;
if (!pull_data->has_indexed_deltas)
need_summary = TRUE;
}
pull_data->static_delta_superblocks = g_ptr_array_new_with_free_func ((GDestroyNotify)g_variant_unref);
if (need_summary)
{
g_autoptr(GBytes) bytes_sig = NULL;
gboolean summary_sig_not_modified = FALSE;
@ -4144,7 +4250,6 @@ ostree_repo_pull_with_options (OstreeRepo *self,
g_autoptr(GVariant) deltas = NULL;
g_autoptr(GVariant) additional_metadata = NULL;
gboolean summary_from_cache = FALSE;
gboolean remote_mode_loaded = FALSE;
gboolean tombstone_commits = FALSE;
if (summary_sig_bytes_v)
@ -4237,6 +4342,7 @@ ostree_repo_pull_with_options (OstreeRepo *self,
g_clear_pointer (&summary_etag, g_free);
summary_last_modified = 0;
if (!_ostree_fetcher_mirrored_request_to_membuf (pull_data->fetcher,
pull_data->meta_mirrorlist,
"summary", OSTREE_FETCHER_REQUEST_OPTIONAL_CONTENT,
@ -4269,10 +4375,10 @@ ostree_repo_pull_with_options (OstreeRepo *self,
}
#endif /* OSTREE_DISABLE_GPGME */
if (!bytes_summary && pull_data->require_static_deltas)
if (!bytes_summary && require_summary_for_mirror)
{
g_set_error (error, G_IO_ERROR, G_IO_ERROR_NOT_FOUND,
"Fetch configured to require static deltas, but no summary found");
g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
"Fetching all refs was requested in mirror mode, but remote repository does not have a summary");
goto out;
}
@ -4359,7 +4465,6 @@ ostree_repo_pull_with_options (OstreeRepo *self,
signatures = g_variant_new_from_bytes (OSTREE_SUMMARY_SIG_GVARIANT_FORMAT,
bytes_sig, FALSE);
g_assert (pull_data->signapi_summary_verifiers);
if (!_sign_verify_for_remote (pull_data->signapi_summary_verifiers, bytes_summary, signatures, NULL, &temp_error))
{
@ -4516,6 +4621,8 @@ ostree_repo_pull_with_options (OstreeRepo *self,
pull_data->summary_has_deltas = deltas != NULL && g_variant_n_children (deltas) > 0;
if (!collect_available_deltas_for_pull (pull_data, deltas, error))
goto out;
g_variant_lookup (additional_metadata, OSTREE_SUMMARY_INDEXED_DELTAS, "b", &pull_data->has_indexed_deltas);
}
if (pull_data->summary &&
@ -4527,13 +4634,23 @@ ostree_repo_pull_with_options (OstreeRepo *self,
pull_data->has_tombstone_commits = tombstone_commits;
remote_mode_loaded = TRUE;
}
else if (pull_data->remote_repo_local == NULL)
}
if (pull_data->require_static_deltas && !pull_data->has_indexed_deltas && !pull_data->summary_has_deltas)
{
g_set_error (error, G_IO_ERROR, G_IO_ERROR_NOT_FOUND,
"Fetch configured to require static deltas, but no summary deltas or delta index found");
goto out;
}
if (remote_mode_loaded && pull_data->remote_repo_local == NULL)
{
/* Fall-back path which loads the necessary config from the remotes
* `config` file. Doing so is deprecated since it means an
* `config` file (unless we already read it above). Doing so is deprecated since it means an
* additional round trip to the remote for each pull. No need to do
* it for local pulls. */
if (!load_remote_repo_config (pull_data, &remote_config, cancellable, error))
if (remote_config == NULL &&
!load_remote_repo_config (pull_data, &remote_config, cancellable, error))
goto out;
if (!ot_keyfile_get_value_with_default (remote_config, "core", "mode", "bare",
@ -4557,79 +4674,6 @@ ostree_repo_pull_with_options (OstreeRepo *self,
remote_mode_str);
goto out;
}
}
if (pull_data->is_mirror && !refs_to_fetch && !opt_collection_refs_set && !configured_branches)
{
if (!bytes_summary)
{
g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
"Fetching all refs was requested in mirror mode, but remote repository does not have a summary");
goto out;
}
}
else if (opt_collection_refs_set)
{
const gchar *collection_id, *ref_name, *checksum;
while (g_variant_iter_loop (collection_refs_iter, "(&s&s&s)", &collection_id, &ref_name, &checksum))
{
if (!ostree_validate_rev (ref_name, error))
goto out;
g_hash_table_insert (requested_refs_to_fetch,
ostree_collection_ref_new (collection_id, ref_name),
(*checksum != '\0') ? g_strdup (checksum) : NULL);
}
}
else if (refs_to_fetch != NULL)
{
char **strviter = refs_to_fetch;
char **commitid_strviter = override_commit_ids ?: NULL;
while (*strviter)
{
const char *branch = *strviter;
if (ostree_validate_checksum_string (branch, NULL))
{
char *key = g_strdup (branch);
g_hash_table_add (commits_to_fetch, key);
}
else
{
if (!ostree_validate_rev (branch, error))
goto out;
char *commitid = commitid_strviter ? g_strdup (*commitid_strviter) : NULL;
g_hash_table_insert (requested_refs_to_fetch,
ostree_collection_ref_new (NULL, branch), commitid);
}
strviter++;
if (commitid_strviter)
commitid_strviter++;
}
}
else
{
char **branches_iter;
branches_iter = configured_branches;
if (!(branches_iter && *branches_iter))
{
g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
"No configured branches for remote %s", remote_name_or_baseurl);
goto out;
}
for (;branches_iter && *branches_iter; branches_iter++)
{
const char *branch = *branches_iter;
g_hash_table_insert (requested_refs_to_fetch,
ostree_collection_ref_new (NULL, branch), NULL);
}
}
/* Resolve the checksum for each ref. This has to be done into a new hash table,
* since we cant modify the keys of @requested_refs_to_fetch while iterating