core: Add dynamic uncompressed object cache for archive-z

This gives us something closer to the advantages of archive and
archive-z when using the latter.  Concretely we get deduplication
among multiple checkouts, along with the "devino" hash table trick
during commits to avoid checksumming content again.

This is enabled by default.
This commit is contained in:
Colin Walters 2012-10-11 18:33:03 -04:00
parent 3c0fe01152
commit ee73c0fa34
4 changed files with 220 additions and 15 deletions

View File

@ -50,6 +50,7 @@ struct OstreeRepo {
GFile *local_heads_dir; GFile *local_heads_dir;
GFile *remote_heads_dir; GFile *remote_heads_dir;
GFile *objects_dir; GFile *objects_dir;
GFile *uncompressed_objects_dir;
GFile *remote_cache_dir; GFile *remote_cache_dir;
GFile *config_file; GFile *config_file;
@ -64,9 +65,11 @@ struct OstreeRepo {
gboolean inited; gboolean inited;
gboolean in_transaction; gboolean in_transaction;
GHashTable *loose_object_devino_hash; GHashTable *loose_object_devino_hash;
GHashTable *updated_uncompressed_dirs;
GKeyFile *config; GKeyFile *config;
OstreeRepoMode mode; OstreeRepoMode mode;
gboolean enable_uncompressed_cache;
OstreeRepo *parent_repo; OstreeRepo *parent_repo;
}; };
@ -104,10 +107,13 @@ ostree_repo_finalize (GObject *object)
g_clear_object (&self->local_heads_dir); g_clear_object (&self->local_heads_dir);
g_clear_object (&self->remote_heads_dir); g_clear_object (&self->remote_heads_dir);
g_clear_object (&self->objects_dir); g_clear_object (&self->objects_dir);
g_clear_object (&self->uncompressed_objects_dir);
g_clear_object (&self->remote_cache_dir); g_clear_object (&self->remote_cache_dir);
g_clear_object (&self->config_file); g_clear_object (&self->config_file);
if (self->loose_object_devino_hash) if (self->loose_object_devino_hash)
g_hash_table_destroy (self->loose_object_devino_hash); g_hash_table_destroy (self->loose_object_devino_hash);
if (self->updated_uncompressed_dirs)
g_hash_table_destroy (self->updated_uncompressed_dirs);
if (self->config) if (self->config)
g_key_file_free (self->config); g_key_file_free (self->config);
g_clear_pointer (&self->cached_meta_indexes, (GDestroyNotify) g_ptr_array_unref); g_clear_pointer (&self->cached_meta_indexes, (GDestroyNotify) g_ptr_array_unref);
@ -177,6 +183,7 @@ ostree_repo_constructor (GType gtype,
self->remote_heads_dir = g_file_resolve_relative_path (self->repodir, "refs/remotes"); self->remote_heads_dir = g_file_resolve_relative_path (self->repodir, "refs/remotes");
self->objects_dir = g_file_get_child (self->repodir, "objects"); self->objects_dir = g_file_get_child (self->repodir, "objects");
self->uncompressed_objects_dir = g_file_get_child (self->repodir, "uncompressed-objects-cache");
self->remote_cache_dir = g_file_get_child (self->repodir, "remote-cache"); self->remote_cache_dir = g_file_get_child (self->repodir, "remote-cache");
self->config_file = g_file_get_child (self->repodir, "config"); self->config_file = g_file_get_child (self->repodir, "config");
@ -674,6 +681,10 @@ ostree_repo_check (OstreeRepo *self, GError **error)
} }
} }
if (!ot_keyfile_get_boolean_with_default (self->config, "core", "enable-uncompressed-cache",
TRUE, &self->enable_uncompressed_cache, error))
goto out;
self->inited = TRUE; self->inited = TRUE;
ret = TRUE; ret = TRUE;
@ -1094,18 +1105,35 @@ get_loose_object_dirs (OstreeRepo *self,
{ {
gboolean ret = FALSE; gboolean ret = FALSE;
GError *temp_error = NULL; GError *temp_error = NULL;
GFile *object_dir_to_scan;
ot_lptrarray GPtrArray *ret_object_dirs = NULL; ot_lptrarray GPtrArray *ret_object_dirs = NULL;
ot_lobj GFileEnumerator *enumerator = NULL; ot_lobj GFileEnumerator *enumerator = NULL;
ot_lobj GFileInfo *file_info = NULL; ot_lobj GFileInfo *file_info = NULL;
ret_object_dirs = g_ptr_array_new_with_free_func ((GDestroyNotify)g_object_unref); ret_object_dirs = g_ptr_array_new_with_free_func ((GDestroyNotify)g_object_unref);
enumerator = g_file_enumerate_children (self->objects_dir, OSTREE_GIO_FAST_QUERYINFO, if (ostree_repo_get_mode (self) == OSTREE_REPO_MODE_ARCHIVE_Z)
object_dir_to_scan = self->uncompressed_objects_dir;
else
object_dir_to_scan = self->objects_dir;
enumerator = g_file_enumerate_children (object_dir_to_scan, OSTREE_GIO_FAST_QUERYINFO,
G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS, G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
cancellable, cancellable,
error); &temp_error);
if (!enumerator) if (!enumerator)
goto out; {
if (g_error_matches (temp_error, G_IO_ERROR, G_IO_ERROR_NOT_FOUND))
{
g_clear_error (&temp_error);
ret = TRUE;
ot_transfer_out_value (out_object_dirs, &ret_object_dirs);
}
else
g_propagate_error (error, temp_error);
goto out;
}
while ((file_info = g_file_enumerator_next_file (enumerator, cancellable, &temp_error)) != NULL) while ((file_info = g_file_enumerator_next_file (enumerator, cancellable, &temp_error)) != NULL)
{ {
@ -1178,8 +1206,6 @@ scan_loose_devino (OstreeRepo *self,
} }
repo_mode = ostree_repo_get_mode (self); repo_mode = ostree_repo_get_mode (self);
if (repo_mode == OSTREE_REPO_MODE_ARCHIVE_Z)
return TRUE;
if (!get_loose_object_dirs (self, &object_dirs, cancellable, error)) if (!get_loose_object_dirs (self, &object_dirs, cancellable, error))
goto out; goto out;
@ -1207,6 +1233,7 @@ scan_loose_devino (OstreeRepo *self,
guint32 type; guint32 type;
OstreeDevIno *key; OstreeDevIno *key;
GString *checksum; GString *checksum;
gboolean skip;
name = g_file_info_get_attribute_byte_string (file_info, "standard::name"); name = g_file_info_get_attribute_byte_string (file_info, "standard::name");
type = g_file_info_get_attribute_uint32 (file_info, "standard::type"); type = g_file_info_get_attribute_uint32 (file_info, "standard::type");
@ -1217,14 +1244,20 @@ scan_loose_devino (OstreeRepo *self,
continue; continue;
} }
if (!((repo_mode == OSTREE_REPO_MODE_ARCHIVE switch (repo_mode)
&& g_str_has_suffix (name, ".filecontent"))
|| (repo_mode == OSTREE_REPO_MODE_BARE
&& g_str_has_suffix (name, ".file"))))
{ {
g_clear_object (&file_info); case OSTREE_REPO_MODE_ARCHIVE:
continue; skip = !g_str_has_suffix (name, ".filecontent");
break;
case OSTREE_REPO_MODE_ARCHIVE_Z:
skip = !g_str_has_suffix (name, ".filez");
break;
case OSTREE_REPO_MODE_BARE:
skip = !g_str_has_suffix (name, ".file");
break;
} }
if (skip)
continue;
dot = strrchr (name, '.'); dot = strrchr (name, '.');
g_assert (dot); g_assert (dot);
@ -1517,6 +1550,20 @@ ostree_repo_get_object_path (OstreeRepo *self,
return ret; return ret;
} }
static GFile *
get_uncompressed_object_cache_path (OstreeRepo *self,
const char *checksum)
{
char *relpath;
GFile *ret;
relpath = ostree_get_relative_object_path (checksum, OSTREE_OBJECT_TYPE_FILE, FALSE);
ret = g_file_resolve_relative_path (self->uncompressed_objects_dir, relpath);
g_free (relpath);
return ret;
}
/** /**
* ostree_repo_stage_content_trusted: * ostree_repo_stage_content_trusted:
* *
@ -3208,10 +3255,29 @@ find_loose_for_checkout (OstreeRepo *self,
do do
{ {
if (self->mode == OSTREE_REPO_MODE_BARE) switch (self->mode)
path = ostree_repo_get_object_path (self, checksum, OSTREE_OBJECT_TYPE_FILE); {
else case OSTREE_REPO_MODE_BARE:
path = ostree_repo_get_archive_content_path (self, checksum); path = ostree_repo_get_object_path (self, checksum, OSTREE_OBJECT_TYPE_FILE);
break;
case OSTREE_REPO_MODE_ARCHIVE:
path = ostree_repo_get_archive_content_path (self, checksum);
break;
case OSTREE_REPO_MODE_ARCHIVE_Z:
{
if (self->enable_uncompressed_cache)
path = get_uncompressed_object_cache_path (self, checksum);
else
path = NULL;
}
break;
}
if (!path)
{
self = self->parent_repo;
continue;
}
if (lstat (ot_gfile_get_path_cached (path), &stbuf) < 0) if (lstat (ot_gfile_get_path_cached (path), &stbuf) < 0)
{ {
@ -3279,6 +3345,7 @@ checkout_file_thread (GSimpleAsyncResult *result,
GCancellable *cancellable) GCancellable *cancellable)
{ {
const char *checksum; const char *checksum;
OstreeRepo *repo;
gboolean hardlink_supported; gboolean hardlink_supported;
GError *local_error = NULL; GError *local_error = NULL;
GError **error = &local_error; GError **error = &local_error;
@ -3288,6 +3355,7 @@ checkout_file_thread (GSimpleAsyncResult *result,
CheckoutOneFileAsyncData *checkout_data; CheckoutOneFileAsyncData *checkout_data;
checkout_data = g_simple_async_result_get_op_res_gpointer (result); checkout_data = g_simple_async_result_get_op_res_gpointer (result);
repo = checkout_data->repo;
/* Hack to avoid trying to create device files as a user */ /* Hack to avoid trying to create device files as a user */
if (checkout_data->mode == OSTREE_REPO_CHECKOUT_MODE_USER if (checkout_data->mode == OSTREE_REPO_CHECKOUT_MODE_USER
@ -3296,15 +3364,72 @@ checkout_file_thread (GSimpleAsyncResult *result,
checksum = ostree_repo_file_get_checksum ((OstreeRepoFile*)checkout_data->source); checksum = ostree_repo_file_get_checksum ((OstreeRepoFile*)checkout_data->source);
/* We can only do hardlinks in these scenarios */
if ((checkout_data->repo->mode == OSTREE_REPO_MODE_BARE if ((checkout_data->repo->mode == OSTREE_REPO_MODE_BARE
&& checkout_data->mode == OSTREE_REPO_CHECKOUT_MODE_NONE) && checkout_data->mode == OSTREE_REPO_CHECKOUT_MODE_NONE)
|| (checkout_data->repo->mode == OSTREE_REPO_MODE_ARCHIVE || (checkout_data->repo->mode == OSTREE_REPO_MODE_ARCHIVE
&& checkout_data->mode == OSTREE_REPO_CHECKOUT_MODE_USER)
|| (checkout_data->repo->mode == OSTREE_REPO_MODE_ARCHIVE_Z
&& checkout_data->mode == OSTREE_REPO_CHECKOUT_MODE_USER)) && checkout_data->mode == OSTREE_REPO_CHECKOUT_MODE_USER))
{ {
if (!find_loose_for_checkout (checkout_data->repo, checksum, &loose_path, if (!find_loose_for_checkout (checkout_data->repo, checksum, &loose_path,
cancellable, error)) cancellable, error))
goto out; goto out;
} }
/* Also, if we're archive-z and we didn't find an object, uncompress it now,
* stick it in the cache, and then hardlink to that.
*/
if (loose_path == NULL
&& repo->mode == OSTREE_REPO_MODE_ARCHIVE_Z
&& checkout_data->mode == OSTREE_REPO_CHECKOUT_MODE_USER
&& repo->enable_uncompressed_cache)
{
ot_lobj GFile *objdir = NULL;
loose_path = get_uncompressed_object_cache_path (repo, checksum);
if (!ostree_repo_load_file (repo, checksum, &input, NULL, &xattrs,
cancellable, error))
goto out;
objdir = g_file_get_parent (loose_path);
if (!ot_gfile_ensure_directory (objdir, TRUE, error))
goto out;
/* Use UNION_FILES to make this last-one-wins thread behavior
* for now; we lose deduplication potentially, but oh well
*/
if (!checkout_file_from_input (loose_path,
OSTREE_REPO_CHECKOUT_MODE_USER,
OSTREE_REPO_CHECKOUT_OVERWRITE_UNION_FILES,
checkout_data->source_info, xattrs,
input, cancellable, error))
goto out;
/* Store the 2-byte objdir prefix (e.g. e3) in a set. The basic
* idea here is that if we had to unpack an object, it's very
* likely we're replacing some other object, so we may need a GC.
*
* This model ensures that we do work roughly proportional to
* the size of the changes. For example, we don't scan any
* directories if we didn't modify anything, meaning you can
* checkout the same tree multiple times very quickly.
*
* This is also scale independent; we don't hardcode e.g. looking
* at 1000 objects.
*
* The downside is that if we're unlucky, we may not free
* an object for quite some time.
*/
g_mutex_lock (&repo->cache_lock);
{
gpointer key = GUINT_TO_POINTER ((g_ascii_xdigit_value (checksum[0]) << 4) +
g_ascii_xdigit_value (checksum[1]));
if (repo->updated_uncompressed_dirs == NULL)
repo->updated_uncompressed_dirs = g_hash_table_new (NULL, NULL);
g_hash_table_insert (repo->updated_uncompressed_dirs, key, key);
}
g_mutex_unlock (&repo->cache_lock);
}
if (loose_path) if (loose_path)
{ {
@ -3615,6 +3740,73 @@ ostree_repo_checkout_tree_finish (OstreeRepo *self,
return TRUE; return TRUE;
} }
/**
* ostree_repo_checkout_gc:
*
* Call this after finishing a succession of checkout operations; it
* will delete any currently-unused uncompressed objects from the
* cache.
*/
gboolean
ostree_repo_checkout_gc (OstreeRepo *self,
GCancellable *cancellable,
GError **error)
{
gboolean ret = FALSE;
ot_lhash GHashTable *to_clean_dirs = NULL;
GHashTableIter iter;
gpointer key, value;
g_mutex_lock (&self->cache_lock);
to_clean_dirs = self->updated_uncompressed_dirs;
self->updated_uncompressed_dirs = g_hash_table_new (NULL, NULL);
g_mutex_unlock (&self->cache_lock);
if (to_clean_dirs)
g_hash_table_iter_init (&iter, to_clean_dirs);
while (to_clean_dirs && g_hash_table_iter_next (&iter, &key, &value))
{
GError *temp_error = NULL;
ot_lobj GFile *objdir = NULL;
ot_lobj GFileInfo *file_info = NULL;
ot_lobj GFileEnumerator *enumerator = NULL;
ot_lfree char *objdir_name = NULL;
objdir_name = g_strdup_printf ("%02x", GPOINTER_TO_UINT (key));
objdir = ot_gfile_get_child_build_path (self->uncompressed_objects_dir, "objects",
objdir_name, NULL);
enumerator = g_file_enumerate_children (objdir, "standard::name,standard::type,unix::inode,unix::nlink",
G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
cancellable,
error);
if (!enumerator)
goto out;
while ((file_info = g_file_enumerator_next_file (enumerator, cancellable, &temp_error)) != NULL)
{
guint32 nlinks = g_file_info_get_attribute_uint32 (file_info, "unix::nlink");
if (nlinks == 1)
{
ot_lobj GFile *objpath = NULL;
objpath = ot_gfile_get_child_build_path (objdir, g_file_info_get_name (file_info), NULL);
if (!ot_gfile_unlink (objpath, cancellable, error))
goto out;
}
g_object_unref (file_info);
}
if (temp_error != NULL)
{
g_propagate_error (error, temp_error);
goto out;
}
}
ret = TRUE;
out:
return ret;
}
gboolean gboolean
ostree_repo_read_commit (OstreeRepo *self, ostree_repo_read_commit (OstreeRepo *self,
const char *rev, const char *rev,

View File

@ -281,6 +281,10 @@ ostree_repo_checkout_tree_finish (OstreeRepo *self,
GAsyncResult *result, GAsyncResult *result,
GError **error); GError **error);
gboolean ostree_repo_checkout_gc (OstreeRepo *self,
GCancellable *cancellable,
GError **error);
gboolean ostree_repo_read_commit (OstreeRepo *self, gboolean ostree_repo_read_commit (OstreeRepo *self,
const char *rev, const char *rev,
GFile **out_root, GFile **out_root,

View File

@ -57,6 +57,7 @@ ot_gfile_ensure_directory (GFile *dir,
gboolean ret = FALSE; gboolean ret = FALSE;
GError *temp_error = NULL; GError *temp_error = NULL;
again:
if (with_parents) if (with_parents)
ret = g_file_make_directory_with_parents (dir, NULL, &temp_error); ret = g_file_make_directory_with_parents (dir, NULL, &temp_error);
else else
@ -71,6 +72,11 @@ ot_gfile_ensure_directory (GFile *dir,
else else
g_clear_error (&temp_error); g_clear_error (&temp_error);
} }
/* Work around glib bug where if multiple threads/processes race in
* _with_parents, it can error out early
*/
if (with_parents && !g_file_query_exists (dir, NULL))
goto again;
ret = TRUE; ret = TRUE;
out: out:

View File

@ -280,6 +280,9 @@ ostree_builtin_checkout (int argc, char **argv, GFile *repo_path, GError **error
} }
} }
if (!ostree_repo_checkout_gc (repo, cancellable, error))
goto out;
ret = TRUE; ret = TRUE;
out: out:
if (context) if (context)