repo: Add APIs for devino optimization between checkout -> commit

A fast way to generate new OSTree content using an existing
tree is to checkout (as hard links), add/replace files, then
call `ostree_repo_scan_hardlinks()`, then commit.

But `ostree_repo_scan_hardlinks()` scans the entire repo, which
can be slow if you have a lot of content.

All we really need is a mapping of (device,inode) -> checksum
just for the objects we checked out, then use that mapping
for commits.

This patch adds API so that callers can create a mapping via
`ostree_repo_devino_cache_new()`, then pass it to
`ostree_repo_checkout_tree_at()` which will populate it, and then
`ostree_repo_write_directory_to_mtree()` can consume it.

I plan to use this in rpm-ostree for package layering work.

Notes:
 - The old `ostree_repo_scan_hardlinks()` API still works.
 - I tweaked the cache to be a set with the checksum colocated with
   the key, to avoid a separate malloc block per entry.

https://github.com/GNOME/ostree/pull/167
This commit is contained in:
Colin Walters 2016-01-05 09:13:54 -05:00
parent 21fbc16bc3
commit 5929ce9e0e
6 changed files with 157 additions and 51 deletions

View File

@ -276,8 +276,12 @@ ostree_repo_commit_modifier_new
OstreeRepoCommitModifierXattrCallback OstreeRepoCommitModifierXattrCallback
ostree_repo_commit_modifier_set_xattr_callback ostree_repo_commit_modifier_set_xattr_callback
ostree_repo_commit_modifier_set_sepolicy ostree_repo_commit_modifier_set_sepolicy
ostree_repo_commit_modifier_set_devino_cache
ostree_repo_commit_modifier_ref ostree_repo_commit_modifier_ref
ostree_repo_commit_modifier_unref ostree_repo_commit_modifier_unref
ostree_repo_devino_cache_new
ostree_repo_devino_cache_ref
ostree_repo_devino_cache_unref
ostree_repo_write_directory_to_mtree ostree_repo_write_directory_to_mtree
ostree_repo_write_dfd_to_mtree ostree_repo_write_dfd_to_mtree
ostree_repo_write_archive_to_mtree ostree_repo_write_archive_to_mtree

View File

@ -429,6 +429,26 @@ checkout_one_file_at (OstreeRepo *repo,
TRUE, &did_hardlink, TRUE, &did_hardlink,
cancellable, error)) cancellable, error))
goto out; goto out;
if (did_hardlink && options->devino_to_csum_cache)
{
struct stat stbuf;
OstreeDevIno *key;
if (TEMP_FAILURE_RETRY (fstatat (destination_dfd, destination_name, &stbuf, AT_SYMLINK_NOFOLLOW)) != 0)
{
glnx_set_error_from_errno (error);
goto out;
}
key = g_new (OstreeDevIno, 1);
key->dev = stbuf.st_dev;
key->ino = stbuf.st_ino;
memcpy (key->checksum, checksum, 65);
g_hash_table_add ((GHashTable*)options->devino_to_csum_cache, key);
}
if (did_hardlink) if (did_hardlink)
break; break;
} }
@ -834,6 +854,42 @@ ostree_repo_checkout_tree_at (OstreeRepo *self,
return ret; return ret;
} }
static guint
devino_hash (gconstpointer a)
{
OstreeDevIno *a_i = (gpointer)a;
return (guint) (a_i->dev + a_i->ino);
}
static int
devino_equal (gconstpointer a,
gconstpointer b)
{
OstreeDevIno *a_i = (gpointer)a;
OstreeDevIno *b_i = (gpointer)b;
return a_i->dev == b_i->dev
&& a_i->ino == b_i->ino;
}
/**
* ostree_repo_devino_cache_new:
*
* OSTree has support for pairing ostree_repo_checkout_tree_at() using
* hardlinks in combination with a later
* ostree_repo_write_directory_to_mtree() using a (normally modified)
* directory. In order for OSTree to optimally detect just the new
* files, use this function and fill in the `devino_to_csum_cache`
* member of `OstreeRepoCheckoutOptions`, then call
* ostree_repo_commit_set_devino_cache().
*
* Returns: (transfer full): Newly allocated cache
*/
OstreeRepoDevInoCache *
ostree_repo_devino_cache_new (void)
{
return (OstreeRepoDevInoCache*) g_hash_table_new_full (devino_hash, devino_equal, g_free, NULL);
}
/** /**
* ostree_repo_checkout_gc: * ostree_repo_checkout_gc:
* @self: Repo * @self: Repo

View File

@ -36,6 +36,22 @@
#include <sys/xattr.h> #include <sys/xattr.h>
#include <glib/gprintf.h> #include <glib/gprintf.h>
struct OstreeRepoCommitModifier {
volatile gint refcount;
OstreeRepoCommitModifierFlags flags;
OstreeRepoCommitFilter filter;
gpointer user_data;
GDestroyNotify destroy_notify;
OstreeRepoCommitModifierXattrCallback xattr_callback;
GDestroyNotify xattr_destroy;
gpointer xattr_user_data;
OstreeSePolicy *sepolicy;
GHashTable *devino_cache;
};
gboolean gboolean
_ostree_repo_ensure_loose_objdir_at (int dfd, _ostree_repo_ensure_loose_objdir_at (int dfd,
const char *loose_path, const char *loose_path,
@ -936,28 +952,6 @@ write_object (OstreeRepo *self,
return ret; return ret;
} }
typedef struct {
dev_t dev;
ino_t ino;
} OstreeDevIno;
static guint
devino_hash (gconstpointer a)
{
OstreeDevIno *a_i = (gpointer)a;
return (guint) (a_i->dev + a_i->ino);
}
static int
devino_equal (gconstpointer a,
gconstpointer b)
{
OstreeDevIno *a_i = (gpointer)a;
OstreeDevIno *b_i = (gpointer)b;
return a_i->dev == b_i->dev
&& a_i->ino == b_i->ino;
}
static gboolean static gboolean
scan_one_loose_devino (OstreeRepo *self, scan_one_loose_devino (OstreeRepo *self,
int object_dir_fd, int object_dir_fd,
@ -998,7 +992,6 @@ scan_one_loose_devino (OstreeRepo *self,
OstreeDevIno *key; OstreeDevIno *key;
struct dirent *child_dent; struct dirent *child_dent;
const char *dot; const char *dot;
GString *checksum;
gboolean skip; gboolean skip;
const char *name; const char *name;
@ -1039,14 +1032,14 @@ scan_one_loose_devino (OstreeRepo *self,
goto out; goto out;
} }
checksum = g_string_new (dent->d_name);
g_string_append_len (checksum, name, 62);
key = g_new (OstreeDevIno, 1); key = g_new (OstreeDevIno, 1);
key->dev = stbuf.st_dev; key->dev = stbuf.st_dev;
key->ino = stbuf.st_ino; key->ino = stbuf.st_ino;
memcpy (key->checksum, dent->d_name, 2);
memcpy (key->checksum + 2, name, 62);
key->checksum[sizeof(key->checksum)-1] = '\0';
g_hash_table_replace (devino_cache, key, g_string_free (checksum, FALSE)); g_hash_table_add (devino_cache, key);
} }
} }
@ -1087,17 +1080,27 @@ scan_loose_devino (OstreeRepo *self,
static const char * static const char *
devino_cache_lookup (OstreeRepo *self, devino_cache_lookup (OstreeRepo *self,
OstreeRepoCommitModifier *modifier,
guint32 device, guint32 device,
guint32 inode) guint32 inode)
{ {
OstreeDevIno dev_ino; OstreeDevIno dev_ino_key;
OstreeDevIno *dev_ino_val;
GHashTable *cache;
if (!self->loose_object_devino_hash) if (self->loose_object_devino_hash)
cache = self->loose_object_devino_hash;
else if (modifier && modifier->devino_cache)
cache = modifier->devino_cache;
else
return NULL; return NULL;
dev_ino.dev = device; dev_ino_key.dev = device;
dev_ino.ino = inode; dev_ino_key.ino = inode;
return g_hash_table_lookup (self->loose_object_devino_hash, &dev_ino); dev_ino_val = g_hash_table_lookup (cache, &dev_ino_key);
if (!dev_ino_val)
return NULL;
return dev_ino_val->checksum;
} }
/** /**
@ -1127,7 +1130,7 @@ ostree_repo_scan_hardlinks (OstreeRepo *self,
g_return_val_if_fail (self->in_transaction == TRUE, FALSE); g_return_val_if_fail (self->in_transaction == TRUE, FALSE);
if (!self->loose_object_devino_hash) if (!self->loose_object_devino_hash)
self->loose_object_devino_hash = g_hash_table_new_full (devino_hash, devino_equal, g_free, g_free); self->loose_object_devino_hash = (GHashTable*)ostree_repo_devino_cache_new ();
g_hash_table_remove_all (self->loose_object_devino_hash); g_hash_table_remove_all (self->loose_object_devino_hash);
if (!scan_loose_devino (self, self->loose_object_devino_hash, cancellable, error)) if (!scan_loose_devino (self, self->loose_object_devino_hash, cancellable, error))
goto out; goto out;
@ -2231,21 +2234,6 @@ create_tree_variant_from_hashes (GHashTable *file_checksums,
return serialized_tree; return serialized_tree;
} }
struct OstreeRepoCommitModifier {
volatile gint refcount;
OstreeRepoCommitModifierFlags flags;
OstreeRepoCommitFilter filter;
gpointer user_data;
GDestroyNotify destroy_notify;
OstreeRepoCommitModifierXattrCallback xattr_callback;
GDestroyNotify xattr_destroy;
gpointer xattr_user_data;
OstreeSePolicy *sepolicy;
};
OstreeRepoCommitFilterResult OstreeRepoCommitFilterResult
_ostree_repo_commit_modifier_apply (OstreeRepo *self, _ostree_repo_commit_modifier_apply (OstreeRepo *self,
OstreeRepoCommitModifier *modifier, OstreeRepoCommitModifier *modifier,
@ -2503,7 +2491,7 @@ write_directory_content_to_mtree_internal (OstreeRepo *self,
g_autofree guchar *child_file_csum = NULL; g_autofree guchar *child_file_csum = NULL;
g_autofree char *tmp_checksum = NULL; g_autofree char *tmp_checksum = NULL;
loose_checksum = devino_cache_lookup (self, loose_checksum = devino_cache_lookup (self, modifier,
g_file_info_get_attribute_uint32 (child_info, "unix::device"), g_file_info_get_attribute_uint32 (child_info, "unix::device"),
g_file_info_get_attribute_uint64 (child_info, "unix::inode")); g_file_info_get_attribute_uint64 (child_info, "unix::inode"));
@ -2757,7 +2745,7 @@ write_dfd_iter_to_mtree_internal (OstreeRepo *self,
goto out; goto out;
} }
loose_checksum = devino_cache_lookup (self, stbuf.st_dev, stbuf.st_ino); loose_checksum = devino_cache_lookup (self, modifier, stbuf.st_dev, stbuf.st_ino);
if (loose_checksum) if (loose_checksum)
{ {
if (!ostree_mutable_tree_replace_file (mtree, dent->d_name, loose_checksum, if (!ostree_mutable_tree_replace_file (mtree, dent->d_name, loose_checksum,
@ -3030,6 +3018,7 @@ ostree_repo_commit_modifier_unref (OstreeRepoCommitModifier *modifier)
modifier->xattr_destroy (modifier->xattr_user_data); modifier->xattr_destroy (modifier->xattr_user_data);
g_clear_object (&modifier->sepolicy); g_clear_object (&modifier->sepolicy);
g_clear_pointer (&modifier->devino_cache, (GDestroyNotify)g_hash_table_unref);
g_free (modifier); g_free (modifier);
return; return;
@ -3080,6 +3069,46 @@ ostree_repo_commit_modifier_set_sepolicy (OstreeRepoCommitModifier
modifier->sepolicy = sepolicy ? g_object_ref (sepolicy) : NULL; modifier->sepolicy = sepolicy ? g_object_ref (sepolicy) : NULL;
} }
/**
* ostree_repo_commit_modifier_set_devino_cache:
* @modifier: Modifier
* @cache: A hash table caching device,inode to checksums
*
* See the documentation for
* `ostree_repo_devino_cache_new()`. This function can
* then be used for later calls to
* `ostree_repo_write_directory_to_mtree()` to optimize commits.
*
* Note if your process has multiple writers, you should use separate
* `OSTreeRepo` instances if you want to also use this API.
*
* This function will add a reference to @cache without copying - you
* should avoid further mutation of the cache.
*/
void
ostree_repo_commit_modifier_set_devino_cache (OstreeRepoCommitModifier *modifier,
OstreeRepoDevInoCache *cache)
{
modifier->devino_cache = g_hash_table_ref ((GHashTable*)cache);
}
OstreeRepoDevInoCache *
ostree_repo_devino_cache_ref (OstreeRepoDevInoCache *cache)
{
g_hash_table_ref ((GHashTable*)cache);
return cache;
}
void
ostree_repo_devino_cache_unref (OstreeRepoDevInoCache *cache)
{
g_hash_table_unref ((GHashTable*)cache);
}
G_DEFINE_BOXED_TYPE(OstreeRepoDevInoCache, ostree_repo_devino_cache,
ostree_repo_devino_cache_ref,
ostree_repo_devino_cache_unref);
G_DEFINE_BOXED_TYPE(OstreeRepoCommitModifier, ostree_repo_commit_modifier, G_DEFINE_BOXED_TYPE(OstreeRepoCommitModifier, ostree_repo_commit_modifier,
ostree_repo_commit_modifier_ref, ostree_repo_commit_modifier_ref,
ostree_repo_commit_modifier_unref); ostree_repo_commit_modifier_unref);

View File

@ -92,6 +92,12 @@ struct OstreeRepo {
OstreeRepo *parent_repo; OstreeRepo *parent_repo;
}; };
typedef struct {
dev_t dev;
ino_t ino;
char checksum[65];
} OstreeDevIno;
gboolean gboolean
_ostree_repo_allocate_tmpdir (int tmpdir_dfd, _ostree_repo_allocate_tmpdir (int tmpdir_dfd,
const char *tmpdir_prefix, const char *tmpdir_prefix,

View File

@ -418,6 +418,9 @@ void ostree_repo_commit_modifier_set_xattr_callback (OstreeRepoCommitModifier
void ostree_repo_commit_modifier_set_sepolicy (OstreeRepoCommitModifier *modifier, void ostree_repo_commit_modifier_set_sepolicy (OstreeRepoCommitModifier *modifier,
OstreeSePolicy *sepolicy); OstreeSePolicy *sepolicy);
void ostree_repo_commit_modifier_set_devino_cache (OstreeRepoCommitModifier *modifier,
OstreeRepoDevInoCache *cache);
OstreeRepoCommitModifier *ostree_repo_commit_modifier_ref (OstreeRepoCommitModifier *modifier); OstreeRepoCommitModifier *ostree_repo_commit_modifier_ref (OstreeRepoCommitModifier *modifier);
void ostree_repo_commit_modifier_unref (OstreeRepoCommitModifier *modifier); void ostree_repo_commit_modifier_unref (OstreeRepoCommitModifier *modifier);
@ -531,10 +534,17 @@ typedef struct {
const char *subpath; const char *subpath;
OstreeRepoDevInoCache *devino_to_csum_cache;
guint unused_uints[6]; guint unused_uints[6];
gpointer unused_ptrs[8]; gpointer unused_ptrs[7];
} OstreeRepoCheckoutOptions; } OstreeRepoCheckoutOptions;
GType ostree_repo_devino_cache_get_type (void);
OstreeRepoDevInoCache *ostree_repo_devino_cache_new (void);
OstreeRepoDevInoCache * ostree_repo_devino_cache_ref (OstreeRepoDevInoCache *cache);
void ostree_repo_devino_cache_unref (OstreeRepoDevInoCache *cache);
gboolean ostree_repo_checkout_tree_at (OstreeRepo *self, gboolean ostree_repo_checkout_tree_at (OstreeRepo *self,
OstreeRepoCheckoutOptions *options, OstreeRepoCheckoutOptions *options,
int destination_dfd, int destination_dfd,

View File

@ -27,6 +27,7 @@
G_BEGIN_DECLS G_BEGIN_DECLS
typedef struct OstreeRepo OstreeRepo; typedef struct OstreeRepo OstreeRepo;
typedef struct OstreeRepoDevInoCache OstreeRepoDevInoCache;
typedef struct OstreeSePolicy OstreeSePolicy; typedef struct OstreeSePolicy OstreeSePolicy;
typedef struct OstreeSysroot OstreeSysroot; typedef struct OstreeSysroot OstreeSysroot;
typedef struct OstreeSysrootUpgrader OstreeSysrootUpgrader; typedef struct OstreeSysrootUpgrader OstreeSysrootUpgrader;