lib/repo: Add a DEVINO_CANONICAL commit modifier flag

I was seeing the `Writing OSTree commit...` phase of rpm-ostree
being very slow lately.  This turns out to be more fallout from
https://github.com/ostreedev/ostree/pull/1170
AKA commit: 8fe4536

Loading the xattrs is slow on my system (F27AW, XFS+LVM, NVMe). I haven't fully
traced through why, but AIUI at least on XFS the xattrs are often stored outside
of the inode so it's a little bit like doing an `open()+read()`. Plus there's
the LSM overhead, etc.

The thing is that for rpm-ostree's package layering use case, we
basically always want to treat the on-disk state as canonical.  (There's
a subtle case here if one does overrides for something that contains
policy but we'll fix that).

Anyways, so we're in a state now where we do the slow but correct thing by
default, which seems sane. But let's allow the app to opt-in to telling us
"really trust devino". The difference between a `stat()` + hash table lookup
versus the full xattr load on my test case of `rpm-ostree install
./tree-1.7.0-10.fc27.x86_64.rpm` is absolutely dramatic; consistently on the
order of 10s without this support, and <1s with (800ms).

Closes: #1357
Approved by: jlebon
This commit is contained in:
Colin Walters 2017-11-30 21:43:17 -05:00 committed by Atomic Bot
parent 5ef8faff9a
commit 7c8ea25306
4 changed files with 85 additions and 33 deletions

View File

@ -2793,47 +2793,70 @@ write_directory_content_to_mtree_internal (OstreeRepo *self,
g_assert (dir_enum != NULL || dfd_iter != NULL);
GFileType file_type = g_file_info_get_file_type (child_info);
const char *name = g_file_info_get_name (child_info);
g_ptr_array_add (path, (char*)name);
g_autofree char *child_relpath = ptrarray_path_join (path);
/* See if we have a devino hit; this is used below. Further, for bare-user
* repos we'll reload our file info from the object (specifically the
* ostreemeta xattr). The on-disk state is not normally what we want to
* commit. Basically we're making sure that we pick up "real" uid/gid and any
* xattrs there.
/* Load flags into boolean constants for ease of readability (we also need to
* NULL-check modifier)
*/
const char *loose_checksum = NULL;
g_autoptr(GVariant) source_xattrs = NULL;
if (dfd_iter != NULL && (file_type != G_FILE_TYPE_DIRECTORY))
{
guint32 dev = g_file_info_get_attribute_uint32 (child_info, "unix::device");
guint64 inode = g_file_info_get_attribute_uint64 (child_info, "unix::inode");
loose_checksum = devino_cache_lookup (self, modifier, dev, inode);
if (loose_checksum && self->mode == OSTREE_REPO_MODE_BARE_USER)
{
child_info = NULL;
if (!ostree_repo_load_file (self, loose_checksum, NULL, &child_info, &source_xattrs,
cancellable, error))
return FALSE;
}
}
g_autoptr(GFileInfo) modified_info = NULL;
OstreeRepoCommitFilterResult filter_result =
_ostree_repo_commit_modifier_apply (self, modifier, child_relpath, child_info, &modified_info);
const gboolean child_info_was_modified = !_ostree_gfileinfo_equal (child_info, modified_info);
const gboolean canonical_permissions = modifier &&
(modifier->flags & OSTREE_REPO_COMMIT_MODIFIER_FLAGS_CANONICAL_PERMISSIONS);
const gboolean devino_canonical = modifier &&
(modifier->flags & OSTREE_REPO_COMMIT_MODIFIER_FLAGS_DEVINO_CANONICAL);
/* We currently only honor the CONSUME flag in the dfd_iter case to avoid even
* more complexity in this function, and it'd mostly only be useful when
* operating on local filesystems anyways.
*/
const gboolean delete_after_commit = dfd_iter && modifier &&
(modifier->flags & OSTREE_REPO_COMMIT_MODIFIER_FLAGS_CONSUME);
const gboolean canonical_permissions = modifier &&
(modifier->flags & OSTREE_REPO_COMMIT_MODIFIER_FLAGS_CANONICAL_PERMISSIONS);
/* See if we have a devino hit; this is used below in a few places. */
const char *loose_checksum = NULL;
if (dfd_iter != NULL && (file_type != G_FILE_TYPE_DIRECTORY))
{
guint32 dev = g_file_info_get_attribute_uint32 (child_info, "unix::device");
guint64 inode = g_file_info_get_attribute_uint64 (child_info, "unix::inode");
loose_checksum = devino_cache_lookup (self, modifier, dev, inode);
if (loose_checksum && devino_canonical)
{
/* Go directly to checksum, do not pass Go, do not collect $200.
* In this mode the app is required to break hardlinks for any
* files it wants to modify.
*/
if (!ostree_mutable_tree_replace_file (mtree, name, loose_checksum, error))
return FALSE;
if (delete_after_commit)
{
if (!glnx_shutil_rm_rf_at (dfd_iter->fd, name, cancellable, error))
return FALSE;
}
return TRUE; /* Early return */
}
}
/* Build the full path which we need for callbacks */
g_ptr_array_add (path, (char*)name);
g_autofree char *child_relpath = ptrarray_path_join (path);
/* For bare-user repos we'll reload our file info from the object
* (specifically the ostreemeta xattr), if it was checked out that way (via
* hardlink). The on-disk state is not normally what we want to commit.
* Basically we're making sure that we pick up "real" uid/gid and any xattrs
* there.
*/
g_autoptr(GVariant) source_xattrs = NULL;
if (loose_checksum && self->mode == OSTREE_REPO_MODE_BARE_USER)
{
child_info = NULL;
if (!ostree_repo_load_file (self, loose_checksum, NULL, &child_info, &source_xattrs,
cancellable, error))
return FALSE;
}
/* Call the filter */
g_autoptr(GFileInfo) modified_info = NULL;
OstreeRepoCommitFilterResult filter_result =
_ostree_repo_commit_modifier_apply (self, modifier, child_relpath, child_info, &modified_info);
const gboolean child_info_was_modified = !_ostree_gfileinfo_equal (child_info, modified_info);
if (filter_result != OSTREE_REPO_COMMIT_FILTER_ALLOW)
{

View File

@ -644,6 +644,7 @@ typedef OstreeRepoCommitFilterResult (*OstreeRepoCommitFilter) (OstreeRepo *r
* @OSTREE_REPO_COMMIT_MODIFIER_FLAGS_CANONICAL_PERMISSIONS: Canonicalize permissions for bare-user-only mode.
* @OSTREE_REPO_COMMIT_MODIFIER_FLAGS_ERROR_ON_UNLABELED: Emit an error if configured SELinux policy does not provide a label
* @OSTREE_REPO_COMMIT_MODIFIER_FLAGS_CONSUME: Delete added files/directories after commit; Since: 2017.13
* @OSTREE_REPO_COMMIT_MODIFIER_FLAGS_DEVINO_CANONICAL: If a devino cache hit is found, skip modifier filters (non-directories only); Since: 2017.14
*/
typedef enum {
OSTREE_REPO_COMMIT_MODIFIER_FLAGS_NONE = 0,
@ -652,6 +653,7 @@ typedef enum {
OSTREE_REPO_COMMIT_MODIFIER_FLAGS_CANONICAL_PERMISSIONS = (1 << 2),
OSTREE_REPO_COMMIT_MODIFIER_FLAGS_ERROR_ON_UNLABELED = (1 << 3),
OSTREE_REPO_COMMIT_MODIFIER_FLAGS_CONSUME = (1 << 4),
OSTREE_REPO_COMMIT_MODIFIER_FLAGS_DEVINO_CANONICAL = (1 << 5),
} OstreeRepoCommitModifierFlags;
/**

View File

@ -51,6 +51,7 @@ static gboolean opt_no_xattrs;
static char *opt_selinux_policy;
static gboolean opt_canonical_permissions;
static gboolean opt_consume;
static gboolean opt_devino_canonical;
static char **opt_trees;
static gint opt_owner_uid = -1;
static gint opt_owner_gid = -1;
@ -98,6 +99,7 @@ static GOptionEntry options[] = {
{ "no-xattrs", 0, 0, G_OPTION_ARG_NONE, &opt_no_xattrs, "Do not import extended attributes", NULL },
{ "selinux-policy", 0, 0, G_OPTION_ARG_FILENAME, &opt_selinux_policy, "Set SELinux labels based on policy in root filesystem PATH (may be /)", "PATH" },
{ "link-checkout-speedup", 0, 0, G_OPTION_ARG_NONE, &opt_link_checkout_speedup, "Optimize for commits of trees composed of hardlinks into the repository", NULL },
{ "devino-canonical", 'I', 0, G_OPTION_ARG_NONE, &opt_devino_canonical, "Assume hardlinked objects are unmodified. Implies --link-checkout-speedup", NULL },
{ "tar-autocreate-parents", 0, 0, G_OPTION_ARG_NONE, &opt_tar_autocreate_parents, "When loading tar archives, automatically create parent directories as needed", NULL },
{ "tar-pathname-filter", 0, 0, G_OPTION_ARG_STRING, &opt_tar_pathname_filter, "When loading tar archives, use REGEX,REPLACEMENT against path names", "REGEX,REPLACEMENT" },
{ "skip-if-unchanged", 0, 0, G_OPTION_ARG_NONE, &opt_skip_if_unchanged, "If the contents are unchanged from previous commit, do nothing", NULL },
@ -480,6 +482,11 @@ ostree_builtin_commit (int argc, char **argv, OstreeCommandInvocation *invocatio
flags |= OSTREE_REPO_COMMIT_MODIFIER_FLAGS_SKIP_XATTRS;
if (opt_consume)
flags |= OSTREE_REPO_COMMIT_MODIFIER_FLAGS_CONSUME;
if (opt_devino_canonical)
{
opt_link_checkout_speedup = TRUE; /* Imply this */
flags |= OSTREE_REPO_COMMIT_MODIFIER_FLAGS_DEVINO_CANONICAL;
}
if (opt_canonical_permissions)
flags |= OSTREE_REPO_COMMIT_MODIFIER_FLAGS_CANONICAL_PERMISSIONS;
if (opt_generate_sizes)

View File

@ -25,7 +25,7 @@ skip_without_user_xattrs
setup_test_repository "bare-user"
extra_basic_tests=5
extra_basic_tests=6
. $(dirname $0)/basic-test.sh
# Reset things so we don't inherit a lot of state from earlier tests
@ -99,3 +99,23 @@ assert_file_has_content ls.txt '^-007.. 0 0 .*/usr/bin/systemd'
$OSTREE ls rootfs /usr/lib/dbus-daemon-helper >ls.txt
assert_file_has_content ls.txt '^-007.. 0 81 .*/usr/lib/dbus-daemon-helper'
echo "ok bare-user link-checkout-speedup maintains uids"
cd ${test_tmpdir}
rm -rf test2-checkout
$OSTREE checkout -H -U test2 test2-checkout
# With --link-checkout-speedup, specifying --owner-uid should "win" by default.
myid=$(id -u)
newid=$((${myid} + 1))
$OSTREE commit ${COMMIT_ARGS} --owner-uid ${newid} --owner-gid ${newid} \
--link-checkout-speedup -b test2-linkcheckout-test --tree=dir=test2-checkout
$OSTREE ls test2-linkcheckout-test /baz/cow > ls.txt
assert_file_has_content ls.txt "^-006.. ${newid} ${newid} .*/baz/cow"
# But --devino-canonical should override that
$OSTREE commit ${COMMIT_ARGS} --owner-uid ${newid} --owner-gid ${newid} \
-I -b test2-devino-test --tree=dir=test2-checkout
$OSTREE ls test2-devino-test /baz/cow > ls.txt
assert_file_has_content ls.txt "^-006.. ${myid} ${myid} .*/baz/cow"
$OSTREE refs --delete test2-{linkcheckout,devino}-test
echo "ok commit with -I"