From a56ba6081a465a3b97bd689459d6260c447d21c0 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Wed, 13 Jan 2016 11:33:54 -0500 Subject: [PATCH] repo: Clean up staging directory for previous boot IDs We had a policy of cleaning up all files in `$repo/tmp` older than one day, but we should really clean up previous bootid staging directories too, as they can potentially take up a lot of disk space. https://bugzilla.gnome.org/show_bug.cgi?id=760531 Closes: #170 Approved by: jlebon --- Makefile-tests.am | 1 + src/libostree/ostree-fetcher.c | 2 +- src/libostree/ostree-repo-commit.c | 97 +++++++++++++++------- src/libostree/ostree-repo-private.h | 15 +++- src/libostree/ostree-repo.c | 118 ++++++++++++++++++--------- tests/test-admin-deploy-bootid-gc.sh | 58 +++++++++++++ 6 files changed, 220 insertions(+), 71 deletions(-) create mode 100755 tests/test-admin-deploy-bootid-gc.sh diff --git a/Makefile-tests.am b/Makefile-tests.am index 9f24ea1d..4986b427 100644 --- a/Makefile-tests.am +++ b/Makefile-tests.am @@ -63,6 +63,7 @@ test_scripts = \ tests/test-admin-deploy-etcmerge-cornercases.sh \ tests/test-admin-deploy-uboot.sh \ tests/test-admin-deploy-grub2.sh \ + tests/test-admin-deploy-bootid-gc.sh \ tests/test-admin-instutil-set-kargs.sh \ tests/test-admin-upgrade-not-backwards.sh \ tests/test-admin-pull-deploy-commit.sh \ diff --git a/src/libostree/ostree-fetcher.c b/src/libostree/ostree-fetcher.c index d7915ba6..3606e8fb 100644 --- a/src/libostree/ostree-fetcher.c +++ b/src/libostree/ostree-fetcher.c @@ -383,7 +383,7 @@ session_thread_request_uri (ThreadClosure *thread_closure, if (thread_closure->tmpdir_name == NULL) { if (!_ostree_repo_allocate_tmpdir (thread_closure->base_tmpdir_dfd, - "fetcher-", + OSTREE_REPO_TMPDIR_FETCHER, &thread_closure->tmpdir_name, &thread_closure->tmpdir_dfd, &thread_closure->tmpdir_lock, diff --git a/src/libostree/ostree-repo-commit.c b/src/libostree/ostree-repo-commit.c index 8e00646c..0d7f9bee 100644 --- a/src/libostree/ostree-repo-commit.c +++ b/src/libostree/ostree-repo-commit.c @@ -1161,7 +1161,6 @@ ostree_repo_prepare_transaction (OstreeRepo *self, { gboolean ret = FALSE; gboolean ret_transaction_resume = FALSE; - g_autofree char *stagedir_boot_id_prefix = NULL; g_autofree char *stagedir_name = NULL; glnx_fd_close int stagedir_fd = -1; g_auto(GLnxDirFdIterator) dfd_iter = { 0, }; @@ -1172,10 +1171,8 @@ ostree_repo_prepare_transaction (OstreeRepo *self, self->in_transaction = TRUE; - stagedir_boot_id_prefix = g_strconcat ("staging-", self->boot_id, "-", NULL); - if (!_ostree_repo_allocate_tmpdir (self->tmp_dir_fd, - stagedir_boot_id_prefix, + self->stagedir_prefix, &self->commit_stagedir_name, &self->commit_stagedir_fd, &self->commit_stagedir_lock, @@ -1281,44 +1278,86 @@ cleanup_tmpdir (OstreeRepo *self, GError **error) { gboolean ret = FALSE; - g_autoptr(GFileEnumerator) enumerator = NULL; + g_auto(GLnxDirFdIterator) dfd_iter = { 0, }; guint64 curtime_secs; - enumerator = g_file_enumerate_children (self->tmp_dir, "standard::name,time::modified", - G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS, - cancellable, - error); - if (!enumerator) - goto out; - curtime_secs = g_get_real_time () / 1000000; + if (!glnx_dirfd_iterator_init_at (self->tmp_dir_fd, ".", TRUE, &dfd_iter, error)) + goto out; + while (TRUE) { - GFileInfo *file_info; - GFile *path; - guint64 mtime; guint64 delta; + struct dirent *dent; + struct stat stbuf; + g_auto(GLnxLockFile) lockfile = GLNX_LOCK_FILE_INIT; + gboolean did_lock; - if (!gs_file_enumerator_iterate (enumerator, &file_info, &path, - cancellable, error)) + if (!glnx_dirfd_iterator_next_dent (&dfd_iter, &dent, cancellable, error)) goto out; - if (file_info == NULL) + + if (dent == NULL) break; - mtime = g_file_info_get_attribute_uint64 (file_info, "time::modified"); - if (mtime > curtime_secs) - continue; - /* Only delete files older than a day. To do better, we would - * need to coordinate between multiple processes in a reliable - * fashion. See - * https://bugzilla.gnome.org/show_bug.cgi?id=709115 - */ - delta = curtime_secs - mtime; - if (delta > 60*60*24) + if (TEMP_FAILURE_RETRY (fstatat (dfd_iter.fd, dent->d_name, &stbuf, AT_SYMLINK_NOFOLLOW)) < 0) { - if (!glnx_shutil_rm_rf_at (AT_FDCWD, gs_file_get_path_cached (path), cancellable, error)) + if (errno == ENOENT) /* Did another cleanup win? */ + continue; + glnx_set_error_from_errno (error); + goto out; + } + + /* First, if it's a directory which needs locking, but it's + * busy, skip it. + */ + if (_ostree_repo_is_locked_tmpdir (dent->d_name)) + { + if (!_ostree_repo_try_lock_tmpdir (dfd_iter.fd, dent->d_name, + &lockfile, &did_lock, error)) goto out; + if (!did_lock) + continue; + } + + /* If however this is the staging directory for the *current* + * boot, then don't delete it now - we may end up reusing it, as + * is the point. + */ + if (g_str_has_prefix (dent->d_name, self->stagedir_prefix)) + continue; + else if (g_str_has_prefix (dent->d_name, OSTREE_REPO_TMPDIR_STAGING)) + { + /* But, crucially we can now clean up staging directories + * from *other* boots + */ + if (!glnx_shutil_rm_rf_at (dfd_iter.fd, dent->d_name, cancellable, error)) + goto out; + } + /* FIXME - move OSTREE_REPO_TMPDIR_FETCHER underneath the + * staging/boot-id scheme as well, since all of the "did it get + * fsync'd" concerns apply to that as well. Then we can skip + * this special case. + */ + else if (g_str_has_prefix (dent->d_name, OSTREE_REPO_TMPDIR_FETCHER)) + continue; + else + { + /* Now we do time-based cleanup. Ignore it if it's somehow + * in the future... + */ + if (stbuf.st_mtime > curtime_secs) + continue; + + /* Now, we arbitrarily delete files/directories older than a + * day, since that's what we were doing before we had locking. + */ + delta = curtime_secs - stbuf.st_mtime; + if (delta > 60*60*24) + { + if (!glnx_shutil_rm_rf_at (dfd_iter.fd, dent->d_name, cancellable, error)) + goto out; + } } } diff --git a/src/libostree/ostree-repo-private.h b/src/libostree/ostree-repo-private.h index 70600f4e..273edd1a 100644 --- a/src/libostree/ostree-repo-private.h +++ b/src/libostree/ostree-repo-private.h @@ -48,7 +48,7 @@ typedef enum { struct OstreeRepo { GObject parent; - char *boot_id; + char *stagedir_prefix; int commit_stagedir_fd; char *commit_stagedir_name; GLnxLockFile commit_stagedir_lock; @@ -108,6 +108,9 @@ typedef struct { char checksum[65]; } OstreeDevIno; +#define OSTREE_REPO_TMPDIR_STAGING "staging-" +#define OSTREE_REPO_TMPDIR_FETCHER "fetcher-" + gboolean _ostree_repo_allocate_tmpdir (int tmpdir_dfd, const char *tmpdir_prefix, @@ -118,6 +121,16 @@ _ostree_repo_allocate_tmpdir (int tmpdir_dfd, GCancellable *cancellable, GError **error); +gboolean +_ostree_repo_is_locked_tmpdir (const char *filename); + +gboolean +_ostree_repo_try_lock_tmpdir (int tmpdir_dfd, + const char *tmpdir_name, + GLnxLockFile *file_lock_out, + gboolean *out_did_lock, + GError **error); + gboolean _ostree_repo_ensure_loose_objdir_at (int dfd, const char *loose_path, diff --git a/src/libostree/ostree-repo.c b/src/libostree/ostree-repo.c index a34e2ec1..2c9a7fd3 100644 --- a/src/libostree/ostree-repo.c +++ b/src/libostree/ostree-repo.c @@ -617,7 +617,7 @@ ostree_repo_finalize (GObject *object) g_clear_object (&self->parent_repo); - g_free (self->boot_id); + g_free (self->stagedir_prefix); g_clear_object (&self->repodir); if (self->repo_dir_fd != -1) (void) close (self->repo_dir_fd); @@ -2428,22 +2428,26 @@ ostree_repo_open (OstreeRepo *self, if (self->inited) return TRUE; - /* We use a per-boot identifier to keep track of which file contents - * possibly haven't been sync'd to disk. + /* We use a directory of the form `staging-${BOOT_ID}-${RANDOM}` + * where if the ${BOOT_ID} doesn't match, we know file contents + * possibly haven't been sync'd to disk and need to be discarded. */ { const char *env_bootid = getenv ("OSTREE_BOOTID"); + g_autofree char *boot_id = NULL; if (env_bootid != NULL) - self->boot_id = g_strdup (env_bootid); + boot_id = g_strdup (env_bootid); else { if (!g_file_get_contents ("/proc/sys/kernel/random/boot_id", - &self->boot_id, + &boot_id, NULL, error)) goto out; - g_strdelimit (self->boot_id, "\n", '\0'); + g_strdelimit (boot_id, "\n", '\0'); } + + self->stagedir_prefix = g_strconcat (OSTREE_REPO_TMPDIR_STAGING, boot_id, "-", NULL); } if (!glnx_opendirat (AT_FDCWD, gs_file_get_path_cached (self->repodir), TRUE, @@ -5003,6 +5007,50 @@ ostree_repo_regenerate_summary (OstreeRepo *self, return ret; } +gboolean +_ostree_repo_is_locked_tmpdir (const char *filename) +{ + return g_str_has_prefix (filename, OSTREE_REPO_TMPDIR_STAGING) || + g_str_has_prefix (filename, OSTREE_REPO_TMPDIR_FETCHER); +} + +gboolean +_ostree_repo_try_lock_tmpdir (int tmpdir_dfd, + const char *tmpdir_name, + GLnxLockFile *file_lock_out, + gboolean *out_did_lock, + GError **error) +{ + gboolean ret = FALSE; + g_autofree char *lock_name = g_strconcat (tmpdir_name, "-lock", NULL); + gboolean did_lock = FALSE; + g_autoptr(GError) local_error = NULL; + + /* We put the lock outside the dir, so we can hold the lock + * until the directory is fully removed */ + if (!glnx_make_lock_file (tmpdir_dfd, lock_name, LOCK_EX | LOCK_NB, + file_lock_out, &local_error)) + { + if (g_error_matches (local_error, G_IO_ERROR, G_IO_ERROR_WOULD_BLOCK)) + { + did_lock = FALSE; + } + else + { + g_propagate_error (error, g_steal_pointer (&local_error)); + goto out; + } + } + else + { + did_lock = TRUE; + } + + ret = TRUE; + *out_did_lock = did_lock; + out: + return ret; +} /* This allocates and locks a subdir of the repo tmp dir, using an existing * one with the same prefix if it is not in use already. */ @@ -5016,14 +5064,18 @@ _ostree_repo_allocate_tmpdir (int tmpdir_dfd, GCancellable *cancellable, GError **error) { + gboolean ret = FALSE; gboolean reusing_dir = FALSE; + gboolean did_lock; g_autofree char *tmpdir_name = NULL; glnx_fd_close int tmpdir_fd = -1; g_auto(GLnxDirFdIterator) dfd_iter = { 0, }; + g_return_val_if_fail (_ostree_repo_is_locked_tmpdir (tmpdir_prefix), FALSE); + /* Look for existing tmpdir (with same prefix) to reuse */ if (!glnx_dirfd_iterator_init_at (tmpdir_dfd, ".", FALSE, &dfd_iter, error)) - return FALSE; + goto out; while (tmpdir_name == NULL) { @@ -5031,10 +5083,9 @@ _ostree_repo_allocate_tmpdir (int tmpdir_dfd, struct dirent *dent; glnx_fd_close int existing_tmpdir_fd = -1; g_autoptr(GError) local_error = NULL; - g_autofree char *lock_name = NULL; if (!glnx_dirfd_iterator_next_dent (&dfd_iter, &dent, cancellable, error)) - return FALSE; + goto out; if (dent == NULL) break; @@ -5055,25 +5106,18 @@ _ostree_repo_allocate_tmpdir (int tmpdir_dfd, else { g_propagate_error (error, g_steal_pointer (&local_error)); - return FALSE; + goto out; } } - lock_name = g_strconcat (dent->d_name, "-lock", NULL); - /* We put the lock outside the dir, so we can hold the lock * until the directory is fully removed */ - if (!glnx_make_lock_file (dfd_iter.fd, lock_name, LOCK_EX | LOCK_NB, - file_lock_out, &local_error)) - { - if (g_error_matches (local_error, G_IO_ERROR, G_IO_ERROR_WOULD_BLOCK)) - continue; - else - { - g_propagate_error (error, g_steal_pointer (&local_error)); - return FALSE; - } - } + if (!_ostree_repo_try_lock_tmpdir (dfd_iter.fd, dent->d_name, + file_lock_out, &did_lock, + error)) + goto out; + if (!did_lock) + continue; /* Touch the reused directory so that we don't accidentally * remove it due to being old when cleaning up the tmpdir @@ -5091,32 +5135,24 @@ _ostree_repo_allocate_tmpdir (int tmpdir_dfd, g_autofree char *tmpdir_name_template = g_strconcat (tmpdir_prefix, "XXXXXX", NULL); glnx_fd_close int new_tmpdir_fd = -1; g_autoptr(GError) local_error = NULL; - g_autofree char *lock_name = NULL; /* No existing tmpdir found, create a new */ if (!glnx_mkdtempat (tmpdir_dfd, tmpdir_name_template, 0777, error)) - return FALSE; + goto out; if (!glnx_opendirat (tmpdir_dfd, tmpdir_name_template, FALSE, &new_tmpdir_fd, error)) - return FALSE; - - lock_name = g_strconcat (tmpdir_name_template, "-lock", NULL); + goto out; /* Note, at this point we can race with another process that picks up this * new directory. If that happens we need to retry, making a new directory. */ - if (!glnx_make_lock_file (tmpdir_dfd, lock_name, LOCK_EX | LOCK_NB, - file_lock_out, &local_error)) - { - if (g_error_matches (local_error, G_IO_ERROR, G_IO_ERROR_WOULD_BLOCK)) - continue; - else - { - g_propagate_error (error, g_steal_pointer (&local_error)); - return FALSE; - } - } + if (!_ostree_repo_try_lock_tmpdir (tmpdir_dfd, tmpdir_name_template, + file_lock_out, &did_lock, + error)) + goto out; + if (!did_lock) + continue; tmpdir_name = g_steal_pointer (&tmpdir_name_template); tmpdir_fd = glnx_steal_fd (&new_tmpdir_fd); @@ -5131,5 +5167,7 @@ _ostree_repo_allocate_tmpdir (int tmpdir_dfd, if (reusing_dir_out) *reusing_dir_out = reusing_dir; - return TRUE; + ret = TRUE; + out: + return ret; } diff --git a/tests/test-admin-deploy-bootid-gc.sh b/tests/test-admin-deploy-bootid-gc.sh new file mode 100755 index 00000000..ba16f336 --- /dev/null +++ b/tests/test-admin-deploy-bootid-gc.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# +# Copyright (C) 2016 Colin Walters +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +set -euo pipefail + +. $(dirname $0)/libtest.sh + +# Exports OSTREE_SYSROOT so --sysroot not needed. +setup_os_repository "archive-z2" "syslinux" + +echo "1..1" + +${CMD_PREFIX} ostree --repo=sysroot/ostree/repo pull-local --remote=testos testos-repo testos/buildmaster/x86_64-runtime +rev=$(${CMD_PREFIX} ostree --repo=sysroot/ostree/repo rev-parse testos/buildmaster/x86_64-runtime) +export rev +${CMD_PREFIX} ostree admin deploy --karg=root=LABEL=MOO --karg=quiet --os=testos testos:testos/buildmaster/x86_64-runtime +os_repository_new_commit + +rm sysroot/ostree/repo/tmp/* -rf +export TEST_BOOTID=4072029c-8b10-60d1-d31b-8422eeff9b42 +if env OSTREE_REPO_TEST_ERROR=pre-commit OSTREE_BOOTID=${TEST_BOOTID} \ + ${CMD_PREFIX} ostree admin deploy --karg=root=LABEL=MOO --karg=quiet --os=testos testos:testos/buildmaster/x86_64-runtime 2>err.txt; then + assert_not_reached "Should have hit OSTREE_REPO_TEST_ERROR_PRE_COMMIT" +fi +stagepath=$(ls -d sysroot/ostree/repo/tmp/staging-${TEST_BOOTID}-*) +assert_has_dir "${stagepath}" + +# We have an older failed stage, now use a new boot id + +export NEW_TEST_BOOTID=5072029c-8b10-60d1-d31b-8422eeff9b42 +if env OSTREE_REPO_TEST_ERROR=pre-commit OSTREE_BOOTID=${NEW_TEST_BOOTID} \ + ${CMD_PREFIX} ostree admin deploy --karg=root=LABEL=FOO --karg=quiet --os=testos testos:testos/buildmaster/x86_64-runtime 2>err.txt; then + assert_not_reached "Should have hit OSTREE_REPO_TEST_ERROR_PRE_COMMIT" +fi +newstagepath=$(ls -d sysroot/ostree/repo/tmp/staging-${NEW_TEST_BOOTID}-*) +assert_has_dir "${newstagepath}" +env OSTREE_BOOTID=${NEW_TEST_BOOTID} ${CMD_PREFIX} ostree admin deploy --karg=root=LABEL=MOO --karg=quiet --os=testos testos:testos/buildmaster/x86_64-runtime +newstagepath=$(ls -d sysroot/ostree/repo/tmp/staging-${NEW_TEST_BOOTID}-*) +assert_not_has_dir "${stagepath}" +assert_not_has_dir "${newstagepath}" + +echo "ok admin bootid GC"