diff --git a/.cci.jenkinsfile b/.cci.jenkinsfile index add8c35a..ac65b9c8 100644 --- a/.cci.jenkinsfile +++ b/.cci.jenkinsfile @@ -81,6 +81,8 @@ parallel fcos: { coreos-assembler buildextend-metal4k coreos-assembler buildextend-live --fast # Install the tests + # Build and install the tests + make -C tests/kolainst make -C tests/kolainst install """) } diff --git a/Makefile.am b/Makefile.am index cd04a055..87a705cc 100644 --- a/Makefile.am +++ b/Makefile.am @@ -43,6 +43,9 @@ AM_DISTCHECK_CONFIGURE_FLAGS += \ GITIGNOREFILES = aclocal.m4 build-aux/ buildutil/*.m4 config.h.in gtk-doc.make +# Generated by coreos-assembler build-fast and kola +GITIGNOREFILES += fastbuild-*.qcow2 _kola_temp/ + SUBDIRS += . if ENABLE_GTK_DOC diff --git a/tests/inst/Cargo.toml b/tests/inst/Cargo.toml index a3838922..d47db53c 100644 --- a/tests/inst/Cargo.toml +++ b/tests/inst/Cargo.toml @@ -6,17 +6,21 @@ edition = "2018" [[bin]] name = "ostree-test" -path = "src/insttest.rs" +path = "src/insttestmain.rs" [dependencies] clap = "2.32.0" -structopt = "0.2" +structopt = "0.3" +serde = "1.0.111" +serde_derive = "1.0.111" +serde_json = "1.0" commandspec = "0.12.2" anyhow = "1.0" tempfile = "3.1.0" +glib = "0.9.1" gio = "0.8" ostree = { version = "0.7.1", features = ["v2020_1"] } -libtest-mimic = "0.2.0" +libtest-mimic = "0.3.0" twoway = "0.2.1" hyper = "0.13" futures = "0.3.4" @@ -26,17 +30,21 @@ tokio = { version = "0.2", features = ["full"] } futures-util = "0.3.1" base64 = "0.12.0" procspawn = "0.8" -proc-macro2 = "0.4" -quote = "0.6" -syn = "0.15" +rand = "0.7.3" linkme = "0.2" +strum = "0.18.0" +strum_macros = "0.18.0" +openat = "0.1.19" +openat-ext = "0.1.4" +nix = "0.17.0" +# This one I might publish to crates.io, not sure yet +with-procspawn-tempdir = { git = "https://github.com/cgwalters/with-procspawn-tempdir" } + +# Internal crate for the test macro itest-macro = { path = "itest-macro" } -with-procspawn-tempdir = { git = "https://github.com/cgwalters/with-procspawn-tempdir" } -#with-procspawn-tempdir = { path = "/var/srv/walters/src/github/cgwalters/with-procspawn-tempdir" } - -# See https://github.com/tcr/commandspec/pulls?q=is%3Apr+author%3Acgwalters+ [patch.crates-io] +# See https://github.com/tcr/commandspec/pulls?q=is%3Apr+author%3Acgwalters+ +# If patches don't get reviewed I'll probably fork it. commandspec = { git = "https://github.com/cgwalters/commandspec", branch = 'walters-master' } -#commandspec = { path = "/var/srv/walters/src/github/tcr/commandspec" } diff --git a/tests/inst/itest-macro/src/itest-macro.rs b/tests/inst/itest-macro/src/itest-macro.rs index 42b99581..34d35a1a 100644 --- a/tests/inst/itest-macro/src/itest-macro.rs +++ b/tests/inst/itest-macro/src/itest-macro.rs @@ -9,18 +9,57 @@ use quote::quote; #[proc_macro_attribute] pub fn itest(attrs: TokenStream, input: TokenStream) -> TokenStream { let attrs = syn::parse_macro_input!(attrs as syn::AttributeArgs); - if attrs.len() > 0 { - return syn::Error::new_spanned(&attrs[0], "itest takes no attributes") + if attrs.len() > 1 { + return syn::Error::new_spanned(&attrs[1], "itest takes 0 or 1 attributes") .to_compile_error() .into(); } + let destructive = match attrs.get(0) { + Some(syn::NestedMeta::Meta(syn::Meta::NameValue(namevalue))) => { + if let Some(name) = namevalue.path.get_ident().map(|i| i.to_string()) { + if name == "destructive" { + match &namevalue.lit { + syn::Lit::Bool(v) => v.value, + _ => { + return syn::Error::new_spanned( + &attrs[1], + format!("destructive must be bool {}", name), + ) + .to_compile_error() + .into(); + } + } + } else { + return syn::Error::new_spanned( + &attrs[1], + format!("Unknown argument {}", name), + ) + .to_compile_error() + .into(); + } + } else { + false + } + } + Some(v) => { + return syn::Error::new_spanned(&v, "Unexpected argument") + .to_compile_error() + .into() + } + None => false, + }; let func = syn::parse_macro_input!(input as syn::ItemFn); let fident = func.sig.ident.clone(); let varident = quote::format_ident!("ITEST_{}", fident); let fidentstrbuf = format!(r#"{}"#, fident); let fidentstr = syn::LitStr::new(&fidentstrbuf, Span::call_site()); + let testident = if destructive { + quote::format_ident!("{}", "DESTRUCTIVE_TESTS") + } else { + quote::format_ident!("{}", "NONDESTRUCTIVE_TESTS") + }; let output = quote! { - #[linkme::distributed_slice(TESTS)] + #[linkme::distributed_slice(#testident)] #[allow(non_upper_case_globals)] static #varident : Test = Test { name: #fidentstr, diff --git a/tests/inst/src/destructive.rs b/tests/inst/src/destructive.rs new file mode 100644 index 00000000..4d22ea83 --- /dev/null +++ b/tests/inst/src/destructive.rs @@ -0,0 +1,624 @@ +//! Test that interrupting an upgrade is safe. +//! +//! This test builds on coreos-assembler's "external tests": +//! https://github.com/coreos/coreos-assembler/blob/master/mantle/kola/README-kola-ext.md +//! Key to this in particular is coreos-assembler implementing the Debian autopkgtest reboot API. +//! +//! The basic model of this test is: +//! +//! Copy the OS content in to an archive repository, and generate a "synthetic" +//! update for it by randomly mutating ELF files. Time how long upgrading +//! to that takes, to use as a baseline in a range of time we will target +//! for interrupt. +//! +//! Start a webserver, pointing rpm-ostree at the updated content. We +//! alternate between a few "interrupt strategies", from `kill -9` on +//! rpm-ostreed, or rebooting normally, or an immediate forced reboot +//! (with no filesystem sync). +//! +//! The state of the tests is passed by serializing JSON into the +//! AUTOPKGTEST_REBOOT_MARK. + +use anyhow::{Context, Result}; +use commandspec::sh_execute; +use rand::seq::SliceRandom; +use rand::Rng; +use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; +use std::io::Write; +use std::path::Path; +use std::time; +use strum::IntoEnumIterator; +use strum_macros::EnumIter; + +use crate::rpmostree; +use crate::test::*; + +const ORIGREF: &'static str = "orig-booted"; +const TESTREF: &'static str = "testcontent"; +const TDATAPATH: &'static str = "/var/tmp/ostree-test-transaction-data.json"; +const SRVREPO: &'static str = "/var/tmp/ostree-test-srv"; +// Percentage of ELF files to change per update +const TREEGEN_PERCENTAGE: u32 = 15; +/// Total number of reboots +const ITERATIONS: u32 = 10; +/// Try at most this number of times per iteration to interrupt +const ITERATION_RETRIES: u32 = 15; +// We mostly want to test forced interrupts since those are +// most likely to break. +const FORCE_INTERRUPT_PERCENTAGE: u32 = 85; +/// Multiply the average cycle time by this to ensure we sometimes +/// fail to interrupt too. +const FORCE_REBOOT_AFTER_MUL: f64 = 1.1f64; +/// Amount of time in seconds we will delay each web request. +/// FIXME: this should be a function of total number of objects or so +const WEBSERVER_DELAY_SECS: f64 = 0.005; + +/// We choose between these at random +#[derive(EnumIter, Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +enum PoliteInterruptStrategy { + None, + Stop, + Reboot, +} + +/// We choose between these at random +#[derive(EnumIter, Debug, PartialEq, Eq, Clone, PartialOrd, Ord, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +enum ForceInterruptStrategy { + Kill9, + Reboot, +} + +#[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +enum InterruptStrategy { + Polite(PoliteInterruptStrategy), + Force(ForceInterruptStrategy), +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +enum UpdateResult { + NotCompleted, + Staged, + Completed, +} + +/// The data passed across reboots by serializing +/// into the AUTOPKGTEST_REBOOT_MARK +#[derive(Serialize, Deserialize, Debug, Default)] +#[serde(rename_all = "kebab-case")] +struct RebootMark { + /// Reboot strategy that was used for this last reboot + reboot_strategy: Option, + /// Counts attempts to interrupt an upgrade + iter: u32, + /// Counts times upgrade completed before we tried to interrupt + before: u32, + /// Results for "polite" interrupt attempts + polite: BTreeMap>, + /// Results for "forced" interrupt attempts + force: BTreeMap>, +} + +impl RebootMark { + fn get_results_map( + &mut self, + strategy: &InterruptStrategy, + ) -> &mut BTreeMap { + match strategy { + InterruptStrategy::Polite(t) => self + .polite + .entry(t.clone()) + .or_insert_with(|| BTreeMap::new()), + InterruptStrategy::Force(t) => self + .force + .entry(t.clone()) + .or_insert_with(|| BTreeMap::new()), + } + } +} + +impl InterruptStrategy { + pub(crate) fn is_noop(&self) -> bool { + match self { + InterruptStrategy::Polite(PoliteInterruptStrategy::None) => true, + _ => false, + } + } +} + +/// TODO add readonly sysroot handling into base ostree +fn testinit() -> Result<()> { + assert!(std::path::Path::new("/run/ostree-booted").exists()); + sh_execute!( + r"if ! test -w /sysroot; then + mount -o remount,rw /sysroot +fi" + )?; + Ok(()) +} + +/// Given a booted ostree, generate a modified version and write it +/// into our srvrepo. This is fairly hacky; it'd be better if we +/// reworked the tree mutation to operate on an ostree repo +/// rather than a filesystem. +fn generate_update(commit: &str) -> Result<()> { + println!("Generating update from {}", commit); + crate::treegen::update_os_tree(SRVREPO, TESTREF, TREEGEN_PERCENTAGE) + .context("Failed to generate new content")?; + // Amortize the prune across multiple runs; we don't want to leak space, + // but traversing all the objects is expensive. So here we only prune 1/5 of the time. + if rand::thread_rng().gen_ratio(1, 5) { + sh_execute!( + "ostree --repo={srvrepo} prune --refs-only --depth=1", + srvrepo = SRVREPO + )?; + } + Ok(()) +} + +/// Create an archive repository of current OS content. This is a bit expensive; +/// in the future we should try a trick using the `parent` property on this repo, +/// and then teach our webserver to redirect to the system for objects it doesn't +/// have. +fn generate_srv_repo(commit: &str) -> Result<()> { + sh_execute!( + r#" + ostree --repo={srvrepo} init --mode=archive + ostree --repo={srvrepo} config set archive.zlib-level 1 + ostree --repo={srvrepo} pull-local /sysroot/ostree/repo {commit} + ostree --repo={srvrepo} refs --create={testref} {commit} + "#, + srvrepo = SRVREPO, + commit = commit, + testref = TESTREF + ) + .context("Failed to generate srv repo")?; + generate_update(commit)?; + Ok(()) +} + +#[derive(Serialize, Deserialize, Debug)] +struct TransactionalTestInfo { + cycle_time: time::Duration, +} + +#[derive(Serialize, Deserialize, Debug, Default)] +struct Kill9Stats { + interrupted: u32, + staged: u32, + success: u32, +} + +#[derive(Serialize, Deserialize, Debug, Default)] +struct RebootStats { + interrupted: u32, + success: u32, +} + +fn upgrade_and_finalize() -> Result<()> { + sh_execute!( + "rpm-ostree upgrade + systemctl start ostree-finalize-staged + systemctl stop ostree-finalize-staged" + ) + .context("Upgrade and finalize failed")?; + Ok(()) +} + +async fn run_upgrade_or_timeout(timeout: time::Duration) -> Result { + let upgrade = tokio::task::spawn_blocking(upgrade_and_finalize); + Ok(tokio::select! { + res = upgrade => { + let _res = res?; + true + }, + _ = tokio::time::delay_for(timeout) => { + false + } + }) +} + +/// The set of commits that we should see +#[derive(Debug)] +struct CommitStates { + booted: String, + orig: String, + prev: String, + target: String, +} + +impl CommitStates { + pub(crate) fn describe(&self, commit: &str) -> Option<&'static str> { + if commit == self.booted { + Some("booted") + } else if commit == self.orig { + Some("orig") + } else if commit == self.prev { + Some("prev") + } else if commit == self.target { + Some("target") + } else { + None + } + } +} + +/// In the case where we've entered via a reboot, this function +/// checks the state of things, and also generates a new update +/// if everything was successful. +fn parse_and_validate_reboot_mark>( + commitstates: &mut CommitStates, + mark: M, +) -> Result { + let markstr = mark.as_ref(); + let mut mark: RebootMark = serde_json::from_str(markstr) + .with_context(|| format!("Failed to parse reboot mark {:?}", markstr))?; + // The first failed reboot may be into the original booted commit + let status = rpmostree::query_status()?; + let firstdeploy = &status.deployments[0]; + // The first deployment should not be staged + assert!(!firstdeploy.staged.unwrap_or(false)); + assert!(firstdeploy.booted); + assert_eq!(firstdeploy.checksum, commitstates.booted); + let reboot_type = if let Some(t) = mark.reboot_strategy.as_ref() { + t.clone() + } else { + anyhow::bail!("No reboot strategy in mark"); + }; + if commitstates.booted == commitstates.target { + mark.get_results_map(&reboot_type) + .entry(UpdateResult::Completed) + .and_modify(|result_e| { + *result_e += 1; + }) + .or_insert(1); + println!("Successfully updated to {}", commitstates.target); + // Since we successfully updated, generate a new commit to target + generate_update(&firstdeploy.checksum)?; + // Update the target state + let srvrepo_obj = ostree::Repo::new(&gio::File::new_for_path(SRVREPO)); + srvrepo_obj.open(gio::NONE_CANCELLABLE)?; + commitstates.target = srvrepo_obj.resolve_rev(TESTREF, false)?.into(); + } else if commitstates.booted == commitstates.orig || commitstates.booted == commitstates.prev { + println!( + "Failed update to {} (booted={})", + commitstates.target, commitstates.booted + ); + mark.get_results_map(&reboot_type) + .entry(UpdateResult::NotCompleted) + .and_modify(|result_e| { + *result_e += 1; + }) + .or_insert(1); + } else { + anyhow::bail!("Unexpected target commit: {}", firstdeploy.checksum); + }; + // Empty this out + mark.reboot_strategy = None; + Ok(mark) +} + +fn validate_pending_commit(pending_commit: &str, commitstates: &CommitStates) -> Result<()> { + if pending_commit != commitstates.target { + sh_execute!("rpm-ostree status -v")?; + sh_execute!( + "ostree show {pending_commit}", + pending_commit = pending_commit + )?; + anyhow::bail!( + "Expected target commit={} but pending={} ({:?})", + commitstates.target, + pending_commit, + commitstates.describe(pending_commit) + ); + } + Ok(()) +} + +/// In the case where we did a kill -9 of rpm-ostree, check the state +fn validate_live_interrupted_upgrade(commitstates: &CommitStates) -> Result { + let status = rpmostree::query_status()?; + let firstdeploy = &status.deployments[0]; + let pending_commit = firstdeploy.checksum.as_str(); + let res = if firstdeploy.staged.unwrap_or(false) { + assert!(!firstdeploy.booted); + validate_pending_commit(pending_commit, &commitstates)?; + UpdateResult::Staged + } else { + if pending_commit == commitstates.booted { + UpdateResult::NotCompleted + } else if pending_commit == commitstates.target { + UpdateResult::Completed + } else { + anyhow::bail!( + "Unexpected pending commit: {} ({:?})", + pending_commit, + commitstates.describe(pending_commit) + ); + } + }; + Ok(res) +} + +fn impl_transaction_test>( + booted_commit: &str, + tdata: &TransactionalTestInfo, + mark: Option, +) -> Result<()> { + let polite_strategies = PoliteInterruptStrategy::iter().collect::>(); + let force_strategies = ForceInterruptStrategy::iter().collect::>(); + + // Gather the expected possible commits + let mut commitstates = { + let srvrepo_obj = ostree::Repo::new(&gio::File::new_for_path(SRVREPO)); + srvrepo_obj.open(gio::NONE_CANCELLABLE)?; + let sysrepo_obj = ostree::Repo::new(&gio::File::new_for_path("/sysroot/ostree/repo")); + sysrepo_obj.open(gio::NONE_CANCELLABLE)?; + + CommitStates { + booted: booted_commit.to_string(), + orig: sysrepo_obj.resolve_rev(ORIGREF, false)?.into(), + prev: srvrepo_obj + .resolve_rev(&format!("{}^", TESTREF), false)? + .into(), + target: srvrepo_obj.resolve_rev(TESTREF, false)?.into(), + } + }; + + let mut mark = if let Some(mark) = mark { + let markstr = mark.as_ref(); + // In the successful case, this generates a new target commit, + // so we pass via &mut. + parse_and_validate_reboot_mark(&mut commitstates, markstr) + .context("Failed to parse reboot mark")? + } else { + RebootMark { + ..Default::default() + } + }; + // Drop the &mut + let commitstates = commitstates; + + assert_ne!(commitstates.booted.as_str(), commitstates.target.as_str()); + + let mut rt = tokio::runtime::Runtime::new()?; + let cycle_time_ms = (tdata.cycle_time.as_secs_f64() * 1000f64 * FORCE_REBOOT_AFTER_MUL) as u64; + // Set when we're trying an interrupt strategy that isn't a reboot, so we will + // re-enter the loop below. + let mut live_strategy: Option = None; + let mut retries = 0; + // This loop is for the non-rebooting strategies - we might use kill -9 + // or not interrupt at all. But if we choose a reboot strategy + // then we'll exit implicitly via the reboot, and reenter the function + // above. + loop { + // Save the previous strategy as a string so we can use it in error + // messages below + let prev_strategy_str = format!("{:?}", live_strategy); + // Process the results of the previous run if any, and reset + // live_strategy to None + if let Some(last_strategy) = live_strategy.take() { + mark.iter += 1; + retries = 0; + let res = validate_live_interrupted_upgrade(&commitstates)?; + if last_strategy.is_noop() { + assert_eq!(res, UpdateResult::Completed) + } + mark.get_results_map(&last_strategy) + .entry(res) + .and_modify(|result_e| { + *result_e += 1; + }) + .or_insert(1); + } + // If we've reached our target iterations, exit the test successfully + if mark.iter == ITERATIONS { + // TODO also add ostree admin fsck to check the deployment directories + sh_execute!( + "echo Performing final validation... + ostree fsck" + )?; + return Ok(()); + } + let mut rng = rand::thread_rng(); + // Pick a strategy for this attempt + let strategy: InterruptStrategy = if rand::thread_rng() + .gen_ratio(FORCE_INTERRUPT_PERCENTAGE, 100) + { + InterruptStrategy::Force(force_strategies.choose(&mut rng).expect("strategy").clone()) + } else { + InterruptStrategy::Polite( + polite_strategies + .choose(&mut rng) + .expect("strategy") + .clone(), + ) + }; + println!("Using interrupt strategy: {:?}", strategy); + // Interrupt usually before the upgrade would + // complete, but also a percentage of the time after. + // The no-op case is special in that we want to wait for it to complete + let sleeptime = if strategy.is_noop() { + // In the no-op case, sleep for minimum of 20x the cycle time, or one day + let ms = std::cmp::min(cycle_time_ms.saturating_mul(20), 24 * 60 * 60 * 1000); + time::Duration::from_millis(ms) + } else { + time::Duration::from_millis(rng.gen_range(0, cycle_time_ms)) + }; + println!( + "force-reboot-time={:?} cycle={:?} status:{:?}", + sleeptime, tdata.cycle_time, &mark + ); + // Reset the target ref to booted, and perform a cleanup + // to ensure we're re-downloading objects each time + sh_execute!( + " + systemctl stop rpm-ostreed + systemctl stop ostree-finalize-staged + ostree reset testrepo:{testref} {booted_commit} + rpm-ostree cleanup -pbrm + ", + testref = TESTREF, + booted_commit = booted_commit + ) + .with_context(|| { + format!( + "Failed pre-upgrade cleanup (prev strategy: {})", + prev_strategy_str.as_str() + ) + })?; + + // The heart of the test - start an upgrade and wait a random amount + // of time to interrupt. If the result is true, then the upgrade completed + // successfully before the timeout. + let res: Result = rt.block_on(async move { run_upgrade_or_timeout(sleeptime).await }); + let res = res.context("Failed during upgrade")?; + if res { + if !strategy.is_noop() { + println!( + "Failed to interrupt upgrade, attempt {}/{}", + retries, ITERATION_RETRIES + ); + retries += 1; + mark.before += 1; + } else { + live_strategy = Some(strategy); + } + let status = rpmostree::query_status()?; + let firstdeploy = &status.deployments[0]; + let pending_commit = firstdeploy.checksum.as_str(); + validate_pending_commit(pending_commit, &commitstates) + .context("Failed to validate pending commit")?; + } else { + // Our timeout fired before the upgrade completed; execute + // the interrupt strategy. + match strategy { + InterruptStrategy::Force(ForceInterruptStrategy::Kill9) => { + sh_execute!( + "systemctl kill -s KILL rpm-ostreed || true + systemctl kill -s KILL ostree-finalize-staged || true" + )?; + live_strategy = Some(strategy); + } + InterruptStrategy::Force(ForceInterruptStrategy::Reboot) => { + mark.reboot_strategy = Some(strategy.clone()); + prepare_reboot(serde_json::to_string(&mark)?)?; + // This is a forced reboot - no syncing of the filesystem. + sh_execute!("reboot -ff")?; + std::thread::sleep(time::Duration::from_secs(60)); + // Shouldn't happen + anyhow::bail!("failed to reboot"); + } + InterruptStrategy::Polite(PoliteInterruptStrategy::None) => { + anyhow::bail!("Failed to wait for uninterrupted upgrade"); + } + InterruptStrategy::Polite(PoliteInterruptStrategy::Reboot) => { + mark.reboot_strategy = Some(strategy.clone()); + Err(reboot(serde_json::to_string(&mark)?))?; + // We either rebooted, or failed to reboot + } + InterruptStrategy::Polite(PoliteInterruptStrategy::Stop) => { + sh_execute!( + "systemctl stop rpm-ostreed || true + systemctl stop ostree-finalize-staged || true" + )?; + live_strategy = Some(strategy); + } + } + } + } +} + +#[itest(destructive = true)] +fn transactionality() -> Result<()> { + testinit()?; + let mark = get_reboot_mark()?; + let cancellable = Some(gio::Cancellable::new()); + let sysroot = ostree::Sysroot::new_default(); + sysroot.load(cancellable.as_ref())?; + assert!(sysroot.is_booted()); + let booted = sysroot.get_booted_deployment().expect("booted deployment"); + let commit: String = booted.get_csum().expect("booted csum").into(); + // We need this static across reboots + let srvrepo = Path::new(SRVREPO); + let firstrun = !srvrepo.exists(); + if let Some(_) = mark.as_ref() { + if firstrun { + anyhow::bail!("Missing {:?}", srvrepo); + } + } else { + if !firstrun { + anyhow::bail!("Unexpected {:?}", srvrepo); + } + generate_srv_repo(&commit)?; + } + + // Let's assume we're changing about 200 objects each time; + // that leads to probably 300 network requests, so we want + // a low average delay. + let webserver_opts = TestHttpServerOpts { + random_delay: Some(time::Duration::from_secs_f64(WEBSERVER_DELAY_SECS)), + ..Default::default() + }; + with_webserver_in(&srvrepo, &webserver_opts, move |addr| { + let url = format!("http://{}", addr); + sh_execute!( + "ostree remote delete --if-exists testrepo + ostree remote add --set=gpg-verify=false testrepo {url}", + url = url + )?; + + if firstrun { + // Also disable some services (like zincati) because we don't want automatic updates + // in our reboots, and it currently fails to start. The less + // we have in each reboot, the faster reboots are. + sh_execute!("systemctl disable --now zincati fedora-coreos-pinger")?; + // And prepare for updates + sh_execute!("rpm-ostree cleanup -pr")?; + generate_update(&commit)?; + // Directly set the origin, so that we're not dependent on the pending deployment. + // FIXME: make this saner + sh_execute!( + " + ostree admin set-origin testrepo {url} {testref} + ostree refs --create testrepo:{testref} {commit} + ostree refs --create={origref} {commit} + ", + url = url, + origref = ORIGREF, + testref = TESTREF, + commit = commit + )?; + // We gather a single "cycle time" at start as a way of gauging how + // long an upgrade should take, so we know when to interrupt. This + // obviously has some pitfalls, mainly when there are e.g. other competing + // VMs when we start but not after (or vice versa) we can either + // interrupt almost always too early, or too late. + let start = time::Instant::now(); + upgrade_and_finalize().context("Firstrun upgrade failed")?; + let end = time::Instant::now(); + let cycle_time = end.duration_since(start); + let tdata = TransactionalTestInfo { + cycle_time: cycle_time, + }; + let mut f = std::io::BufWriter::new(std::fs::File::create(&TDATAPATH)?); + serde_json::to_writer(&mut f, &tdata)?; + f.flush()?; + sh_execute!("rpm-ostree status")?; + } + + let tdata = { + let mut f = std::io::BufReader::new(std::fs::File::open(&TDATAPATH)?); + serde_json::from_reader(&mut f).context("Failed to parse test info JSON")? + }; + + impl_transaction_test(commit.as_str(), &tdata, mark.as_ref())?; + + Ok(()) + })?; + Ok(()) +} diff --git a/tests/inst/src/insttest.rs b/tests/inst/src/insttest.rs deleted file mode 100644 index 1c1fa379..00000000 --- a/tests/inst/src/insttest.rs +++ /dev/null @@ -1,46 +0,0 @@ -use anyhow::Result; -// use structopt::StructOpt; -// // https://github.com/clap-rs/clap/pull/1397 -// #[macro_use] -// extern crate clap; - -mod repobin; -mod sysroot; -mod test; - -fn gather_tests() -> Vec { - test::TESTS - .iter() - .map(|t| libtest_mimic::Test { - name: t.name.into(), - kind: "".into(), - is_ignored: false, - is_bench: false, - data: t, - }) - .collect() -} - -fn run_test(test: &test::TestImpl) -> libtest_mimic::Outcome { - if let Err(e) = (test.data.f)() { - libtest_mimic::Outcome::Failed { - msg: Some(e.to_string()), - } - } else { - libtest_mimic::Outcome::Passed - } -} - -fn main() -> Result<()> { - procspawn::init(); - - // Ensure we're always in tempdir so we can rely on it globally - let tmp_dir = tempfile::Builder::new() - .prefix("ostree-insttest-top") - .tempdir()?; - std::env::set_current_dir(tmp_dir.path())?; - - let args = libtest_mimic::Arguments::from_args(); - let tests = gather_tests(); - libtest_mimic::run_tests(&args, tests, run_test).exit(); -} diff --git a/tests/inst/src/insttestmain.rs b/tests/inst/src/insttestmain.rs new file mode 100644 index 00000000..3fdc1be1 --- /dev/null +++ b/tests/inst/src/insttestmain.rs @@ -0,0 +1,113 @@ +use anyhow::{bail, Result}; +use structopt::StructOpt; + +mod destructive; +mod repobin; +mod rpmostree; +mod sysroot; +mod test; +mod treegen; + +// Written by Ignition +const DESTRUCTIVE_TEST_STAMP: &'static str = "/etc/ostree-destructive-test-ok"; + +#[derive(Debug, StructOpt)] +#[structopt(rename_all = "kebab-case")] +/// Main options struct +enum Opt { + /// List the destructive tests + ListDestructive, + /// Run a destructive test (requires ostree-based host, may break it!) + RunDestructive { name: String }, + /// Run the non-destructive tests + NonDestructive(NonDestructiveOpts), +} + +#[derive(Debug, StructOpt)] +#[structopt(rename_all = "kebab-case")] +enum NonDestructiveOpts { + #[structopt(external_subcommand)] + Args(Vec), +} + +fn libtest_from_test(t: &'static test::Test) -> test::TestImpl { + libtest_mimic::Test { + name: t.name.into(), + kind: "".into(), + is_ignored: false, + is_bench: false, + data: t, + } +} + +fn run_test(test: &test::TestImpl) -> libtest_mimic::Outcome { + if let Err(e) = (test.data.f)() { + libtest_mimic::Outcome::Failed { + msg: Some(e.to_string()), + } + } else { + libtest_mimic::Outcome::Passed + } +} + +fn main() -> Result<()> { + // Ensure we're always in tempdir so we can rely on it globally. + // We use /var/tmp to ensure we have storage space in the destructive + // case. + let tmp_dir = tempfile::Builder::new() + .prefix("ostree-insttest-top") + .tempdir_in("/var/tmp")?; + std::env::set_current_dir(tmp_dir.path())?; + + procspawn::init(); + let args: Vec = std::env::args().collect(); + let opt = { + if args.len() == 1 { + println!("No arguments provided, running non-destructive tests"); + Opt::NonDestructive(NonDestructiveOpts::Args(Vec::new())) + } else { + Opt::from_iter(args.iter()) + } + }; + + match opt { + Opt::ListDestructive => { + for t in test::DESTRUCTIVE_TESTS.iter() { + println!("{}", t.name); + } + return Ok(()); + } + Opt::NonDestructive(subopt) => { + // FIXME add method to parse subargs + let iter = match subopt { + NonDestructiveOpts::Args(subargs) => subargs, + }; + let libtestargs = libtest_mimic::Arguments::from_iter(iter); + let tests: Vec<_> = test::NONDESTRUCTIVE_TESTS + .iter() + .map(libtest_from_test) + .collect(); + libtest_mimic::run_tests(&libtestargs, tests, run_test).exit(); + } + Opt::RunDestructive { name } => { + if !std::path::Path::new(DESTRUCTIVE_TEST_STAMP).exists() { + bail!( + "This is a destructive test; signal acceptance by creating {}", + DESTRUCTIVE_TEST_STAMP + ) + } + if !std::path::Path::new("/run/ostree-booted").exists() { + bail!("An ostree-based host is required") + } + + for t in test::DESTRUCTIVE_TESTS.iter() { + if t.name == name { + (t.f)()?; + println!("ok destructive test: {}", t.name); + return Ok(()); + } + } + bail!("Unknown destructive test: {}", name); + } + } +} diff --git a/tests/inst/src/repobin.rs b/tests/inst/src/repobin.rs index 41fd1390..208eae40 100644 --- a/tests/inst/src/repobin.rs +++ b/tests/inst/src/repobin.rs @@ -77,7 +77,7 @@ fn test_pull_basicauth() -> Result<()> { format!("http://{}@{}/", TEST_HTTP_BASIC_AUTH, addr).into_bytes(), )?; let osroot = Path::new("osroot"); - mkroot(&osroot)?; + crate::treegen::mkroot(&osroot)?; sh_execute!( r#"ostree --repo={serverrepo} init --mode=archive ostree --repo={serverrepo} commit -b os --tree=dir={osroot} >/dev/null diff --git a/tests/inst/src/rpmostree.rs b/tests/inst/src/rpmostree.rs new file mode 100644 index 00000000..fee97355 --- /dev/null +++ b/tests/inst/src/rpmostree.rs @@ -0,0 +1,33 @@ +use anyhow::Result; +use serde_derive::Deserialize; +use serde_json; +use std::process::{Command, Stdio}; + +#[derive(Deserialize)] +#[serde(rename_all = "kebab-case")] +#[allow(unused)] +pub(crate) struct Status { + pub(crate) deployments: Vec, +} + +#[derive(Deserialize)] +#[serde(rename_all = "kebab-case")] +#[allow(unused)] +pub(crate) struct Deployment { + pub(crate) unlocked: Option, + pub(crate) osname: String, + pub(crate) pinned: bool, + pub(crate) checksum: String, + pub(crate) staged: Option, + pub(crate) booted: bool, + pub(crate) serial: u32, + pub(crate) origin: String, +} + +pub(crate) fn query_status() -> Result { + let cmd = Command::new("rpm-ostree") + .args(&["status", "--json"]) + .stdout(Stdio::piped()) + .spawn()?; + Ok(serde_json::from_reader(cmd.stdout.unwrap())?) +} diff --git a/tests/inst/src/test.rs b/tests/inst/src/test.rs index 7178d7bb..24dc8194 100644 --- a/tests/inst/src/test.rs +++ b/tests/inst/src/test.rs @@ -3,9 +3,11 @@ use std::fs::File; use std::io::prelude::*; use std::path::Path; use std::process::Command; +use std::time; use anyhow::{bail, Context, Result}; use linkme::distributed_slice; +use rand::Rng; pub use itest_macro::itest; pub use with_procspawn_tempdir::with_procspawn_tempdir; @@ -28,7 +30,9 @@ pub(crate) struct Test { pub(crate) type TestImpl = libtest_mimic::Test<&'static Test>; #[distributed_slice] -pub(crate) static TESTS: [Test] = [..]; +pub(crate) static NONDESTRUCTIVE_TESTS: [Test] = [..]; +#[distributed_slice] +pub(crate) static DESTRUCTIVE_TESTS: [Test] = [..]; /// Run command and assert that its stderr contains pat pub(crate) fn cmd_fails_with>(mut c: C, pat: &str) -> Result<()> { @@ -53,30 +57,10 @@ pub(crate) fn write_file>(p: P, buf: &str) -> Result<()> { Ok(()) } -pub(crate) fn mkroot>(p: P) -> Result<()> { - let p = p.as_ref(); - for v in &["usr/bin", "etc"] { - std::fs::create_dir_all(p.join(v))?; - } - let verpath = p.join("etc/version"); - let v: u32 = if verpath.exists() { - let s = std::fs::read_to_string(&verpath)?; - let v: u32 = s.trim_end().parse()?; - v + 1 - } else { - 0 - }; - write_file(&verpath, &format!("{}", v))?; - write_file(p.join("usr/bin/somebinary"), &format!("somebinary v{}", v))?; - write_file(p.join("etc/someconf"), &format!("someconf v{}", v))?; - write_file(p.join("usr/bin/vmod2"), &format!("somebinary v{}", v % 2))?; - write_file(p.join("usr/bin/vmod3"), &format!("somebinary v{}", v % 3))?; - Ok(()) -} - #[derive(Default, Debug, Copy, Clone)] pub(crate) struct TestHttpServerOpts { pub(crate) basicauth: bool, + pub(crate) random_delay: Option, } pub(crate) const TEST_HTTP_BASIC_AUTH: &'static str = "foouser:barpw"; @@ -105,6 +89,11 @@ pub(crate) async fn http_server>( sv: Static, opts: TestHttpServerOpts, ) -> Result> { + if let Some(random_delay) = opts.random_delay { + let slices = 100u32; + let n: u32 = rand::thread_rng().gen_range(0, slices); + std::thread::sleep((random_delay / slices) * n); + } if opts.basicauth { if let Some(ref authz) = req.headers().get(http::header::AUTHORIZATION) { match validate_authz(authz.as_ref()) { @@ -149,7 +138,8 @@ pub(crate) async fn http_server>( pub(crate) fn with_webserver_in, F>( path: P, opts: &TestHttpServerOpts, - f: F) -> Result<()> + f: F, +) -> Result<()> where F: FnOnce(&std::net::SocketAddr) -> Result<()>, F: Send + 'static, @@ -163,6 +153,48 @@ where Ok(()) } +/// Parse an environment variable as UTF-8 +pub(crate) fn getenv_utf8(n: &str) -> Result> { + if let Some(v) = std::env::var_os(n) { + Ok(Some( + v.to_str() + .ok_or_else(|| anyhow::anyhow!("{} is invalid UTF-8", n))? + .to_string(), + )) + } else { + Ok(None) + } +} + +/// Defined by the autopkgtest specification +pub(crate) fn get_reboot_mark() -> Result> { + getenv_utf8("AUTOPKGTEST_REBOOT_MARK") +} + +/// Initiate a clean reboot; on next boot get_reboot_mark() will return `mark`. +#[allow(dead_code)] +pub(crate) fn reboot>(mark: M) -> std::io::Error { + let mark = mark.as_ref(); + use std::os::unix::process::CommandExt; + std::process::Command::new("/tmp/autopkgtest-reboot") + .arg(mark) + .exec() +} + +/// Prepare a reboot - you should then initiate a reboot however you like. +/// On next boot get_reboot_mark() will return `mark`. +#[allow(dead_code)] +pub(crate) fn prepare_reboot>(mark: M) -> Result<()> { + let mark = mark.as_ref(); + let s = std::process::Command::new("/tmp/autopkgtest-reboot-prepare") + .arg(mark) + .status()?; + if !s.success() { + anyhow::bail!("{:?}", s); + } + Ok(()) +} + // I put tests in your tests so you can test while you test #[cfg(test)] mod tests { diff --git a/tests/inst/src/treegen.rs b/tests/inst/src/treegen.rs new file mode 100644 index 00000000..7c28fb70 --- /dev/null +++ b/tests/inst/src/treegen.rs @@ -0,0 +1,148 @@ +use anyhow::{Context, Result}; +use commandspec::sh_execute; +use openat_ext::{FileExt, OpenatDirExt}; +use rand::Rng; +use std::fs::File; +use std::io::prelude::*; +use std::os::unix::fs::FileExt as UnixFileExt; +use std::path::Path; + +use crate::test::*; + +/// Each time this is invoked it changes file contents +/// in the target root, in a predictable way. +pub(crate) fn mkroot>(p: P) -> Result<()> { + let p = p.as_ref(); + let verpath = p.join("etc/.mkrootversion"); + let v: u32 = if verpath.exists() { + let s = std::fs::read_to_string(&verpath)?; + let v: u32 = s.trim_end().parse()?; + v + 1 + } else { + 0 + }; + mkvroot(p, v) +} + +// Like mkroot but supports an explicit version +pub(crate) fn mkvroot>(p: P, v: u32) -> Result<()> { + let p = p.as_ref(); + for v in &["usr/bin", "etc"] { + std::fs::create_dir_all(p.join(v))?; + } + let verpath = p.join("etc/.mkrootversion"); + write_file(&verpath, &format!("{}", v))?; + write_file(p.join("usr/bin/somebinary"), &format!("somebinary v{}", v))?; + write_file(p.join("etc/someconf"), &format!("someconf v{}", v))?; + write_file(p.join("usr/bin/vmod2"), &format!("somebinary v{}", v % 2))?; + write_file(p.join("usr/bin/vmod3"), &format!("somebinary v{}", v % 3))?; + Ok(()) +} + +/// Returns `true` if a file is ELF; see https://en.wikipedia.org/wiki/Executable_and_Linkable_Format +pub(crate) fn is_elf(f: &mut File) -> Result { + let mut buf = [0; 5]; + let n = f.read_at(&mut buf, 0)?; + if n < buf.len() { + anyhow::bail!("Failed to read expected {} bytes", buf.len()); + } + Ok(buf[0] == 0x7F && &buf[1..4] == b"ELF") +} + +pub(crate) fn mutate_one_executable_to( + f: &mut File, + name: &std::ffi::OsStr, + dest: &openat::Dir, +) -> Result<()> { + let mut destf = dest + .write_file(name, 0o755) + .context("Failed to open for write")?; + f.copy_to(&destf).context("Failed to copy")?; + // ELF is OK with us just appending some junk + let extra = rand::thread_rng() + .sample_iter(&rand::distributions::Alphanumeric) + .take(10) + .collect::(); + destf + .write_all(extra.as_bytes()) + .context("Failed to append extra data")?; + Ok(()) +} + +/// Find ELF files in the srcdir, write new copies to dest (only percentage) +pub(crate) fn mutate_executables_to( + src: &openat::Dir, + dest: &openat::Dir, + percentage: u32, +) -> Result { + use nix::sys::stat::Mode as NixMode; + assert!(percentage > 0 && percentage <= 100); + let mut mutated = 0; + for entry in src.list_dir(".")? { + let entry = entry?; + if src.get_file_type(&entry)? != openat::SimpleType::File { + continue; + } + let meta = src.metadata(entry.file_name())?; + let st = meta.stat(); + let mode = NixMode::from_bits_truncate(st.st_mode); + // Must be executable + if !mode.intersects(NixMode::S_IXUSR | NixMode::S_IXGRP | NixMode::S_IXOTH) { + continue; + } + // Not suid + if mode.intersects(NixMode::S_ISUID | NixMode::S_ISGID) { + continue; + } + // Greater than 1k in size + if st.st_size < 1024 { + continue; + } + let mut f = src.open_file(entry.file_name())?; + if !is_elf(&mut f)? { + continue; + } + if !rand::thread_rng().gen_ratio(percentage, 100) { + continue; + } + mutate_one_executable_to(&mut f, entry.file_name(), dest) + .with_context(|| format!("Failed updating {:?}", entry.file_name()))?; + mutated += 1; + } + Ok(mutated) +} + +// Given an ostree ref, use the running root filesystem as a source, update +// `percentage` percent of binary (ELF) files +pub(crate) fn update_os_tree>( + repo_path: P, + ostref: &str, + percentage: u32, +) -> Result<()> { + assert!(percentage > 0 && percentage <= 100); + let repo_path = repo_path.as_ref(); + let tempdir = tempfile::tempdir_in(repo_path.join("tmp"))?; + let mut mutated = 0; + { + let tempdir = openat::Dir::open(tempdir.path())?; + let binary_dirs = &["usr/bin", "usr/sbin", "usr/lib", "usr/lib64"]; + let rootfs = openat::Dir::open("/")?; + for v in binary_dirs { + let v = *v; + if let Some(src) = rootfs.sub_dir_optional(v)? { + tempdir.ensure_dir("usr", 0o755)?; + tempdir.ensure_dir(v, 0o755)?; + let dest = tempdir.sub_dir(v)?; + mutated += mutate_executables_to(&src, &dest, percentage) + .with_context(|| format!("Replacing binaries in {}", v))?; + } + } + } + assert!(mutated > 0); + println!("Mutated ELF files: {}", mutated); + sh_execute!("ostree --repo={repo} commit --consume -b {ostref} --base={ostref} --tree=dir={tempdir} --owner-uid 0 --owner-gid 0 --selinux-policy-from-base --link-checkout-speedup --no-bindings --no-xattrs", + repo = repo_path.to_str().unwrap(), + ostref = ostref, + tempdir = tempdir.path().to_str().unwrap()).context("Failed to commit updated content")?; + Ok(()) +} diff --git a/tests/kolainst/.gitignore b/tests/kolainst/.gitignore new file mode 100644 index 00000000..97b5dac6 --- /dev/null +++ b/tests/kolainst/.gitignore @@ -0,0 +1 @@ +destructive-list.txt diff --git a/tests/kolainst/Makefile b/tests/kolainst/Makefile index 6416217e..acfdc3b7 100644 --- a/tests/kolainst/Makefile +++ b/tests/kolainst/Makefile @@ -7,9 +7,11 @@ KOLA_TESTDIR = $(DESTDIR)/usr/lib/coreos-assembler/tests/kola/ostree/ all: for x in $(LIBSCRIPTS); do bash -n "$${x}"; done - (cd ../inst && cargo build --release) + (cd ../inst && cargo run --release -- list-destructive) > destructive-list.txt -install: all +install: install -D -m 0644 -t $(KOLA_TESTDIR) $(LIBSCRIPTS) for x in $(TESTDIRS); do rsync -rlv ./$${x} $(KOLA_TESTDIR)/; done install -D -m 0755 -t $(KOLA_TESTDIR)/nondestructive-rs ../inst/target/release/ostree-test + install -D -m 0644 destructive-stamp.ign $(KOLA_TESTDIR)/destructive-rs/config.ign + ./install-wrappers.sh destructive-list.txt $(KOLA_TESTDIR)/destructive-rs diff --git a/tests/kolainst/destructive-stamp.ign b/tests/kolainst/destructive-stamp.ign new file mode 100644 index 00000000..7a4552fe --- /dev/null +++ b/tests/kolainst/destructive-stamp.ign @@ -0,0 +1,17 @@ +{ + "ignition": { + "version": "3.0.0" + }, + "storage": { + "files": [ + { + "path": "/etc/ostree-destructive-test-ok", + "filesystem": "root", + "mode": 420, + "contents": { + "source": "data:text/plain;base64," + } + } + ] + } +} diff --git a/tests/kolainst/install-wrappers.sh b/tests/kolainst/install-wrappers.sh new file mode 100755 index 00000000..4fbb05c6 --- /dev/null +++ b/tests/kolainst/install-wrappers.sh @@ -0,0 +1,17 @@ +#!/bin/bash +set -xeuo pipefail +# Generates a kola test for each destructive test in the binary +list=$1 +shift +testdir=$1 +shift +ln -Tsf ../nondestructive-rs ${testdir}/data +while read line; do + cat >${testdir}/${line} << EOF +#!/bin/bash +set -xeuo pipefail +dn=\$(dirname $0) +exec \${KOLA_EXT_DATA}/ostree-test run-destructive ${line} +EOF + chmod a+x "${testdir}/${line}" +done < "${list}"