From d523a4941208b677c9f04150258f302f19a69319 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Thu, 28 Sep 2023 06:09:42 +0200 Subject: [PATCH 01/30] test: Factorize cockpit-ws startup in TestClient Cherry-picked from 61c14cbeafb592c87f --- test/verify/check-client | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/verify/check-client b/test/verify/check-client index 9699334695f..7f3aa04ab7b 100755 --- a/test/verify/check-client +++ b/test/verify/check-client @@ -83,17 +83,16 @@ Command = {self.libexecdir}/cockpit-beiboot timeout=30) def testBeibootNoBridge(self): - self.m_client.spawn(f"runuser -u admin -- {self.libexecdir}/cockpit-ws --no-tls", "ws.log") # set up target machine: no cockpit self.m_target.execute("rm /usr/bin/cockpit-bridge; rm -r /usr/share/cockpit") - self.checkLoginScenarios(local_bridge=False) def testBeibootWithBridge(self): - self.m_client.spawn(f"runuser -u admin -- {self.libexecdir}/cockpit-ws --no-tls", "ws.log") self.checkLoginScenarios(local_bridge=True) def checkLoginScenarios(self, *, local_bridge=True): + self.m_client.spawn(f"runuser -u admin -- {self.libexecdir}/cockpit-ws --no-tls", "ws.log") + b = self.browser b.open("/") From 2904ed663aaabf8e9e6ad6822c725df8c9b9ca9b Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Thu, 28 Sep 2023 10:01:31 +0200 Subject: [PATCH 02/30] test: Use standard logout test API in check-client Cherry-picked from 7b8bfe10c2ab8e --- test/verify/check-client | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/verify/check-client b/test/verify/check-client index 7f3aa04ab7b..ab8d2179e52 100755 --- a/test/verify/check-client +++ b/test/verify/check-client @@ -67,9 +67,7 @@ Command = {self.libexecdir}/cockpit-beiboot def logout(self, check_last_host=None): b = self.browser - b.assert_no_oops() - b.open_session_menu() - b.click('#logout') + b.logout() # FIXME: This is broken, nothing appears # b.wait_text("#brand", "Connect to:") if check_last_host: From e68c73696e10b750ec0c7b33192c6e8dd8150835 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Thu, 28 Sep 2023 10:34:48 +0200 Subject: [PATCH 03/30] test: Accept destroyed execution context when clicking logout This is quite literally what it is defined to do. This races with the CDP driver finishing the command, so sometimes it would fail the test on throwing that RuntimeError. Cherry-picked from a324b3fa3bb6fd80 --- test/common/testlib.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/common/testlib.py b/test/common/testlib.py index 7a31afd9f71..6606d4e8074 100644 --- a/test/common/testlib.py +++ b/test/common/testlib.py @@ -818,7 +818,12 @@ def logout(self): self.click('#go-logout') else: self.open_session_menu() - self.click('#logout') + try: + self.click('#logout') + except RuntimeError as e: + # logging out does destroy the current frame context, it races with the CDP driver finishing the command + if "Execution context was destroyed" not in str(e): + raise self.wait_visible('#login') self.machine.allow_restart_journal_messages() From 3f35d129813649bf15213f0d9e087815b9f756ad Mon Sep 17 00:00:00 2001 From: Marius Vollmer Date: Thu, 28 Sep 2023 16:51:24 +0300 Subject: [PATCH 04/30] test: Cleanup volume group in one non-destructive test Just janking out the disk will leave debris behind in /dev. Cherry-picked from 5e1bafc3a8 --- test/verify/check-storage-mounting | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/verify/check-storage-mounting b/test/verify/check-storage-mounting index 5cf374d032b..abbc4c7e614 100755 --- a/test/verify/check-storage-mounting +++ b/test/verify/check-storage-mounting @@ -428,6 +428,11 @@ class TestStorageMountingLUKS(storagelib.StorageCase): self.login_and_go("/storage") + self.addCleanup(m.execute, + "umount /run/data || true;" + "cryptsetup close $(lsblk -lno NAME /dev/test/one | tail -1) || true;" + "vgremove --force test 2>/dev/null || true") + # Quickly make two logical volumes disk = self.add_ram_disk() b.wait_in_text("#drives", disk) From 08d0c448b91879947e62a9cc9b043dedfd89dd2d Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Tue, 24 Oct 2023 12:57:28 +0200 Subject: [PATCH 05/30] test: Relax boot timer check in TestServices.testTimerSession Stop predenting that we can accurately predict the next `OnBoot` timer in TestServices.testTimerSession. It is very much *not* "now + 200 minutes", but "200 minutes after the current VM booted" (which may be long-running in Testing Farm or our CI machinery). As this is a neverending race condition in evenings, and we don't test the accuracy of systemd here, relax the check to just ensure that it happens today or tomorrow. Cherry-picked from 2f149fd766c42 --- test/verify/check-system-services | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/verify/check-system-services b/test/verify/check-system-services index 2a8f99f8da2..44701618d49 100755 --- a/test/verify/check-system-services +++ b/test/verify/check-system-services @@ -590,11 +590,13 @@ WantedBy=default.target b.wait_visible(self.svc_sel('test-onboot.timer')) b.wait_text(self.svc_sel('test-onboot.timer') + ' .service-unit-triggers', '') self.run_systemctl(user, "start test-onboot.timer") - # Check the next run. Since it triggers 200mins after the boot, it might be today or tomorrow (after 20:40) - today_stamp = int(m.execute("date +%s").strip()) - time_zone = b.eval_js("Intl.DateTimeFormat().resolvedOptions().timeZone") # get browser's time zone - today_plus_200min = m.execute(f"TZ='{time_zone}' date --date=@{today_stamp + 200 * 60} '+%b %-d, %Y'").strip() - b.wait_in_text(self.svc_sel('test-onboot.timer') + ' .service-unit-next-trigger', today_plus_200min) + # Check the next run. Since it triggers 200mins after the boot, it might be today or tomorrow + # this is too racy to predict accurately + today = m.execute("date '+%b %-d, %Y'").strip() + tomorrow = m.execute("date --date tomorrow '+%b %-d, %Y'").strip() + sel_next = self.svc_sel('test-onboot.timer') + ' .service-unit-next-trigger' + b.wait_in_text(sel_next, ", ") + self.assertRegex(b.text(sel_next), f"{today}|{tomorrow}") b.wait_in_text(self.svc_sel('test-onboot.timer') + ' .service-unit-last-trigger', "unknown") # last trigger self.run_systemctl(user, "stop test-onboot.timer") From 3385f872f25322fb22a3156409eb569de1dcd726 Mon Sep 17 00:00:00 2001 From: Marius Vollmer Date: Mon, 6 Nov 2023 15:58:24 +0200 Subject: [PATCH 06/30] test: Retry after UDisks2 timeout in TestStorageUsed.testUsed It is not clear what exactly keeps /dev/sda1 busy when the kernel tries to read the new partition table. It can't be the artificial processes and services started by the test itself since unmounting and locking have already succeeded at that point. This bug happens only in quite specific conditions, and can't be expected to ever get fixed. So let's do what every user would do as well: Retry the dialog. Cherry-picked from 530c70d843cc1bd --- test/verify/check-storage-used | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/test/verify/check-storage-used b/test/verify/check-storage-used index c15a88fd520..4a0fb295b94 100755 --- a/test/verify/check-storage-used +++ b/test/verify/check-storage-used @@ -89,7 +89,21 @@ ExecStart=/usr/bin/sleep infinity b.wait_visible("#dialog tr:first-child button:contains(Currently in use)") b.assert_pixels('#dialog', "format-disk") self.dialog_apply() - self.dialog_wait_close() + try: + self.dialog_wait_close() + except testlib.Error: + if "Timed out waiting for object" in b.text("#dialog"): + # Sometimes /dev/sda1 is still held open by something + # immediately after locking it. This prevents the + # kernel from reading the new partition table. Let's + # just try again. + print("WARNING: Retrying partition table creation") + self.dialog_cancel() + self.dialog_wait_close() + b.click('button:contains(Create partition table)') + self.confirm() + else: + raise m.execute("! systemctl --quiet is-active keep-mnt-busy") From ce3b5ab5eba0d1f0edddddce270230a651d61c24 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Wed, 8 Nov 2023 12:13:06 +0100 Subject: [PATCH 07/30] test: Fix wait timeout adjustment in TestIPA.testClientCertAuthentication Since commit 49ee017f26ba, the step that takes long is already the `Browser.open()`, as that loads the packages and frame from the remote machine, and the timeout now happens in `waitPageLoad()`. Still, loading the frame also takes a while, so keep the long timeout for enter-page() as well. Cherry-picked from b28914fd0dc. --- test/verify/check-system-realms | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/verify/check-system-realms b/test/verify/check-system-realms index 3821c5f46fe..ea989d2f42a 100755 --- a/test/verify/check-system-realms +++ b/test/verify/check-system-realms @@ -820,8 +820,8 @@ ipa-advise enable-admins-sudo | sh -ex # ssh -K is supposed to forward the credentials cache, but doesn't; klist in the ssh session is empty # and there is no ccache; so, emulate what cockpit-ssh could eventually do and check that *if* the # session had the ticket forwarded, it *could* do sudo. See https://issues.redhat.com/browse/COCKPIT-643 - b.open("/@x0.cockpit.lan/system/terminal") with b.wait_timeout(60): + b.open("/@x0.cockpit.lan/system/terminal") b.enter_page("/system/terminal", host="x0.cockpit.lan") b.wait_in_text(".terminal .xterm-accessibility-tree", "alice") b.key_press(f"{ccache_env} sudo whoami\r") From d4fd7894e5f74390b7d0b618ac5eb46c2f232b02 Mon Sep 17 00:00:00 2001 From: Jelle van der Waa Date: Wed, 8 Nov 2023 14:46:12 +0100 Subject: [PATCH 08/30] test: wait for journal to rotate in testLogs In our CI the testLogs sometimes does not get a `START` and then fails. From the failed log it looks like this is due to journal being rotated. The user test case already sleeps to let journalctl settle so now we unify this approach. Nov 08 13:13:09 ubuntu systemd[1]: Started test.service - Test Service. Nov 08 13:13:09 ubuntu test-service[2427]: START Nov 08 13:13:09 ubuntu systemd-journald[271]: Received client request to rotate journal. Nov 08 13:19:51 ubuntu systemd-journald[271]: Received client request to rotate journal. Nov 08 13:19:52 ubuntu test-service[2480]: START Nov 08 13:19:57 ubuntu test-service[2480]: WORKING Cherry-picked from 107c8557516d38f --- test/verify/check-system-services | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/verify/check-system-services b/test/verify/check-system-services index 44701618d49..6514bd10313 100755 --- a/test/verify/check-system-services +++ b/test/verify/check-system-services @@ -162,10 +162,11 @@ trap "echo STOP" 0 if [ $(id -u) -eq 0 ]; then journalctl --sync -else - # increase the chance for journal to catch up - sleep 5 fi + +# increase the chance for journal to catch up +sleep 5 + echo START while true; do sleep 5 From 458ea56182296b5b22f25b8a580a114851955483 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Thu, 9 Nov 2023 13:51:46 +0100 Subject: [PATCH 09/30] test: Close modal in TestSystemInfo.testCryptoPolicies Close the crypto policy dialog after checking the default value. Leaving it open and clicking around on the main page is cheating and prone to race conditions, and will fail with the next commit. Cherry-picked from 8a933082b518b0772 --- test/verify/check-system-info | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/verify/check-system-info b/test/verify/check-system-info index c90d981240e..f27f56d91ff 100755 --- a/test/verify/check-system-info +++ b/test/verify/check-system-info @@ -1041,6 +1041,8 @@ password=foobar b.click("#crypto-policy-button") func = b.wait_not_present if m.image.startswith('rhel-8') or m.image.startswith('centos-8') else b.wait_visible func(".pf-v5-c-menu__item-main .pf-v5-c-menu__item-text:contains('DEFAULT:SHA1')") + b.click("#crypto-policy-dialog button:contains('Cancel')") + b.wait_not_present("#crypto-policy-dialog") # Test if a new subpolicy can be set new_profile = "LEGACY:AD-SUPPORT" From c58e2fc5acd671ab3c456673a45fa408e62547d9 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Tue, 14 Nov 2023 09:32:41 +0100 Subject: [PATCH 10/30] test: Kill processes that keep scsi_debug mounts busy Tests like TestStorageUsed.testTeardownRetry run processes that keep a scsi_debug block device mount busy. If they fail on some assertion in the middle, the generic storage cleanup (umount, rmmod scsi_debug) fails, and the following tests get broken. Add an `fuser` kill loop to prevent that. Also show all stdout output from these commands. We don't need it returned in the code, it's more useful for developers in the test output. Cherry-picked from a45210a26f3e8fb260c --- test/common/testlib.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/common/testlib.py b/test/common/testlib.py index 6606d4e8074..32b74dc9b4e 100644 --- a/test/common/testlib.py +++ b/test/common/testlib.py @@ -1535,10 +1535,12 @@ def cleanup_home_dirs(): "for dev in $(ls /sys/bus/pseudo/drivers/scsi_debug/adapter*/host*/target*/*:*/block); do " " for s in /sys/block/*/slaves/${dev}*; do [ -e $s ] || break; " " d=/dev/$(dirname $(dirname ${s#/sys/block/})); " + " while fuser --mount $d --kill; do sleep 0.1; done; " " umount $d || true; dmsetup remove --force $d || true; " " done; " - " umount /dev/$dev 2>/dev/null || true; " - "done; until rmmod scsi_debug; do sleep 0.2; done") + " while fuser --mount /dev/$dev --kill; do sleep 0.1; done; " + " umount /dev/$dev || true; " + "done; until rmmod scsi_debug; do sleep 0.2; done", stdout=None) def terminate_sessions(): # on OSTree we don't get "web console" sessions with the cockpit/ws container; just SSH; but also, some tests start From 4b1aa2ac0f77cbbbd1f9b4ca01391efdb93c1ff0 Mon Sep 17 00:00:00 2001 From: Tomas Matus Date: Wed, 15 Nov 2023 16:10:27 +0100 Subject: [PATCH 11/30] tests: increase timeout on testCPUSecurityMitigationsEnable This test barely makes it within the default 10 minutes timeout. From what I see most of the time is spent by waiting for multiple reboots of the machine. Locally this took almost 7 minutes to run so for CI we can bump this timeout to 20 minutes. Cherry-picked from d2ccfc01123ecbac952 --- test/verify/check-system-info | 1 + 1 file changed, 1 insertion(+) diff --git a/test/verify/check-system-info b/test/verify/check-system-info index f27f56d91ff..4433c62218c 100755 --- a/test/verify/check-system-info +++ b/test/verify/check-system-info @@ -753,6 +753,7 @@ machine : 8561 spoof_threads(2, expect_link_present=True, expect_smt_state=self.expect_smt_default, cmdline=None) @testlib.skipImage("TODO: add Arch Linux grub entry support", "arch") + @testlib.timeout(1200) def testCPUSecurityMitigationsEnable(self): b = self.browser m = self.machine From 3ddec5fcbd328295cd1a14ae9e2276286f05c5fc Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Tue, 14 Nov 2023 14:52:18 +0100 Subject: [PATCH 12/30] test: Allow samba user creation to take some time With the impending services image refresh [1] and the new Samba container, user creation is not instantaneous any more. Add a retry loop. [1] https://github.com/cockpit-project/bots/pull/4885 Cherry-picked from 7f12811ffe0ee59 --- test/verify/check-system-realms | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/verify/check-system-realms b/test/verify/check-system-realms index ea989d2f42a..62541d9af32 100755 --- a/test/verify/check-system-realms +++ b/test/verify/check-system-realms @@ -890,7 +890,7 @@ class TestAD(TestRealms, CommonTests): # create another AD user self.machines['services'].execute(f"podman exec -i samba samba-tool user add alice {self.alice_password}") # ensure it works - m.execute('id alice') + m.execute('while ! id alice; do sleep 5; done', timeout=300) b.login_and_go('/system', user='alice', password=self.alice_password) b.wait_visible("#overview") b.logout() From 0d0056623e0cf51ef3cf6df0bcfefc4bcdbe87b6 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Wed, 15 Nov 2023 07:10:19 +0100 Subject: [PATCH 13/30] test: Replace ldapmodify with samba-tool user edit ldapmodify is not available in the quay.io/samba.org/samba-ad-server container, and it has serious trouble to authenticate. But the newer Samba now supports `samba-tool user edit`. Use that with a on-interactive edit script instead. Cherry-picked from b88436ba7f101063 --- test/verify/check-system-realms | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/test/verify/check-system-realms b/test/verify/check-system-realms index 62541d9af32..df29937fa07 100755 --- a/test/verify/check-system-realms +++ b/test/verify/check-system-realms @@ -906,15 +906,19 @@ class TestAD(TestRealms, CommonTests): alice_cert = f.read().strip() # mangle into form palatable for LDAP alice_cert = ''.join([line for line in alice_cert.splitlines() if not line.startswith("----")]) - # set up an AD user and import their TLS certificate; avoid using the common "userCertificate;binary", - # as that does not work with Samba - services_machine.execute(r"""podman exec -i samba sh -exc ' -samba-tool user add alice %(alice_pass)s -printf "version: 1\ndn: cn=alice,cn=users,dc=cockpit,dc=lan\nchangetype: modify\nadd: userCertificate\nuserCertificate: %(alice_cert)s\n" | \ - ldapmodify -v -U Administrator -w '%(admin_pass)s' + # set up an AD user and import their TLS certificate + services_machine.write("/tmp/alice_edit", f'''#!/bin/sh -eu +sed -i "/^$/d" "$1" +echo "userCertificate: {alice_cert}" >> "$1" +''', perm="755") + services_machine.execute(f""" +podman cp /tmp/alice_edit samba:/tmp/ +podman exec -i samba sh -exc ' +samba-tool user add alice {self.alice_password} +samba-tool user edit --editor=/tmp/alice_edit alice # for debugging: -ldapsearch -v -U Administrator -w '%(admin_pass)s' -b 'cn=alice,cn=users,dc=cockpit,dc=lan' -' """ % {"alice_pass": self.alice_password, "admin_pass": self.admin_password, "alice_cert": alice_cert}) +samba-tool user show alice +' """, stdout=None) # set up sssd for certificate mapping to AD # see sssd.conf(5) "CERTIFICATE MAPPING SECTION" and sss-certmap(5) From 002489a1ff3c13032f00c7ce09326480f8fa3e17 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Wed, 15 Nov 2023 07:28:52 +0100 Subject: [PATCH 14/30] test: Adjust TestGrafanaClient to Grafana 10.2 Adjust the data host CSS selector. The new services image auto-enables the PCP plugin, so that hack can go. Unfortunately the new version now tries to download the plugin catalog in the background, and there is no working way to disable that. This breaks the test at a random place. Anticipate, wait for, and ignore that error. Cherry-picked from 7c0420509a3898c9 --- test/verify/check-metrics | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/test/verify/check-metrics b/test/verify/check-metrics index dbc0f7761d1..b647ba14535 100755 --- a/test/verify/check-metrics +++ b/test/verify/check-metrics @@ -1486,15 +1486,6 @@ class TestGrafanaClient(testlib.MachineCase): bg.click("button:contains('Log in')") bg.wait_in_text("body", "Add your first data source") - # HACK Unsigned plugin needs to be enabled manually - # See https://github.com/performancecopilot/grafana-pcp/issues/94 - bg.open("/plugins/performancecopilot-pcp-app") - with bg.wait_timeout(30): - bg.wait_visible(".gf-form-button-row button") - if bg.text(".gf-form-button-row button") == "Enable": - bg.click(".gf-form-button-row button") - bg.wait_text(".gf-form-button-row button", "Disable") - # Add the PCP redis data source for our client machine # Cog (Configuration) menu → Data Sources → Add # Select PCP redis, HTTP URL http://10.111.112.1:44322 @@ -1506,6 +1497,17 @@ class TestGrafanaClient(testlib.MachineCase): bg.click("button:contains('Save &')") # Save & [tT]est bg.wait_in_text("body", "Data source is working") + # HACK: There is no way to disable the plugin update check; it happens in the background + # and kills a random CDP wait with this RuntimeError; `check_for_plugin_updates = false` + # is supposed to avoid that, but it doesn't work; so wait for that error to happen and + # ignore it + try: + with bg.wait_timeout(60): + bg.wait_js_cond("false") + except RuntimeError as e: + if "Failed to fetch plugins from catalog" not in str(e): + raise + # Grafana auto-discovers "host" variable for incoming metrics; it takes a while to receive the first # measurement; that event is not observable directly in Grafana, and the dashboard does not auto-update to # new variables; so probe the API until it appears @@ -1523,7 +1525,7 @@ class TestGrafanaClient(testlib.MachineCase): # .. and the dashboard name becomes clickable bg.click("a:contains('PCP Redis: Host Overview')") - bg.wait_in_text(".submenu-controls", "grafana-client") + bg.wait_in_text("#var-host", "grafana-client") # expect a "Load average" panel with a sensible number max_load = bg.text("div:contains('Load average') .graph-legend-series:contains('1 minute') .max") From 57d050b97fcddddaa9f15839908417f2862afc89 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Wed, 15 Nov 2023 13:44:25 +0100 Subject: [PATCH 15/30] test: Robustify waiting for candlepin Use the officially recommended /status route, which we expect to actually succeed (unlike /candlepin, which is just a redirect). Add curl `--fail` to ensure a non-zero exit code while it fails. Cherry-picked from b2f0b4f9dc7729f0d9f --- test/verify/check-packagekit | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/verify/check-packagekit b/test/verify/check-packagekit index 2d5b27e9f0b..13ed7f21889 100755 --- a/test/verify/check-packagekit +++ b/test/verify/check-packagekit @@ -26,7 +26,7 @@ import testlib WAIT_SCRIPT = """ for x in $(seq 1 200); do - if curl --insecure -s https://%(addr)s:8443/candlepin; then + if curl --fail --insecure --silent --show-error https://%(addr)s:8443/candlepin/status; then break else sleep 1 From 735ce1dd357678153de97a89d10f2a3cac084abe Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Wed, 15 Nov 2023 14:42:52 +0100 Subject: [PATCH 16/30] test: Increase timeout for contacting AD domain Apparently recent Samba/AD is a bit slower now. Cherry-picked from ff0c22999e9802 --- test/verify/check-system-realms | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/verify/check-system-realms b/test/verify/check-system-realms index df29937fa07..576eb0bfe80 100755 --- a/test/verify/check-system-realms +++ b/test/verify/check-system-realms @@ -117,7 +117,8 @@ class CommonTests: b.set_input_text(self.op_address, "cockpit.lan") else: # on current OSes, domain and suggested admin get auto-detected - b.wait_val(self.op_address, "cockpit.lan") + with b.wait_timeout(60): + b.wait_val(self.op_address, "cockpit.lan") # Join cockpit.lan b.click(self.domain_sel) @@ -273,7 +274,8 @@ class CommonTests: b.wait_popup("realms-join-dialog") b.wait_attr("#realms-op-address", "data-discover", "done") b.set_input_text(self.op_address, "f0.cockpit.lan") - b.wait_text("#realms-op-address-helper", "Contacted domain") + with b.wait_timeout(60): + b.wait_text("#realms-op-address-helper", "Contacted domain") # admin gets auto-detected b.wait_val(self.op_admin, self.admin_user) b.set_input_text(self.op_admin_password, self.admin_password) From 775e3633cb6c09183aa7b3f1a19fc55a836ed7f9 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Wed, 15 Nov 2023 14:52:05 +0100 Subject: [PATCH 17/30] test: Fix waiting for IdM user First wait for the realm user to exist before using it in chown. D'oh! Cherry-picked from fdca31b3a3d720e553 --- test/verify/check-system-realms | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/verify/check-system-realms b/test/verify/check-system-realms index 576eb0bfe80..1d5ec73a057 100755 --- a/test/verify/check-system-realms +++ b/test/verify/check-system-realms @@ -151,13 +151,13 @@ class CommonTests: m.execute("! su -c klist " + self.admin_user) b.logout() - # change existing local "admin" home dir to domain "admin" user - m.execute(f"chown -R {self.admin_user}@cockpit.lan /home/admin") - # wait until IPA user works m.execute('while ! su - -c "echo %s | sudo -S true" %s@cockpit.lan; do sleep 5; sss_cache -E || true; systemctl try-restart sssd; done' % ( self.admin_password, self.admin_user), timeout=300) + # change existing local "admin" home dir to domain "admin" user + m.execute(f"chown -R {self.admin_user}@cockpit.lan /home/admin") + # log in as domain admin and check that we can do privileged operations b.login_and_go('/system/services#/systemd-tmpfiles-clean.timer', user=f'{self.admin_user}@cockpit.lan', password=self.admin_password) b.wait_in_text("#statuses", "Running") From 5d6e0b0ed37e87c48df614f713d54b28713dde31 Mon Sep 17 00:00:00 2001 From: Jelle van der Waa Date: Thu, 9 Nov 2023 10:18:15 +0100 Subject: [PATCH 18/30] test: use become_superuser helper for switching access Cherry-picked from b76a50fccafbf --- test/verify/check-system-realms | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/test/verify/check-system-realms b/test/verify/check-system-realms index 1d5ec73a057..7b41633619c 100755 --- a/test/verify/check-system-realms +++ b/test/verify/check-system-realms @@ -94,10 +94,8 @@ class CommonTests: # Test that we reconnect on privileges change self.login_and_go("/system", superuser=False) - b.click("button:contains('Turn on administrative access')") - b.set_input_text("#switch-to-admin-access-password", "foobar") - b.click("button:contains('Authenticate')") - b.wait_not_present("#switch-to-admin-access-password") + b.wait_visible(f"{self.domain_sel}:disabled") + b.become_superuser() def wait_number_domains(n): if n == 0: From 6056a198403bec41506b0fa666c591447fd24ca9 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Wed, 15 Nov 2023 14:53:47 +0100 Subject: [PATCH 19/30] test: Drop obsolete RHEL 8.7 special case Cherry-picked from 9da922928c1b1e --- test/verify/check-system-realms | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/test/verify/check-system-realms b/test/verify/check-system-realms index 7b41633619c..31da6557802 100755 --- a/test/verify/check-system-realms +++ b/test/verify/check-system-realms @@ -106,22 +106,14 @@ class CommonTests: wait_number_domains(0) - def set_address(): - # old realmd/IPA don't support realmd auto-detection yet - if m.image == "rhel-8-7": - b.wait_attr("#realms-op-address", "data-discover", "done") - b.wait_val(self.op_address, "") - b.wait_not_present("#realms-op-address-helper") - b.set_input_text(self.op_address, "cockpit.lan") - else: - # on current OSes, domain and suggested admin get auto-detected - with b.wait_timeout(60): - b.wait_val(self.op_address, "cockpit.lan") + def wait_domain_detected(): + with b.wait_timeout(60): + b.wait_val(self.op_address, "cockpit.lan") # Join cockpit.lan b.click(self.domain_sel) b.wait_popup("realms-join-dialog") - set_address() + wait_domain_detected() b.wait_text("#realms-op-address-helper", "Contacted domain") # admin gets auto-detected b.wait_val(self.op_admin, self.admin_user) @@ -223,7 +215,7 @@ class CommonTests: # Send a wrong password b.click(self.domain_sel) b.wait_popup("realms-join-dialog") - set_address() + wait_domain_detected() b.wait_val(self.op_admin, self.admin_user) b.set_input_text(self.op_admin_password, "foo") b.click(f"#realms-join-dialog button{self.primary_btn_class}") @@ -259,7 +251,7 @@ class CommonTests: b.click(self.domain_sel) b.wait_popup("realms-join-dialog") # wait for auto-detection - set_address() + wait_domain_detected() b.set_input_text(self.op_address, "NOPE") with b.wait_timeout(30): b.wait_text("#realms-op-address-helper", "Domain could not be contacted") From 6aca4be6210cc5f9390be2d7377403ea2a2a09c8 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Thu, 16 Nov 2023 07:24:01 +0100 Subject: [PATCH 20/30] test: Factorize and fix timeout for contacting domain In most cases this is fast, but quite often Samba takes annoyingly long to answer. Make the timeout consistent and enforce this with helper functions, except for the instance in TestPackageInstall as that doesn't derive from CommonTests. Cherry-picked from 9da922928c1b1ea848 --- test/verify/check-system-realms | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/test/verify/check-system-realms b/test/verify/check-system-realms index 31da6557802..6297fbdb0bf 100755 --- a/test/verify/check-system-realms +++ b/test/verify/check-system-realms @@ -84,6 +84,15 @@ ExecStart=/bin/true @testlib.skipDistroPackage() class CommonTests: + + def wait_discover(self): + with self.browser.wait_timeout(60): + self.browser.wait_attr("#realms-op-address", "data-discover", "done") + + def wait_address_helper(self, expected=None): + with self.browser.wait_timeout(60): + self.browser.wait_text("#realms-op-address-helper", expected or "Contacted domain") + @testlib.timeout(900) def testQualifiedUsers(self): m = self.machine @@ -114,7 +123,7 @@ class CommonTests: b.click(self.domain_sel) b.wait_popup("realms-join-dialog") wait_domain_detected() - b.wait_text("#realms-op-address-helper", "Contacted domain") + self.wait_address_helper() # admin gets auto-detected b.wait_val(self.op_admin, self.admin_user) b.set_input_text(self.op_admin_password, self.admin_password) @@ -253,8 +262,7 @@ class CommonTests: # wait for auto-detection wait_domain_detected() b.set_input_text(self.op_address, "NOPE") - with b.wait_timeout(30): - b.wait_text("#realms-op-address-helper", "Domain could not be contacted") + self.wait_address_helper("Domain could not be contacted") b.wait_visible(f"#realms-join-dialog button{self.primary_btn_class}:disabled") b.click("#realms-join-dialog button.pf-m-link") b.wait_not_present("#realms-join-dialog") @@ -262,10 +270,9 @@ class CommonTests: # Join a domain with the server as address (input differs from domain name) b.click(self.domain_sel) b.wait_popup("realms-join-dialog") - b.wait_attr("#realms-op-address", "data-discover", "done") + self.wait_discover() b.set_input_text(self.op_address, "f0.cockpit.lan") - with b.wait_timeout(60): - b.wait_text("#realms-op-address-helper", "Contacted domain") + self.wait_address_helper() # admin gets auto-detected b.wait_val(self.op_admin, self.admin_user) b.set_input_text(self.op_admin_password, self.admin_password) @@ -353,9 +360,10 @@ class CommonTests: self.login_and_go("/system") b.click("#system_information_domain_button") b.wait_popup("realms-join-dialog") - b.wait_attr("#realms-op-address", "data-discover", "done") + self.wait_discover() + b.set_input_text("#realms-op-address", "cockpit.lan") - b.wait_text("#realms-op-address-helper", "Contacted domain") + self.wait_address_helper() b.set_input_text("#realms-op-admin", self.admin_user) b.set_input_text("#realms-op-admin-password", self.admin_password) b.click(f"#realms-join-dialog button{self.primary_btn_class}") @@ -673,9 +681,9 @@ class TestIPA(TestRealms, CommonTests): # Join cockpit.lan b.click(self.domain_sel) b.wait_popup("realms-join-dialog") - b.wait_attr("#realms-op-address", "data-discover", "done") + self.wait_discover() b.set_input_text(self.op_address, "cockpit.lan") - b.wait_in_text("#realms-op-address-helper", "Domain is not supported") + self.wait_address_helper("Domain is not supported") # no admin name auto-detection for unsupported domains b.wait_val(self.op_admin, "") b.set_input_text(self.op_admin, self.admin_user) @@ -1213,7 +1221,8 @@ class TestPackageInstall(packagelib.PackageCase): b.wait_visible("#realms-join-dialog") # no auto-detected domain/admin - b.wait_attr("#realms-op-address", "data-discover", "done") + with b.wait_timeout(60): + b.wait_attr("#realms-op-address", "data-discover", "done") self.assertEqual(b.val("#realms-op-address"), "") self.assertEqual(b.val("#realms-op-admin"), "") From 1828ba401361b730d847558010811d14871ef1a7 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Thu, 16 Nov 2023 14:49:32 +0100 Subject: [PATCH 21/30] test: Avoid sssd.service restart limit failure in check-system-realms Restarting sssd in a loop is prone to run into > systemd[1]: sssd.service: Start request repeated too quickly. > systemd[1]: sssd.service: Failed with result 'start-limit-hit'. Cherry-picked from 68d2eb70163179 --- test/verify/check-system-realms | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/verify/check-system-realms b/test/verify/check-system-realms index 6297fbdb0bf..75917343f7f 100755 --- a/test/verify/check-system-realms +++ b/test/verify/check-system-realms @@ -151,8 +151,9 @@ class CommonTests: b.logout() # wait until IPA user works - m.execute('while ! su - -c "echo %s | sudo -S true" %s@cockpit.lan; do sleep 5; sss_cache -E || true; systemctl try-restart sssd; done' % ( - self.admin_password, self.admin_user), timeout=300) + m.execute(f'while ! su - -c "echo {self.admin_password} | sudo -S true" {self.admin_user}@cockpit.lan; do ' + ' sleep 5; sss_cache -E || true; systemctl reset-failed sssd; systemctl try-restart sssd; done', + timeout=300) # change existing local "admin" home dir to domain "admin" user m.execute(f"chown -R {self.admin_user}@cockpit.lan /home/admin") @@ -370,7 +371,7 @@ class CommonTests: with b.wait_timeout(300): b.wait_not_present("#realms-join-dialog") b.logout() - m.execute('while ! id alice; do sleep 5; systemctl restart sssd; done', timeout=300) + m.execute('while ! id alice; do sleep 5; systemctl reset-failed sssd; systemctl restart sssd; done', timeout=300) # alice's certificate was written by testClientCertAuthentication() alice_cert_key = ['--cert', "/var/tmp/alice.pem", '--key', "/var/tmp/alice.key"] From b9005d8abb035c5fa391cf40cb4148f35898e69a Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Thu, 16 Nov 2023 14:51:43 +0100 Subject: [PATCH 22/30] test: Increaese IPA leave timeout With 30 seconds we are running into occasional timeout failures. Cherry-picked from 6ef43c630a778fb036 --- test/verify/check-system-realms | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/verify/check-system-realms b/test/verify/check-system-realms index 75917343f7f..4b50f470825 100755 --- a/test/verify/check-system-realms +++ b/test/verify/check-system-realms @@ -207,7 +207,7 @@ class CommonTests: # b.assert_pixels("#realms-leave-dialog", "realm-leave", [".pf-v5-c-expandable-section__toggle-icon"]) b.click("#realms-op-leave") - with b.wait_timeout(30): + with b.wait_timeout(60): b.wait_not_present("#realms-leave-dialog") wait_number_domains(0) # re-enables hostname changing From 4ee88ade9ff6ab16dde9ff3eec0cf57408728c6a Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Fri, 17 Nov 2023 07:38:01 +0100 Subject: [PATCH 23/30] test: Drop sssd fiddling in testClientCertAuthentication Restarting sssd that often causes state corruption, as it often cannot initialize in 5s. It's also too much fiddling with the OS -- joining a domain should make the users available automatically, otherwise this is a bug. This works fine with IPA, and doesn't regess AD either. testUnqualifiedUsers() already does it that way, too. Cherry-picked from c055b47e66ffc036 --- test/verify/check-system-realms | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/verify/check-system-realms b/test/verify/check-system-realms index 4b50f470825..7fd5f8a210d 100755 --- a/test/verify/check-system-realms +++ b/test/verify/check-system-realms @@ -371,7 +371,7 @@ class CommonTests: with b.wait_timeout(300): b.wait_not_present("#realms-join-dialog") b.logout() - m.execute('while ! id alice; do sleep 5; systemctl reset-failed sssd; systemctl restart sssd; done', timeout=300) + m.execute('while ! id alice; do sleep 5; done', timeout=300) # alice's certificate was written by testClientCertAuthentication() alice_cert_key = ['--cert', "/var/tmp/alice.pem", '--key', "/var/tmp/alice.key"] From d4a29aef78f7a80336463cb02ca80710131676d3 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Fri, 17 Nov 2023 10:22:20 +0100 Subject: [PATCH 24/30] test: Drop obsolete INSECURELDAP hack The current service image's samba container does not look at that any more, and we also stopped using `ldapmodify`. Cherry-picked from 4727d487b9cde8aa9 --- test/verify/check-system-realms | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/verify/check-system-realms b/test/verify/check-system-realms index 7fd5f8a210d..09c7b4ac785 100755 --- a/test/verify/check-system-realms +++ b/test/verify/check-system-realms @@ -844,8 +844,6 @@ class TestAD(TestRealms, CommonTests): self.admin_password = "foobarFoo123" self.alice_password = 'WonderLand123' self.expected_server_software = "active-directory" - # necessary to run ldapmodify; FIXME: change this on the services image itself - self.machines['services'].execute("sed -i 's/-e/-e INSECURELDAP=true &/' /root/run-samba-domain") self.machines['services'].execute("/root/run-samba-domain") m = self.machine From b342c7b19290c6e19e4cda8ddb0abb3c77149ded Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Fri, 17 Nov 2023 10:23:52 +0100 Subject: [PATCH 25/30] test: Drop obsolete sssd hack https://bugzilla.redhat.com/show_bug.cgi?id=1839805 got fixed long ago. Cherry-picked from 77a329c56c90c72 --- test/verify/check-system-realms | 3 --- 1 file changed, 3 deletions(-) diff --git a/test/verify/check-system-realms b/test/verify/check-system-realms index 09c7b4ac785..3594ed72c97 100755 --- a/test/verify/check-system-realms +++ b/test/verify/check-system-realms @@ -859,9 +859,6 @@ class TestAD(TestRealms, CommonTests): # similar to "ipa-advise enable-admins-sudo"? m.write("/etc/sudoers.d/domain-admins", r"%domain\ admins@COCKPIT.LAN ALL=(ALL) ALL") - # HACK: work around https://bugzilla.redhat.com/show_bug.cgi?id=1839805 - m.write("/etc/sssd/conf.d/rhbz1839805.conf", "[domain/cockpit.lan]\nad_gpo_access_control=disabled\n", perm="0600") - # HACK: Figure out why this happens self.allow_journal_messages(""".*didn't receive expected "authorize" message""", 'cockpit-session:$') From 2f13d9e2bbf19fd2c7eeda040deb0ad24e47ec03 Mon Sep 17 00:00:00 2001 From: Marius Vollmer Date: Fri, 8 Dec 2023 17:22:55 +0200 Subject: [PATCH 26/30] test: Retry auth in checkClientCertAuthentication Password authentication sometimes fails on the first try. Cherry-picked from a61bb417db688 --- test/verify/check-system-realms | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/test/verify/check-system-realms b/test/verify/check-system-realms index 3594ed72c97..bbdf787a7b8 100755 --- a/test/verify/check-system-realms +++ b/test/verify/check-system-realms @@ -395,13 +395,21 @@ class CommonTests: # certificates; it just rejects cert requests. For interactive tests, grab src/tls/ca/alice.p12 and import # it into the browser. - def do_test(authopts, expected, not_expected=None, session_leader=None): + def do_test(authopts, expected, not_expected=None, session_leader=None, retry=False): m.start_cockpit(tls=True) - output = m.execute(['curl', '-ksS', '-D-', *authopts, 'https://localhost:9090/cockpit/login']) - for s in expected: - self.assertIn(s, output) - for s in (not_expected or []): - self.assertNotIn(s, output) + + def try_auth(): + output = m.execute(['curl', '-ksS', '-D-', *authopts, 'https://localhost:9090/cockpit/login']) + for s in expected: + self.assertIn(s, output) + for s in (not_expected or []): + self.assertNotIn(s, output) + return True + + if retry: + testlib.wait(try_auth, delay=5, tries=10) + else: + try_auth() # sessions/users often hang around in State=closing for a long time, ignore these if session_leader: @@ -427,11 +435,13 @@ class CommonTests: # from sssd self.allow_journal_messages("alice is not allowed to run sudo on x0. This incident will be reported.") + # occasional intermediate error during password auth + self.allow_journal_messages("cockpit-session: user account access failed: 4 alice: System error") # cert auth should not be enabled by default do_test(alice_cert_key, ["HTTP/1.1 401 Authentication required", '"authorize"']) - # password auth should work - do_test(alice_user_pass, ['HTTP/1.1 200 OK', '"csrf-token"'], session_leader='cockpit-session') + # password auth should work (but might need to be retried) + do_test(alice_user_pass, ['HTTP/1.1 200 OK', '"csrf-token"'], session_leader='cockpit-session', retry=True) # enable cert based auth m.write("/etc/cockpit/cockpit.conf", '[WebService]\nClientCertAuthentication = true\n', append=True) From 38094ba0ca40843760ed55c50281256b2e2970ff Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Mon, 27 Nov 2023 13:00:03 +0100 Subject: [PATCH 27/30] test: Use proper TLS CA for subscription-manager Grab the candlepin server's CA and install it both into rhsm and the general system (for `curl`). This tests subscription-manager more realistically, without having to yell "insecure" all the time. Also simplify and robustify the waiting loop. Previously, the loop could just end with 200 failures, and the test would go on. Now it will timeout. Also lower the 6 minute timeout to the default 2 minute -- starting up candlepin only takes a few seconds on our current image. Cherry-picked from 564717f61abf1 --- test/verify/check-packagekit | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/test/verify/check-packagekit b/test/verify/check-packagekit index 13ed7f21889..e254b78c5a7 100755 --- a/test/verify/check-packagekit +++ b/test/verify/check-packagekit @@ -24,16 +24,6 @@ import time import packagelib import testlib -WAIT_SCRIPT = """ -for x in $(seq 1 200); do - if curl --fail --insecure --silent --show-error https://%(addr)s:8443/candlepin/status; then - break - else - sleep 1 - fi -done -""" - OSesWithoutTracer = ["debian-stable", "debian-testing", "ubuntu-2204", "ubuntu-stable", "fedora-coreos", "rhel4edge"] OSesWithoutKpatch = ["debian-*", "ubuntu-*", "arch", "fedora-*", "rhel4edge", "centos-*"] @@ -1275,7 +1265,7 @@ class TestUpdatesSubscriptions(packagelib.PackageCase): def register(self): # this fails with "Unable to find available subscriptions for all your installed products", but works anyway self.machine.execute( - "LC_ALL=C.UTF-8 subscription-manager register --insecure --serverurl https://10.111.112.100:8443/candlepin --org=admin --activationkey=awesome_os_pool || true") + "LC_ALL=C.UTF-8 subscription-manager register --serverurl https://services.cockpit.lan:8443/candlepin --org=admin --activationkey=awesome_os_pool || true") self.machine.execute("LC_ALL=C.UTF-8 subscription-manager attach --auto") def setUp(self): @@ -1297,11 +1287,15 @@ class TestUpdatesSubscriptions(packagelib.PackageCase): m.execute("mkdir -p /etc/pki/product") m.upload([product_file], "/etc/pki/product") - # make sure that rhsm skips certificate checks for the server - self.sed_file("s/insecure = 0/insecure = 1/g", "/etc/rhsm/rhsm.conf") + # set up CA + ca = self.candlepin.execute("cat /home/admin/candlepin/certs/candlepin-ca.crt") + m.write("/etc/pki/ca-trust/source/anchors/candlepin-ca.crt", ca) + m.write("/etc/hosts", "10.111.112.100 services.cockpit.lan\n", append=True) + m.execute("cp /etc/pki/ca-trust/source/anchors/candlepin-ca.crt /etc/rhsm/ca/candlepin-ca.pem") + m.execute("update-ca-trust") # Wait for the web service to be accessible - m.execute(WAIT_SCRIPT % {"addr": "10.111.112.100"}, timeout=360) + m.execute("until curl --fail --silent --show-error https://services.cockpit.lan:8443/candlepin/status; do sleep 1; done") self.update_icon = "#page_status_notification_updates svg" self.update_text = "#page_status_notification_updates" self.update_text_action = "#page_status_notification_updates a" From e53e28f60fc463f38f3de199aad32e3b3543ac89 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Wed, 29 Nov 2023 18:29:03 +0100 Subject: [PATCH 28/30] test: Adjust hack in TestGrafanaClient for non-crashing page (#19667) Later Grafana versions [1] fixed the page crash on "Failed to fetch plugins from catalog", and just log it to the console now. That will make the "wait for false" loop timeout and eventually fail. If that happens, then all is actually well. [1] https://github.com/cockpit-project/bots/pull/5601 Cherry-picked from de7ab98dc9aee629f --- test/verify/check-metrics | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/verify/check-metrics b/test/verify/check-metrics index b647ba14535..0d54c41aa1e 100755 --- a/test/verify/check-metrics +++ b/test/verify/check-metrics @@ -1507,6 +1507,10 @@ class TestGrafanaClient(testlib.MachineCase): except RuntimeError as e: if "Failed to fetch plugins from catalog" not in str(e): raise + except testlib.Error as e: + if not e.msg.startswith("timeout"): + raise + # no plugin check error? great! # Grafana auto-discovers "host" variable for incoming metrics; it takes a while to receive the first # measurement; that event is not observable directly in Grafana, and the dashboard does not auto-update to From 27ef8003b15ca1786151f1123d760eb4f52d0057 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Wed, 29 Nov 2023 22:06:47 +0100 Subject: [PATCH 29/30] test: Drop plugin update check crash hack With the latest service refresh [1] Grafana now handles being offline correctly. [1] https://github.com/cockpit-project/bots/pull/5601 Cherry-picked from e8e4bdaff94db8 --- test/verify/check-metrics | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/test/verify/check-metrics b/test/verify/check-metrics index 0d54c41aa1e..872d6edbd48 100755 --- a/test/verify/check-metrics +++ b/test/verify/check-metrics @@ -1497,21 +1497,6 @@ class TestGrafanaClient(testlib.MachineCase): bg.click("button:contains('Save &')") # Save & [tT]est bg.wait_in_text("body", "Data source is working") - # HACK: There is no way to disable the plugin update check; it happens in the background - # and kills a random CDP wait with this RuntimeError; `check_for_plugin_updates = false` - # is supposed to avoid that, but it doesn't work; so wait for that error to happen and - # ignore it - try: - with bg.wait_timeout(60): - bg.wait_js_cond("false") - except RuntimeError as e: - if "Failed to fetch plugins from catalog" not in str(e): - raise - except testlib.Error as e: - if not e.msg.startswith("timeout"): - raise - # no plugin check error? great! - # Grafana auto-discovers "host" variable for incoming metrics; it takes a while to receive the first # measurement; that event is not observable directly in Grafana, and the dashboard does not auto-update to # new variables; so probe the API until it appears From c17189f9f4893eb481d74e03bce6b40622a5e7ee Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Mon, 4 Dec 2023 13:16:07 +0100 Subject: [PATCH 30/30] test: Disable busy swap on scsi_debug When e.g. TestStorageswap.test fails in the middle, the active swap partition on the scsi_debug driver will prevent the module removal, and break all subsequent tests. Helps with #19683 Cherry-picked from 6c3986d7b18221 --- test/common/testlib.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/common/testlib.py b/test/common/testlib.py index 32b74dc9b4e..00f4428074b 100644 --- a/test/common/testlib.py +++ b/test/common/testlib.py @@ -1540,6 +1540,7 @@ def cleanup_home_dirs(): " done; " " while fuser --mount /dev/$dev --kill; do sleep 0.1; done; " " umount /dev/$dev || true; " + " swapon --show=NAME --noheadings | grep $dev | xargs -r swapoff; " "done; until rmmod scsi_debug; do sleep 0.2; done", stdout=None) def terminate_sessions():