diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index b240916a7..4647e402a 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -26,7 +26,6 @@ from lxml import etree from . import config, constants, ssh_key, sh -from . import upgradeutil from . import utils from . import xmlutil from .cibconfig import cib_factory @@ -1403,10 +1402,6 @@ def init_sbd(): _context.sbd_manager.sbd_init() -def init_upgradeutil(): - upgradeutil.force_set_local_upgrade_seq() - - def init_ocfs2(): """ OCFS2 configure process @@ -2155,7 +2150,6 @@ def corosync_stage_finished(): "corosync": (corosync_stage_finished, False), "remote_auth": (init_remote_auth, True), "sbd": (lambda: True, False), - "upgradeutil": (init_upgradeutil, True), "cluster": (is_online, False) } @@ -2227,7 +2221,6 @@ def bootstrap_init(context): init_corosync() init_remote_auth() init_sbd() - init_upgradeutil() lock_inst = lock.Lock() try: @@ -2298,7 +2291,6 @@ def bootstrap_join(context): if _context.stage != "": remote_user, cluster_node = _parse_user_at_host(_context.cluster_node, _context.current_user) - init_upgradeutil() check_stage_dependency(_context.stage) globals()["join_" + _context.stage](cluster_node, remote_user) else: @@ -2314,7 +2306,6 @@ def bootstrap_join(context): _context.cluster_node = cluster_user_at_node _context.initialize_user() - init_upgradeutil() remote_user, cluster_node = _parse_user_at_host(_context.cluster_node, _context.current_user) utils.ping_node(cluster_node) join_ssh(cluster_node, remote_user) diff --git a/crmsh/healthcheck.py b/crmsh/healthcheck.py index 23a841c78..e242fd78f 100644 --- a/crmsh/healthcheck.py +++ b/crmsh/healthcheck.py @@ -8,6 +8,7 @@ import crmsh.constants import crmsh.parallax +import crmsh.user_of_host import crmsh.utils @@ -164,6 +165,35 @@ def fix_cluster(self, nodes: typing.Iterable[str], ask: typing.Callable[[str], N crmsh.bootstrap.change_user_shell('hacluster', node) +class PasswordlessPrimaryUserAuthenticationFeature(Feature): + def check_quick(self) -> bool: + local_node = crmsh.utils.this_node() + try: + crmsh.utils.user_of(local_node) + return True + except crmsh.user_of_host.UserNotFoundError: + return False + + def check_local(self, nodes: typing.Iterable[str]) -> bool: + try: + for node in nodes: + crmsh.utils.user_of(node) + except crmsh.user_of_host.UserNotFoundError: + return False + try: + crmsh.parallax.parallax_call(nodes, 'true') + return True + except ValueError: + return False + + def fix_local(self, nodes: typing.Iterable[str], ask: typing.Callable[[str], None]) -> None: + logger.warning('Passwordless ssh is not initialized. Use `crm cluster init ssh` and `crm cluster join ssh -c ` to set it up.') + raise FixFailure + + def fix_cluster(self, nodes: typing.Iterable[str], ask: typing.Callable[[str], None]) -> None: + return self.fix_local(nodes, ask) + + def main_check_local(args) -> int: try: feature = Feature.get_feature_by_name(args.feature)() diff --git a/crmsh/main.py b/crmsh/main.py index a3a929779..b99d9fe1b 100644 --- a/crmsh/main.py +++ b/crmsh/main.py @@ -11,7 +11,6 @@ from . import constants from . import clidisplay from . import term -from . import upgradeutil from . import utils from . import userdir @@ -367,7 +366,6 @@ def run(): if options.profile: return profile_run(context, user_args) else: - upgradeutil.upgrade_if_needed() return main_input_loop(context, user_args) except utils.NoSSHError as msg: logger.error('%s', msg) diff --git a/crmsh/report/core.py b/crmsh/report/core.py index 1d4b7ec3d..2f3ca6661 100644 --- a/crmsh/report/core.py +++ b/crmsh/report/core.py @@ -19,7 +19,7 @@ import crmsh.sh import crmsh.report.sh import crmsh.user_of_host -from crmsh import utils as crmutils +from crmsh import utils as crmutils, userdir from crmsh import constants as crmconstants from crmsh import config, log, tmpfiles, ui_cluster from crmsh.sh import ShellUtils @@ -409,6 +409,11 @@ def find_ssh_user(context: Context) -> None: logger.warning('%s', buf.getvalue()) else: logger.warning("passwordless ssh to node %s does not work", n) + if not crmutils.can_ask(): + logger.error('Cannot create a report non-interactively. Interactive authentication is required.') + if userdir.getuser() == 'hacluster': + logger.warning('Passwordless ssh does not work. Run "crm cluster health hawk2 --fix" to set it up.') + raise ValueError('Cannot create a report.') def load_from_crmsh_config(context: Context) -> None: diff --git a/crmsh/scripts.py b/crmsh/scripts.py index 2b500d711..1a7e2cede 100644 --- a/crmsh/scripts.py +++ b/crmsh/scripts.py @@ -20,7 +20,7 @@ except ImportError: import simplejson as json -from . import config, constants +from . import config, constants, user_of_host from . import handles from . import options from . import userdir @@ -1639,6 +1639,16 @@ def _copy_utils(dst): raise ValueError(e) +def _check_parallax_remote_available(printer, hosts): + try: + _parallax_call(printer, hosts, 'true', timeout_seconds=15) + except user_of_host.UserNotFoundError: + if userdir.getuser() == 'hacluster': + raise ValueError('Passwordless ssh does not work. Run "crm cluster health hawk2 --fix" to set it up.') from None + else: + raise ValueError('Passwordless ssh does not work.') from None + + def _create_remote_workdirs(printer, hosts, path, timeout_seconds): "Create workdirs on remote hosts" ok = True @@ -1779,6 +1789,7 @@ def prepare(self, has_remote_actions): json.dump(self.data, open(self.statefile, 'w')) _copy_utils(self.workdir) if has_remote_actions: + _check_parallax_remote_available(self.printer, self.hosts) _create_remote_workdirs(self.printer, self.hosts, self.workdir, self.timeout_seconds) _copy_to_remote_dirs(self.printer, self.hosts, self.workdir, self.timeout_seconds) # make sure all path references are relative to the script directory @@ -2106,7 +2117,10 @@ def run(script, params, printer): finally: if not dry_run: if not config.core.debug: - _run_cleanup(printer, has_remote_actions, local_node, hosts, workdir, int(params['timeout'])) + try: + _run_cleanup(printer, has_remote_actions, local_node, hosts, workdir, int(params['timeout'])) + except user_of_host.UserNotFoundError: + pass elif has_remote_actions: _print_debug(printer, local_node, hosts, workdir, int(params['timeout'])) else: diff --git a/crmsh/ui_cluster.py b/crmsh/ui_cluster.py index 7b50c3a58..5867eea2d 100644 --- a/crmsh/ui_cluster.py +++ b/crmsh/ui_cluster.py @@ -9,7 +9,7 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter import crmsh.parallax -from . import command, sh +from . import command, sh, healthcheck from . import utils from . import scripts from . import completers as compl @@ -751,6 +751,42 @@ def do_health(self, context, *args): ''' Extensive health check. ''' + if not args: + return self._do_health_legacy(context, *args) + parser = argparse.ArgumentParser() + parser.add_argument('component', choices=['hawk2']) + parser.add_argument('-f', '--fix', action='store_true') + parsed_args = parser.parse_args(args) + if parsed_args.component == 'hawk2': + nodes = utils.list_cluster_nodes() + if parsed_args.fix: + if not healthcheck.feature_full_check(healthcheck.PasswordlessPrimaryUserAuthenticationFeature(), nodes): + try: + healthcheck.feature_fix( + healthcheck.PasswordlessPrimaryUserAuthenticationFeature(), + nodes, + utils.ask, + ) + except healthcheck.FixFailure: + logger.error('Cannot fix automatically.') + return False + try: + healthcheck.feature_fix(healthcheck.PasswordlessHaclusterAuthenticationFeature(), nodes, utils.ask) + logger.info("hawk2: passwordless ssh authentication: OK.") + return True + except healthcheck.FixFailure: + logger.error("hawk2: passwordless ssh authentication: FAIL.") + return False + else: + if healthcheck.feature_full_check(healthcheck.PasswordlessHaclusterAuthenticationFeature(), nodes): + logger.info("hawk2: passwordless ssh authentication: OK.") + return True + else: + logger.error("hawk2: passwordless ssh authentication: FAIL.") + logger.warning('Please run "crm cluster health hawk2 --fix"') + return False + + def _do_health_legacy(self, context, *args): params = self._args_implicit(context, args, 'nodes') script = scripts.load_script('health') if script is None: diff --git a/crmsh/user_of_host.py b/crmsh/user_of_host.py index 3067a0f81..041263f13 100644 --- a/crmsh/user_of_host.py +++ b/crmsh/user_of_host.py @@ -64,7 +64,7 @@ def user_pair_for_ssh(self, host: str) -> typing.Tuple[str, str]: else: ret = self._guess_user_for_ssh(host) if ret is None: - raise UserNotFoundError + raise UserNotFoundError from None else: self._user_pair_cache[host] = ret return ret diff --git a/test/features/bootstrap_bugs.feature b/test/features/bootstrap_bugs.feature index 448722dff..ee2178dd9 100644 --- a/test/features/bootstrap_bugs.feature +++ b/test/features/bootstrap_bugs.feature @@ -231,20 +231,10 @@ Feature: Regression test for bootstrap bugs Given Cluster service is "stopped" on "hanode1" And Cluster service is "stopped" on "hanode2" When Run "crm cluster init -y" on "hanode1" + And Run "rm -f /root/.ssh/id_rsa.pub" on "hanode1" Then Cluster service is "started" on "hanode1" When Run "crm cluster join -c hanode1 -y" on "hanode2" Then Cluster service is "started" on "hanode2" - When Run "rm -f /root/.ssh/id_rsa.pub" on "hanode1" - When Run "rm -f /root/.ssh/id_rsa.pub" on "hanode2" - When Run "rm -f /var/lib/crmsh/upgrade_seq" on "hanode1" - When Run "rm -f /var/lib/crmsh/upgrade_seq" on "hanode2" - When Run "rm -rf /var/lib/heartbeat/cores/hacluster/.ssh" on "hanode1" - And Run "rm -rf /var/lib/heartbeat/cores/hacluster/.ssh" on "hanode2" - And Run "usermod -s /usr/sbin/nologin hacluster" on "hanode1" - And Run "usermod -s /usr/sbin/nologin hacluster" on "hanode2" - And Run "crm status" on "hanode1" - Then Check user shell for hacluster between "hanode1 hanode2" - Then Check passwordless for hacluster between "hanode1 hanode2" @skip_non_root @clean @@ -277,26 +267,6 @@ Feature: Regression test for bootstrap bugs And Expected "hacluster:haclient" in stdout And Run "stat -c '%U:%G' ~hacluster/.ssh/authorized_keys" OK on "hanode2" And Expected "hacluster:haclient" in stdout - # in an upgraded cluster in which ~hacluster/.ssh/authorized_keys exists - When Run "chown root:root ~hacluster/.ssh/authorized_keys && chmod 0600 ~hacluster/.ssh/authorized_keys" on "hanode1" - And Run "chown root:root ~hacluster/.ssh/authorized_keys && chmod 0600 ~hacluster/.ssh/authorized_keys" on "hanode2" - And Run "rm -f /var/lib/crmsh/upgrade_seq" on "hanode1" - And Run "rm -f /var/lib/crmsh/upgrade_seq" on "hanode2" - And Run "crm status" on "hanode1" - Then Run "stat -c '%U:%G' ~hacluster/.ssh/authorized_keys" OK on "hanode1" - And Expected "hacluster:haclient" in stdout - Then Run "stat -c '%U:%G' ~hacluster/.ssh/authorized_keys" OK on "hanode2" - And Expected "hacluster:haclient" in stdout - # in an upgraded cluster in which ~hacluster/.ssh/authorized_keys does not exist - When Run "rm -rf /var/lib/heartbeat/cores/hacluster/.ssh/" on "hanode1" - And Run "rm -rf /var/lib/heartbeat/cores/hacluster/.ssh/" on "hanode2" - And Run "rm -f /var/lib/crmsh/upgrade_seq" on "hanode1" - And Run "rm -f /var/lib/crmsh/upgrade_seq" on "hanode2" - And Run "crm status" on "hanode1" - Then Run "stat -c '%U:%G' ~hacluster/.ssh/authorized_keys" OK on "hanode1" - And Expected "hacluster:haclient" in stdout - Then Run "stat -c '%U:%G' ~hacluster/.ssh/authorized_keys" OK on "hanode2" - And Expected "hacluster:haclient" in stdout @clean Scenario: Ditch no-quorum-policy=ignore when joining diff --git a/test/features/bootstrap_init_join_remove.feature b/test/features/bootstrap_init_join_remove.feature index 9c6238dd4..19e65fbe1 100644 --- a/test/features/bootstrap_init_join_remove.feature +++ b/test/features/bootstrap_init_join_remove.feature @@ -184,21 +184,3 @@ Feature: crmsh bootstrap process - init, join and remove Then Cluster service is "started" on "hanode3" And Online nodes are "hanode1 hanode2 hanode3" And Check passwordless for hacluster between "hanode1 hanode2 hanode3" - - Scenario: Check hacluster's user shell - Given Cluster service is "stopped" on "hanode3" - When Run "crm cluster join -c hanode1 -y" on "hanode3" - Then Cluster service is "started" on "hanode3" - And Online nodes are "hanode1 hanode2 hanode3" - When Run "rm -rf /var/lib/heartbeat/cores/hacluster/.ssh" on "hanode1" - And Run "rm -rf /var/lib/heartbeat/cores/hacluster/.ssh" on "hanode2" - And Run "rm -rf /var/lib/heartbeat/cores/hacluster/.ssh" on "hanode3" - And Run "usermod -s /usr/sbin/nologin hacluster" on "hanode1" - And Run "usermod -s /usr/sbin/nologin hacluster" on "hanode2" - And Run "usermod -s /usr/sbin/nologin hacluster" on "hanode3" - And Run "rm -f /var/lib/crmsh/upgrade_seq" on "hanode1" - And Run "rm -f /var/lib/crmsh/upgrade_seq" on "hanode2" - And Run "rm -f /var/lib/crmsh/upgrade_seq" on "hanode3" - And Run "crm status" on "hanode1" - Then Check user shell for hacluster between "hanode1 hanode2 hanode3" - Then Check passwordless for hacluster between "hanode1 hanode2 hanode3" diff --git a/test/features/healthcheck.feature b/test/features/healthcheck.feature index da7f78ac3..d3508297c 100644 --- a/test/features/healthcheck.feature +++ b/test/features/healthcheck.feature @@ -16,22 +16,15 @@ Feature: healthcheck detect and fix problems in a crmsh deployment And Online nodes are "hanode1 hanode2" And Show cluster status on "hanode1" - @clean - Scenario: a new node joins when directory ~hacluster/.ssh is removed from cluster - When Run "rm -rf ~hacluster/.ssh" on "hanode1" - And Run "rm -rf ~hacluster/.ssh" on "hanode2" - And Run "crm cluster join -c hanode1 -y" on "hanode3" - Then Cluster service is "started" on "hanode3" - # FIXME: new join implement does not trigger a exception any longer, and the auto fix is not applied - # And File "~hacluster/.ssh/id_rsa" exists on "hanode1" - # And File "~hacluster/.ssh/id_rsa" exists on "hanode2" - # And File "~hacluster/.ssh/id_rsa" exists on "hanode3" - - # skip non-root as behave_agent is not able to run commands interactively with non-root sudoer - @skip_non_root @clean Scenario: An upgrade_seq file in ~hacluster/crmsh/ will be migrated to /var/lib/crmsh (bsc#1213050) - When Run "mv /var/lib/crmsh ~hacluster/" on "hanode1" - Then File "~hacluster/crmsh/upgrade_seq" exists on "hanode1" - When Run "crm cluster status" on "hanode1" - Then File "/var/lib/crmsh/upgrade_seq" exists on "hanode1" + When Run "rm -rf ~hacluster/.ssh" on "hanode1" + And Try "crm cluster health hawk2" on "hanode1" + Then Expected "hawk2: passwordless ssh authentication: FAIL." in stderr + When Run "crm cluster health hawk2 --fix" on "hanode1" + Then Expected "hawk2: passwordless ssh authentication: OK." in stdout + When Run "rm -rf ~hacluster/.ssh /root/.config/crm" on "hanode1" + And Try "crm cluster health hawk2" on "hanode1" + Then Expected "hawk2: passwordless ssh authentication: FAIL." in stderr + When Try "crm cluster health hawk2 --fix" on "hanode1" + Then Expected "Cannot fix automatically" in stderr