Skip to content

Commit

Permalink
Improve cluster remove (#1545)
Browse files Browse the repository at this point in the history
Changes include:
- Dev: bootstrap: Enhance log clarity during crm cluster remove process
- Dev: completers: Reuse node completer for cluster remove and health
  • Loading branch information
liangxin1300 authored Sep 20, 2024
2 parents 28abc7c + 5638894 commit 77b6645
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 39 deletions.
22 changes: 13 additions & 9 deletions crmsh/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -2074,7 +2074,7 @@ def stop_services(stop_list, remote_addr=None):
service_manager = ServiceManager()
for service in stop_list:
if service_manager.service_is_active(service, remote_addr=remote_addr):
logger.info("Stopping the %s%s", service, " on {}".format(remote_addr) if remote_addr else "")
logger.info("Stopping the %s on %s", service, remote_addr if remote_addr else utils.this_node())
service_manager.stop_service(service, disable=True, remote_addr=remote_addr)


Expand All @@ -2101,7 +2101,7 @@ def remove_node_from_cluster(node):
rm_configuration_files(node)

# execute the command : crm node delete $HOSTNAME
logger.info("Removing the node {}".format(node))
logger.info("Removing node %s from CIB", node)
if not NodeMgmt.call_delnode(node):
utils.fatal("Failed to remove {}.".format(node))

Expand Down Expand Up @@ -2341,7 +2341,7 @@ def bootstrap_join(context):


def bootstrap_finished():
logger.info("Done (log saved to %s)" % (log.CRMSH_LOG_FILE))
logger.info("Done (log saved to %s on %s)", log.CRMSH_LOG_FILE, utils.this_node())


def join_ocfs2(peer_host, peer_user):
Expand Down Expand Up @@ -2398,13 +2398,15 @@ def bootstrap_remove(context):

init()

if _context.qdevice_rm_flag and _context.cluster_node:
utils.fatal("Either remove node or qdevice")
if _context.cluster_node:
logger.info("Removing node %s from cluster", _context.cluster_node)

service_manager = ServiceManager()
if not service_manager.service_is_active("corosync.service"):
utils.fatal("Cluster is not active - can't execute removing action")

if _context.qdevice_rm_flag and _context.cluster_node:
utils.fatal("Either remove node or qdevice")

_context.skip_csync2 = not service_manager.service_is_active(CSYNC2_SERVICE)
if _context.skip_csync2:
_context.node_list_in_cluster = utils.fetch_cluster_node_list_from_node(utils.this_node())
Expand Down Expand Up @@ -2451,11 +2453,13 @@ def remove_self(force_flag=False):
nodes = xmlutil.listnodes(include_remote_nodes=False)
othernode = next((x for x in nodes if x != me), None)
if othernode is not None:
# remove from other node
logger.info("Removing node %s from cluster on %s", me, othernode)
cmd = "crm{} cluster remove{} -c {}".format(" -F" if force_flag else "", " -y" if yes_to_all else "", me)
rc, _, _ = sh.cluster_shell().get_rc_stdout_stderr_without_input(othernode, cmd)
rc, stdout, stderr = sh.cluster_shell().get_rc_stdout_stderr_without_input(othernode, cmd)
if rc != 0:
utils.fatal("Failed to remove this node from {}".format(othernode))
utils.fatal(f"Failed to remove this node from {othernode}: {stderr}")
elif stdout:
print(stdout)
else:
# disable and stop cluster
stop_services(SERVICES_STOP_LIST)
Expand Down
20 changes: 3 additions & 17 deletions crmsh/ui_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,6 @@ def parse_options(parser, args):
return options, args


def _remove_completer(args):
try:
n = utils.list_cluster_nodes()
except:
n = []
for node in args[1:]:
if node in n:
n.remove(node)
return scripts.param_completion_list('remove') + n


def script_printer():
from .ui_script import ConsolePrinter
return ConsolePrinter()
Expand Down Expand Up @@ -319,7 +308,6 @@ def _args_implicit(self, context, args, name):
return args + ['%s=%s' % (name, ','.join(vals))]
return args

# @command.completers_repeating(compl.call(scripts.param_completion_list, 'init'))
@command.skill_level('administrator')
def do_init(self, context, *args):
'''
Expand Down Expand Up @@ -532,7 +520,7 @@ def do_join(self, context, *args):
return True

@command.alias("delete")
@command.completers_repeating(_remove_completer)
@command.completers_repeating(compl.nodes)
@command.skill_level('administrator')
def do_remove(self, context, *args):
'''
Expand Down Expand Up @@ -568,6 +556,7 @@ def do_remove(self, context, *args):
for node in args:
rm_context.cluster_node = node
bootstrap.bootstrap_remove(rm_context)
print()
return True

@command.skill_level('administrator')
Expand Down Expand Up @@ -757,7 +746,7 @@ def do_geo_init_arbitrator(self, context, *args):
bootstrap.bootstrap_arbitrator(geo_context)
return True

@command.completers_repeating(compl.call(scripts.param_completion_list, 'health'))
@command.completers_repeating(compl.nodes)
def do_health(self, context, *args):
'''
Extensive health check.
Expand All @@ -768,9 +757,6 @@ def do_health(self, context, *args):
raise ValueError("health script failed to load")
return scripts.run(script, script_args(params), script_printer())

def _node_in_cluster(self, node):
return node in utils.list_cluster_nodes()

def do_status(self, context):
'''
Quick cluster health status. Corosync status, DRBD status...
Expand Down
28 changes: 15 additions & 13 deletions test/unittests/test_bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -1518,7 +1518,7 @@ def test_valid_admin_ip_in_use(self, mock_ipv6, mock_invoke):
@mock.patch('crmsh.bootstrap.Context')
def test_bootstrap_remove_cluster_is_active(self, mock_context, mock_init, mock_active,
mock_error):
mock_context_inst = mock.Mock()
mock_context_inst = mock.Mock(qdevice=False, cluster_node=None)
mock_context.return_value = mock_context_inst
mock_active.return_value = False
mock_error.side_effect = SystemExit
Expand Down Expand Up @@ -1558,14 +1558,13 @@ def test_bootstrap_remove_qdevice(self, mock_context, mock_init, mock_active,
def test_bootstrap_remove_qdevice_cluster_node(self, mock_context, mock_init, mock_active, mock_error):
mock_context_inst = mock.Mock(qdevice=True, cluster_node="node1")
mock_context.return_value = mock_context_inst
mock_active.return_value = True
mock_error.side_effect = SystemExit

with self.assertRaises(SystemExit):
bootstrap.bootstrap_remove(mock_context_inst)

mock_init.assert_called_once_with()
mock_active.assert_called_once_with("corosync.service")
mock_active.assert_not_called()
mock_error.assert_called_once_with("Either remove node or qdevice")

@mock.patch('crmsh.bootstrap.prompt_for_string')
Expand Down Expand Up @@ -1655,6 +1654,7 @@ def test_bootstrap_remove_self_need_force(self, mock_context, mock_init, mock_ac
mock_this_node.assert_called_once_with()
mock_error.assert_called_once_with("Removing self requires --force")

@mock.patch('crmsh.bootstrap.bootstrap_finished')
@mock.patch('crmsh.sh.ClusterShell.get_stdout_or_raise_error')
@mock.patch('crmsh.bootstrap.remove_self')
@mock.patch('crmsh.utils.this_node')
Expand All @@ -1666,7 +1666,7 @@ def test_bootstrap_remove_self_need_force(self, mock_context, mock_init, mock_ac
@mock.patch('crmsh.bootstrap.init')
@mock.patch('crmsh.bootstrap.Context')
def test_bootstrap_remove_self(self, mock_context, mock_init, mock_active,
mock_error, mock_qdevice, mock_hostname, mock_confirm, mock_this_node, mock_self, mock_run):
mock_error, mock_qdevice, mock_hostname, mock_confirm, mock_this_node, mock_self, mock_run, mock_finished):
mock_context_inst = mock.Mock(cluster_node="node1", force=True, qdevice_rm_flag=None)
mock_context.return_value = mock_context_inst
mock_active.return_value = [True, True]
Expand Down Expand Up @@ -1774,7 +1774,7 @@ def test_remove_self_other_nodes(self, mock_this_node, mock_list, mock_run, mock

mock_list.assert_called_once_with(include_remote_nodes=False)
mock_run.assert_called_once_with("node2", "crm cluster remove -y -c node1")
mock_error.assert_called_once_with("Failed to remove this node from node2")
mock_error.assert_called_once_with("Failed to remove this node from node2: err")

@mock.patch('crmsh.utils.package_is_installed')
@mock.patch('crmsh.sh.ClusterShell.get_stdout_or_raise_error')
Expand Down Expand Up @@ -1804,11 +1804,13 @@ def test_get_cluster_node_ip(self, mock_get_values, mock_get_iplist):
mock_get_values.assert_called_once_with("nodelist.node.ring0_addr")
mock_get_iplist.assert_called_once_with('node1')

@mock.patch('crmsh.utils.this_node')
@mock.patch('crmsh.service_manager.ServiceManager.stop_service')
@mock.patch('logging.Logger.info')
@mock.patch('crmsh.service_manager.ServiceManager.service_is_active')
def test_stop_services(self, mock_active, mock_status, mock_stop):
def test_stop_services(self, mock_active, mock_status, mock_stop, mock_this_node):
mock_active.side_effect = [True, True, True, True]
mock_this_node.side_effect = ['node1', 'node1', 'node1', 'node1']
bootstrap.stop_services(bootstrap.SERVICES_STOP_LIST)
mock_active.assert_has_calls([
mock.call("corosync-qdevice.service", remote_addr=None),
Expand All @@ -1817,10 +1819,10 @@ def test_stop_services(self, mock_active, mock_status, mock_stop):
mock.call("csync2.socket", remote_addr=None)
])
mock_status.assert_has_calls([
mock.call('Stopping the %s%s', 'corosync-qdevice.service', ''),
mock.call('Stopping the %s%s', 'corosync.service', ''),
mock.call('Stopping the %s%s', 'hawk.service', ''),
mock.call('Stopping the %s%s', 'csync2.socket', '')
mock.call('Stopping the %s on %s', 'corosync-qdevice.service', 'node1'),
mock.call('Stopping the %s on %s', 'corosync.service', 'node1'),
mock.call('Stopping the %s on %s', 'hawk.service', 'node1'),
mock.call('Stopping the %s on %s', 'csync2.socket', 'node1')
])
mock_stop.assert_has_calls([
mock.call("corosync-qdevice.service", disable=True, remote_addr=None),
Expand All @@ -1846,7 +1848,7 @@ def test_remove_node_from_cluster_rm_node_failed(self, mock_get_ip, mock_stop, m
bootstrap.remove_node_from_cluster('node1')

mock_get_ip.assert_called_once_with('node1')
mock_status.assert_called_once_with("Removing the node node1")
mock_status.assert_called_once_with("Removing node %s from CIB", "node1")
mock_stop.assert_called_once_with(bootstrap.SERVICES_STOP_LIST, remote_addr="node1")
mock_invoke.assert_not_called()
mock_call_delnode.assert_called_once_with("node1")
Expand All @@ -1871,7 +1873,7 @@ def test_remove_node_from_cluster_rm_csync_failed(self, mock_get_ip, mock_stop,
bootstrap.remove_node_from_cluster('node1')

mock_get_ip.assert_called_once_with('node1')
mock_status.assert_called_once_with("Removing the node node1")
mock_status.assert_called_once_with("Removing node %s from CIB", "node1")
mock_stop.assert_called_once_with(bootstrap.SERVICES_STOP_LIST, remote_addr="node1")
mock_invoke.assert_not_called()
mock_call_delnode.assert_called_once_with("node1")
Expand Down Expand Up @@ -1909,7 +1911,7 @@ def test_remove_node_from_cluster_hostname(self, mock_get_ip, mock_stop, mock_st

mock_get_ip.assert_called_once_with('node1')
mock_status.assert_has_calls([
mock.call("Removing the node node1"),
mock.call("Removing node %s from CIB", "node1"),
mock.call("Propagating configuration changes across the remaining nodes")
])
mock_stop.assert_called_once_with(bootstrap.SERVICES_STOP_LIST, remote_addr="node1")
Expand Down

0 comments on commit 77b6645

Please sign in to comment.