diff --git a/docs/netlab/validate.md b/docs/netlab/validate.md index 92259ffdc..8e3658de1 100644 --- a/docs/netlab/validate.md +++ b/docs/netlab/validate.md @@ -25,6 +25,15 @@ options: -e, --error-only Display only validation errors (on stderr) ``` +The **netlab validate** command returns the overall test results in its exit code: + +| Exit code | Meaning | +|----------:|---------| +| 0 | All tests passed | +| 1 | At least one test failed | +| 2 | `netlab validate` did not find a single usable test to execute | +| 3 | Some of the tests generated warnings | + ## Example **netlab validate** command was executed on the [Advertise IPv4 Prefixes to BGP Neighbors](https://bgplabs.net/basic/3-originate/) lab before the user configured BGP prefix origination: diff --git a/docs/topology/validate.md b/docs/topology/validate.md index 40cbb9245..687e3590d 100644 --- a/docs/topology/validate.md +++ b/docs/topology/validate.md @@ -28,6 +28,7 @@ Each test has a name (dictionary key) and description (dictionary value) -- anot * **wait** (integer, optional) -- Time to wait (when specified as the only action in the test) or retry (when used together with other actions). The first wait/retry timeout is measured from when the lab was started; subsequent times are measured from the previous test containing the **wait** parameter. * **wait_str** (string, optional) -- Message to print before starting the wait. * **stop_on_error** (bool, optional) -- When set to `True`, the validation tests stop if the current test fails on at least one of the devices. +* **level** (string, optional) -- When set to `warning,` the test failure does not indicate that the whole testing sequence has failed but generates a warning message. [^TEX]: Objects, non-empty strings, lists, or dictionaries, integers not equal to zero or `True`. diff --git a/netsim/cli/validate.py b/netsim/cli/validate.py index a93808433..d4063f817 100644 --- a/netsim/cli/validate.py +++ b/netsim/cli/validate.py @@ -16,7 +16,7 @@ from . import load_snapshot, parser_add_debug, parser_add_verbose, external_commands from ..utils import log, templates, strings, status as _status, files as _files -from ..data import global_vars +from ..data import global_vars,get_box from ..augment import devices from .. import data from .connect import connect_to_node, connect_to_tool, LogLevel @@ -77,6 +77,23 @@ def validate_parse(args: typing.List[str]) -> argparse.Namespace: # ERROR_ONLY: bool = False TEST_HEADER: dict = {} +TEST_COUNT: Box = get_box({'passed': 0, 'failed': 0, 'warning': 0, 'count': 0, 'skip': 0}) + +''' +increase_fail_count, increase_pass_count: Increase the counters based on test severity level +''' +def increase_fail_count(v_entry: Box) -> None: + global TEST_COUNT + TEST_COUNT.count += 1 + if v_entry.get('level',None) == 'warning': + TEST_COUNT.warning += 1 + else: + TEST_COUNT.failed += 1 + +def increase_pass_count(v_entry: Box) -> None: + global TEST_COUNT + TEST_COUNT.count += 1 + TEST_COUNT.passed += 1 ''' list_tests: display validation tests defined for the current lab topology @@ -121,6 +138,7 @@ def log_failure( msg: str, topology: Box, f_status: str = 'FAIL', + f_color: str = 'bright_red', more_data: typing.Optional[str] = None) -> None: global TEST_HEADER,ERROR_ONLY @@ -131,7 +149,7 @@ def log_failure( TEST_HEADER = {} o_file = sys.stderr if ERROR_ONLY else sys.stdout - p_status(f_status,'bright_red',topology,stderr=ERROR_ONLY) + p_status(f_status,f_color,topology,stderr=ERROR_ONLY) print(msg,file=o_file) if more_data: p_status('MORE','bright_black',topology,stderr=ERROR_ONLY) @@ -149,12 +167,12 @@ def log_progress(msg: str, topology: Box, f_status: str = 'PASS') -> None: # Print generic "ambivalent info" message # -def log_info(msg: str, topology: Box, f_status: str = 'INFO') -> None: +def log_info(msg: str, topology: Box, f_status: str = 'INFO', f_color: str = 'bright_cyan') -> None: global ERROR_ONLY if ERROR_ONLY: return - p_status(f_status,'yellow',topology) + p_status(f_status,f_color,topology) print(msg) # Print "test failed on node" @@ -162,7 +180,10 @@ def log_info(msg: str, topology: Box, f_status: str = 'INFO') -> None: def p_test_fail(n_name: str, v_entry: Box, topology: Box, fail_msg: str = '') -> None: err = v_entry.get('fail',fail_msg or f'Test failed for node {n_name}') err = f'Node {n_name}: '+err - log_failure(err,topology) + if v_entry.get('level') == 'warning': + log_failure(err,topology,f_status='WARNING',f_color='bright_yellow') + else: + log_failure(err,topology) # Print "test passed" # @@ -172,6 +193,12 @@ def p_test_pass(v_entry: Box, topology: Box) -> None: _validation_plugins: dict = {} +''' +test_plural: get "one test" or "x tests" +''' +def test_plural(cnt: int) -> str: + return 'one test' if cnt == 1 else f'{cnt} tests' + ''' extend_first_wait_time: some devices need extra time to start working, even when the configuration process has completed. Assuming we can't (or don't want to) @@ -322,7 +349,7 @@ class PluginEvalError(Exception): # Exception class used to raise plugin eva PluginEvalError exceptions, resulting in custom error messages. ''' def exec_plugin_function(action: str, v_entry: Box, node: Box, result: typing.Optional[Box] = None) -> typing.Any: - global test_skip_count + global TEST_COUNT p_name = f'validate_{node.device}' exec = f'{p_name}.{action}_{v_entry.plugin}' @@ -347,7 +374,7 @@ def exec_plugin_function(action: str, v_entry: Box, node: Box, result: typing.Op msg=str(wn), f_status='SKIPPED', topology=topology) - test_skip_count += 1 + TEST_COUNT.skip += 1 return None except AttributeError as ex: if p_name in str(ex): @@ -632,10 +659,6 @@ def wait_before_testing( time.sleep(wait_time if wait_time < 5 else 5) # Wait no more than five seconds wait_time = wait_time - 5 -test_skip_count: int -test_result_count: int -test_pass_count: int - """ execute_validation_expression: execute the v_entry.valid string in a safe environment with error/success logging. @@ -666,8 +689,7 @@ def execute_validation_expression( report_error: bool, report_success: bool = True) -> typing.Optional[bool]: - global test_skip_count,test_result_count,test_pass_count - global BUILTINS + global BUILTINS,TEST_COUNT v_test = get_entry_value(v_entry,'valid',node,topology) if not v_test: # Do we have a validation expression for the current device? @@ -675,7 +697,7 @@ def execute_validation_expression( f'Test results validation not defined for device {node.device} / node {node.name}', f_status = 'SKIPPED', topology=topology) - test_skip_count += 1 + TEST_COUNT.skip += 1 return None else: try: # Otherwise try to evaluate the validation expression @@ -701,13 +723,12 @@ def execute_validation_expression( if OK is not None and not OK: # We have a real result (not skipped) that is not OK if report_error: p_test_fail(node.name,v_entry,topology) - test_result_count += 1 + increase_fail_count(v_entry) return bool(OK) elif OK: # ... or we might have a positive result if report_success: log_progress(f'Validation succeeded on {node.name}',topology) - test_result_count += 1 - test_pass_count += 1 + increase_pass_count(v_entry) return bool(OK) return OK @@ -724,37 +745,34 @@ def execute_suzieq_validation( verbosity: int, report_error: bool) -> typing.Optional[bool]: - global test_skip_count,test_result_count,test_pass_count - C_OK = None if isinstance(result,Box): - return execute_validation_expression(v_entry,node,topology,result,verbosity,report_error) - - for record in result: - for kw in ['assert']: - if kw in record: - record[f'_{kw}'] = record[kw] - - R_OK = execute_validation_expression(v_entry,node,topology,record,verbosity,report_error,report_success=False) - if C_OK is None: - C_OK = bool(R_OK) - elif R_OK is not None: - if v_entry.suzieq.get('valid','any') == 'any': - C_OK = C_OK or bool(R_OK) - else: - C_OK = C_OK and bool(R_OK) + C_OK = execute_validation_expression(v_entry,node,topology,result,verbosity,report_error) + else: + for record in result: + for kw in ['assert']: + if kw in record: + record[f'_{kw}'] = record[kw] + + R_OK = execute_validation_expression(v_entry,node,topology,record,verbosity,report_error,report_success=False) + if C_OK is None: + C_OK = bool(R_OK) + elif R_OK is not None: + if v_entry.suzieq.get('valid','any') == 'any': + C_OK = C_OK or bool(R_OK) + else: + C_OK = C_OK and bool(R_OK) if C_OK is None: return C_OK if not C_OK: if report_error: p_test_fail(node.name,v_entry,topology) - test_result_count += 1 + increase_fail_count(v_entry) return C_OK log_progress(f'Validation succeeded on {node.name}',topology) - test_result_count += 1 - test_pass_count += 1 + increase_pass_count(v_entry) return True """ @@ -775,8 +793,6 @@ def execute_validation_plugin( verbosity: int, report_error: bool) -> typing.Optional[bool]: - global test_skip_count,test_result_count,test_pass_count - try: OK = exec_plugin_function('valid',v_entry,node,result) if OK is not None and not OK: @@ -784,7 +800,7 @@ def execute_validation_plugin( p_test_fail(node.name,v_entry,topology) except Exception as ex: if report_error: - log_failure(f'{node.name}: {str(ex)}',topology) + p_test_fail(node.name,v_entry,topology,str(ex)) OK = False if (not OK and verbosity) or verbosity >= 2: @@ -793,12 +809,12 @@ def execute_validation_plugin( print(f'Evaluated result {OK}') if OK is not None: - if report_error or OK: - test_result_count += 1 if OK: msg = f'{node.name}: {OK}' if isinstance(OK,str) else f'Validation succeeded on {node.name}' log_progress(msg,topology) - test_pass_count += 1 + increase_pass_count(v_entry) + elif report_error: + increase_fail_count(v_entry) return OK if OK is None else bool(OK) @@ -812,7 +828,7 @@ def execute_node_validation( report_error: bool, args: argparse.Namespace) -> typing.Tuple[typing.Optional[bool],typing.Optional[bool]]: - global test_skip_count,test_result_count,test_pass_count + global TEST_COUNT node = topology.nodes[n_name] result = data.get_empty_box() @@ -826,7 +842,7 @@ def execute_node_validation( f'Test action not defined for device {node.device} / node {n_name}', f_status='SKIPPED', topology=topology) - test_skip_count += 1 # Increment skip count for test results summary + TEST_COUNT.skip += 1 # Increment skip count for test results summary return (True,None) # Processed, unknown result if args.verbose >= 2: # Print out what will be executed @@ -837,19 +853,21 @@ def execute_node_validation( if action == 'show': # We got a 'show' action, try to get parsed results result = get_parsed_result(v_entry,n_name,topology,args.verbose) if '_error' in result: # OOPS, we failed (unrecoverable) - test_result_count += 1 # Increase result count + increase_fail_count(v_entry) return (True, False) # ... and return (processed, failed) elif action == 'exec': # We got an 'exec' action, try to get something out of the device result.stdout = get_result_string(v_entry,n_name,topology,report_error) if result.stdout is False: # Did the exec command fail? if report_error: - test_result_count += 1 + increase_fail_count(v_entry) return (True, False) # Return (processed, failed) elif action == 'suzieq': result = get_suzieq_result(v_entry,n_name,topology,args.verbose) OK = bool(result) != (v_entry.suzieq.get('expect','data') == 'empty') if not OK and report_error: p_test_fail(n_name,v_entry,topology,'suzieq did not return the expected data') + increase_fail_count(v_entry) + return(True,False) if OK != False and 'valid' in v_entry: # Do we have a validation expression in the test entry? if action == 'suzieq': @@ -884,8 +902,6 @@ def execute_node_validation( * 'nodes' list that is used to build the '--limit' argument ''' def execute_netlab_config(v_entry: Box, topology: Box) -> bool: - global test_skip_count,test_result_count,test_pass_count - node_str = ",".join(v_entry.nodes) cmd = f'netlab config {v_entry.config.template} --limit {node_str}' for k,v in v_entry.config.variable.items(): @@ -894,7 +910,7 @@ def execute_netlab_config(v_entry: Box, topology: Box) -> bool: print(f'Executing {cmd}') log_info(f'Executing configuration snippet {v_entry.config.template}',topology) if external_commands.run_command(cmd,check_result=True,ignore_errors=True,return_stdout=True): - test_pass_count += 1 + increase_pass_count(v_entry) msg = v_entry.get('pass','Devices configured') log_progress(msg,topology) return True @@ -903,6 +919,7 @@ def execute_netlab_config(v_entry: Box, topology: Box) -> bool: f'"{cmd}" failed', topology, more_data='Execute the command manually to figure out what went wrong') + increase_fail_count(v_entry) return False ''' @@ -913,7 +930,7 @@ def execute_validation_test( topology: Box, start_time: typing.Optional[typing.Union[int,float]], args: argparse.Namespace) -> typing.Optional[bool]: - global test_skip_count,test_result_count,test_pass_count + global TEST_COUNT # Return value uses ternary logic: OK (True), Fail(False), Skipped (None) ret_value = None @@ -921,7 +938,7 @@ def execute_validation_test( p_test_header(v_entry,topology) # Print test header if 'wait' in v_entry and not v_entry.nodes: # Handle pure wait case if v_entry.get('stop_on_error',False): - if test_result_count and test_pass_count != test_result_count: + if TEST_COUNT.failed: log_failure('Validation failed due to previous errors',topology) sys.exit(1) else: @@ -936,11 +953,10 @@ def execute_validation_test( f'There are no nodes specified for this test, skipping...', f_status='SKIPPED', topology=topology) - test_skip_count += 1 + TEST_COUNT.skip += 1 return None if 'config' in v_entry: - test_result_count = test_result_count + 1 return execute_netlab_config(v_entry,topology) wait_time = 0 if args.nowait else v_entry.get('wait',0) @@ -982,6 +998,7 @@ def execute_validation_test( log_info( f'Succeeded in { round(time.time() - start_time,1) } seconds', f_status = 'PASS', + f_color= 'light_green', topology=topology) p_test_pass(v_entry,topology) # ... declare Mission Accomplished @@ -1042,7 +1059,7 @@ def filter_by_nodes(args: argparse.Namespace, topology: Box) -> None: Main routine: run all tests, handle validation errors, print summary results ''' def run(cli_args: typing.List[str]) -> None: - global test_skip_count,test_result_count,test_pass_count,ERROR_ONLY + global TEST_COUNT,ERROR_ONLY args = validate_parse(cli_args) log.set_logging_flags(args) topology = load_snapshot(args) @@ -1066,9 +1083,6 @@ def run(cli_args: typing.List[str]) -> None: ERROR_ONLY = args.error_only status = True cnt = 0 - test_skip_count = 0 - test_result_count = 0 - test_pass_count = 0 topology._v_len = max([ len(v_entry.name) for v_entry in topology.validate ] + [ 7 ]) start_time = _status.lock_timestamp() or time.time() log.init_log_system(header=False) @@ -1103,12 +1117,14 @@ def run(cli_args: typing.List[str]) -> None: if not ERROR_ONLY: print() - if test_pass_count and test_pass_count == test_result_count: - log_progress(f'Tests passed: {test_pass_count}',topology,f_status='SUCCESS') - elif test_result_count: - log_failure('Tests completed, validation failed',topology) - - if test_skip_count: - log_info('Some tests were skipped, the results are not reliable',topology) - - sys.exit(0 if status else 1) + if TEST_COUNT.passed == TEST_COUNT.count: + log_progress(f'Tests passed: {TEST_COUNT.passed}',topology,f_status='SUCCESS') + elif TEST_COUNT.count: + if TEST_COUNT.failed != 0: + log_failure(f'{test_plural(TEST_COUNT.count)} completed, {test_plural(TEST_COUNT.failed)} failed',topology) + elif TEST_COUNT.warning != 0: + log_info(f'{test_plural(TEST_COUNT.warning)} out of {test_plural(TEST_COUNT.count)} generated a warning',topology) + if TEST_COUNT.skip: + log_info(f'{test_plural(TEST_COUNT.skip)} out of {test_plural(TEST_COUNT.count)} were skipped, the results are not reliable',topology) + + sys.exit(0 if not (TEST_COUNT.failed or TEST_COUNT.warning) else 1 if TEST_COUNT.failed else 3) diff --git a/netsim/defaults/attributes.yml b/netsim/defaults/attributes.yml index b6fdb07ce..12794fe56 100644 --- a/netsim/defaults/attributes.yml +++ b/netsim/defaults/attributes.yml @@ -158,6 +158,7 @@ _v_entry: # Validation entry valid: { type: str, valid_values: [ all, any ]} plugin: str stop_on_error: bool + level: { type: str, valid_values: [ warning ] } _v_option: _description: | A "show", "exec" or "valid" parameter in a network validation test. It's a diff --git a/tests/integration/device-module-test b/tests/integration/device-module-test index 0a1ec5f94..5cc206e49 100755 --- a/tests/integration/device-module-test +++ b/tests/integration/device-module-test @@ -156,7 +156,10 @@ def run_cleanup(args: argparse.Namespace,final: bool = True, abort: bool = True) RESULTS: Box = get_empty_box() -def log_result(test: str, stage: typing.Optional[str] = None, result: bool = False) -> None: +def log_result( + test: str, + stage: typing.Optional[str] = None, + result: typing.Union[bool,str] = False) -> None: global RESULTS test = os.path.basename(test).split('.')[0] @@ -165,8 +168,8 @@ def log_result(test: str, stage: typing.Optional[str] = None, result: bool = Fal else: RESULTS[test][stage] = result -def print_fail(step: str, fail_color: str = "bright_red") -> None: - _strings.print_colored_text(f"{step}(FAIL)",color=fail_color) +def print_fail(step: str, fail_status: str = "FAIL", fail_color: str = "bright_red") -> None: + _strings.print_colored_text(f"{step}({fail_status})",color=fail_color) print(" ",end="",flush=True) def print_ok(step: str) -> None: @@ -227,6 +230,9 @@ def run_test(test: str, args: argparse.Namespace,first: bool) -> None: print_ok("validate") elif valid.returncode == 2: print("validate(SKIPPED)",end=" ",flush=True) + elif valid.returncode == 3: + log_result(test,'validate','warning') + print_fail("validate",fail_status='WARNING',fail_color="bright_yellow") else: if args.quiet or args.batch: print_fail("validate")