diff --git a/scripts/check_test_cases.py b/scripts/check_test_cases.py new file mode 100755 index 0000000000..2576dc7d10 --- /dev/null +++ b/scripts/check_test_cases.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 + +"""Sanity checks for test data.""" + +# Copyright The Mbed TLS Contributors +# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later + +import argparse +import re +import sys + +from mbedtls_framework import collect_test_cases + + +class DescriptionChecker(collect_test_cases.TestDescriptionExplorer): + """Check all test case descriptions. + +* Check that each description is valid (length, allowed character set, etc.). +* Check that there is no duplicated description inside of one test suite. +""" + + def __init__(self, results): + self.results = results + + def new_per_file_state(self): + """Dictionary mapping descriptions to their line number.""" + return {} + + def process_test_case(self, per_file_state, + file_name, line_number, description): + """Check test case descriptions for errors.""" + results = self.results + seen = per_file_state + if description in seen: + results.error(file_name, line_number, + 'Duplicate description (also line {})', + seen[description]) + return + if re.search(br'[\t;]', description): + results.error(file_name, line_number, + 'Forbidden character \'{}\' in description', + re.search(br'[\t;]', description).group(0).decode('ascii')) + if re.search(br'[^ -~]', description): + results.error(file_name, line_number, + 'Non-ASCII character in description') + if len(description) > 66: + results.warning(file_name, line_number, + 'Test description too long ({} > 66)', + len(description)) + seen[description] = line_number + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('--list-all', + action='store_true', + help='List all test cases, without doing checks') + parser.add_argument('--quiet', '-q', + action='store_true', + help='Hide warnings') + parser.add_argument('--verbose', '-v', + action='store_false', dest='quiet', + help='Show warnings (default: on; undoes --quiet)') + options = parser.parse_args() + if options.list_all: + descriptions = collect_test_cases.collect_available_test_cases() + sys.stdout.write('\n'.join(descriptions + [''])) + return + results = collect_test_cases.Results(options) + checker = DescriptionChecker(results) + try: + checker.walk_all() + except collect_test_cases.ScriptOutputError as e: + results.error(e.script_name, e.idx, + '"{}" should be listed as ";"', + e.line) + if (results.warnings or results.errors) and not options.quiet: + sys.stderr.write('{}: {} errors, {} warnings\n' + .format(sys.argv[0], results.errors, results.warnings)) + sys.exit(1 if results.errors else 0) + +if __name__ == '__main__': + main() diff --git a/scripts/mbedtls_framework/collect_test_cases.py b/scripts/mbedtls_framework/collect_test_cases.py new file mode 100644 index 0000000000..2567b57a33 --- /dev/null +++ b/scripts/mbedtls_framework/collect_test_cases.py @@ -0,0 +1,166 @@ +"""Discover all the test cases (unit tests and SSL tests).""" + +# Copyright The Mbed TLS Contributors +# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later + +import glob +import os +import re +import subprocess +import sys + +from . import build_tree + + +class ScriptOutputError(ValueError): + """A kind of ValueError that indicates we found + the script doesn't list test cases in an expected + pattern. + """ + + @property + def script_name(self): + return super().args[0] + + @property + def idx(self): + return super().args[1] + + @property + def line(self): + return super().args[2] + +class Results: + """Store file and line information about errors or warnings in test suites.""" + + def __init__(self, options): + self.errors = 0 + self.warnings = 0 + self.ignore_warnings = options.quiet + + def error(self, file_name, line_number, fmt, *args): + sys.stderr.write(('{}:{}:ERROR:' + fmt + '\n'). + format(file_name, line_number, *args)) + self.errors += 1 + + def warning(self, file_name, line_number, fmt, *args): + if not self.ignore_warnings: + sys.stderr.write(('{}:{}:Warning:' + fmt + '\n') + .format(file_name, line_number, *args)) + self.warnings += 1 + +class TestDescriptionExplorer: + """An iterator over test cases with descriptions. + +The test cases that have descriptions are: +* Individual unit tests (entries in a .data file) in test suites. +* Individual test cases in ssl-opt.sh. + +This is an abstract class. To use it, derive a class that implements +the process_test_case method, and call walk_all(). +""" + + def process_test_case(self, per_file_state, + file_name, line_number, description): + """Process a test case. + +per_file_state: an object created by new_per_file_state() at the beginning + of each file. +file_name: a relative path to the file containing the test case. +line_number: the line number in the given file. +description: the test case description as a byte string. +""" + raise NotImplementedError + + def new_per_file_state(self): + """Return a new per-file state object. + +The default per-file state object is None. Child classes that require per-file +state may override this method. +""" + #pylint: disable=no-self-use + return None + + def walk_test_suite(self, data_file_name): + """Iterate over the test cases in the given unit test data file.""" + in_paragraph = False + descriptions = self.new_per_file_state() # pylint: disable=assignment-from-none + with open(data_file_name, 'rb') as data_file: + for line_number, line in enumerate(data_file, 1): + line = line.rstrip(b'\r\n') + if not line: + in_paragraph = False + continue + if line.startswith(b'#'): + continue + if not in_paragraph: + # This is a test case description line. + self.process_test_case(descriptions, + data_file_name, line_number, line) + in_paragraph = True + + def collect_from_script(self, script_name): + """Collect the test cases in a script by calling its listing test cases +option""" + descriptions = self.new_per_file_state() # pylint: disable=assignment-from-none + listed = subprocess.check_output(['sh', script_name, '--list-test-cases']) + # Assume test file is responsible for printing identical format of + # test case description between --list-test-cases and its OUTCOME.CSV + # + # idx indicates the number of test case since there is no line number + # in the script for each test case. + for idx, line in enumerate(listed.splitlines()): + # We are expecting the script to list the test cases in + # `;` pattern. + script_outputs = line.split(b';', 1) + if len(script_outputs) == 2: + suite_name, description = script_outputs + else: + raise ScriptOutputError(script_name, idx, line.decode("utf-8")) + + self.process_test_case(descriptions, + suite_name.decode('utf-8'), + idx, + description.rstrip()) + + @staticmethod + def collect_test_directories(): + """Get the relative path for the TLS and Crypto test directories.""" + mbedtls_root = build_tree.guess_mbedtls_root() + directories = [os.path.join(mbedtls_root, 'tests'), + os.path.join(mbedtls_root, 'tf-psa-crypto', 'tests')] + directories = [os.path.relpath(p) for p in directories] + return directories + + def walk_all(self): + """Iterate over all named test cases.""" + test_directories = self.collect_test_directories() + for directory in test_directories: + for data_file_name in glob.glob(os.path.join(directory, 'suites', + '*.data')): + self.walk_test_suite(data_file_name) + + for sh_file in ['ssl-opt.sh', 'compat.sh']: + sh_file = os.path.join(directory, sh_file) + if os.path.isfile(sh_file): + self.collect_from_script(sh_file) + +class TestDescriptions(TestDescriptionExplorer): + """Collect the available test cases.""" + + def __init__(self): + super().__init__() + self.descriptions = set() + + def process_test_case(self, _per_file_state, + file_name, _line_number, description): + """Record an available test case.""" + base_name = re.sub(r'\.[^.]*$', '', re.sub(r'.*/', '', file_name)) + key = ';'.join([base_name, description.decode('utf-8')]) + self.descriptions.add(key) + +def collect_available_test_cases(): + """Collect the available test cases.""" + explorer = TestDescriptions() + explorer.walk_all() + return sorted(explorer.descriptions) diff --git a/scripts/mbedtls_framework/outcome_analysis.py b/scripts/mbedtls_framework/outcome_analysis.py new file mode 100644 index 0000000000..2a79fd57db --- /dev/null +++ b/scripts/mbedtls_framework/outcome_analysis.py @@ -0,0 +1,399 @@ +"""Outcome file analysis code. + +This module is the bulk of the code of tests/scripts/analyze_outcomes.py +in each consuming branch. The consuming script is expected to derive +the classes with branch-specific customizations such as ignore lists. +""" + +# Copyright The Mbed TLS Contributors +# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later + +import argparse +import gzip +import lzma +import sys +import traceback +import re +import subprocess +import os +import typing + +from . import collect_test_cases + + +# `ComponentOutcomes` is a named tuple which is defined as: +# ComponentOutcomes( +# successes = { +# "", +# ... +# }, +# failures = { +# "", +# ... +# } +# ) +# suite_case = ";" +ComponentOutcomes = typing.NamedTuple('ComponentOutcomes', + [('successes', typing.Set[str]), + ('failures', typing.Set[str])]) + +# `Outcomes` is a representation of the outcomes file, +# which defined as: +# Outcomes = { +# "": ComponentOutcomes, +# ... +# } +Outcomes = typing.Dict[str, ComponentOutcomes] + + +class Results: + """Process analysis results.""" + + def __init__(self, + stderr: bool = True, + log_file: str = '') -> None: + """Log and count errors. + + Log to stderr if stderr is true. + Log to log_file if specified and non-empty. + """ + self.error_count = 0 + self.warning_count = 0 + self.stderr = stderr + self.log_file = None + if log_file: + self.log_file = open(log_file, 'w', encoding='utf-8') + + def new_section(self, fmt, *args, **kwargs): + self._print_line('\n*** ' + fmt + ' ***\n', *args, **kwargs) + + def info(self, fmt, *args, **kwargs): + self._print_line('Info: ' + fmt, *args, **kwargs) + + def error(self, fmt, *args, **kwargs): + self.error_count += 1 + self._print_line('Error: ' + fmt, *args, **kwargs) + + def warning(self, fmt, *args, **kwargs): + self.warning_count += 1 + self._print_line('Warning: ' + fmt, *args, **kwargs) + + def _print_line(self, fmt, *args, **kwargs): + line = (fmt + '\n').format(*args, **kwargs) + if self.stderr: + sys.stderr.write(line) + if self.log_file: + self.log_file.write(line) + +def execute_reference_driver_tests(results: Results, ref_component: str, driver_component: str, \ + outcome_file: str) -> None: + """Run the tests specified in ref_component and driver_component. Results + are stored in the output_file and they will be used for the following + coverage analysis""" + results.new_section("Test {} and {}", ref_component, driver_component) + + shell_command = "tests/scripts/all.sh --outcome-file " + outcome_file + \ + " " + ref_component + " " + driver_component + results.info("Running: {}", shell_command) + ret_val = subprocess.run(shell_command.split(), check=False).returncode + + if ret_val != 0: + results.error("failed to run reference/driver components") + +IgnoreEntry = typing.Union[str, typing.Pattern] + +def name_matches_pattern(name: str, str_or_re: IgnoreEntry) -> bool: + """Check if name matches a pattern, that may be a string or regex. + - If the pattern is a string, name must be equal to match. + - If the pattern is a regex, name must fully match. + """ + # The CI's python is too old for re.Pattern + #if isinstance(str_or_re, re.Pattern): + if not isinstance(str_or_re, str): + return str_or_re.fullmatch(name) is not None + else: + return str_or_re == name + +def open_outcome_file(outcome_file: str) -> typing.TextIO: + if outcome_file.endswith('.gz'): + return gzip.open(outcome_file, 'rt', encoding='utf-8') + elif outcome_file.endswith('.xz'): + return lzma.open(outcome_file, 'rt', encoding='utf-8') + else: + return open(outcome_file, 'rt', encoding='utf-8') + +def read_outcome_file(outcome_file: str) -> Outcomes: + """Parse an outcome file and return an outcome collection. + """ + outcomes = {} + with open_outcome_file(outcome_file) as input_file: + for line in input_file: + (_platform, component, suite, case, result, _cause) = line.split(';') + # Note that `component` is not unique. If a test case passes on Linux + # and fails on FreeBSD, it'll end up in both the successes set and + # the failures set. + suite_case = ';'.join([suite, case]) + if component not in outcomes: + outcomes[component] = ComponentOutcomes(set(), set()) + if result == 'PASS': + outcomes[component].successes.add(suite_case) + elif result == 'FAIL': + outcomes[component].failures.add(suite_case) + + return outcomes + + +class Task: + """Base class for outcome analysis tasks.""" + + # Override the following in child classes. + # Map test suite names (with the test_suite_prefix) to a list of ignored + # test cases. Each element in the list can be either a string or a regex; + # see the `name_matches_pattern` function. + IGNORED_TESTS = {} #type: typing.Dict[str, typing.List[IgnoreEntry]] + + def __init__(self, options) -> None: + """Pass command line options to the tasks. + + Each task decides which command line options it cares about. + """ + pass + + def section_name(self) -> str: + """The section name to use in results.""" + raise NotImplementedError + + def ignored_tests(self, test_suite: str) -> typing.Iterator[IgnoreEntry]: + """Generate the ignore list for the specified test suite.""" + if test_suite in self.IGNORED_TESTS: + yield from self.IGNORED_TESTS[test_suite] + pos = test_suite.find('.') + if pos != -1: + base_test_suite = test_suite[:pos] + if base_test_suite in self.IGNORED_TESTS: + yield from self.IGNORED_TESTS[base_test_suite] + + def is_test_case_ignored(self, test_suite: str, test_string: str) -> bool: + """Check if the specified test case is ignored.""" + for str_or_re in self.ignored_tests(test_suite): + if name_matches_pattern(test_string, str_or_re): + return True + return False + + def run(self, results: Results, outcomes: Outcomes): + """Run the analysis on the specified outcomes. + + Signal errors via the results objects + """ + raise NotImplementedError + + +class CoverageTask(Task): + """Analyze test coverage.""" + + # Test cases whose suite and description are matched by an entry in + # IGNORED_TESTS are expected to be never executed. + # All other test cases are expected to be executed at least once. + + def __init__(self, options) -> None: + super().__init__(options) + self.full_coverage = options.full_coverage #type: bool + + @staticmethod + def section_name() -> str: + return "Analyze coverage" + + def run(self, results: Results, outcomes: Outcomes) -> None: + """Check that all available test cases are executed at least once.""" + # Make sure that the generated data files are present (and up-to-date). + # This allows analyze_outcomes.py to run correctly on a fresh Git + # checkout. + cp = subprocess.run(['make', 'generated_files'], + cwd='tests', + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + check=False) + if cp.returncode != 0: + sys.stderr.write(cp.stdout.decode('utf-8')) + results.error("Failed \"make generated_files\" in tests. " + "Coverage analysis may be incorrect.") + available = collect_test_cases.collect_available_test_cases() + for suite_case in available: + hit = any(suite_case in comp_outcomes.successes or + suite_case in comp_outcomes.failures + for comp_outcomes in outcomes.values()) + (test_suite, test_description) = suite_case.split(';') + ignored = self.is_test_case_ignored(test_suite, test_description) + + if not hit and not ignored: + if self.full_coverage: + results.error('Test case not executed: {}', suite_case) + else: + results.warning('Test case not executed: {}', suite_case) + elif hit and ignored: + # If a test case is no longer always skipped, we should remove + # it from the ignore list. + if self.full_coverage: + results.error('Test case was executed but marked as ignored for coverage: {}', + suite_case) + else: + results.warning('Test case was executed but marked as ignored for coverage: {}', + suite_case) + + +class DriverVSReference(Task): + """Compare outcomes from testing with and without a driver. + + There are 2 options to use analyze_driver_vs_reference_xxx locally: + 1. Run tests and then analysis: + - tests/scripts/all.sh --outcome-file "$PWD/out.csv" + - tests/scripts/analyze_outcomes.py out.csv analyze_driver_vs_reference_xxx + 2. Let this script run both automatically: + - tests/scripts/analyze_outcomes.py out.csv analyze_driver_vs_reference_xxx + """ + + # Override the following in child classes. + # Configuration name (all.sh component) used as the reference. + REFERENCE = '' + # Configuration name (all.sh component) used as the driver. + DRIVER = '' + # Ignored test suites (without the test_suite_ prefix). + IGNORED_SUITES = [] #type: typing.List[str] + + def __init__(self, options) -> None: + super().__init__(options) + self.ignored_suites = frozenset('test_suite_' + x + for x in self.IGNORED_SUITES) + + def section_name(self) -> str: + return f"Analyze driver {self.DRIVER} vs reference {self.REFERENCE}" + + def run(self, results: Results, outcomes: Outcomes) -> None: + """Check that all tests passing in the driver component are also + passing in the corresponding reference component. + Skip: + - full test suites provided in ignored_suites list + - only some specific test inside a test suite, for which the corresponding + output string is provided + """ + ref_outcomes = outcomes.get("component_" + self.REFERENCE) + driver_outcomes = outcomes.get("component_" + self.DRIVER) + + if ref_outcomes is None or driver_outcomes is None: + results.error("required components are missing: bad outcome file?") + return + + if not ref_outcomes.successes: + results.error("no passing test in reference component: bad outcome file?") + return + + for suite_case in ref_outcomes.successes: + # suite_case is like "test_suite_foo.bar;Description of test case" + (full_test_suite, test_string) = suite_case.split(';') + test_suite = full_test_suite.split('.')[0] # retrieve main part of test suite name + + # Immediately skip fully-ignored test suites + if test_suite in self.ignored_suites or \ + full_test_suite in self.ignored_suites: + continue + + # For ignored test cases inside test suites, just remember and: + # don't issue an error if they're skipped with drivers, + # but issue an error if they're not (means we have a bad entry). + ignored = self.is_test_case_ignored(full_test_suite, test_string) + + if not ignored and not suite_case in driver_outcomes.successes: + results.error("SKIP/FAIL -> PASS: {}", suite_case) + if ignored and suite_case in driver_outcomes.successes: + results.error("uselessly ignored: {}", suite_case) + + +# Set this to False if a consuming branch can't achieve full test coverage +# in its default CI run. +FULL_COVERAGE_BY_DEFAULT = True + +def main(known_tasks: typing.Dict[str, typing.Type[Task]]) -> None: + try: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('outcomes', metavar='OUTCOMES.CSV', + help='Outcome file to analyze (can be .gz or .xz)') + parser.add_argument('specified_tasks', default='all', nargs='?', + help='Analysis to be done. By default, run all tasks. ' + 'With one or more TASK, run only those. ' + 'TASK can be the name of a single task or ' + 'comma/space-separated list of tasks. ') + parser.add_argument('--allow-partial-coverage', action='store_false', + dest='full_coverage', default=FULL_COVERAGE_BY_DEFAULT, + help=("Only warn if a test case is skipped in all components" + + (" (default)" if not FULL_COVERAGE_BY_DEFAULT else "") + + ". Only used by the 'analyze_coverage' task.")) + parser.add_argument('--list', action='store_true', + help='List all available tasks and exit.') + parser.add_argument('--log-file', + default='tests/analyze_outcomes.log', + help='Log file (default: tests/analyze_outcomes.log;' + ' empty means no log file)') + parser.add_argument('--require-full-coverage', action='store_true', + dest='full_coverage', default=FULL_COVERAGE_BY_DEFAULT, + help=("Require all available test cases to be executed" + + (" (default)" if FULL_COVERAGE_BY_DEFAULT else "") + + ". Only used by the 'analyze_coverage' task.")) + options = parser.parse_args() + + if options.list: + for task_name in known_tasks: + print(task_name) + sys.exit(0) + + main_results = Results(log_file=options.log_file) + + if options.specified_tasks == 'all': + tasks_list = list(known_tasks.keys()) + else: + tasks_list = re.split(r'[, ]+', options.specified_tasks) + for task_name in tasks_list: + if task_name not in known_tasks: + sys.stderr.write('invalid task: {}\n'.format(task_name)) + sys.exit(2) + + # If the outcome file exists, parse it once and share the result + # among tasks to improve performance. + # Otherwise, it will be generated by execute_reference_driver_tests. + if not os.path.exists(options.outcomes): + if len(tasks_list) > 1: + sys.stderr.write("mutiple tasks found, please provide a valid outcomes file.\n") + sys.exit(2) + + task_name = tasks_list[0] + task_class = known_tasks[task_name] + if not issubclass(task_class, DriverVSReference): + sys.stderr.write("please provide valid outcomes file for {}.\n".format(task_name)) + sys.exit(2) + # mypy isn't smart enough to know that REFERENCE and DRIVER + # are *class* attributes of all classes derived from + # DriverVSReference. (It would be smart enough if we had an + # instance of task_class, but we can't construct an instance + # until we have the outcome data, so at this point we only + # have the class.) So we use indirection to access the class + # attributes. + execute_reference_driver_tests(main_results, + getattr(task_class, 'REFERENCE'), + getattr(task_class, 'DRIVER'), + options.outcomes) + + outcomes = read_outcome_file(options.outcomes) + + for task_name in tasks_list: + task_constructor = known_tasks[task_name] + task_instance = task_constructor(options) + main_results.new_section(task_instance.section_name()) + task_instance.run(main_results, outcomes) + + main_results.info("Overall results: {} warnings and {} errors", + main_results.warning_count, main_results.error_count) + + sys.exit(0 if (main_results.error_count == 0) else 1) + + except Exception: # pylint: disable=broad-except + # Print the backtrace and exit explicitly with our chosen status. + traceback.print_exc() + sys.exit(120)