diff --git a/tools/repo_parser/requirements.txt b/tools/repo_parser/requirements.txt new file mode 100644 index 00000000..537c1ec3 --- /dev/null +++ b/tools/repo_parser/requirements.txt @@ -0,0 +1,3 @@ +mypy +pytest +pytest-cov diff --git a/tools/repo_parser/spec_finder.py b/tools/repo_parser/spec_finder.py index 0901c454..0627f4b0 100755 --- a/tools/repo_parser/spec_finder.py +++ b/tools/repo_parser/spec_finder.py @@ -6,22 +6,38 @@ import os import sys import configparser +from typing import TypedDict, Optional, cast + +class Config(TypedDict): + file_extension: str + multiline_regex: Optional[str] + number_subregex: Optional[str] + text_subregex: Optional[str] + inline_comment_prefix: Optional[str] def _demarkdown(t): return t.replace('**', '').replace('`', '').replace('"', '') -def get_spec_parser(code_dir): +def get_spec_parser(code_dir) -> Config: with open(os.path.join(code_dir, '.specrc')) as f: data = '\n'.join(f.readlines()) - typical = configparser.ConfigParser() + typical = configparser.ConfigParser(comment_prefixes=None) typical.read_string(data) retval = typical['spec'] - assert 'file_extension' in retval - assert 'multiline_regex' in retval - assert 'number_subregex' in retval - assert 'text_subregex' in retval - return retval + + if 'inline_comment_prefix' in retval: + # If an `inline_comment_prefix` is set, then we're using the inline + # comment approach, which should obviate artisnal regexes. + retval['multiline_regex'] = r'spec:(.*?):end' + retval['number_subregex'] = r'(?P[\d.]+):' + retval['text_subregex'] = r'[\d.]+:(.*)' + else: + assert 'file_extension' in retval + assert 'multiline_regex' in retval + assert 'number_subregex' in retval + assert 'text_subregex' in retval + return cast(Config, retval) @@ -43,6 +59,67 @@ def get_spec(force_refresh=False, path_prefix="./"): f.write(data) return json.loads(data) +def specmap_from_file(actual_spec): + spec_map = {} + for entry in actual_spec['rules']: + number = re.search(r'[\d.]+', entry['id']).group() + if 'requirement' in entry['machine_id']: + spec_map[number] = _demarkdown(entry['content']) + + if len(entry['children']) > 0: + for ch in entry['children']: + number = re.search(r'[\d.]+', ch['id']).group() + if 'requirement' in ch['machine_id']: + spec_map[number] = _demarkdown(ch['content']) + return spec_map + +def find_covered_specs(config, data): + repo_specs = {} + for match in re.findall(config['multiline_regex'], data, re.MULTILINE | re.DOTALL): + match = match.replace('\n', '').replace(config['inline_comment_prefix'], '') + # normalize whitespace + match = re.sub(" {2,}", " ", match.strip()) + number = re.findall(config['number_subregex'], match)[0] + + text_with_concat_chars = re.findall(config['text_subregex'], match, re.MULTILINE | re.DOTALL) + try: + text = ''.join(text_with_concat_chars).strip() + # We have to match for ") to capture text with parens inside, so we add the trailing " back in. + text = _demarkdown(eval('"%s"' % text)) + entry = repo_specs[number] = { + 'number': number, + 'text': text, + } + except Exception as e: + print(f"Skipping {match} b/c we couldn't parse it") + return repo_specs + +def gen_report(from_spec, from_repo): + extra = set() + missing = set() + different_text = set() + good = set() + + missing = set(from_spec.keys()) # assume they're all missing + + for number, text in from_repo.items(): + if number in missing: + missing.remove(number) + if number not in from_spec: + extra.add(number) + continue + if text == from_spec[number]: + good.add(number) + else: + different_text.add(number) + + return { + 'extra': extra, + 'missing': missing, + 'different-text': different_text, + 'good': good, + } + def main(refresh_spec=False, diff_output=False, limit_numbers=None, code_directory=None, json_report=False): report = { @@ -55,20 +132,12 @@ def main(refresh_spec=False, diff_output=False, limit_numbers=None, code_directo actual_spec = get_spec(refresh_spec, path_prefix=code_directory) config = get_spec_parser(code_directory) - spec_map = {} - for entry in actual_spec['rules']: - number = re.search(r'[\d.]+', entry['id']).group() - if 'requirement' in entry['machine_id']: - spec_map[number] = _demarkdown(entry['content']) + spec_map = specmap_from_file(actual_spec) - if len(entry['children']) > 0: - for ch in entry['children']: - number = re.search(r'[\d.]+', ch['id']).group() - if 'requirement' in ch['machine_id']: - spec_map[number] = _demarkdown(ch['content']) repo_specs = {} missing = set(spec_map.keys()) + bad_num = 0 for root, dirs, files in os.walk(".", topdown=False): for name in files: @@ -78,51 +147,28 @@ def main(refresh_spec=False, diff_output=False, limit_numbers=None, code_directo with open(F) as f: data = ''.join(f.readlines()) - for match in re.findall(config['multiline_regex'], data, re.MULTILINE | re.DOTALL): - match = match.replace('\n', '') - number = re.findall(config['number_subregex'], match)[0] - - if number in missing: - missing.remove(number) - text_with_concat_chars = re.findall(config['text_subregex'], match, re.MULTILINE | re.DOTALL) - try: - text = ''.join(text_with_concat_chars).strip() - # We have to match for ") to capture text with parens inside, so we add the trailing " back in. - text = _demarkdown(eval('"%s"' % text)) - entry = repo_specs[number] = { - 'number': number, - 'text': text, - } - except Exception as e: - print(f"Skipping {match} b/c we couldn't parse it") - - bad_num = len(missing) - for number, entry in sorted(repo_specs.items(), key=lambda x: x[0]): - if limit_numbers is not None and len(limit_numbers) > 0 and number not in limit_numbers: - continue - if number in spec_map: - txt = entry['text'] - if txt == spec_map[number]: - report['good'].add(number) - continue - else: - print(f"{number} is bad.") - report['different-text'].add(number) - bad_num += 1 - if diff_output: - print("Official:") - print("\t%s" % spec_map[number]) - print("") - print("Ours:") - print("\t%s" % txt) - continue + repo_specs |= find_covered_specs(config, data) + + report = gen_report(from_spec=spec_map, from_repo=repo_specs) - report['extra'].add(number) + for number in report['different-text']: + bad_num += 1 + print(f"{number} is bad.") + if diff_output: + print("Official:") + print("\t%s" % spec_map[number]) + print("") + print("Ours:") + print("\t%s" % repo_specs[number]) + + bad_num += len(report['extra']) + for number in report['extra']: print(f"{number} is defined in our tests, but couldn't find it in the spec") - print("") + + missing = report['missing'] + bad_num += len(missing) if len(missing) > 0: - report['missing'] = missing print('In the spec, but not in our tests: ') for m in sorted(missing): print(f" {m}: {spec_map[m]}") @@ -131,7 +177,7 @@ def main(refresh_spec=False, diff_output=False, limit_numbers=None, code_directo for k in report.keys(): report[k] = sorted(list(report[k])) report_txt = json.dumps(report, indent=4) - loc = '/appdir/%s-report.json' % config['file_extension'] + loc = os.path.join(code_directory, '%s-report.json' % config['file_extension']) with open(loc, 'w') as f: f.write(report_txt) sys.exit(bad_num) diff --git a/tools/repo_parser/test_spec_finder.py b/tools/repo_parser/test_spec_finder.py new file mode 100644 index 00000000..23cde980 --- /dev/null +++ b/tools/repo_parser/test_spec_finder.py @@ -0,0 +1,59 @@ +import re +from spec_finder import find_covered_specs, gen_report + +def test_simple_singleline(): + text = """ + // spec:4.3.6:The after stage MUST run after flag resolution occurs. It accepts a hook context (required), flag evaluation details (required) and hook hints (optional). It has no return value.:end + """ + cfg = { + 'multiline_regex': r'spec:(.*):end', + 'number_subregex': r'(?P[\d.]+):', + 'text_subregex': r'[\d.]+:(.*)', + 'inline_comment_prefix': '//', + } + output = find_covered_specs(cfg, text) + assert '4.3.6' in output + assert output['4.3.6']['text'] == "The after stage MUST run after flag resolution occurs. It accepts a hook context (required), flag evaluation details (required) and hook hints (optional). It has no return value." + + +def test_multiline_comment(): + text = """ + // spec:4.3.7:The error hook MUST run when errors are encountered in the + // before stage, the after stage or during flag resolution. It accepts hook + // context (required), exception representing what went wrong (required), and + // hook hints (optional). It has no return value.:end + """ + cfg = { + 'multiline_regex': r'spec:(.*):end', + 'number_subregex': r'(?P[\d.]+):', + 'text_subregex': r'[\d.]+:(.*)', + 'inline_comment_prefix': '//', + } + output = find_covered_specs(cfg, text) + assert '4.3.7' in output + assert output['4.3.7']['text'] == """The error hook MUST run when errors are encountered in the before stage, the after stage or during flag resolution. It accepts hook context (required), exception representing what went wrong (required), and hook hints (optional). It has no return value.""" + + +def test_report(): + spec = { + '1.2.3': "good text", + '2.3.4': 'different text', + '3.4.5': 'missing' + } + + repo = { + '1.2.3': 'good text', + '2.3.4': 'it is different', + '4.5.6': 'extra' + } + + report = gen_report(spec, repo) + assert len(report['good']) == 1 + assert len(report['different-text']) == 1 + assert len(report['missing']) == 1 + assert len(report['extra']) == 1 + + assert report['good'] == set(['1.2.3']) + assert report['different-text'] == set(['2.3.4']) + assert report['missing'] == set(['3.4.5']) + assert report['extra'] == set(['4.5.6'])