Skip to content

Commit

Permalink
Support for inline comments to validate spec
Browse files Browse the repository at this point in the history
Signed-off-by: Justin Abrahms <[email protected]>
  • Loading branch information
justinabrahms committed May 26, 2024
1 parent c0739a1 commit b3e85a8
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 59 deletions.
3 changes: 3 additions & 0 deletions tools/repo_parser/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
mypy
pytest
pytest-cov
164 changes: 105 additions & 59 deletions tools/repo_parser/spec_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,38 @@
import os
import sys
import configparser
from typing import TypedDict, Optional, cast

class Config(TypedDict):
file_extension: str
multiline_regex: Optional[str]
number_subregex: Optional[str]
text_subregex: Optional[str]
inline_comment_prefix: Optional[str]

def _demarkdown(t):
return t.replace('**', '').replace('`', '').replace('"', '')

def get_spec_parser(code_dir):
def get_spec_parser(code_dir) -> Config:
with open(os.path.join(code_dir, '.specrc')) as f:
data = '\n'.join(f.readlines())

typical = configparser.ConfigParser()
typical = configparser.ConfigParser(comment_prefixes=None)
typical.read_string(data)
retval = typical['spec']
assert 'file_extension' in retval
assert 'multiline_regex' in retval
assert 'number_subregex' in retval
assert 'text_subregex' in retval
return retval

if 'inline_comment_prefix' in retval:
# If an `inline_comment_prefix` is set, then we're using the inline
# comment approach, which should obviate artisnal regexes.
retval['multiline_regex'] = r'spec:(.*?):end'
retval['number_subregex'] = r'(?P<number>[\d.]+):'
retval['text_subregex'] = r'[\d.]+:(.*)'
else:
assert 'file_extension' in retval
assert 'multiline_regex' in retval
assert 'number_subregex' in retval
assert 'text_subregex' in retval
return cast(Config, retval)



Expand All @@ -43,6 +59,67 @@ def get_spec(force_refresh=False, path_prefix="./"):
f.write(data)
return json.loads(data)

def specmap_from_file(actual_spec):
spec_map = {}
for entry in actual_spec['rules']:
number = re.search(r'[\d.]+', entry['id']).group()
if 'requirement' in entry['machine_id']:
spec_map[number] = _demarkdown(entry['content'])

if len(entry['children']) > 0:
for ch in entry['children']:
number = re.search(r'[\d.]+', ch['id']).group()
if 'requirement' in ch['machine_id']:
spec_map[number] = _demarkdown(ch['content'])
return spec_map

def find_covered_specs(config, data):
repo_specs = {}
for match in re.findall(config['multiline_regex'], data, re.MULTILINE | re.DOTALL):
match = match.replace('\n', '').replace(config['inline_comment_prefix'], '')
# normalize whitespace
match = re.sub(" {2,}", " ", match.strip())
number = re.findall(config['number_subregex'], match)[0]

text_with_concat_chars = re.findall(config['text_subregex'], match, re.MULTILINE | re.DOTALL)
try:
text = ''.join(text_with_concat_chars).strip()
# We have to match for ") to capture text with parens inside, so we add the trailing " back in.
text = _demarkdown(eval('"%s"' % text))
entry = repo_specs[number] = {
'number': number,
'text': text,
}
except Exception as e:
print(f"Skipping {match} b/c we couldn't parse it")
return repo_specs

def gen_report(from_spec, from_repo):
extra = set()
missing = set()
different_text = set()
good = set()

missing = set(from_spec.keys()) # assume they're all missing

for number, text in from_repo.items():
if number in missing:
missing.remove(number)
if number not in from_spec:
extra.add(number)
continue
if text == from_spec[number]:
good.add(number)
else:
different_text.add(number)

return {
'extra': extra,
'missing': missing,
'different-text': different_text,
'good': good,
}


def main(refresh_spec=False, diff_output=False, limit_numbers=None, code_directory=None, json_report=False):
report = {
Expand All @@ -55,20 +132,12 @@ def main(refresh_spec=False, diff_output=False, limit_numbers=None, code_directo
actual_spec = get_spec(refresh_spec, path_prefix=code_directory)
config = get_spec_parser(code_directory)

spec_map = {}
for entry in actual_spec['rules']:
number = re.search(r'[\d.]+', entry['id']).group()
if 'requirement' in entry['machine_id']:
spec_map[number] = _demarkdown(entry['content'])
spec_map = specmap_from_file(actual_spec)

if len(entry['children']) > 0:
for ch in entry['children']:
number = re.search(r'[\d.]+', ch['id']).group()
if 'requirement' in ch['machine_id']:
spec_map[number] = _demarkdown(ch['content'])

repo_specs = {}
missing = set(spec_map.keys())
bad_num = 0

for root, dirs, files in os.walk(".", topdown=False):
for name in files:
Expand All @@ -78,51 +147,28 @@ def main(refresh_spec=False, diff_output=False, limit_numbers=None, code_directo
with open(F) as f:
data = ''.join(f.readlines())

for match in re.findall(config['multiline_regex'], data, re.MULTILINE | re.DOTALL):
match = match.replace('\n', '')
number = re.findall(config['number_subregex'], match)[0]

if number in missing:
missing.remove(number)
text_with_concat_chars = re.findall(config['text_subregex'], match, re.MULTILINE | re.DOTALL)
try:
text = ''.join(text_with_concat_chars).strip()
# We have to match for ") to capture text with parens inside, so we add the trailing " back in.
text = _demarkdown(eval('"%s"' % text))
entry = repo_specs[number] = {
'number': number,
'text': text,
}
except Exception as e:
print(f"Skipping {match} b/c we couldn't parse it")

bad_num = len(missing)
for number, entry in sorted(repo_specs.items(), key=lambda x: x[0]):
if limit_numbers is not None and len(limit_numbers) > 0 and number not in limit_numbers:
continue
if number in spec_map:
txt = entry['text']
if txt == spec_map[number]:
report['good'].add(number)
continue
else:
print(f"{number} is bad.")
report['different-text'].add(number)
bad_num += 1
if diff_output:
print("Official:")
print("\t%s" % spec_map[number])
print("")
print("Ours:")
print("\t%s" % txt)
continue
repo_specs |= find_covered_specs(config, data)

report = gen_report(from_spec=spec_map, from_repo=repo_specs)

report['extra'].add(number)
for number in report['different-text']:
bad_num += 1
print(f"{number} is bad.")
if diff_output:
print("Official:")
print("\t%s" % spec_map[number])
print("")
print("Ours:")
print("\t%s" % repo_specs[number])

bad_num += len(report['extra'])
for number in report['extra']:
print(f"{number} is defined in our tests, but couldn't find it in the spec")
print("")


missing = report['missing']
bad_num += len(missing)
if len(missing) > 0:
report['missing'] = missing
print('In the spec, but not in our tests: ')
for m in sorted(missing):
print(f" {m}: {spec_map[m]}")
Expand All @@ -131,7 +177,7 @@ def main(refresh_spec=False, diff_output=False, limit_numbers=None, code_directo
for k in report.keys():
report[k] = sorted(list(report[k]))
report_txt = json.dumps(report, indent=4)
loc = '/appdir/%s-report.json' % config['file_extension']
loc = os.path.join(code_directory, '%s-report.json' % config['file_extension'])
with open(loc, 'w') as f:
f.write(report_txt)
sys.exit(bad_num)
Expand Down
59 changes: 59 additions & 0 deletions tools/repo_parser/test_spec_finder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import re
from spec_finder import find_covered_specs, gen_report

def test_simple_singleline():
text = """
// spec:4.3.6:The after stage MUST run after flag resolution occurs. It accepts a hook context (required), flag evaluation details (required) and hook hints (optional). It has no return value.:end
"""
cfg = {
'multiline_regex': r'spec:(.*):end',
'number_subregex': r'(?P<number>[\d.]+):',
'text_subregex': r'[\d.]+:(.*)',
'inline_comment_prefix': '//',
}
output = find_covered_specs(cfg, text)
assert '4.3.6' in output
assert output['4.3.6']['text'] == "The after stage MUST run after flag resolution occurs. It accepts a hook context (required), flag evaluation details (required) and hook hints (optional). It has no return value."


def test_multiline_comment():
text = """
// spec:4.3.7:The error hook MUST run when errors are encountered in the
// before stage, the after stage or during flag resolution. It accepts hook
// context (required), exception representing what went wrong (required), and
// hook hints (optional). It has no return value.:end
"""
cfg = {
'multiline_regex': r'spec:(.*):end',
'number_subregex': r'(?P<number>[\d.]+):',
'text_subregex': r'[\d.]+:(.*)',
'inline_comment_prefix': '//',
}
output = find_covered_specs(cfg, text)
assert '4.3.7' in output
assert output['4.3.7']['text'] == """The error hook MUST run when errors are encountered in the before stage, the after stage or during flag resolution. It accepts hook context (required), exception representing what went wrong (required), and hook hints (optional). It has no return value."""


def test_report():
spec = {
'1.2.3': "good text",
'2.3.4': 'different text',
'3.4.5': 'missing'
}

repo = {
'1.2.3': 'good text',
'2.3.4': 'it is different',
'4.5.6': 'extra'
}

report = gen_report(spec, repo)
assert len(report['good']) == 1
assert len(report['different-text']) == 1
assert len(report['missing']) == 1
assert len(report['extra']) == 1

assert report['good'] == set(['1.2.3'])
assert report['different-text'] == set(['2.3.4'])
assert report['missing'] == set(['3.4.5'])
assert report['extra'] == set(['4.5.6'])

0 comments on commit b3e85a8

Please sign in to comment.