Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Show stdout on failure #45

Merged
merged 2 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 49 additions & 4 deletions test_framework/basic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Basic tests for Parts I & II"""
from __future__ import annotations

import difflib
import json
import platform
import subprocess
Expand Down Expand Up @@ -144,6 +145,49 @@ def replace_stem(path: Path, new_stem: str) -> Path:
return path.with_name(new_stem).with_suffix(path.suffix)


def build_error_message(
expected_retcode: int,
expected_stdout: str,
actual: subprocess.CompletedProcess[str],
exe_name: str,
) -> str:
"""Build the error message for when a compiled test program behaves incorrectly
Called when a unittest assert* message fails
Args:
expected_retcode: expected return code from EXPECTED_RESULTS
expected_stdout: expected stdout from EXPECTED_RESULTS (often empty)
actual: result from calling subprocess.run() on compiled test program
exe_name: full path to compiled test program
Returns:
an error message
"""

msg_lines = [f"Incorrect behavior in {exe_name}"]

# report on incorrect return code
if expected_retcode != actual.returncode:
msg_lines += [
f"* Bad return code: expected {expected_retcode} and got {actual.returncode}"
]

# report on incorrect stdout
if actual.stdout != expected_stdout:
msg_lines.append(
f"* Bad stdout: expected {repr(expected_stdout)} and got {repr(actual.stdout)}"
)
diff = list(
difflib.ndiff(expected_stdout.splitlines(), actual.stdout.splitlines())
)
msg_lines.extend(diff)

# report on incorrect stderr (note: we always expect stderr to be empty)
if actual.stderr:
msg_lines.append("* Expected no output to stderr, but found:\n")
msg_lines.extend(actual.stderr)

return "\n".join(msg_lines)


class TestChapter(unittest.TestCase):
"""Base per-chapter test class - should be subclassed, not instantiated directly.

Expand Down Expand Up @@ -269,17 +313,18 @@ def validate_runs(
self.assertEqual(
expected_retcode,
actual.returncode,
msg=f"Expected return code {expected_retcode}, found {actual.returncode} in {exe}",
msg=build_error_message(expected_retcode, expected_stdout, actual, exe),
)
self.assertEqual(
expected_stdout,
actual.stdout,
msg=f"Expected output {expected_stdout}, found {actual.stdout} in {exe}",
msg=build_error_message(expected_retcode, expected_stdout, actual, exe),
)

# none of our test programs write to stderr
self.assertFalse(
actual.stderr, msg=f"Unexpected error output {actual.stderr} in {exe}"
actual.stderr,
msg=build_error_message(expected_retcode, expected_stdout, actual, exe),
)

def compile_failure(self, source_file: Path) -> None:
Expand Down Expand Up @@ -437,7 +482,7 @@ class TestDirs:
INVALID_LABELS = "invalid_labels"
INVALID_STRUCT_TAGS = "invalid_struct_tags"
# valid test programs for parts I & II
# (we'll handle part III test sdifferently)
# (we'll handle part III tests differently)
VALID = "valid"


Expand Down
8 changes: 4 additions & 4 deletions test_framework/regalloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ def spill_test(
len(spill_instructions),
max_spilled_instructions,
msg=common.build_msg(
f"Should only need {max_spilled_instructions} instructions \
involving spilled pseudo but found {len(spill_instructions)}",
f"Should only need {max_spilled_instructions} instructions "
"involving spilled pseudo but found {len(spill_instructions)}",
bad_instructions=spill_instructions,
full_prog=parsed_asm,
program_path=program_path,
Expand All @@ -203,8 +203,8 @@ def spill_test(
len(spilled_operands),
max_spilled_pseudos,
msg=common.build_msg(
f"At most {max_spilled_pseudos} pseudoregs should have been spilled, \
looks like {len(spilled_operands)} were",
f"At most {max_spilled_pseudos} pseudoregs should have been spilled, "
"looks like {len(spilled_operands)} were",
bad_instructions=spill_instructions,
full_prog=parsed_asm,
program_path=program_path,
Expand Down
6 changes: 5 additions & 1 deletion test_framework/tacky/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ def build_msg(
bad_instructions: Optional[Sequence[asm.AsmItem]] = None,
full_prog: Optional[asm.AssemblyFunction] = None,
program_path: Optional[Path] = None,
max_prog_disp_length: int = 20,
) -> str:
"""Utility function for validators to report invalid assembly code"""
msg_lines = [msg]
Expand All @@ -167,7 +168,10 @@ def build_msg(
msg_lines.append("Bad instructions:")
msg_lines.extend(printed_instructions)
if full_prog:
msg_lines.extend(["Complete program:", str(full_prog)])
if len(full_prog.instructions) > max_prog_disp_length:
msg_lines.append("Complete assembly function: <too long, not shown>")
else:
msg_lines.extend(["Complete assembly function:", str(full_prog)])
if program_path:
msg_lines.append(f"Program: {program_path}")
return "\n".join(msg_lines)
Expand Down
4 changes: 2 additions & 2 deletions test_framework/tacky/copy_prop.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,8 +275,8 @@ def same_arg_test(self, program: Path) -> None:
)
self.assertTrue(
same_value,
msg=f"Bad arguments {actual_args[0]} and {actual_args[1]} to callee: \
both args should have same value",
msg=f"Bad arguments {actual_args[0]} and {actual_args[1]} to callee: "
"both args should have same value",
)

def redundant_copies_test(self, program: Path) -> None:
Expand Down
94 changes: 90 additions & 4 deletions test_framework/test_tests/test_toplevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from __future__ import annotations

import os
import re
import shutil
import subprocess
Expand All @@ -24,9 +25,14 @@
TEST_DIR,
excluded_extra_credit,
ExtraCredit,
build_test_class,
)
from ..tacky.dead_store_elim import STORE_ELIMINATED

NQCC = os.getenv("NQCC")
assert NQCC # make sure this environment variable is actually set
NQCC_PATH: Path = Path(NQCC)

TEST_PATTERN = re.compile("^Ran ([0-9]+) tests", flags=re.MULTILINE)
FAILURE_PATTERN = re.compile("failures=([0-9]+)")

Expand Down Expand Up @@ -212,15 +218,15 @@ def test_expected_error_code(self) -> None:
# NQCC throws error code 125 in all cases
# This should succeed b/c it specifies the error code we'll actually throw
try:
testrun = run_test_script(
run_test_script(
"./test_compiler $NQCC --chapter 1 --expected-error-codes 125"
)
except subprocess.CalledProcessError as err:
self.fail(f"Test command failed with message {err.stderr}")

# Specify multiple error codes including the one we'll actually throw; this should also succeed
try:
testrun = run_test_script(
run_test_script(
"./test_compiler $NQCC --chapter 1 --expected-error-codes 125 127"
)
except subprocess.CalledProcessError as err:
Expand All @@ -243,6 +249,8 @@ class BadSourceTest(unittest.TestCase):
ret2 = TEST_DIR / "chapter_1/valid/return_2.c"
ret0 = TEST_DIR / "chapter_1/valid/return_0.c"
hello_world = TEST_DIR / "chapter_9/valid/arguments_in_registers/hello_world.c"
# like hello_world, this writes to stdout
static_recursive_call = TEST_DIR / "chapter_10/valid/static_recursive_call.c"
dse_relative = Path("chapter_19/dead_store_elimination/int_only/fig_19_11.c")
dse = TEST_DIR / dse_relative

Expand All @@ -254,9 +262,14 @@ def setUpClass(cls) -> None:
# save these to a temporary directory before overwriting them
cls.tmpdir = tempfile.TemporaryDirectory()
shutil.copy(cls.hello_world, cls.tmpdir.name)
shutil.copy(cls.static_recursive_call, cls.tmpdir.name)
shutil.copy(cls.ret0, cls.tmpdir.name)
shutil.copy(cls.dse, cls.tmpdir.name)

# overwrite static_recursive_call with another file that has
# a different retcode and different stdout
shutil.copy(cls.ret2, cls.static_recursive_call)

# overwrite hello_world with another file that has same retcode but different stdout
shutil.copy(cls.ret0, cls.hello_world)

Expand Down Expand Up @@ -292,18 +305,42 @@ def tearDownClass(cls) -> None:
tmp_path = Path(cls.tmpdir.name)
tmp_ret0 = tmp_path / cls.ret0.name
tmp_helloworld = tmp_path / cls.hello_world.name
tmp_static_recursive_call = tmp_path / cls.static_recursive_call.name
tmp_dse = tmp_path / cls.dse.name

shutil.copy(tmp_ret0, cls.ret0)
shutil.copy(tmp_helloworld, cls.hello_world)
shutil.copy(tmp_dse, cls.dse)
shutil.copy(tmp_static_recursive_call, cls.static_recursive_call)
cls.tmpdir.cleanup()

# remove intermediate files produced by --keep-asm-on-failure
for f in (TEST_DIR / f"chapter_1").rglob("*.s"):
if f.name not in ASSEMBLY_LIBS:
f.unlink(missing_ok=True)

@classmethod
def run_chapter_tests(cls, chapter: int) -> unittest.TestResult:
"""Utility function to run test suite for a given chapter and return TestResult.
Use this instead of run_test_script so we can inspect individual test results.
"""
test_class = build_test_class(
NQCC_PATH,
chapter,
options=[],
stage="run",
extra_credit_flags=ExtraCredit.NONE,
skip_invalid=True,
error_codes=[],
)
test_suite = unittest.defaultTestLoader.loadTestsFromTestCase(test_class)

# test failure is expected and shouldn't be displayed to stdout,
# so direct test output to /dev/null
with open(os.devnull, "w") as devnull:
result = unittest.TextTestRunner(stream=devnull).run(test_suite)
return result

def assert_no_intermediate_files(self, chapter: int) -> None:
# Executables, *.i files, etc should have been cleaned up
intermediate_files = [
Expand Down Expand Up @@ -378,7 +415,7 @@ def test_intermediate(self) -> None:

def test_keep_asm(self) -> None:
"""Use --keep-asm-on-failure option to generate assembly for failures"""
with self.assertRaises(subprocess.CalledProcessError) as cpe:
with self.assertRaises(subprocess.CalledProcessError):
run_test_script("./test_compiler $NQCC --chapter 1")
# make sure we preserved .s file for ret0, which should fail
expected_asm = self.__class__.ret0.with_suffix(".s")
Expand All @@ -389,11 +426,60 @@ def test_keep_asm(self) -> None:

def test_keep_asm_optimize(self) -> None:
"""Make sure --keep-asm-on-failure works for chapter 19 tests"""
with self.assertRaises(subprocess.CalledProcessError) as cpe:
with self.assertRaises(subprocess.CalledProcessError):
run_test_script("./test_compiler $NQCC --chapter 19")
# make sure we preserved .s file for ret0, which should fail
expected_asm = self.__class__.dse.with_suffix(".s")
self.assertTrue(
expected_asm.exists,
msg=f"{expected_asm} should be preserved on failure but wasn't found",
)

def test_bad_retcode_output(self) -> None:
"""If return code is incorrect, only report that, not stdout or stderr."""

chapter1_results = self.run_chapter_tests(chapter=1)

# Expected failure in return_0.c, which we replaced with return_2.c
self.assertEqual(len(chapter1_results.failures), 1)
# chapter1_results.failures is a list of (TestCase instance, error message) tuples
err_output: str = chapter1_results.failures[0][1]
expected_err = (
"/tests/chapter_1/valid/return_0"
"\n* Bad return code: expected 0 and got 2\n"
)
err_output_end = err_output[-len(expected_err) :]
self.assertEqual(err_output_end, expected_err)

def test_bad_stdout_output(self) -> None:
"""If only stdout is incorrect, report that but not return code."""
chapter9_results = self.run_chapter_tests(chapter=9)

# Expected failure in hello_world.c, which we replaced with return_0.c
self.assertEqual(len(chapter9_results.failures), 1)
# chapter9_results.failures is a list of (TestCase instance, error message) tuples
err_output: str = chapter9_results.failures[0][1]
expected_err = (
"/tests/chapter_9/valid/arguments_in_registers/hello_world"
"\n* Bad stdout: expected 'Hello, World!\\n' and got ''"
"\n- Hello, World!\n"
)
err_output_end = err_output[-len(expected_err) :]
self.assertEqual(err_output_end, expected_err)

def test_bad_stdout_and_retcode(self) -> None:
"""If stdout and return code are both incorrect, report both of them."""
chapter10_results = self.run_chapter_tests(chapter=10)

# Expected failure in static_recursive_call.c, which we replaced with return_2.c
self.assertEqual(len(chapter10_results.failures), 1)
# chapter10_results.failures is a list of (TestCase instance, error message) tuples
err_output: str = chapter10_results.failures[0][1]
expected_err = (
"/tests/chapter_10/valid/static_recursive_call"
"\n* Bad return code: expected 0 and got 2"
"\n* Bad stdout: expected 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' and got ''"
"\n- ABCDEFGHIJKLMNOPQRSTUVWXYZ\n"
)
err_output_end = err_output[-len(expected_err) :]
self.assertEqual(err_output_end, expected_err)
Loading