From 602435e710990796bfa6b8f7330f2d776957c572 Mon Sep 17 00:00:00 2001 From: %magics Date: Sat, 16 Nov 2024 20:48:30 +0000 Subject: [PATCH] clean up prompt tests format --- .../core/benchmarks/runbugrun/runbugrun.py | 17 ++++++++++------- .../sample/strategies/instruct_python.py | 5 ++++- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/elleelleaime/core/benchmarks/runbugrun/runbugrun.py b/elleelleaime/core/benchmarks/runbugrun/runbugrun.py index 33417cde..c4c8080b 100644 --- a/elleelleaime/core/benchmarks/runbugrun/runbugrun.py +++ b/elleelleaime/core/benchmarks/runbugrun/runbugrun.py @@ -86,12 +86,15 @@ def get_failing_tests(self, buggy_file, errors, test_rows): with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: futures = [] futures_to_tests = {} - + for test_id, (test_input, test_output) in test_rows.iterrows(): + test_input = test_input.strip() + test_output = test_output.strip() + if isinstance(errors, list): result = errors[0]['exception'] + '\n' + errors[0]['output'] - cause = f"""Function with input {test_input.replace('"', "'")} failed with error: {result}""" - failing_tests[f"""{test_input} -> {test_output}"""] = cause + cause = f"""Function with input: \n{test_input} \nexpected to output: \n{test_output} \nfailed with error: \n{result.strip()}""" + failing_tests[f"""test_{test_id}"""] = cause else: # if there isn't a runtime exception, need to execute to get the cause of test failure return failing_tests # TODO: checkout first? @@ -102,11 +105,11 @@ def get_failing_tests(self, buggy_file, errors, test_rows): returncode, result = future.result() test_input, test_output = futures_to_tests[future] if returncode: - cause = f"""Function with input {test_input.replace('"', "'")} failed with error: {result}""" - failing_tests[f"""{test_input} -> {test_output}"""] = cause + cause = f"""Function with input: \n{test_input} \nexpected to output: \n{test_output} \nfailed with error: \n{result.strip()}""" + failing_tests[f"""test_{test_id}"""] = cause elif result != test_output.strip(): - cause = f"""Expected function with input {test_input.replace('"', "'")} to output {test_output.replace('"', "'").replace("'", r"\'")} but got {result}""" - failing_tests[f"""{test_input} -> {test_output}"""] = cause + cause = f"""Function with input: \n{test_input} \nexpected to output: \n{test_output} \nbut got: \n{result}""" + failing_tests[f"""test_{test_id}"""] = cause else: continue diff --git a/elleelleaime/sample/strategies/instruct_python.py b/elleelleaime/sample/strategies/instruct_python.py index e801ccfb..6ae7b6df 100644 --- a/elleelleaime/sample/strategies/instruct_python.py +++ b/elleelleaime/sample/strategies/instruct_python.py @@ -1,5 +1,6 @@ from typing import Optional, Tuple from unidiff import PatchSet +import re from elleelleaime.sample.strategy import PromptingStrategy from elleelleaime.core.benchmarks.bug import RichBug @@ -38,9 +39,11 @@ def instruct( failing_tests_string = "" for test_case, cause in failing_test_causes.items(): + expected = re.search('expected to output: \n(.*)\n(?:failed|but got)', cause) + expected = f"\"{expected.group(1)}\"" if expected else 'N/A' failing_tests_string += f"""Test `{test_case}`: ```python -assert result == {test_case.split(' -> ')[-1]} +assert result == {expected} ``` Test `{test_case}` error: ```