Skip to content

Commit

Permalink
clean up prompt tests format
Browse files Browse the repository at this point in the history
  • Loading branch information
cadddr committed Nov 16, 2024
1 parent 57b2c05 commit 602435e
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 8 deletions.
17 changes: 10 additions & 7 deletions elleelleaime/core/benchmarks/runbugrun/runbugrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,15 @@ def get_failing_tests(self, buggy_file, errors, test_rows):
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
futures = []
futures_to_tests = {}

for test_id, (test_input, test_output) in test_rows.iterrows():
test_input = test_input.strip()
test_output = test_output.strip()

if isinstance(errors, list):
result = errors[0]['exception'] + '\n' + errors[0]['output']
cause = f"""Function with input {test_input.replace('"', "'")} failed with error: {result}"""
failing_tests[f"""{test_input} -> {test_output}"""] = cause
cause = f"""Function with input: \n{test_input} \nexpected to output: \n{test_output} \nfailed with error: \n{result.strip()}"""
failing_tests[f"""test_{test_id}"""] = cause
else: # if there isn't a runtime exception, need to execute to get the cause of test failure
return failing_tests
# TODO: checkout first?
Expand All @@ -102,11 +105,11 @@ def get_failing_tests(self, buggy_file, errors, test_rows):
returncode, result = future.result()
test_input, test_output = futures_to_tests[future]
if returncode:
cause = f"""Function with input {test_input.replace('"', "'")} failed with error: {result}"""
failing_tests[f"""{test_input} -> {test_output}"""] = cause
cause = f"""Function with input: \n{test_input} \nexpected to output: \n{test_output} \nfailed with error: \n{result.strip()}"""
failing_tests[f"""test_{test_id}"""] = cause
elif result != test_output.strip():
cause = f"""Expected function with input {test_input.replace('"', "'")} to output {test_output.replace('"', "'").replace("'", r"\'")} but got {result}"""
failing_tests[f"""{test_input} -> {test_output}"""] = cause
cause = f"""Function with input: \n{test_input} \nexpected to output: \n{test_output} \nbut got: \n{result}"""
failing_tests[f"""test_{test_id}"""] = cause
else:
continue

Expand Down
5 changes: 4 additions & 1 deletion elleelleaime/sample/strategies/instruct_python.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Optional, Tuple
from unidiff import PatchSet
import re

from elleelleaime.sample.strategy import PromptingStrategy
from elleelleaime.core.benchmarks.bug import RichBug
Expand Down Expand Up @@ -38,9 +39,11 @@ def instruct(

failing_tests_string = ""
for test_case, cause in failing_test_causes.items():
expected = re.search('expected to output: \n(.*)\n(?:failed|but got)', cause)
expected = f"\"{expected.group(1)}\"" if expected else 'N/A'
failing_tests_string += f"""Test `{test_case}`:
```python
assert result == {test_case.split(' -> ')[-1]}
assert result == {expected}
```
Test `{test_case}` error:
```
Expand Down

0 comments on commit 602435e

Please sign in to comment.