forked from princeton-nlp/SWE-agent
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_replay.py
142 lines (122 loc) · 4.85 KB
/
run_replay.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import json
import os
import subprocess
import yaml
from argparse import ArgumentParser
def process_synthetic_trajs(action_trajs_path: str, config_file: str, suffix: str):
# Load action trajectories, task instances
action_trajs = [json.loads(x) for x in open(action_trajs_path, "r").readlines()]
task_instances = [x["task_instance"] for x in action_trajs]
file_name = action_trajs_path.rsplit("/", 1)[-1]
# Temporary file names
replay_action_trajs_path = "temp_actions.jsonl"
replay_task_instances_path = file_name
# Write task_instances to file for data_path
with open(replay_task_instances_path, "w") as f:
for t in task_instances:
print(json.dumps(t), file=f, end="\n", flush=True)
# Write action trajectories to a file
with open(replay_action_trajs_path, "w") as f:
for t in action_trajs:
print(
json.dumps({t["task_instance"]["instance_id"]: t["actions"]}),
file=f,
end="\n",
flush=True,
)
# Call run.py via subprocess
command = [
"python",
"run.py",
"--config_file", config_file,
"--data_path", replay_task_instances_path,
"--install_environment", "True",
"--model_name", "replay",
"--replay_path", replay_action_trajs_path
]
if suffix is not None:
command.extend(["--suffix", suffix])
subprocess.run(command)
os.remove(replay_action_trajs_path)
os.remove(replay_task_instances_path)
def process_single_traj(traj_path: str, config_file: str, data_path: str, suffix: str):
replay_action_trajs_path = "temp_replay.jsonl"
# Open trajectory file, extract responses as actions
if traj_path.endswith(".yaml"):
traj_data = dict()
with open(traj_path, "r") as f:
traj_data["history"] = yaml.safe_load(f)
else:
traj_data = json.load(open(traj_path, "r"))
actions = [x["content"] for x in traj_data["history"] if x["role"] == "assistant"]
instance_id = traj_path.split("/")[-1].split(".")[0]
with open(replay_action_trajs_path, "w") as f:
print(
json.dumps({instance_id: actions}),
file=f,
end="\n",
flush=True
)
replay_task_instances_path = instance_id + ".jsonl"
# Get data_path from args.yaml
if data_path is None:
args_path = os.path.join(
os.path.dirname(traj_path),
"args.yaml"
)
args = yaml.safe_load(open(args_path))
data_path = args['environment']['data_path']
# Identify the relevant task instance and create it
data = None
if data_path.endswith(".jsonl"):
data = [json.loads(x) for x in open(data_path, "r").readlines()]
elif data_path.endswith(".json"):
data = json.load(open(data_path))
else:
raise ValueError("--data_path must be a .json or .jsonl")
data = [d for d in data if d["instance_id"] == instance_id]
with open(replay_task_instances_path, "w") as f:
for d in data:
print(json.dumps(d), file=f, end="\n", flush=True)
# Call run.py via subprocess
command = [
"python",
"run.py",
"--config_file", config_file,
"--data_path", replay_task_instances_path,
"--install_environment", "True",
"--model_name", "replay",
"--replay_path", replay_action_trajs_path,
]
if suffix is not None:
command.extend(["--suffix", suffix])
subprocess.run(command)
os.remove(replay_action_trajs_path)
os.remove(replay_task_instances_path)
def main(
action_trajs_path: str,
traj_path: str,
config_file: str,
data_path: str,
suffix: str,
):
if action_trajs_path is not None:
process_synthetic_trajs(action_trajs_path, config_file, suffix)
elif traj_path is not None:
process_single_traj(traj_path, config_file, data_path, suffix)
else:
print(
"No replays generated.\n"
"You must either provide one of the following. Either...\n"
"\t* --action_trajs_path for replaying synthetic trajectories\n"
"\t* --traj_path for replaying SWE-agent style trajectories (from ./trajectories folder)\n"
)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--action_trajs_path", help="Path to action trajectories to replay", default=None)
parser.add_argument("--traj_path", help="Path to trajectory to replay", default=None)
parser.add_argument("--config_file", help="Path to template", required=True)
parser.add_argument("--data_path", help="(Optional) Path to data file containing task instances ref'ed by replay trajectories", default=None)
parser.add_argument("--suffix", help="(Optional) Suffix argument appended to end of traj path", default=None)
args = parser.parse_args()
main(**vars(args))