-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_completions.py
146 lines (114 loc) · 5.02 KB
/
generate_completions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import os
import sys
from datetime import datetime
from functools import partial
import pandas as pd
import torch
import yaml
from accelerate.utils import find_executable_batch_size
from huggingface_hub import login as hf_login
from tqdm import tqdm
from baselines.model_utils import get_template, load_model_and_tokenizer
hf_login(token=os.environ.get("HUGGINGFACE_API_KEY"))
def _hf_generate_with_batching(model, tokenizer, test_cases, template, **generation_kwargs):
@find_executable_batch_size(starting_batch_size=1)
def inner_generation_loop(batch_size):
nonlocal model, tokenizer, test_cases, template, generation_kwargs
generations = []
for i in tqdm(range(0, len(test_cases), batch_size)):
batched_test_cases = test_cases[i : i + batch_size]
inputs = [template["prompt"].format(instruction=s) for s in batched_test_cases]
print("inputs", inputs)
inputs = tokenizer(inputs, return_tensors="pt", padding=True)
inputs = inputs.to(model.device)
with torch.no_grad():
outputs = model.generate(
inputs=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
**generation_kwargs,
).cpu()
generated_tokens = outputs[:, inputs["input_ids"].shape[1] :]
batch_generations = [
tokenizer.decode(o, skip_special_tokens=True).strip() for o in generated_tokens
]
print("batch_generations", batch_generations)
generations.extend(batch_generations)
return generations
return inner_generation_loop()
def load_generation_function(
config_file, model_name, max_new_tokens
):
with open(config_file) as file:
model_configs = yaml.full_load(file)
num_gpus = model_configs[model_name].get("num_gpus", 0)
# check num gpus available to see if greater than num_gpus in config
num_gpus_available = torch.cuda.device_count()
if num_gpus_available != num_gpus:
print(
f"Warning: num_gpus in config ({num_gpus}) does not match num_gpus available ({num_gpus_available}). Using {num_gpus_available} GPUs."
)
num_gpus = num_gpus_available
model_config = model_configs[model_name]["model"]
model_config["num_gpus"] = num_gpus
model_config["dtype"] = "float16"
model_name_or_path = model_config["model_name_or_path"]
print("Using HF generation")
model, tokenizer = load_model_and_tokenizer(**model_config)
generation_kwargs = dict(max_new_tokens=max_new_tokens, do_sample=False)
TEMPLATE = get_template(
model_name_or_path, chat_template=model_config.get("chat_template", None)
)
return partial(
_hf_generate_with_batching,
model=model,
tokenizer=tokenizer,
template=TEMPLATE,
**generation_kwargs,
)
def generate_main(test_cases, generation_function):
# ==== Generate ====
print("Generating completions...")
generations = generation_function(test_cases=test_cases)
print("Done")
return generations
def generate_completions(df, model_name):
generation_function = load_generation_function(
config_file=os.environ.get("WORK_DIR") + "/configs/model_configs/models.yaml",
model_name=model_name,
max_new_tokens=512,
)
msg_list = df.best_msg.values.tolist()
# Generate completions for the missing messages
temp = generate_main(msg_list, generation_function)
assert len(df) == len(temp), "Length of generated completions does not match the length of the DataFrame"
df['final_completion'] = temp
return df
def get_df(results_dir, df_name):
"""Concatenate the last entries of all CSV files into one."""
folder_name = '_'.join(df_name.split("_")[:-2])
if not df_name.endswith('.csv'):
df_name = df_name + '.csv'
path = os.path.join(results_dir, folder_name, df_name)
df = pd.read_csv(path)
return df, path
def save_concatenated_csv(concatenated_df, wandb_run_name, base_results_dir):
"""Save the concatenated DataFrame to a CSV file with a timestamped name."""
output_dir = os.path.join(base_results_dir, wandb_run_name)
os.makedirs(output_dir, exist_ok=True)
current_time = datetime.now().strftime('%Y%m%d_%H%M%S')
output_file = os.path.join(output_dir, f"{wandb_run_name}_{current_time}.csv")
concatenated_df.to_csv(output_file, index=False)
print(f"Concatenated CSV saved to {output_file}")
if __name__ == "__main__":
# Get command-line arguments
df_name = sys.argv[1]
results_dir = sys.argv[2]
print("Generating completions for", df_name)
concatenated_df, df_path = get_df(results_dir, df_name)
# Generate completions
models_all = concatenated_df.experiment_name.values.tolist()
model_name = models_all[0]
assert all(models_all[0] == x for x in models_all)
concatenated_df = generate_completions(concatenated_df, model_name)
concatenated_df.to_csv(df_path, index=False)
print("The file is saved to", df_path)