From c2ab4da9b6309b66e915ddf66341c9eb4bf568fa Mon Sep 17 00:00:00 2001 From: Jiacheng Liu Date: Mon, 25 Nov 2024 00:10:07 +0000 Subject: [PATCH] Fix lint --- olmo/eval/downstream.py | 8 ++------ olmo/scaling/scaling_laws/utils.py | 4 +++- scripts/eval.py | 8 ++++++-- scripts/eval_hf.py | 25 ++++++++++++++++--------- scripts/scaling/predict.py | 11 +++++++---- scripts/scaling/step1.py | 2 +- scripts/scaling/step2.py | 11 +++++++---- 7 files changed, 42 insertions(+), 27 deletions(-) diff --git a/olmo/eval/downstream.py b/olmo/eval/downstream.py index b5cb90c00..286ed9fd0 100644 --- a/olmo/eval/downstream.py +++ b/olmo/eval/downstream.py @@ -106,12 +106,8 @@ def update(self, batch: Dict[str, Any], lm_logits: torch.Tensor, dc_lm_logits=No self.loglikelihoods.append( torch.Tensor((doc_id, cont_id, log_likelihood)).to(batch["continuation"][idx].device) ) - self.celosses.append( - torch.Tensor((doc_id, cont_id, celoss)).to(batch["continuation"][idx].device) - ) - self.bpbs.append( - torch.Tensor((doc_id, cont_id, bpb)).to(batch["continuation"][idx].device) - ) + self.celosses.append(torch.Tensor((doc_id, cont_id, celoss)).to(batch["continuation"][idx].device)) + self.bpbs.append(torch.Tensor((doc_id, cont_id, bpb)).to(batch["continuation"][idx].device)) self.labels.append( torch.LongTensor((doc_id, cont_id, batch["label_id"][idx])).to(batch["label_id"][idx].device) ) diff --git a/olmo/scaling/scaling_laws/utils.py b/olmo/scaling/scaling_laws/utils.py index 21934174c..25e36b4a6 100644 --- a/olmo/scaling/scaling_laws/utils.py +++ b/olmo/scaling/scaling_laws/utils.py @@ -754,7 +754,9 @@ def get_length(path): return "" -def get_step2_data_by_name(configs, task_name, x_metric="rc_bpb", y_metric="rc_acc", moving_avg=1, skip_perc=0.0, last_n_points=-1): +def get_step2_data_by_name( + configs, task_name, x_metric="rc_bpb", y_metric="rc_acc", moving_avg=1, skip_perc=0.0, last_n_points=-1 +): task = tasks[task_name] if x_metric == "rc_bpb": loss_keys = task.get_loss_keys() diff --git a/scripts/eval.py b/scripts/eval.py index cb8e2830e..0ec8956cd 100644 --- a/scripts/eval.py +++ b/scripts/eval.py @@ -125,8 +125,12 @@ def main(cfg: TrainConfig) -> None: else: # This globbing only works with local paths load_paths = list(glob.glob(f"{cfg.load_path}/step*")) - load_paths = [x for x in load_paths if x.split("/")[-1].replace("-unsharded", "").split("step")[-1].isdigit()] - load_paths = [x for x in load_paths if int(x.split("/")[-1].replace("-unsharded", "").split("step")[-1]) % 5000 == 0] + load_paths = [ + x for x in load_paths if x.split("/")[-1].replace("-unsharded", "").split("step")[-1].isdigit() + ] + load_paths = [ + x for x in load_paths if int(x.split("/")[-1].replace("-unsharded", "").split("step")[-1]) % 5000 == 0 + ] load_paths = list( sorted(load_paths, key=lambda x: int(x.split("/")[-1].replace("-unsharded", "").split("step")[-1])) ) diff --git a/scripts/eval_hf.py b/scripts/eval_hf.py index c16e3850d..99314f870 100644 --- a/scripts/eval_hf.py +++ b/scripts/eval_hf.py @@ -1,17 +1,19 @@ -from itertools import islice import json import os import sys -from tqdm import tqdm +from itertools import islice from typing import Any, Dict + import torch import torch.nn.functional as F import transformers -from olmo.config import TrainConfig, EvaluatorConfig, EvaluatorType +from tqdm import tqdm + +from olmo.config import EvaluatorConfig, EvaluatorType, TrainConfig from olmo.eval import build_evaluator -from olmo.torch_util import move_to_device from olmo.eval.downstream import label_to_task_map_new from olmo.exceptions import OLMoCliError +from olmo.torch_util import move_to_device def get_labels(batch: Dict[str, Any]) -> torch.Tensor: @@ -30,14 +32,19 @@ def get_labels(batch: Dict[str, Any]) -> torch.Tensor: labels.masked_fill_(~instance_mask.unsqueeze(-1), value=-100) return labels[..., 1:].contiguous() + def main(cfg: TrainConfig, model_name: str): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - tokenizer = transformers.AutoTokenizer.from_pretrained(model_name, token=os.environ.get("HF_TOKEN_DOWNLOAD", None)) + tokenizer = transformers.AutoTokenizer.from_pretrained( + model_name, token=os.environ.get("HF_TOKEN_DOWNLOAD", None) + ) if tokenizer.pad_token_id is None: # This is to prevent the NoneType error in collate_fn() tokenizer.pad_token_id = 0 - model = transformers.AutoModelForCausalLM.from_pretrained(model_name, token=os.environ.get("HF_TOKEN_DOWNLOAD", None)) + model = transformers.AutoModelForCausalLM.from_pretrained( + model_name, token=os.environ.get("HF_TOKEN_DOWNLOAD", None) + ) model.to(device) model.eval() @@ -100,10 +107,10 @@ def main(cfg: TrainConfig, model_name: str): print(eval_metrics) - save_folder = f'/weka/oe-training-default/jiachengl/hc-law/eval_bpb_mc_v2' + save_folder = "/weka/oe-training-default/jiachengl/hc-law/eval_bpb_mc_v2" if not os.path.exists(save_folder): os.makedirs(save_folder) - with open(f'{save_folder}/{model_name.replace("/", "_")}.json', 'w') as f: + with open(f'{save_folder}/{model_name.replace("/", "_")}.json', "w") as f: json.dump(eval_metrics, f) @@ -115,4 +122,4 @@ def main(cfg: TrainConfig, model_name: str): raise OLMoCliError(f"Usage: {sys.argv[0]} [CONFIG_PATH] [MODEL_NAME]") cfg = TrainConfig.load(yaml_path) - main(cfg, model_name) \ No newline at end of file + main(cfg, model_name) diff --git a/scripts/scaling/predict.py b/scripts/scaling/predict.py index b9dc8b1c5..1229a5a7a 100644 --- a/scripts/scaling/predict.py +++ b/scripts/scaling/predict.py @@ -25,9 +25,7 @@ def parse_args(): parser.add_argument( "-k", "--keys", nargs="+", default=[], help="For avg metrics. Use one of [all-val-lm, all-bpb]" ) - parser.add_argument( - "-x", "--x_metric", default="rc_bpb", choices=["rc_bpb", "c4"], help="Metric as input" - ) + parser.add_argument("-x", "--x_metric", default="rc_bpb", choices=["rc_bpb", "c4"], help="Metric as input") parser.add_argument( "-y", "--y_metric", default="rc_acc", choices=["rc_acc", "mc_acc"], help="Metric to predict" ) @@ -71,7 +69,12 @@ def main(): # Step 2 step2_data_by_name = get_step2_data_by_name( - step2_configs, task_name, x_metric=args.x_metric, y_metric=args.y_metric, moving_avg=args.moving_avg, skip_perc=args.skip_perc + step2_configs, + task_name, + x_metric=args.x_metric, + y_metric=args.y_metric, + moving_avg=args.moving_avg, + skip_perc=args.skip_perc, ) step2_coefficients, _ = fit_step2(step2_data_by_name, task_name, args.y_metric) diff --git a/scripts/scaling/step1.py b/scripts/scaling/step1.py index 23c4d5858..c2329de00 100644 --- a/scripts/scaling/step1.py +++ b/scripts/scaling/step1.py @@ -118,7 +118,7 @@ def predict_step1(configs, data_by_name, coefficients, y_metric): else: raise ValueError(f"Unknown y_metric: {y_metric}") - y, y_pred, rel_error = 0, 0, 0 + y, y_pred, rel_error = 0.0, 0.0, 0.0 for name, data in data_by_name.items(): predicted_data_by_name[name] = { diff --git a/scripts/scaling/step2.py b/scripts/scaling/step2.py index 94dcb053d..054c8de70 100644 --- a/scripts/scaling/step2.py +++ b/scripts/scaling/step2.py @@ -27,9 +27,7 @@ def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("-k", "--keys", nargs="+", default=[], help="Key(s) for tasks") - parser.add_argument( - "-x", "--x_metric", default="rc_bpb", choices=["rc_bpb", "c4"], help="Metric as input" - ) + parser.add_argument("-x", "--x_metric", default="rc_bpb", choices=["rc_bpb", "c4"], help="Metric as input") parser.add_argument( "-y", "--y_metric", default="rc_acc", choices=["rc_acc", "mc_acc"], help="Metric to predict" ) @@ -222,7 +220,12 @@ def main(): for i, task_name in enumerate(args.keys): data_by_name = get_step2_data_by_name( - configs, task_name, x_metric=args.x_metric, y_metric=args.y_metric, moving_avg=args.moving_avg, skip_perc=args.skip_perc + configs, + task_name, + x_metric=args.x_metric, + y_metric=args.y_metric, + moving_avg=args.moving_avg, + skip_perc=args.skip_perc, ) coefficients, cov = fit_step2(data_by_name, task_name, args.y_metric)