Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MiniLLM] problem in evaluate() function #300

Open
pipilia opened this issue Jan 13, 2025 · 0 comments
Open

[MiniLLM] problem in evaluate() function #300

pipilia opened this issue Jan 13, 2025 · 0 comments

Comments

@pipilia
Copy link

pipilia commented Jan 13, 2025

I attempted to apply the evaluate() method from your finetune.py to the training process of MiniLLM, but I noticed that model.eval() causes anomalies in the rl_loss. Specifically, at certain fixed steps, the pg_loss becomes extremely large, and the reg_loss also increases slightly, leading to the issue: "Current loss scale already at minimum - cannot decrease scale anymore. Exiting run." However, after removing model.eval(), the training proceeds normally. Do you understand the reason behind this? Is model.eval() necessary?

Here is my evaluate() method:

def evaluate_vd(args, tokenizer, model, dataset: LMTrainDataset, split, device):
    
    collate_fn = dataset.collate

    dp_world_size = dist.get_world_size()
    dp_rank = dist.get_rank()
    dp_group = None

    print_rank("dp size", dp_world_size)

    generation_config = GenerationConfig(
        do_sample=args.do_sample,
        top_p=args.top_p,
        top_k=args.top_k,
        temperature=args.temperature,
        repetition_penalty=args.repetition_penalty,
        max_length=args.max_length,
        min_length=None,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
        return_dict_in_generate=True,
        output_scores=False
    )

    sampler = DistributedSampler(dataset, shuffle=False, drop_last=False, rank=dp_rank, num_replicas=dp_world_size)
    dataloader = DataLoader(
        dataset, sampler=sampler, batch_size=args.eval_batch_size, num_workers=args.num_workers, collate_fn=collate_fn)

    model.eval()
    
    all_response_ids = []
    
    with torch.no_grad():
        for it, (model_batch, no_model_batch, gen_data) in enumerate(tqdm(dataloader, desc="Evaluating", disable=(dist.get_rank() != 0))):
            print_rank(f"{it}/{len(dataloader)}")
            dataset.move_to_device(model_batch, no_model_batch, gen_data, device)            
            max_new_tokens = args.max_length - gen_data["input_ids"].size(1)
            
            gen_out = model.generate(
                **gen_data,
                generation_config=generation_config,
                max_new_tokens=max_new_tokens)
            
            full_ids = gen_out.sequences
            
            full_ids = F.pad(
                full_ids,
                (0, args.max_length - full_ids.shape[1]),
                value=tokenizer.pad_token_id,
            )
            
            response_ids = full_ids[:, gen_data["input_ids"].size(1):]
            all_response_ids.append(response_ids)

    
    all_response_ids = torch.cat(all_response_ids, dim=0)
    all_response_ids = all_gather(all_response_ids, dim=1, world_size=dp_world_size, group=dp_group, op="stack")
    all_response_ids = all_response_ids.view(-1, all_response_ids.size(-1))
    
    responses = tokenizer.batch_decode(all_response_ids, skip_special_tokens=True)
    
    references = dataset.answers
    responses = responses[:len(references)]
    
    res = compute_metrics(responses, references)
    # writer.add_scalar("eval/rougel", res["rougeL"], global_step)
    # writer.add_scalar("eval/exact_match", res["exact_match"], global_step)
    # eval_dir = os.path.join(args.save, "eval", str(epoch))
    # print_rank(eval_dir)
    # os.makedirs(eval_dir, exist_ok=True)
    # with open(os.path.join(eval_dir, "answers.jsonl"), "w") as f:
    #     for resp in responses:
    #         f.write(json.dumps({"text": resp}) + "\n")
    
    log_str = f"{split} | {res}"
    print_rank(log_str)
    model.train()
    # save_rank(log_str, os.path.join(args.save, "log.txt"))
    return res['rougeL'], res['exact_match']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant