[MiniLLM] problem in evaluate() function #300

pipilia · 2025-01-13T10:26:57Z

I attempted to apply the evaluate() method from your finetune.py to the training process of MiniLLM, but I noticed that model.eval() causes anomalies in the rl_loss. Specifically, at certain fixed steps, the pg_loss becomes extremely large, and the reg_loss also increases slightly, leading to the issue: "Current loss scale already at minimum - cannot decrease scale anymore. Exiting run." However, after removing model.eval(), the training proceeds normally. Do you understand the reason behind this? Is model.eval() necessary?

Here is my evaluate() method:

def evaluate_vd(args, tokenizer, model, dataset: LMTrainDataset, split, device):
    
    collate_fn = dataset.collate

    dp_world_size = dist.get_world_size()
    dp_rank = dist.get_rank()
    dp_group = None

    print_rank("dp size", dp_world_size)

    generation_config = GenerationConfig(
        do_sample=args.do_sample,
        top_p=args.top_p,
        top_k=args.top_k,
        temperature=args.temperature,
        repetition_penalty=args.repetition_penalty,
        max_length=args.max_length,
        min_length=None,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
        return_dict_in_generate=True,
        output_scores=False
    )

    sampler = DistributedSampler(dataset, shuffle=False, drop_last=False, rank=dp_rank, num_replicas=dp_world_size)
    dataloader = DataLoader(
        dataset, sampler=sampler, batch_size=args.eval_batch_size, num_workers=args.num_workers, collate_fn=collate_fn)

    model.eval()
    
    all_response_ids = []
    
    with torch.no_grad():
        for it, (model_batch, no_model_batch, gen_data) in enumerate(tqdm(dataloader, desc="Evaluating", disable=(dist.get_rank() != 0))):
            print_rank(f"{it}/{len(dataloader)}")
            dataset.move_to_device(model_batch, no_model_batch, gen_data, device)            
            max_new_tokens = args.max_length - gen_data["input_ids"].size(1)
            
            gen_out = model.generate(
                **gen_data,
                generation_config=generation_config,
                max_new_tokens=max_new_tokens)
            
            full_ids = gen_out.sequences
            
            full_ids = F.pad(
                full_ids,
                (0, args.max_length - full_ids.shape[1]),
                value=tokenizer.pad_token_id,
            )
            
            response_ids = full_ids[:, gen_data["input_ids"].size(1):]
            all_response_ids.append(response_ids)

    
    all_response_ids = torch.cat(all_response_ids, dim=0)
    all_response_ids = all_gather(all_response_ids, dim=1, world_size=dp_world_size, group=dp_group, op="stack")
    all_response_ids = all_response_ids.view(-1, all_response_ids.size(-1))
    
    responses = tokenizer.batch_decode(all_response_ids, skip_special_tokens=True)
    
    references = dataset.answers
    responses = responses[:len(references)]
    
    res = compute_metrics(responses, references)
    # writer.add_scalar("eval/rougel", res["rougeL"], global_step)
    # writer.add_scalar("eval/exact_match", res["exact_match"], global_step)
    # eval_dir = os.path.join(args.save, "eval", str(epoch))
    # print_rank(eval_dir)
    # os.makedirs(eval_dir, exist_ok=True)
    # with open(os.path.join(eval_dir, "answers.jsonl"), "w") as f:
    #     for resp in responses:
    #         f.write(json.dumps({"text": resp}) + "\n")
    
    log_str = f"{split} | {res}"
    print_rank(log_str)
    model.train()
    # save_rank(log_str, os.path.join(args.save, "log.txt"))
    return res['rougeL'], res['exact_match']

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[MiniLLM] problem in evaluate() function #300

[MiniLLM] problem in evaluate() function #300

pipilia commented Jan 13, 2025

[MiniLLM] problem in evaluate() function #300

[MiniLLM] problem in evaluate() function #300

Comments

pipilia commented Jan 13, 2025