Skip to content

Commit

Permalink
Fix linter comments
Browse files Browse the repository at this point in the history
Signed-off-by: Michal Bien <[email protected]>
  • Loading branch information
Glorf committed Jan 20, 2025
1 parent 296d20e commit 2dbb364
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 23 deletions.
17 changes: 9 additions & 8 deletions nemo/collections/llm/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
from typing_extensions import Annotated

import nemo.lightning as nl
from nemo.collections.llm.evaluation.api import ApiEndpoint, EvaluationConfig, EvaluationTarget
from nemo.collections.llm.quantization import ExportConfig, QuantizationConfig
from nemo.collections.llm.evaluation.api import EvaluationConfig, ApiEndpoint, EvaluationTarget
from nemo.lightning import (
AutoResume,
NeMoLogger,
Expand Down Expand Up @@ -427,22 +427,21 @@ def deploy(


def evaluate(
target_cfg: EvaluationTarget,
eval_cfg: EvaluationConfig = EvaluationConfig(type="gsm8k"),
):
target_cfg: EvaluationTarget,
eval_cfg: EvaluationConfig = EvaluationConfig(type="gsm8k"),
):
"""
Evaluates nemo model deployed on PyTriton server (via trtllm) using lm-evaluation-harness
(https://github.com/EleutherAI/lm-evaluation-harness/tree/main).
Args:
target_cfg (EvaluationTarget): target of the evaluation. Providing nemo_checkpoint_path, model_id and url in EvaluationTarget.api_endpoint is required to run evaluations.
eval_cfg (EvaluationConfig): configuration for evaluations. Default type (task): gsm8k.
eval_cfg (EvaluationConfig): configuration for evaluations. Default type (task): gsm8k.
"""

if target_cfg.api_endpoint.nemo_checkpoint_path is None:
raise ValueError("Please provide nemo_checkpoint_path in your target_cfg.")


try:
# lm-evaluation-harness import
from lm_eval import evaluator
Expand All @@ -461,7 +460,9 @@ def evaluate(
tokenizer = io.load_context(endpoint.nemo_checkpoint_path + "/context", subpath="model.tokenizer")

# Wait for server to be ready before starting evaluation
evaluation.wait_for_server_ready(url=endpoint.url, triton_http_port=endpoint.nemo_triton_http_port, model_name=endpoint.model_id)
evaluation.wait_for_server_ready(
url=endpoint.url, triton_http_port=endpoint.nemo_triton_http_port, model_name=endpoint.model_id
)
# Create an object of the NeMoFWLM which is passed as a model to evaluator.simple_evaluate
params = eval_cfg.params
model = evaluation.NeMoFWLMEval(
Expand All @@ -472,7 +473,7 @@ def evaluate(
params.temperature,
params.top_p,
params.top_k,
params.add_bos
params.add_bos,
)

eval_task = eval_cfg.type
Expand Down
62 changes: 47 additions & 15 deletions nemo/collections/llm/evaluation/api.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,69 @@
from pydantic import BaseModel, Field
from typing import Optional

from pydantic import BaseModel, Field


class ApiEndpoint(BaseModel):
url: str = Field(description="Url of the model")
model_id: str = Field(description="Name of the model")
type: str = Field(description="The type of the target", default="chat")
api_key: str = Field(description="Name of the env variable that stores API key for the model", default="NVIDIA_API_KEY")
api_key: str = Field(
description="Name of the env variable that stores API key for the model", default="NVIDIA_API_KEY"
)
stream: bool = Field(description="Whether responses should be streamed", default=False)
nemo_checkpoint_path: Optional[str] = Field(description="Path for nemo 2.0 checkpoint. This is used to get the tokenizer from the ckpt which is required to tokenize the evaluation input and output prompts.", default=None)
nemo_triton_http_port: Optional[int] = Field(description="HTTP port that was used for the PyTriton server in the deploy method. Default: 8000.", default=8000)

nemo_checkpoint_path: Optional[str] = Field(
description="Path for nemo 2.0 checkpoint. This is used to get the tokenizer from the ckpt which is required to tokenize the evaluation input and output prompts.",
default=None,
)
nemo_triton_http_port: Optional[int] = Field(
description="HTTP port that was used for the PyTriton server in the deploy method. Default: 8000.",
default=8000,
)


class EvaluationTarget(BaseModel):
api_endpoint: ApiEndpoint = Field(description="Api endpoint to be used for evaluation")


class ConfigParams(BaseModel):
parallelism: int = Field(description="Parallelism to be used", default=1)
top_p: float = Field(description="float value between 0 and 1. limits to the top tokens within a certain probability. top_p=0 means the model will only consider the single most likely token for the next prediction. Default: 0.9999999", default=0.9999999)
temperature: float = Field(description="float value between 0 and 1. temp of 0 indicates greedy decoding, where the token with highest prob is chosen. Temperature can't be set to 0.0 currently. Default: 0.0000001", default=0.0000001)
tokenizer_path: str = Field(description="Name of the tokenizer used for evaluation", default="meta-llama/Llama-3.1-70B-Instruct")
limit: Optional[int] = Field(description="Limit evaluation to `limit` samples. Default: use all samples", default=None)
first_n: Optional[int] = Field(description="Evaluate only on the first first_n samples. Default: use all samples", default=None)
top_p: float = Field(
description="float value between 0 and 1. limits to the top tokens within a certain probability. top_p=0 means the model will only consider the single most likely token for the next prediction. Default: 0.9999999",
default=0.9999999,
)
temperature: float = Field(
description="float value between 0 and 1. temp of 0 indicates greedy decoding, where the token with highest prob is chosen. Temperature can't be set to 0.0 currently. Default: 0.0000001",
default=0.0000001,
)
tokenizer_path: str = Field(
description="Name of the tokenizer used for evaluation", default="meta-llama/Llama-3.1-70B-Instruct"
)
limit: Optional[int] = Field(
description="Limit evaluation to `limit` samples. Default: use all samples", default=None
)
first_n: Optional[int] = Field(
description="Evaluate only on the first first_n samples. Default: use all samples", default=None
)
n_samples: Optional[int] = Field(description="Number of samples to be generated", default=1)
num_samples: Optional[int] = Field(description="Maximum number of samples to test (in ruler)", default=10)
num_fewshot: Optional[int] = Field(description="Number of examples in few-shot context. Default: None.", default=None)
num_fewshot: Optional[int] = Field(
description="Number of examples in few-shot context. Default: None.", default=None
)
max_tokens_to_generate: Optional[int] = Field(description="max tokens to generate. Default: 256.", default=256)
top_k: Optional[int] = Field(description="limits to a certain number (K) of the top tokens to consider. top_k=1 means the model will only consider the single most likely token for the next prediction. Default: 1", default=1)
add_bos: Optional[bool] = Field(description="whether a special token representing the beginning of a sequence should be added when encoding a string. Default: False.", default=False)
bootstrap_iters: int = Field(description="Number of iterations for bootstrap statistics, used when calculating stderrs. Set to 0 for no stderr calculations to be performed. Default: 100000.", default=100000)
top_k: Optional[int] = Field(
description="limits to a certain number (K) of the top tokens to consider. top_k=1 means the model will only consider the single most likely token for the next prediction. Default: 1",
default=1,
)
add_bos: Optional[bool] = Field(
description="whether a special token representing the beginning of a sequence should be added when encoding a string. Default: False.",
default=False,
)
bootstrap_iters: int = Field(
description="Number of iterations for bootstrap statistics, used when calculating stderrs. Set to 0 for no stderr calculations to be performed. Default: 100000.",
default=100000,
)


class EvaluationConfig(BaseModel):
type: str = Field(description="Name/type of the task")
params: ConfigParams = Field(description="Parameters to be used for evaluation", default=ConfigParams())
params: ConfigParams = Field(description="Parameters to be used for evaluation", default=ConfigParams())

0 comments on commit 2dbb364

Please sign in to comment.