Fix linter comments

Signed-off-by: Michal Bien <[email protected]>
NVIDIA · Jan 20, 2025 · 2dbb364 · 2dbb364
1 parent 296d20e
commit 2dbb364
Show file tree

Hide file tree

Showing 2 changed files with 56 additions and 23 deletions.
diff --git a/nemo/collections/llm/api.py b/nemo/collections/llm/api.py
@@ -26,8 +26,8 @@
 from typing_extensions import Annotated
 
 import nemo.lightning as nl
+from nemo.collections.llm.evaluation.api import ApiEndpoint, EvaluationConfig, EvaluationTarget
 from nemo.collections.llm.quantization import ExportConfig, QuantizationConfig
-from nemo.collections.llm.evaluation.api import EvaluationConfig, ApiEndpoint, EvaluationTarget
 from nemo.lightning import (
     AutoResume,
     NeMoLogger,
@@ -427,22 +427,21 @@ def deploy(
 
 
 def evaluate(
-        target_cfg: EvaluationTarget,
-        eval_cfg: EvaluationConfig = EvaluationConfig(type="gsm8k"),
-        ):
+    target_cfg: EvaluationTarget,
+    eval_cfg: EvaluationConfig = EvaluationConfig(type="gsm8k"),
+):
     """
     Evaluates nemo model deployed on PyTriton server (via trtllm) using lm-evaluation-harness
     (https://github.com/EleutherAI/lm-evaluation-harness/tree/main).
 
     Args:
         target_cfg (EvaluationTarget): target of the evaluation. Providing nemo_checkpoint_path, model_id and url in EvaluationTarget.api_endpoint is required to run evaluations.
-        eval_cfg (EvaluationConfig): configuration for evaluations. Default type (task): gsm8k. 
+        eval_cfg (EvaluationConfig): configuration for evaluations. Default type (task): gsm8k.
     """
 
     if target_cfg.api_endpoint.nemo_checkpoint_path is None:
         raise ValueError("Please provide nemo_checkpoint_path in your target_cfg.")
 
-
     try:
         # lm-evaluation-harness import
         from lm_eval import evaluator
@@ -461,7 +460,9 @@ def evaluate(
     tokenizer = io.load_context(endpoint.nemo_checkpoint_path + "/context", subpath="model.tokenizer")
 
     # Wait for server to be ready before starting evaluation
-    evaluation.wait_for_server_ready(url=endpoint.url, triton_http_port=endpoint.nemo_triton_http_port, model_name=endpoint.model_id)
+    evaluation.wait_for_server_ready(
+        url=endpoint.url, triton_http_port=endpoint.nemo_triton_http_port, model_name=endpoint.model_id
+    )
     # Create an object of the NeMoFWLM which is passed as a model to evaluator.simple_evaluate
     params = eval_cfg.params
     model = evaluation.NeMoFWLMEval(
@@ -472,7 +473,7 @@ def evaluate(
         params.temperature,
         params.top_p,
         params.top_k,
-        params.add_bos
+        params.add_bos,
     )
 
     eval_task = eval_cfg.type

diff --git a/nemo/collections/llm/evaluation/api.py b/nemo/collections/llm/evaluation/api.py
@@ -1,37 +1,69 @@
-from pydantic import BaseModel, Field
 from typing import Optional
 
+from pydantic import BaseModel, Field
+
 
 class ApiEndpoint(BaseModel):
     url: str = Field(description="Url of the model")
     model_id: str = Field(description="Name of the model")
     type: str = Field(description="The type of the target", default="chat")
-    api_key: str = Field(description="Name of the env variable that stores API key for the model", default="NVIDIA_API_KEY")
+    api_key: str = Field(
+        description="Name of the env variable that stores API key for the model", default="NVIDIA_API_KEY"
+    )
     stream: bool = Field(description="Whether responses should be streamed", default=False)
-    nemo_checkpoint_path: Optional[str] = Field(description="Path for nemo 2.0 checkpoint. This is used to get the tokenizer from the ckpt which is required to tokenize the evaluation input and output prompts.", default=None)
-    nemo_triton_http_port: Optional[int] = Field(description="HTTP port that was used for the PyTriton server in the deploy method. Default: 8000.", default=8000)
-
+    nemo_checkpoint_path: Optional[str] = Field(
+        description="Path for nemo 2.0 checkpoint. This is used to get the tokenizer from the ckpt which is required to tokenize the evaluation input and output prompts.",
+        default=None,
+    )
+    nemo_triton_http_port: Optional[int] = Field(
+        description="HTTP port that was used for the PyTriton server in the deploy method. Default: 8000.",
+        default=8000,
+    )
+
 
 class EvaluationTarget(BaseModel):
     api_endpoint: ApiEndpoint = Field(description="Api endpoint to be used for evaluation")
 
 
 class ConfigParams(BaseModel):
     parallelism: int = Field(description="Parallelism to be used", default=1)
-    top_p: float = Field(description="float value between 0 and 1. limits to the top tokens within a certain probability. top_p=0 means the model will only consider the single most likely token for the next prediction. Default: 0.9999999", default=0.9999999)
-    temperature: float = Field(description="float value between 0 and 1. temp of 0 indicates greedy decoding, where the token with highest prob is chosen. Temperature can't be set to 0.0 currently. Default: 0.0000001", default=0.0000001)
-    tokenizer_path: str = Field(description="Name of the tokenizer used for evaluation", default="meta-llama/Llama-3.1-70B-Instruct")
-    limit: Optional[int] = Field(description="Limit evaluation to `limit` samples. Default: use all samples", default=None)
-    first_n: Optional[int] = Field(description="Evaluate only on the first first_n samples. Default: use all samples", default=None)
+    top_p: float = Field(
+        description="float value between 0 and 1. limits to the top tokens within a certain probability. top_p=0 means the model will only consider the single most likely token for the next prediction. Default: 0.9999999",
+        default=0.9999999,
+    )
+    temperature: float = Field(
+        description="float value between 0 and 1. temp of 0 indicates greedy decoding, where the token with highest prob is chosen. Temperature can't be set to 0.0 currently. Default: 0.0000001",
+        default=0.0000001,
+    )
+    tokenizer_path: str = Field(
+        description="Name of the tokenizer used for evaluation", default="meta-llama/Llama-3.1-70B-Instruct"
+    )
+    limit: Optional[int] = Field(
+        description="Limit evaluation to `limit` samples. Default: use all samples", default=None
+    )
+    first_n: Optional[int] = Field(
+        description="Evaluate only on the first first_n samples. Default: use all samples", default=None
+    )
     n_samples: Optional[int] = Field(description="Number of samples to be generated", default=1)
     num_samples: Optional[int] = Field(description="Maximum number of samples to test (in ruler)", default=10)
-    num_fewshot: Optional[int] = Field(description="Number of examples in few-shot context. Default: None.", default=None)
+    num_fewshot: Optional[int] = Field(
+        description="Number of examples in few-shot context. Default: None.", default=None
+    )
     max_tokens_to_generate: Optional[int] = Field(description="max tokens to generate. Default: 256.", default=256)
-    top_k: Optional[int] = Field(description="limits to a certain number (K) of the top tokens to consider. top_k=1 means the model will only consider the single most likely token for the next prediction. Default: 1", default=1)
-    add_bos: Optional[bool] = Field(description="whether a special token representing the beginning of a sequence should be added when encoding a string. Default: False.", default=False)
-    bootstrap_iters: int = Field(description="Number of iterations for bootstrap statistics, used when calculating stderrs. Set to 0 for no stderr calculations to be performed. Default: 100000.", default=100000)
+    top_k: Optional[int] = Field(
+        description="limits to a certain number (K) of the top tokens to consider. top_k=1 means the model will only consider the single most likely token for the next prediction. Default: 1",
+        default=1,
+    )
+    add_bos: Optional[bool] = Field(
+        description="whether a special token representing the beginning of a sequence should be added when encoding a string. Default: False.",
+        default=False,
+    )
+    bootstrap_iters: int = Field(
+        description="Number of iterations for bootstrap statistics, used when calculating stderrs. Set to 0 for no stderr calculations to be performed. Default: 100000.",
+        default=100000,
+    )
 
 
 class EvaluationConfig(BaseModel):
     type: str = Field(description="Name/type of the task")
-    params: ConfigParams = Field(description="Parameters to be used for evaluation", default=ConfigParams())
+    params: ConfigParams = Field(description="Parameters to be used for evaluation", default=ConfigParams())