Skip to content

Commit

Permalink
Merge branch 'main' into feature-chat-models
Browse files Browse the repository at this point in the history
  • Loading branch information
sadra-barikbin committed Aug 27, 2024
2 parents b44362b + 24adaa2 commit f881dc3
Show file tree
Hide file tree
Showing 46 changed files with 1,325 additions and 1,211 deletions.
20 changes: 19 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,18 @@ accelerate launch --multi_gpu --num_processes=<num_gpus> -m \

You can find the template of the expected model configuration in [examples/model_configs/base_model.yaml_](./examples/model_configs/base_model.yaml).

### Evaluating a quantized model

If you want to evaluate a model by quantizing it, then the model can be loaded in `4bit` or `8bit`. Implicitly, this makes use of `BitsAndBytesConfig` and can drastically reduce memory requirements for consumer-grade hardware.

An example configuration can be found in [examples/model_configs/quantized_model.yaml](./examples/model_configs/quantized_model.yaml).

### Evaluating a PEFT model

If you want to evaluate a model trained with `peft`, check out [examples/model_configs/peft_model.yaml](./examples/model_configs/peft_model.yaml).

Currently, `lighteval` supports `adapter` and `delta` weights to be applied to the base model.

### Evaluating a large model with pipeline parallelism

To evaluate models larger that ~40B parameters in 16-bit precision, you will need to shard the model across multiple GPUs to fit it in VRAM. You can do this by passing `model_parallel=True` and adapting `--num_processes` to be the number of processes to use for data parallel. For example, on a single node of 8 GPUs, you can run:
Expand Down Expand Up @@ -480,6 +492,12 @@ export CUDA_LAUNCH_BLOCKING=1
srun accelerate launch --multi_gpu --num_processes=8 -m lighteval accelerate --model_args "pretrained=your model name" --tasks examples/tasks/open_llm_leaderboard_tasks.txt --override_batch_size 1 --save_details --output_dir=your output dir
```

## Authentication

For authentication of HuggingFace models (i.e `base` models), a HuggingFace token is used. The `HF_TOKEN` used is picked up directly from the environment.

For `tgi` models, authentication is provided in the config file. An example can be found at [tgi_model.yaml](./examples/model_configs/tgi_model.yaml).

## Releases

### Building the package
Expand All @@ -498,4 +516,4 @@ python3 -m build .
version = {0.3.0},
url = {https://github.com/huggingface/lighteval}
}
```
```
2 changes: 1 addition & 1 deletion community_tasks/_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@
from lighteval.metrics import Metrics
from lighteval.metrics.metrics import SampleLevelMetric
from lighteval.metrics.utils import MetricCategory, MetricUseCase
from lighteval.tasks.default_prompts import LETTER_INDICES
from lighteval.tasks.lighteval_task import LightevalTaskConfig
from lighteval.tasks.requests import Doc
from lighteval.tasks.tasks_prompt_formatting import LETTER_INDICES


# DEFINE YOUR PROMPT FUNCTIONS
Expand Down
2 changes: 1 addition & 1 deletion community_tasks/arabic_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@
import re

from lighteval.metrics.metrics import Metrics
from lighteval.tasks.default_prompts import LETTER_INDICES
from lighteval.tasks.lighteval_task import LightevalTaskConfig
from lighteval.tasks.requests import Doc
from lighteval.tasks.tasks_prompt_formatting import LETTER_INDICES


# fmt: off
Expand Down
12 changes: 12 additions & 0 deletions examples/model_configs/peft_model.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
model:
type: "base"
base_params:
model_args: "pretrained=predibase/customer_support,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ... For a PEFT model, the pretrained model should be the one trained with PEFT and the base model below will contain the original model on which the adapters will be applied.
dtype: "4bit" # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization.
compile: true
merged_weights: # Ignore this section if you are not using PEFT models
delta_weights: false # set to True of your model should be merged with a base model, also need to provide the base model name
adapter_weights: true # set to True of your model has been trained with peft, also need to provide the base model name
base_model: "mistralai/Mistral-7B-v0.1" # path to the base_model - needs to be specified only if delta_weights or adapter_weights is set to True
generation:
multichoice_continuations_start_space: null # If true/false, will force multiple choice continuations to start/not start with a space. If none, will do nothing
12 changes: 12 additions & 0 deletions examples/model_configs/quantized_model.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
model:
type: "base"
base_params:
model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ...
dtype: "4bit" # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization.
compile: true
merged_weights: # Ignore this section if you are not using PEFT models
delta_weights: false # set to True of your model should be merged with a base model, also need to provide the base model name
adapter_weights: false # set to True of your model has been trained with peft, also need to provide the base model name
base_model: null # path to the base_model - needs to be specified only if delta_weights or adapter_weights is set to True
generation:
multichoice_continuations_start_space: null # If true/false, will force multiple choice continuations to start/not start with a space. If none, will do nothing
4 changes: 2 additions & 2 deletions examples/nanotron/custom_evaluation_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@
from dataclasses import asdict
from typing import Dict, List, Tuple

import lighteval.tasks.tasks_prompt_formatting as prompt
import lighteval.tasks.default_prompts as prompt
from lighteval.metrics import Metrics
from lighteval.tasks.default_prompts import LETTER_INDICES
from lighteval.tasks.lighteval_task import LightevalTaskConfig
from lighteval.tasks.requests import Doc
from lighteval.tasks.tasks_prompt_formatting import LETTER_INDICES


_TASKS_STRINGS: List[Tuple[LightevalTaskConfig, str]] = []
Expand Down
6 changes: 3 additions & 3 deletions src/lighteval/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def cli_evaluate():
parser_nanotron(parser_b)

# Subparser for task utils functions
parser_c = subparsers.add_parser("tasks", help="use nanotron as backend for evaluation.")
parser_c = subparsers.add_parser("tasks", help="display information about available tasks and samples.")
parser_utils_tasks(parser_c)

args = parser.parse_args()
Expand All @@ -69,7 +69,7 @@ def cli_evaluate():
if args.inspect:
print(f"Loading the tasks dataset to cache folder: {args.cache_dir}")
print(
"All examples will be displayed without few shot, as few shot sample construction requires loading a model and using its tokenizer."
"All examples will be displayed without few shot, as few shot sample construction requires loading a model and using its tokenizer. "
)
# Loading task
task_names_list, _ = taskinfo_selector(args.inspect)
Expand All @@ -78,7 +78,7 @@ def cli_evaluate():
print("-" * 10, name, "-" * 10)
if args.show_config:
print("-" * 10, "CONFIG")
task.print_config()
task.cfg.print()
for ix, sample in enumerate(task.eval_docs()[: int(args.num_samples)]):
if ix == 0:
print("-" * 10, "SAMPLES")
Expand Down
146 changes: 0 additions & 146 deletions src/lighteval/evaluator.py

This file was deleted.

8 changes: 7 additions & 1 deletion src/lighteval/logging/evaluation_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from enum import Enum
from pathlib import Path

import torch
from datasets import Dataset, load_dataset
from datasets.utils.metadata import MetadataConfigs
from huggingface_hub import DatasetCard, DatasetCardData, HfApi, HFSummaryWriter, hf_hub_url
Expand All @@ -42,7 +43,8 @@
TaskConfigLogger,
VersionsLogger,
)
from lighteval.utils import NO_TENSORBOARDX_WARN_MSG, is_nanotron_available, is_tensorboardX_available, obj_to_markdown
from lighteval.utils.imports import NO_TENSORBOARDX_WARN_MSG, is_nanotron_available, is_tensorboardX_available
from lighteval.utils.utils import obj_to_markdown


if is_nanotron_available():
Expand All @@ -63,6 +65,8 @@ def default(self, o):
return str(o)
if callable(o):
return o.__name__
if isinstance(o, torch.dtype):
return str(o)
if isinstance(o, Enum):
return o.name
return super().default(o)
Expand Down Expand Up @@ -167,6 +171,8 @@ def save(self) -> None:

config_general = copy.deepcopy(self.general_config_logger)
config_general = asdict(config_general)
# We remove the config from logging, which contains context/accelerator objects
config_general.pop("config")

to_dump = {
"config_general": config_general,
Expand Down
2 changes: 1 addition & 1 deletion src/lighteval/logging/hierarchical_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from logging import Logger
from typing import Any, Callable

from lighteval.utils import is_accelerate_available, is_nanotron_available
from lighteval.utils.imports import is_accelerate_available, is_nanotron_available


if is_nanotron_available():
Expand Down
Loading

0 comments on commit f881dc3

Please sign in to comment.