scripts/benchmarks/scenarios.yaml

# This file holds a list of scenarios to may be run.
# - to limit to a number of scenarios, use the --run-only-scenarios flag.
# - Each scenario will be run against a particular acceleration framework
#   config, if the framework_config: key is specified.
#   * a particular framework configuration
# - the arguments tag will hold arguments to be passed to sft_trainer
#   * the arguments are singular except for model_name_or_path which can handle
#     multiple arguments.
# - So anything that is critical for the scenario MUST be specified here 
#   and not in the defaults, e.g. fp16

# This stanza will be used in future to replace the custom processing functions in data_processing.py 
# data_processing:
#   dataset_name: yahma/alpaca-cleaned
#   chat_template: |
#     {%- for message in messages %}
#         {% if message['input'] != '' %}
#     Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

#         {% else %}
#     Below is an instruction that describes a task. Write a response that appropriately completes the request.

#         {% endif %}
#     ### Instruction:
#     {{ message['instruction'] }}

#         {% if message['input'] != '' %}
#     ### Input:
#     {{ message['input'] }}

#         {% endif %}
#     ### Response:
#     {{ message['output'] + eos_token }}
#     {% endfor %}
#   tokenize: True


scenarios:
    -   name: full-finetuning
        framework_config: 
            - 
            - foak-fast-kernels
        arguments:
            learning_rate: 2e-5
            model_name_or_path: 
                - 'bigcode/gpt_bigcode-santacoder'
                - 'mistralai/Mistral-7B-v0.1'
                - 'mistralai/Mixtral-8x7B-Instruct-v0.1'
                - 'NousResearch/Llama-2-70b-hf'
            torch_dtype: bfloat16

    -   name: standard-peft
        framework_config: 
            - 
            - foak-fast-kernels
        arguments:
            learning_rate: 2e-4
            torch_dtype: bfloat16
            peft_method: lora
            r: 16
            lora_alpha: 16
            lora_dropout: 0.1
            target_modules: ["q_proj", "k_proj", "v_proj", "o_proj"]
            model_name_or_path: 
                - 'mistralai/Mistral-7B-v0.1'
                - 'mistralai/Mixtral-8x7B-Instruct-v0.1'
                - 'NousResearch/Llama-2-70b-hf'

    -   name: baseline-peft-bnb
        framework_config: 
            - baseline-peft-bnb
        arguments:
            bf16: True
            learning_rate: 2e-4
            torch_dtype: bfloat16
            peft_method: lora
            r: 16
            lora_alpha: 16
            lora_dropout: 0.1
            target_modules: ["q_proj", "k_proj", "v_proj", "o_proj"]
            model_name_or_path: 
                - 'mistralai/Mistral-7B-v0.1'
                - 'mistralai/Mixtral-8x7B-Instruct-v0.1'
                - 'NousResearch/Llama-2-70b-hf'

    -   name: accelerated-peft-bnb
        framework_config: 
            - accelerated-peft-bnb
            - accelerated-peft-bnb-foak
        arguments:
            bf16: True
            learning_rate: 2e-4
            torch_dtype: bfloat16
            peft_method: lora
            r: 16
            lora_alpha: 16
            lora_dropout: 0.1
            target_modules: ["q_proj", "k_proj", "v_proj", "o_proj", "c_attn"]
            model_name_or_path: 
                - 'bigcode/gpt_bigcode-santacoder'
                - 'mistralai/Mistral-7B-v0.1'
                - 'mistralai/Mixtral-8x7B-Instruct-v0.1'
                - 'NousResearch/Llama-2-70b-hf'

    -   name: accelerated-peft-gptq
        framework_config: 
            - accelerated-peft-autogptq
            - accelerated-peft-autogptq-foak
        arguments:
            learning_rate: 2e-4
            fp16: True # running gptq-lora in float16 is more performant, see issue
            torch_dtype: float16 # https://github.com/foundation-model-stack/fms-acceleration/issues/84
            peft_method: lora
            r: 16
            lora_alpha: 16
            lora_dropout: 0.1
            target_modules: ["q_proj", "k_proj", "v_proj", "o_proj"]
            model_name_or_path: 
                - 'TheBloke/Mistral-7B-v0.1-GPTQ'
                - 'TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ'
                - 'TheBloke/Llama-2-70B-GPTQ'