configs/model_configs/models.yaml

# StableLM Zephyr
stablelm_zephyr_3b:
  model:
    model_name_or_path: stabilityai/stablelm-zephyr-3b
    use_fast_tokenizer: True
  num_gpus: 1

# Phi-3
phi_3_small_8k_instruct_fast:
  model:
    model_name_or_path: microsoft/Phi-3-small-8k-instruct
    use_fast_tokenizer: True
    trust_remote_code: True
  num_gpus: 1

# Starling Nexusflow
starling_lm_7B_beta_fast:
  model:
    model_name_or_path: Nexusflow/Starling-LM-7B-beta
    use_fast_tokenizer: True
  num_gpus: 1

# Starling
starling_lm_7B_alpha_fast:
  model:
    model_name_or_path: berkeley-nest/Starling-LM-7B-alpha
    use_fast_tokenizer: True
  num_gpus: 1

# Gemma IT
gemma2_2b_it:
  model:
    model_name_or_path: google/gemma-2-2b-it
    use_fast_tokenizer: False
    dtype: float16
  num_gpus: 1

gemma_7b_it_fast:
  model:
    model_name_or_path: google/gemma-7b-it
    use_fast_tokenizer: True
    dtype: float16
  num_gpus: 1

gemma_7b_it:
  model:
    model_name_or_path: google/gemma-7b-it
    use_fast_tokenizer: False
    dtype: float16
  num_gpus: 1

gemma2_2b_it_fast:
  model:
    model_name_or_path: google/gemma-2-2b-it
    use_fast_tokenizer: True
    dtype: float16
  num_gpus: 1
  
gemma2_9b_it_fast:
  model:
    model_name_or_path: google/gemma-2-9b-it
    use_fast_tokenizer: True
    dtype: float16
  num_gpus: 1
# Llama-3-8B-Instruct-RR

llama3_8b_RR_fast:
  model:
    model_name_or_path: GraySwanAI/Llama-3-8B-Instruct-RR
    use_fast_tokenizer: True
    pad_token: "<pad>"
    dtype: float16
  num_gpus: 1

# Llama 3

llama3_8b_fast:
  model:
    model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
    use_fast_tokenizer: True
    pad_token: "<pad>"
    dtype: float16
  num_gpus: 1

llama3_1_8b_fast:
  model:
    model_name_or_path: meta-llama/Meta-Llama-3.1-8B-Instruct
    use_fast_tokenizer: True
    dtype: float16
  num_gpus: 1

llama3_2_1b_fast:
  model:
    model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
    use_fast_tokenizer: True
    dtype: float16
  num_gpus: 1

llama3_2_3b_fast:
  model:
    model_name_or_path: meta-llama/Llama-3.2-3B-Instruct
    use_fast_tokenizer: True
    dtype: float16
  num_gpus: 1
  
llama3_8b:
  model:
    model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
    use_fast_tokenizer: False
    pad_token: "<pad>"
    dtype: float16
  num_gpus: 1

llama3_70b_fast:
  model:
    model_name_or_path: meta-llama/Meta-Llama-3-70B
    use_fast_tokenizer: True
    dtype: float16
  num_gpus: 2

# Llama 2

llama2_7b_fast:
  model:
    model_name_or_path: meta-llama/Llama-2-7b-chat-hf
    use_fast_tokenizer: True
    dtype: float16
  num_gpus: 1

llama2_7b:
  model:
    model_name_or_path: meta-llama/Llama-2-7b-chat-hf
    use_fast_tokenizer: False
    dtype: float16
  num_gpus: 1

llama2_13b_fast:
  model:
    model_name_or_path: meta-llama/Llama-2-13b-chat-hf
    use_fast_tokenizer: True
    dtype: float16
  num_gpus: 1

llama2_13b:
  model:
    model_name_or_path: meta-llama/Llama-2-13b-chat-hf
    use_fast_tokenizer: False
    dtype: float16
  num_gpus: 1

llama2_70b_fast:
  model:
    model_name_or_path: meta-llama/Llama-2-70b-chat-hf
    use_fast_tokenizer: True
    dtype: float16
  num_gpus: 2

llama2_70b:
  model:
    model_name_or_path: meta-llama/Llama-2-70b-chat-hf
    use_fast_tokenizer: False
    dtype: float16
  num_gpus: 2

test:
  model:
    model_name_or_path: lmsys/vicuna-7b-v1.5
    use_fast_tokenizer: False
    dtype: float16
  num_gpus: 1

vicuna_7b_v1_5_fast:
  model:
    model_name_or_path: lmsys/vicuna-7b-v1.5
    use_fast_tokenizer: True
    dtype: float16
  num_gpus: 1

vicuna_7b_v1_5:
  model:
    model_name_or_path: lmsys/vicuna-7b-v1.5
    use_fast_tokenizer: False
    dtype: float16
  num_gpus: 1

vicuna_13b_v1_5_fast:
  model:
    model_name_or_path: lmsys/vicuna-13b-v1.5
    use_fast_tokenizer: True
    dtype: float16
  num_gpus: 1

vicuna_13b_v1_5:
  model:
    model_name_or_path: lmsys/vicuna-13b-v1.5
    use_fast_tokenizer: False
    dtype: float16
  num_gpus: 1

koala_7b:
  model:
    model_name_or_path: TheBloke/koala-7B-HF
    use_fast_tokenizer: False
    dtype: bfloat16
  num_gpus: 1

koala_13b:
  model:
    model_name_or_path: TheBloke/koala-13B-HF
    use_fast_tokenizer: False
    dtype: bfloat16
  num_gpus: 1

# === Orca ===
orca_2_7b:
  model: 
    model_name_or_path: microsoft/Orca-2-7b
    use_fast_tokenizer: False
    dtype: bfloat16
  num_gpus: 1

orca_2_13b:
  model: 
    model_name_or_path: microsoft/Orca-2-13b
    use_fast_tokenizer: False
    dtype: bfloat16
  num_gpus: 1

# ==== SOLAR ===
solar_10_7b_instruct:
  model:
    model_name_or_path: upstage/SOLAR-10.7B-Instruct-v1.0
    use_fast_tokenizer: False
    dtype: float16
  num_gpus: 1

openchat_3_5_1210:
  model:
    model_name_or_path: openchat/openchat-3.5-1210
    use_fast_tokenizer: False
    dtype: bfloat16
  num_gpus: 1

starling_7b_fast:
  model: 
    model_name_or_path: berkeley-nest/Starling-LM-7B-alpha
    use_fast_tokenizer: True
    dtype: float16
  num_gpus: 1

# mistrals

mistral_7b_v2_fast:
  model:
    model_name_or_path: mistralai/Mistral-7B-Instruct-v0.2
    use_fast_tokenizer: True
    dtype: bfloat16
  num_gpus: 1

mistral_7b_v2:
  model: 
    model_name_or_path: mistralai/Mistral-7B-Instruct-v0.2
    use_fast_tokenizer: False
    dtype: bfloat16
  num_gpus: 1

mixtral_8x7b_fast:
  model:
    model_name_or_path: mistralai/Mixtral-8x7B-Instruct-v0.1
    use_fast_tokenizer: True
    dtype: bfloat16
  num_gpus: 2

mixtral_8x7b:
  model:
    model_name_or_path: mistralai/Mixtral-8x7B-Instruct-v0.1
    use_fast_tokenizer: False
    dtype: bfloat16
  num_gpus: 2

# Zephyr
zephyr_7b:
  model:
    model_name_or_path: HuggingFaceH4/zephyr-7b-beta
    use_fast_tokenizer: False
    dtype: bfloat16
  num_gpus: 1

r2d2_fast:
  model:
    model_name_or_path: cais/zephyr_7b_r2d2
    use_fast_tokenizer: True
    dtype: float16
  num_gpus: 1

zephyr_7b_robust:
  model:
    model_name_or_path: /data/private_models/harmbench/adv_training/data2/zephyr-7b-sft-full_adv_training6/step_2000
    use_fast_tokenizer: False
    dtype: bfloat16


# Baichuan 2
baichuan2_7b:
  model:
    model_name_or_path: baichuan-inc/Baichuan2-7B-Chat
    use_fast_tokenizer: False
    dtype: bfloat16
    trust_remote_code: True
  num_gpus: 1

baichuan2_13b:
  model:
    model_name_or_path: baichuan-inc/Baichuan2-13B-Chat
    use_fast_tokenizer: False
    dtype: bfloat16
    trust_remote_code: True
  num_gpus: 1

# qwen:
qwen_7b_chat:
  model:
    model_name_or_path: Qwen/Qwen-7B-Chat
    use_fast_tokenizer: True
    dtype: bfloat16
    pad_token: <|extra_0|>
    eos_token: <|im_end|>
    trust_remote_code: True
  num_gpus: 1

qwen_14b_chat:
  model:
    model_name_or_path: Qwen/Qwen-14B-Chat
    use_fast_tokenizer: True
    trust_remote_code: True
    dtype: bfloat16
    pad_token: <|extra_0|>
    eos_token: <|im_end|>
  num_gpus: 1

qwen_72b_chat:
  model:
    model_name_or_path: Qwen/Qwen-72B-Chat
    use_fast_tokenizer: True
    trust_remote_code: True
    dtype: bfloat16
    pad_token: <|extra_0|>
    eos_token: <|im_end|>
  num_gpus: 4

# ======== API Models =====
# TODO: remove tokens before release
gpt-3.5-turbo-1106:
  model:
    model_name_or_path: gpt-3.5-turbo-1106
    token: token

gpt-3.5-turbo-0613:
  model:
    model_name_or_path: gpt-3.5-turbo-0613
    token: token

gpt-4-1106-preview:
  model:
    model_name_or_path: gpt-4-1106-preview
    token: token
  
gpt-4-0613:
  model:
    model_name_or_path: gpt-4-0613
    token: token

gpt-4-vision-preview:
  model:
    model_name_or_path: gpt-4-vision-preview
    token: token


claude-instant-1:
  model:
    model_name_or_path: claude-instant-1
    token: token
  
claude-2.1:
  model:
    model_name_or_path: claude-2.1
    token: token
  
claude-2:
  model:
    model_name_or_path: claude-2
    token: token

gemini:
  model:
    model_name_or_path: gemini-pro
    token: token

mistral-medium:
  mode:
    model_name_or_path: mistral-medium
    token: token

LLaVA:
  model:
    model_name_or_path: LLaVA

InstructBLIP:
  model:
    model_name_or_path: InstructBLIP

QWen:
  model:
    model_name_or_path: QWen

GPT4V:
  model:
    model_name_or_path: GPT4V

# adv trained model