configs/method_configs/AutoDan.yaml

default_method_hyperparameters:
    num_steps: 100
    num_test_cases_per_behavior: 1
    batch_size: 64 # number of parallel mutations
    num_elites: 0.1
    crossover: 0.5
    num_points: 5
    mutation: 0.01
    targets_path: ./data/400_behavior_to_target.json

    target_len: -1
    #== Eval Configs == #
    eval_steps: 10
    # if early_stopping: stop when loss <= early_stopping_min_loss (every step)
    early_stopping: True
    early_stopping_min_loss: 0.05
    # If eval_with_check_refusal==True and loss <= check_refusal_min_loss:
    #   stop when generation is not refusal (every eval_steps)
    eval_with_check_refusal: True # off
    check_refusal_min_loss: 0.05
    #== Filter Config ==#
    jb_filter_window_size: 8
    jb_filter_metric_name: perplexity
    jb_filter_threshold: -38276.122748721245
    seed: 0
    adaptive: True

    # using gpt mutate model
    # mutate_target_model:
    #   model_name_or_path: gpt-3.5-turbo-1106
    #   token: <openai_api_token>

    # using open-source mutate model
    mutate_model:
      # model_name_or_path: mistralai/Mistral-7B-Instruct-v0.2
      model_name_or_path: mistralai/Mixtral-8x7B-Instruct-v0.1
      # use_vllm: True
      num_gpus: 1
      dtype: bfloat16
      # uncomment this to use toghether api instead of gpus, will override num_gpus
      # together_api_key: <together_api_key>


    #mutate_model:
    #  model_name_or_path: mistralai/Mistral-7B-Instruct-v0.2
    #  use_vllm: True
    #  num_gpus: 1
    #  dtype: bfloat16
        # uncomment this to use toghether api instead of gpus, will override num_gpus
        # together_api_key: <together_api_key>

<model_name1>:
  target_model: <model_name1>['model']

test: 
  num_steps: 1
  target_model:
    model_name_or_path: lmsys/vicuna-7b-v1.5
    use_fast_tokenizer: False
    use_flash_attention_2: True
    num_gpus: 1
    model_short_name: Vicuna # AutoDan's original implementation has a placeholder for model name in prompts
    developer_name: LMSYS # AutoDan's original implementation has a placeholder for developer name/company in prompts

gemma_7b_it_fast:
  target_model:
    model_name_or_path: google/gemma-7b-it
    use_fast_tokenizer: True
    model_short_name: Gemma
    developer_name: Google

gemma2_2b_it_fast:
  target_model:
    model_name_or_path: google/gemma-2-2b-it
    use_fast_tokenizer: True
    dtype: float16
    model_short_name: Gemma
    developer_name: Google

gemma2_9b_it_fast:
  target_model:
    model_name_or_path: google/gemma-2-9b-it
    use_fast_tokenizer: True
    dtype: float16
    model_short_name: Gemma
    developer_name: Google

vicuna_7b_v1_5: 
  target_model:
    model_name_or_path: lmsys/vicuna-7b-v1.5
    use_fast_tokenizer: False
    use_flash_attention_2: True
    model_short_name: Vicuna # AutoDan's original implementation has a placeholder for model name in prompts
    developer_name: LMSYS # AutoDan's original implementation has a placeholder for developer name/company in prompts

vicuna_13b_v1_5: 
  target_model:
    model_name_or_path: lmsys/vicuna-13b-v1.5
    use_fast_tokenizer: False
    use_flash_attention_2: True
    num_gpus: 1
    model_short_name: Vicuna # AutoDan's original implementation has a placeholder for model name in prompts
    developer_name: LMSYS # AutoDan's original implementation has a placeholder for developer name/company in prompts

vicuna_13b_v1_5_fast: 
  target_model:
    model_name_or_path: lmsys/vicuna-13b-v1.5
    use_fast_tokenizer: Fast
    use_flash_attention_2: True
    dtype: bfloat16
    num_gpus: 1
    model_short_name: Vicuna # AutoDan's original implementation has a placeholder for model name in prompts
    developer_name: LMSYS # AutoDan's original implementation has a placeholder for developer name/company in prompts

zephyr_7b:
  target_model: 
    model_name_or_path: HuggingFaceH4/zephyr-7b-beta
    use_fast_tokenizer: False
    use_flash_attention_2: True
    model_short_name: Zephyr
    developer_name: Hugging Face

zephyr_7b_robust:
  target_model:
    model_name_or_path: zephyr_robust
    use_fast_tokenizer: False
    dtype: bfloat16
    model_short_name: Zephyr
    developer_name: Hugging Face

r2d2_fast:
  target_model:
    model_name_or_path: cais/zephyr_7b_r2d2
    use_fast_tokenizer: True
    dtype: bfloat16
    model_short_name: Zephyr
    developer_name: Hugging Face


starling_7b_fast:
  target_model:
    use_fast_tokenizer: True
    model_name_or_path: berkeley-nest/Starling-LM-7B-alpha
    dtype: bfloat16
    model_short_name: Starling
    developer_name: Hugging Face


koala_7b:
  target_model:
    model_name_or_path: /data/private_models/harmbench/models/koala-7B
    use_fast_tokenizer: False
    dtype: bfloat16
    developer_name: UC Berkeley
    model_short_name: Koala

koala_13b:
  target_model:
    model_name_or_path: /data/private_models/harmbench/models/koala-13B
    use_fast_tokenizer: False
    dtype: bfloat16
    developer_name: UC Berkeley
    model_short_name: Koala

starling_7b:
  target_model: 
    model_name_or_path: berkeley-nest/Starling-LM-7B-alpha
    use_fast_tokenizer: False
    use_flash_attention_2: True
    model_short_name: GPT-4 # try ask the model `who are you?`
    developer_name: OpenAI
    dtype: bfloat16

openchat_3_5_1210:
  target_model: 
    model_name_or_path: /data/private_models/harmbench/models/openchat-3.5-1210
    use_fast_tokenizer: False
    use_flash_attention_2: True
    model_short_name: GPT-4 # try ask the model `who are you?`
    developer_name: OpenAI
    dtype: bfloat16


llama2_7b: 
  num_test_cases_per_behavior: 1
  eval_steps: 25
  target_model:
    model_name_or_path: meta-llama/Llama-2-7b-chat-hf
    use_fast_tokenizer: False
    num_gpus: 1
    dtype: bfloat16
    model_short_name: Llama-2 # AutoDan's original implementation has a placeholder for model name in prompts
    developer_name: Meta # AutoDan's original implementation has a placeholder for developer name/company in prompts

llama2_7b_fast: 
  target_model:
    # model_name_or_path: /fast/apanfilov/huggingface/models--meta-llama--Llama-2-7b-chat-hf
    model_name_or_path: meta-llama/Llama-2-7b-chat-hf
    use_fast_tokenizer: True
    num_gpus: 1
    model_short_name: Llama-2 # AutoDan's original implementation has a placeholder for model name in prompts
    developer_name: Meta # AutoDan's original implementation has a placeholder for developer name/company in prompts

llama2_13b: 
  eval_steps: 25
  target_model:
    model_name_or_path: meta-llama/Llama-2-13b-chat-hf
    use_fast_tokenizer: False
    num_gpus: 2
    model_short_name: Llama-2 # AutoDan's original implementation has a placeholder for model name in prompts
    developer_name: Meta # AutoDan's original implementation has a placeholder for developer name/company in prompts

llama2_13b_fast: 
  target_model:
    model_name_or_path: meta-llama/Llama-2-13b-chat-hf
    use_fast_tokenizer: True
    num_gpus: 2
    dtype: bfloat16
    model_short_name: Llama-2 # AutoDan's original implementation has a placeholder for model name in prompts
    developer_name: Meta # AutoDan's original implementation has a placeholder for developer name/company in prompts

llama2_70b:
  eval_steps: 25
  target_model:
    model_name_or_path: /data/private_models/llama2/llama-2-weights-hf-70b-chat
    use_fast_tokenizer: False
    num_gpus: 2
    dtype: bfloat16
    model_short_name: Llama-2 # AutoDan's original implementation has a placeholder for model name in prompts
    developer_name: Meta # AutoDan's original implementation has a placeholder for developer name/company in prompts

llama3_8b_fast:
  target_model:
    model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
    use_fast_tokenizer: True
    dtype: bfloat16
    num_gpus: 1
    model_short_name: Llama-3 # AutoDan's original implementation has a placeholder for model name in prompts
    developer_name: Meta # AutoDan's original implementation has a placeholder for developer name/company in prompts

llama3_1_8b_fast:
  target_model:
    model_name_or_path: meta-llama/Meta-Llama-3.1-8B-Instruct
    use_fast_tokenizer: True
    dtype: bfloat16
    num_gpus: 1
    model_short_name: Llama-3 # AutoDan's original implementation has a placeholder for model name in prompts
    developer_name: Meta # AutoDan's original implementation has a placeholder for developer name/company in prompts

llama3_2_1b_fast:
  target_model:
    model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
    use_fast_tokenizer: True
    dtype: float16
    model_short_name: Llama-3
    developer_name: Meta

llama3_2_3b_fast:
  target_model:
    model_name_or_path: meta-llama/Llama-3.2-3B-Instruct
    use_fast_tokenizer: True
    dtype: float16
    model_short_name: Llama-3
    developer_name: Meta

orca_2_7b:
  target_model:
    model_name_or_path: microsoft/Orca-2-7b
    use_fast_tokenizer: False
    use_flash_attention_2: True
    num_gpus: 1
    dtype: bfloat16
    model_short_name: Orca # AutoDan's original implementation has a placeholder for model name in prompts
    developer_name: Microsoft # AutoDan's original implementation has a placeholder for developer name/company in prompts


orca_2_13b:
  target_model:
    model_name_or_path: microsoft/Orca-2-13b
    use_fast_tokenizer: False
    use_flash_attention_2: True
    num_gpus: 1
    dtype: bfloat16
    model_short_name: Orca # AutoDan's original implementation has a placeholder for model name in prompts
    developer_name: Microsoft # AutoDan's original implementation has a placeholder for developer name/company in prompts

baichuan2_7b:
  target_model:
    model_name_or_path: /data/private_models/baichuan2_fixed/baichuan2-7b-chat
    use_fast_tokenizer: False
    dtype: bfloat16
    trust_remote_code: True
    model_short_name: Baichuan
    developer_name: Baichuan
    num_gpus: 2
  forward_batch_size: 16

baichuan2_13b:
  target_model:
    model_name_or_path: /data/private_models/baichuan2_fixed/baichuan2-13b-chat
    use_fast_tokenizer: False
    dtype: bfloat16
    trust_remote_code: True
    model_short_name: Baichuan
    developer_name: Baichuan
    device_map: balanced_low_0
    num_gpus: 2
  forward_batch_size: 8 # setting forward_batch_size because implementation has a lot of cache after each auto_find_batch_size OOM: #https://github.com/QwenLM/Qwen/issues/538


solar_10_7b_instruct:
  target_model:
    model_name_or_path: upstage/SOLAR-10.7B-Instruct-v1.0
    use_fast_tokenizer: False
    dtype: float16
    trust_remote_code: True
    model_short_name: GPT
    developer_name: OpenAI
    num_gpus: 1


mistral_7b_v2:
  target_model:
    model_name_or_path: mistralai/Mistral-7B-Instruct-v0.2
    use_fast_tokenizer: False
    dtype: bfloat16
    trust_remote_code: True
    model_short_name: Mistral
    developer_name: Mistral AI
    num_gpus: 1

mixtral_8x7b:
  target_model:
    model_name_or_path: mistralai/Mixtral-8x7B-Instruct-v0.1
    use_fast_tokenizer: False
    dtype: bfloat16
    trust_remote_code: True
    model_short_name: Mistral
    developer_name: Mistral AI
    num_gpus: 2


qwen_7b_chat:
  target_model:
    model_name_or_path: /data/private_models/harmbench/models/Qwen-7B-Chat
    use_fast_tokenizer: True
    trust_remote_code: True
    use_flash_attn: True
    dtype: bfloat16
    pad_token: <|extra_0|>
    eos_token: <|endoftext|>
    model_short_name: QianWen
    developer_name: Alibaba Cloud
    num_gpus: 2
  forward_batch_size: 16 # setting forward_batch_size because implementation of qwen have a lot of cache after each auto_find_batch_size OOM: #https://github.com/QwenLM/Qwen/issues/538

qwen_14b_chat:
  target_model:
    model_name_or_path: /data/private_models/harmbench/models/Qwen-14B-Chat
    use_fast_tokenizer: True
    trust_remote_code: True
    use_flash_attn: True
    dtype: bfloat16
    pad_token: <|extra_0|>
    eos_token: <|endoftext|>
    model_short_name: QianWen
    developer_name: Alibaba Cloud
    num_gpus: 2
    device_map: auto
  forward_batch_size: 8

qwen_72b_chat:
  target_model:
    model_name_or_path: /data/private_models/harmbench/models/Qwen-72B-Chat
    use_fast_tokenizer: True
    trust_remote_code: True
    dtype: bfloat16
    pad_token: <|extra_0|>
    eos_token: <|endoftext|>
    model_short_name: QianWen
    developer_name: Alibaba Cloud
    num_gpus: 3
  forward_batch_size: 8