example-train-medalpaca.sh

#!/bin/bash
#SBATCH --job-name=alpaca-7        # Specify job name
#SBATCH --partition=pgpu           # Specify partition name
#SBATCH --mem=0                    # Use entire memory of node
#SBATCH --gres=gpu:8               # Generic resources; 8 GPU
#SBATCH --exclusive                # Do not share node
#SBATCH --time=48:00:00            # Set a limit on the total run time
#SBATCH --output=logs_alp-7.o%j    # File name for standard output
#SBATCH --error=errors_alp-7.e%j   # File name for standard error output

cd /path/to/gitrepo

# activate conda environment
source /home/user/miniconda3/etc/profile.d/conda.sh
conda activate medalpaca

# recommended to manually set the hf cache dir, as the files are huge
export HF_HOME="/path/to/your/hfcache"

# feel free to adapt the below command, to run the training
# in 8bit with LoRA, fp16 with LoRA or bf16 and fsdp

torchrun --nproc_per_node=8 --master_port=9876 medalpaca/train.py \
    --model 'decapoda-research/llama-7b-hf' \
    --data_path 'medical_meadow_small.json' \
    --output_dir './lora-alpaca-7b' \
    --train_in_8bit False \
    --use_lora False \
    --bf16 True \
    --tf32 True \
    --fp16 False \
    --gradient_checkpointing True \
    --global_batch_size 256 \
    --per_device_batch_size 4 \
    --wandb_project 'medalpaca' \
    --use_wandb False