-
Notifications
You must be signed in to change notification settings - Fork 5
/
Makefile
100 lines (87 loc) · 3.82 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/make
PYTHON_VENV_DIR?=./venv
.PHONY: help collect install install-dev install-venv install-base uninstall uninstall-venv clean run train-original
help:
@echo 'ENVIRONMENT: ## PYTHON_VENV_DIR' | awk 'BEGIN {FS = ":.*?## "}; {printf "%-20s %s\n", $$1, $$2}'
@grep -E '^[\.a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "%-20s %s\n", $$1, $$2}'
collect: ## Collect and shows environment (sensitive values!)
$(foreach v, $(filter-out .VARIABLES,$(.VARIABLES)), $(info $(v) = $($(v))))
@ . ${PYTHON_VENV_DIR}/bin/activate \
&& python3 -m torch.utils.collect_env
install: install-venv ## Install dependencies for production
@ true \
&& . ${PYTHON_VENV_DIR}/bin/activate \
&& pip3 install -Ur requirements.txt
# customize!
install-dev: install-venv ## Install dependencies for development
@ true \
&& . ${PYTHON_VENV_DIR}/bin/activate \
&& pip3 install -Ur requirements-dev.txt
# customize!
install-venv: install-base ## Install Linux packages and python (venv)
@ true \
&& sudo apt install -y patchelf ccache \
&& sudo apt install -y zlib1g-dev libjpeg-dev libpq-dev git-lfs \
&& test -d ${PYTHON_VENV_DIR} || python3 -m venv ${PYTHON_VENV_DIR} \
&& . ${PYTHON_VENV_DIR}/bin/activate \
&& pip3 install --upgrade pip \
&& pip3 install -Ur requirements.txt \
&& test -d tmp/AutoGPTQ || git clone https://github.com/PanQiWei/AutoGPTQ.git tmp/AutoGPTQ \
&& cd tmp/AutoGPTQ && BUILD_CUDA_EXT=1 pip3 install -vvv --no-build-isolation -e .
install-base: ## Install base Linux packages and python
@ true \
&& sudo apt update \
&& sudo apt install -y python3 \
&& sudo apt install -y python3.10-venv python3-dev python3-setuptools \
&& sudo apt install -y --no-install-recommends build-essential gcc \
&& sudo apt install -y python3-pip \
&& sudo apt clean
# customize!
clean: ## Delete virtual environment and temporal folders
rm -rf __pycache__ logs offload tmp "${PYTHON_VENV_DIR}"
# customize!
run: ## Run the python application in virtual environment
@ . ${PYTHON_VENV_DIR}/bin/activate \
&& mkdir -p logs \
&& python3 main.py
# - https://huggingface.co/docs/trl/main/en/index
# - https://huggingface.co/docs/trl/main/en/sft_trainer
# - https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py
# - https://huggingface.co/docs/trl/main/en/clis
# train-original: ## Train 'original' model
# . ${PYTHON_VENV_DIR}/bin/activate \
# && mkdir -p datasets \
# && python3 dataset_format.py \
# && trl sft \
# --model_name_or_path meta-llama/Llama-2-7b-chat-hf \
# --dataset_name datasets/olafrv-trl \
# --dataset_text_field "instruction" \
# --output_dir models/olafrv/Llama-2-7b-chat-hf-trained
# https://huggingface.co/docs/transformers/main/en//quantization
# Only 4-bit models are supported, and we recommend deactivating
# the ExLlama kernels if you’re finetuning a quantized model with PEFT.
# train-gptq: ## Train 'tlrsft' model
# . ${PYTHON_VENV_DIR}/bin/activate \
# && mkdir -p datasets \
# && python3 dataset_format.py \
# && trl sft \
# --model_name_or_path TheBloke/Llama-2-7b-Chat-GPTQ \
# --dataset_name datasets/olafrv-trl \
# --dataset_text_field "instruction" \
# --output_dir models/olafrv/Llama-2-7b-Chat-GPTQ-trained
# https://huggingface.co/docs/autotrain/v0.8.21/llm_finetuning
# Llama2 is now supported by AutoTrain, but CUDA out of memory.
# autotrain llm --help
# autotrain setup --update-torch # Only if using Google Collab
# autotrain setup
# autotrain-original: ## Train 'original' model
# # HF_HUB_OFFLINE=1 HF_DATASETS_OFFLINE=1
# cd models/olafrv \
# && autotrain llm --train --trainer sft \
# --token ${HUGGINGFACE_TOKEN} \
# --data_path ../../datasets/olafrv-autotrain \
# --model meta-llama/Llama-2-7b-chat-hf \
# --text_column text \
# --project_name "Llama-2-7b-chat-hf-autotrain"
# > logs/training.log &
# tail -f logs/training.log