-
-
Notifications
You must be signed in to change notification settings - Fork 163
/
lib.include.sh
398 lines (334 loc) · 14.6 KB
/
lib.include.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
#!/usr/bin/env bash
set -e
# Detect absolute path to the directory where "lib.include.sh" resides.
export SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
# Guard against including the library multiple times.
readonly SCRIPT_DIR
# Ensure that all scripts change their working dir to the root of the project.
cd -- "${SCRIPT_DIR}"
# User-configurable environment variables.
# IMPORTANT: Don't modify the code below! Pass these variables via the environment!
# NOTE: The "OT_CONDA_ENV" and "OT_PYTHON_VENV" are always relative to "SCRIPT_DIR"
# unless an absolute ("/home/foo/venv") or relative-traversal ("../venv") path is given.
# NOTE: The Conda detection prioritizes the user-provided value, otherwise the
# value of "$CONDA_EXE" (the env variable set by Conda's shell startup script),
# or lastly the "conda" binary (from PATH) as final fallback. We MUST use this
# order, otherwise we will fail to detect Conda if its startup script has executed,
# since their script shadows "conda" as a shell-function instead of a binary!
export OT_CONDA_CMD="${OT_CONDA_CMD:-${CONDA_EXE:-conda}}"
export OT_CONDA_ENV="${OT_CONDA_ENV:-conda_env}"
export OT_PYTHON_CMD="${OT_PYTHON_CMD:-python}"
export OT_PYTHON_VENV="${OT_PYTHON_VENV:-venv}"
export OT_PREFER_VENV="${OT_PREFER_VENV:-false}"
export OT_CUDA_LOWMEM_MODE="${OT_CUDA_LOWMEM_MODE:-false}"
export OT_PLATFORM_REQUIREMENTS="${OT_PLATFORM_REQUIREMENTS:-detect}"
export OT_SCRIPT_DEBUG="${OT_SCRIPT_DEBUG:-false}"
# Internal environment variables.
# NOTE: Version check supports "3", "3.1" and "3.1.5" specifier formats.
export OT_PYTHON_VERSION_MINIMUM="3.10"
export OT_PYTHON_VERSION_TOO_HIGH="3.13"
export OT_CONDA_USE_PYTHON_VERSION="3.10"
export OT_MUST_INSTALL_REQUIREMENTS="false"
export OT_HOST_OS="$(uname -s)"
# Force PyTorch to use fallbacks on Mac systems.
if [[ "${OT_HOST_OS}" == "Darwin" ]]; then
export PYTORCH_ENABLE_MPS_FALLBACK="1"
fi
# Change PyTorch memory allocation to reduce CUDA out-of-memory situations.
if [[ "${OT_CUDA_LOWMEM_MODE}" == "true" ]]; then
export PYTORCH_CUDA_ALLOC_CONF="garbage_collection_threshold:0.6,max_split_size_mb:128"
fi
# Utility functions.
function escape_shell_command {
# NOTE: "%q" ensures shell-compatible argument escaping.
printf " %q" "$@" | sed 's/^ //'
}
function print {
# NOTE: "%b" parses escape-sequences, allowing us to output "\n" newlines.
printf "[OneTrainer] %b\n" "$*"
}
function print_warning {
printf "[OneTrainer] Warning: %b\n" "$*" >&2
}
function print_error {
printf "[OneTrainer] Error: %b\n" "$*" >&2
}
function print_debug {
if [[ "${OT_SCRIPT_DEBUG}" == "true" ]]; then
print "$*"
fi
}
function print_command {
# NOTE: "%s" prints the escaped command as-is without parsing escape-seqs.
printf "[OneTrainer] + %s\n" "$(escape_shell_command "$@")"
}
function regex_escape {
sed 's/[][\.|$(){}?+*^]/\\&/g' <<<"$*"
}
# Resolves the absolute path for an absolute or relative input path.
function absolute_path {
if [[ -z "$1" ]]; then
print_error "absolute_path requires 1 argument."
return 1
fi
if [[ ! -d "$1" ]]; then
print_error "absolute_path argument is not a directory: \"$1\"."
return 1
fi
echo "$(cd -- "$1" &>/dev/null && pwd)"
}
# Checks if a command exists and is executable.
function can_exec {
if [[ -z "$1" ]]; then
print_error "can_exec requires 1 argument."
return 1
fi
if local full_path="$(command -v "$1" 2>/dev/null)"; then
if [[ ! -z "${full_path}" ]] && [[ -x "${full_path}" ]]; then
return 0
fi
fi
return 1
}
# Executes a shell command and displays the exact command for logging purposes.
function run_cmd {
print_command "$@"
"$@"
}
# Python command wrappers.
function run_python {
run_cmd "${OT_PYTHON_CMD}" "$@"
}
function run_pip {
run_python -m pip "$@"
}
function run_venv {
run_python -m venv "$@"
}
function has_python {
can_exec "${OT_PYTHON_CMD}"
}
function has_python_venv {
[[ -f "${OT_PYTHON_VENV}/bin/activate" ]]
}
function create_python_venv {
print "Creating Python Venv environment in \"${OT_PYTHON_VENV}\"..."
run_venv "${OT_PYTHON_VENV}"
export OT_MUST_INSTALL_REQUIREMENTS="true"
}
function ensure_python_venv_exists {
if ! has_python_venv; then
create_python_venv
fi
}
function activate_python_venv {
# NOTE: This rewrites PATH to make all subsequent Python commands prefer
# to use the venv's binaries instead. You should only execute this once!
source "${OT_PYTHON_VENV}/bin/activate"
# NOTE: Sanity check just to ensure that the activate-script was real.
if [[ -z "${VIRTUAL_ENV}" ]]; then
print_error "Something went wrong when activating the Python Venv in \"${OT_PYTHON_VENV}\"."
exit 1
fi
# We must now force the Python binary name back to normal, since the venv's
# own, internal Python binary is ALWAYS named "python".
export OT_PYTHON_CMD="python"
}
# Conda command wrappers.
function run_conda {
run_cmd "${OT_CONDA_CMD}" "$@"
}
__HAS_CONDA__CACHE=""
function has_conda {
# We cache the result of this check to speed up further "has_conda" calls.
if [[ -z "${__HAS_CONDA__CACHE}" ]]; then
if can_exec "${OT_CONDA_CMD}"; then
__HAS_CONDA__CACHE="true"
else
__HAS_CONDA__CACHE="false"
fi
fi
[[ "${__HAS_CONDA__CACHE}" == "true" ]]
}
function has_conda_env {
# Look for Conda's metadata to ensure it's a valid local Conda environment.
[[ -d "${OT_CONDA_ENV}/conda-meta" ]]
}
function has_conda_global_env {
if [[ -z "$1" ]]; then
print_error "has_conda_global_env requires 1 argument."
return 1
fi
# Checks for a globally installed (non-local) Conda environment by name.
# NOTE: We perform a strict, case-sensitive check for the exact env name.
run_conda info --envs | grep -q -- "^$(regex_escape "$1")\b"
}
function create_conda_env {
print "Creating Conda environment in \"${OT_CONDA_ENV}\"..."
# IMPORTANT: The ".*" suffix tells Conda to install the latest bugfix/patch
# release of the desired Python version. For example, if we specify "3.12.*",
# then it will pick the latest patch release, such as "3.12.5". It also works
# correctly if we specify an EXACT patch release ourselves, such as "3.10.14.*",
# or if we only specify a major version, such as "3.*" (gets the latest release).
declare -a install_args=()
install_args+=("python==${OT_CONDA_USE_PYTHON_VERSION}.*")
# IMPORTANT: We MUST use "conda-forge" and EXPLICITLY switch to the version
# of Tk that has libXft support on Linux, otherwise the GUI will have broken
# fonts, inability to render Unicode, and no antialiasing! Doesn't affect Macs.
# SEE: https://github.com/conda-forge/tk-feedstock/pull/40#issuecomment-2381409555
# SEE: https://anaconda.org/conda-forge/tk/files (only Linux has "xft" variant).
if [[ "${OT_HOST_OS}" == "Linux" ]]; then
install_args+=("tk[build=xft_*]")
fi
# NOTE: We install with strict channel priority and an explicit channel list,
# which ensures that package names which exist in "conda-forge" will never
# fall back to the "defaults" channel if "conda-forge" lacks the required
# version. Protects against mismatched packages built with different settings.
run_conda create -y --prefix "${OT_CONDA_ENV}" --override-channels --strict-channel-priority --channel "conda-forge" "${install_args[@]}"
export OT_MUST_INSTALL_REQUIREMENTS="true"
# Show a warning if the user has the legacy "ot" environment on their system.
if has_conda_global_env "ot"; then
# NOTE: We tell the user what to do, since automated removal is risky.
print_warning "The deprecated \"ot\" Conda environment has been detected on your system. It is occupying several gigabytes of disk space, and can be deleted manually to reclaim the storage space.\n\nTo delete the outdated Conda environment, execute the following command:\n\"${OT_CONDA_CMD}\" remove -y --name \"ot\" --all"
fi
}
function ensure_conda_env_exists {
if ! has_conda_env; then
create_conda_env
fi
}
function run_in_conda_env {
# NOTE: The "--no-capture-output" flag is necessary to print live to stdout/stderr.
run_conda run --prefix "${OT_CONDA_ENV}" --no-capture-output "$@"
}
function run_python_in_conda_env {
# NOTE: Python is ALWAYS called "python" inside Conda's environment.
run_in_conda_env python "$@"
}
function run_pip_in_conda_env {
run_python_in_conda_env -m pip "$@"
}
# Checks if the user hasn't requested Venv instead, and if Conda exists.
function should_use_conda {
# NOTE: This check is intentionally not cached, to allow changing preference
# during runtime. Furthermore, "has_conda" uses caching for speed already.
[[ "${OT_PREFER_VENV}" != "true" ]] && has_conda
}
# Helpers which automatically run Python and Pip in either Conda or Venv/Host,
# depending on what's available on the system or user-preference overrides.
function activate_chosen_env {
if should_use_conda; then
print "Using Conda environment in \"${OT_CONDA_ENV}\"..."
ensure_conda_env_exists
else
print "Using Python Venv environment in \"${OT_PYTHON_VENV}\"..."
ensure_python_venv_exists
activate_python_venv
fi
}
function run_python_in_active_env {
if should_use_conda; then
run_python_in_conda_env "$@"
else
run_python "$@"
fi
}
function run_pip_in_active_env {
if should_use_conda; then
run_pip_in_conda_env "$@"
else
run_pip "$@"
fi
}
# Determines which requirements.txt file we need to install.
function get_platform_requirements_path {
# NOTE: The user can override our platform detection via the environment.
local platform_reqs="${OT_PLATFORM_REQUIREMENTS}"
if [[ "${platform_reqs}" == "detect" ]]; then
# NOTE: We MUST prioritize NVIDIA first, since machines that contain
# *both* AMD and NVIDIA GPUs are usually running integrated AMD graphics
# that's built into their CPU, whereas their *dedicated* GPU is NVIDIA.
if [[ -e "/dev/nvidia0" ]] || can_exec nvidia-smi || can_exec "/usr/lib/wsl/lib/nvidia-smi"; then
# NVIDIA graphics.
# "/dev/nvidia0": The "first" detected NVIDIA GPU in the system.
# "nvidia-smi": Driver tool for all NVIDIA GPUs made after 2010.
# "nvcc": CUDA SDK compiler. Not included in the drivers.
# "/usr/lib/wsl/lib/nvidia-smi": WSL's NVIDIA path (isn't in $PATH).
# SEE: https://docs.nvidia.com/cuda/wsl-user-guide/
platform_reqs="requirements-cuda.txt"
elif [[ -e "/dev/kfd" ]]; then
# AMD graphics.
platform_reqs="requirements-rocm.txt"
else
# No GPU acceleration.
platform_reqs="requirements-default.txt"
fi
fi
if [[ -z "${platform_reqs}" ]] || [[ ! -f "${platform_reqs}" ]]; then
print_error "Requirements file \"${platform_reqs}\" does not exist."
return 1
fi
echo "${platform_reqs}"
}
# Installs the Global and Platform requirements into the active environment.
function install_requirements_in_active_env {
# Ensure that we have the latest Python tools, and install the dependencies.
# NOTE: The "eager" upgrade strategy is necessary for upgrading dependencies
# when running in existing environments. It ensures that all libraries will
# be upgraded to the same versions as a fresh reinstall of requirements.txt.
print "Installing requirements in active environment..."
run_pip_in_active_env install --upgrade --upgrade-strategy eager pip setuptools
run_pip_in_active_env install --upgrade --upgrade-strategy eager -r requirements-global.txt -r "$(get_platform_requirements_path)"
export OT_MUST_INSTALL_REQUIREMENTS="false"
}
function install_requirements_in_active_env_if_necessary {
if [[ "${OT_MUST_INSTALL_REQUIREMENTS}" != "false" ]]; then
install_requirements_in_active_env
fi
}
# Educates the user about the correct methods for installing Python or Conda.
function show_runtime_solutions {
if should_use_conda; then
# Resolve the absolute path to ensure user doesn't delete anything else.
local conda_env_path="${OT_CONDA_ENV}"
if has_conda_env; then
conda_env_path="$(absolute_path "${conda_env_path}")"
fi
# NOTE: We tell the user what to do, since automated removal is risky.
print "Solution: Switch your Conda environment to the required Python version by deleting your old environment, and then run OneTrainer again.\n\nTo delete the outdated Conda environment, execute the following command:\n\"${OT_CONDA_CMD}\" remove -y --prefix \"${conda_env_path}\" --all"
else
print "Solution: Either install the required Python version via pyenv (https://github.com/pyenv/pyenv) and set the project directory's Python version with \"pyenv install <version>\" followed by \"pyenv local <version>\", or install Miniconda if you prefer that we automatically manage everything for you (https://docs.anaconda.com/miniconda/). Remember to manually delete any previous Venv or Conda environment which was created with a different Python version. Read \"LAUNCH-SCRIPTS.md\" for more detailed instructions."
fi
}
# Ensures that Python or Conda exists on the host and can be executed.
function exit_if_no_runtime {
# NOTE: If "should_use_conda" is true, we have a usable Conda.
if ! should_use_conda && ! has_python; then
print_error "Python command \"${OT_PYTHON_CMD}\" does not exist on your system."
show_runtime_solutions
exit 1
fi
}
# Verifies that Python version is ">= minimum and < too high" in Conda/Venv/Host.
function exit_if_active_env_wrong_python_version {
if ! run_python_in_active_env "scripts/util/version_check.py" "${OT_PYTHON_VERSION_MINIMUM}" "${OT_PYTHON_VERSION_TOO_HIGH}"; then
show_runtime_solutions
exit 1
fi
}
# Performs the most important startup sanity checks and environment preparation.
function prepare_runtime_environment {
# Ensure that the chosen Conda or Python runtime exists.
exit_if_no_runtime
# Create and activate the chosen environment.
activate_chosen_env
# Protect against outdated Python environments created with older versions.
exit_if_active_env_wrong_python_version
# If this is an upgrade, always ensure that we have the latest dependencies,
# otherwise only install requirements if the environment was newly created.
if [[ "$1" == "upgrade" ]]; then
install_requirements_in_active_env
else
install_requirements_in_active_env_if_necessary
fi
}