dusty-nv · jasl · Oct 22, 2023 · Oct 22, 2023 · Dec 9, 2023 · Dec 9, 2023
diff --git a/jetson_containers/container.py b/jetson_containers/container.py
@@ -295,6 +295,7 @@ def test_container(name, package, simulate=False):
  cmd += f"--volume {package['path']}:/test" + _NEWLINE_
  cmd += f"--volume {os.path.join(_PACKAGE_ROOT, 'data')}:/data" + _NEWLINE_
  cmd += f"--workdir /test" + _NEWLINE_
+ cmd += f"--entrypoint /usr/bin/env" + _NEWLINE_
  cmd += name + _NEWLINE_
 
  cmd += "/bin/bash -c '"

diff --git a/packages/audio/riva-client/Dockerfile.python b/packages/audio/riva-client/Dockerfile.python
@@ -39,5 +39,4 @@ RUN pip3 show nvidia-riva-client && python3 -c 'import riva.client; print(riva.c
 COPY list_audio_devices.py python-clients/scripts/
 COPY loopback.py python-clients/scripts/
 
-
-WORKDIR /
+WORKDIR /
diff --git a/packages/cuda/cuda/Dockerfile b/packages/cuda/cuda/Dockerfile
@@ -25,7 +25,6 @@ RUN apt-get update && \
 
 RUN echo "Downloading ${CUDA_DEB}" && \
  mkdir /tmp/cuda && cd /tmp/cuda && \
- wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/arm64/cuda-ubuntu2004.pin -O /etc/apt/preferences.d/cuda-repository-pin-600 && \
  wget --quiet --show-progress --progress=bar:force:noscroll ${CUDA_URL} && \
  dpkg -i *.deb && \
  cp /var/cuda-tegra-repo-*/cuda-tegra-*-keyring.gpg /usr/share/keyrings/ && \

diff --git a/packages/diffusion/sd-next/Dockerfile b/packages/diffusion/sd-next/Dockerfile
@@ -0,0 +1,38 @@
+#---
+# name: sd-next
+# group: diffusion
+# depends: [python, pycuda, protobuf:apt, numba, numpy, tensorflow2, opencv, pytorch, torchvision, transformers, xformers, huggingface_hub]
+# requires: '>=34.1.0'
+# docs: docs.md
+# notes: disabled on JetPack 4
+#---
+ARG BASE_IMAGE
+FROM ${BASE_IMAGE}
+
+ARG SD_NEXT_REPO=vladmandic/automatic
+ARG SD_NEXT_VERSION=master
+
+ADD https://api.github.com/repos/${SD_NEXT_REPO}/git/refs/heads/${SD_NEXT_VERSION} /tmp/sd_next_version.json
+
+RUN cd /opt && \
+ git clone --branch ${SD_NEXT_VERSION} --depth=1 https://github.com/${SD_NEXT_REPO} && \
+ cd automatic && \
+ sed 's|^huggingface_hub.*||' -i requirements.txt && \
+ sed 's|^transformers.*||' -i requirements.txt && \
+ sed 's|^protobuf.*||' -i requirements.txt && \
+ sed 's|^numba.*||' -i requirements.txt && \
+ sed 's|^numpy.*||' -i requirements.txt && \
+ cat requirements.txt && \
+ TENSORFLOW_PACKAGE=https://nvidia.box.com/shared/static/wp43cd8e0lgen2wdqic3irdwagpgn0iz.whl python3 ./launch.py --skip-torch --use-cuda --reinstall --test
+
+# partially initialized module 'cv2' has no attribute 'gapi_wip_gst_GStreamerPipeline'
+RUN cd /opt && ./opencv_install.sh
+
+# set the cache dir for models
+ENV DIFFUSERS_CACHE=/data/models/diffusers
+
+COPY docker-entrypoint.sh /usr/local/bin
+
+WORKDIR /opt/automatic
+
+ENTRYPOINT ["docker-entrypoint.sh"]
diff --git a/packages/diffusion/sd-next/README.md b/packages/diffusion/sd-next/README.md
@@ -0,0 +1,110 @@
+# sd-next
+
+> [`CONTAINERS`](#user-content-containers) [`IMAGES`](#user-content-images) [`RUN`](#user-content-run) [`BUILD`](#user-content-build)
+
+
+<img src="https://raw.githubusercontent.com/dusty-nv/jetson-containers/docs/docs/images/diffusion_webui.jpg">
+
+* SD.Next from https://github.com/vladmandic/automatic (found under `/opt/automatic`)
+* with TensorRT extension from https://github.com/AUTOMATIC1111/stable-diffusion-webui-tensorrt 
+* see the tutorial at the [**Jetson Generative AI Lab**](https://nvidia-ai-iot.github.io/jetson-generative-ai-playground/tutorial_diffusion.html)
+
+This container has a default run command that will automatically start the webserver like this:
+
+```bash
+cd /opt/automatic && python3 launch.py \
+ --data=/data/models/stable-diffusion \
+ --enable-insecure-extension-access \
+ --xformers \
+ --listen \
+ --port=7860
+```
+
+After starting the container, you can navigate your browser to `http://$IP_ADDRESS:7860` (substitute the address or hostname of your device). The server will automatically download the default model ([`stable-diffusion-1.5`](https://huggingface.co/runwayml/stable-diffusion-v1-5)) during startup.
+
+Other configuration arguments can be found at [AUTOMATIC1111/stable-diffusion-webui/wiki/Command-Line-Arguments-and-Settings](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Command-Line-Arguments-and-Settings)
+
+* `--medvram` (sacrifice some performance for low VRAM usage)
+* `--lowvram` (sacrafice a lot of speed for very low VRAM usage)
+
+See the [`stable-diffusion`](/packages/diffusion/stable-diffusion) container to run image generation from a script (`txt2img.py`) as opposed to the web UI. 
+
+### Tips & Tricks
+
+Negative prompts: https://huggingface.co/spaces/stabilityai/stable-diffusion/discussions/7857
+
+Stable Diffusion XL
+ * https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl
+ * https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
+ * https://huggingface.co/stabilityai/stable-diffusion-xl-refiner-1.0
+ * https://stable-diffusion-art.com/sdxl-model/
+<details open>
+<summary><b><a id="containers">CONTAINERS</a></b></summary>
+<br>
+
+| **`stable-diffusion-webui`** | |
+| :-- | :-- |
+| &nbsp;&nbsp;&nbsp;Builds | [![`stable-diffusion-webui_jp51`](https://img.shields.io/github/actions/workflow/status/dusty-nv/jetson-containers/stable-diffusion-webui_jp51.yml?label=stable-diffusion-webui:jp51)](https://github.com/dusty-nv/jetson-containers/actions/workflows/stable-diffusion-webui_jp51.yml) [![`stable-diffusion-webui_jp60`](https://img.shields.io/github/actions/workflow/status/dusty-nv/jetson-containers/stable-diffusion-webui_jp60.yml?label=stable-diffusion-webui:jp60)](https://github.com/dusty-nv/jetson-containers/actions/workflows/stable-diffusion-webui_jp60.yml) |
+| &nbsp;&nbsp;&nbsp;Requires | `L4T >=34.1.0` |
+| &nbsp;&nbsp;&nbsp;Dependencies | [`build-essential`](/packages/build-essential) [`cuda`](/packages/cuda/cuda) [`cudnn`](/packages/cuda/cudnn) [`python`](/packages/python) [`tensorrt`](/packages/tensorrt) [`numpy`](/packages/numpy) [`cmake`](/packages/cmake/cmake_pip) [`onnx`](/packages/onnx) [`pytorch`](/packages/pytorch) [`torchvision`](/packages/pytorch/torchvision) [`huggingface_hub`](/packages/llm/huggingface_hub) [`rust`](/packages/rust) [`transformers`](/packages/llm/transformers) [`xformers`](/packages/llm/xformers) [`pycuda`](/packages/cuda/pycuda) [`opencv`](/packages/opencv) |
+| &nbsp;&nbsp;&nbsp;Dependants | [`l4t-diffusion`](/packages/l4t/l4t-diffusion) |
+| &nbsp;&nbsp;&nbsp;Dockerfile | [`Dockerfile`](Dockerfile) |
+| &nbsp;&nbsp;&nbsp;Images | [`dustynv/stable-diffusion-webui:r35.2.1`](https://hub.docker.com/r/dustynv/stable-diffusion-webui/tags) `(2023-12-05, 7.1GB)`<br>[`dustynv/stable-diffusion-webui:r35.3.1`](https://hub.docker.com/r/dustynv/stable-diffusion-webui/tags) `(2023-11-05, 7.1GB)`<br>[`dustynv/stable-diffusion-webui:r35.4.1`](https://hub.docker.com/r/dustynv/stable-diffusion-webui/tags) `(2023-11-05, 7.1GB)`<br>[`dustynv/stable-diffusion-webui:r36.2.0`](https://hub.docker.com/r/dustynv/stable-diffusion-webui/tags) `(2023-12-05, 8.8GB)` |
+| &nbsp;&nbsp;&nbsp;Notes | disabled on JetPack 4 |
+
+</details>
+
+<details open>
+<summary><b><a id="images">CONTAINER IMAGES</a></b></summary>
+<br>
+
+| Repository/Tag | Date | Arch | Size |
+| :-- | :--: | :--: | :--: |
+| &nbsp;&nbsp;[`dustynv/stable-diffusion-webui:r35.2.1`](https://hub.docker.com/r/dustynv/stable-diffusion-webui/tags) | `2023-12-05` | `arm64` | `7.1GB` |
+| &nbsp;&nbsp;[`dustynv/stable-diffusion-webui:r35.3.1`](https://hub.docker.com/r/dustynv/stable-diffusion-webui/tags) | `2023-11-05` | `arm64` | `7.1GB` |
+| &nbsp;&nbsp;[`dustynv/stable-diffusion-webui:r35.4.1`](https://hub.docker.com/r/dustynv/stable-diffusion-webui/tags) | `2023-11-05` | `arm64` | `7.1GB` |
+| &nbsp;&nbsp;[`dustynv/stable-diffusion-webui:r36.2.0`](https://hub.docker.com/r/dustynv/stable-diffusion-webui/tags) | `2023-12-05` | `arm64` | `8.8GB` |
+
+> <sub>Container images are compatible with other minor versions of JetPack/L4T:</sub><br>
+> <sub>&nbsp;&nbsp;&nbsp;&nbsp;• L4T R32.7 containers can run on other versions of L4T R32.7 (JetPack 4.6+)</sub><br>
+> <sub>&nbsp;&nbsp;&nbsp;&nbsp;• L4T R35.x containers can run on other versions of L4T R35.x (JetPack 5.1+)</sub><br>
+</details>
+
+<details open>
+<summary><b><a id="run">RUN CONTAINER</a></b></summary>
+<br>
+
+To start the container, you can use the [`run.sh`](/docs/run.md)/[`autotag`](/docs/run.md#autotag) helpers or manually put together a [`docker run`](https://docs.docker.com/engine/reference/commandline/run/) command:
+```bash
+# automatically pull or build a compatible container image
+./run.sh $(./autotag sd-next)
+
+# or explicitly specify one of the container images above
+./run.sh dustynv/sd-next:r35.2.1
+
+# or if using 'docker run' (specify image and mounts/ect)
+sudo docker run --runtime nvidia -it --rm --network=host dustynv/sd-next:r35.2.1
+```
+> <sup>[`run.sh`](/docs/run.md) forwards arguments to [`docker run`](https://docs.docker.com/engine/reference/commandline/run/) with some defaults added (like `--runtime nvidia`, mounts a `/data` cache, and detects devices)</sup><br>
+> <sup>[`autotag`](/docs/run.md#autotag) finds a container image that's compatible with your version of JetPack/L4T - either locally, pulled from a registry, or by building it.</sup>
+
+To mount your own directories into the container, use the [`-v`](https://docs.docker.com/engine/reference/commandline/run/#volume) or [`--volume`](https://docs.docker.com/engine/reference/commandline/run/#volume) flags:
+```bash
+./run.sh -v /path/on/host:/path/in/container $(./autotag sd-next)
+```
+To launch the container running a command, as opposed to an interactive shell:
+```bash
+./run.sh $(./autotag sd-next) my_app --abc xyz
+```
+You can pass any options to [`run.sh`](/docs/run.md) that you would to [`docker run`](https://docs.docker.com/engine/reference/commandline/run/), and it'll print out the full command that it constructs before executing it.
+</details>
+<details open>
+<summary><b><a id="build">BUILD CONTAINER</b></summary>
+<br>
+
+If you use [`autotag`](/docs/run.md#autotag) as shown above, it'll ask to build the container for you if needed. To manually build it, first do the [system setup](/docs/setup.md), then run:
+```bash
+./build.sh sd-next
+```
+The dependencies from above will be built into the container, and it'll be tested during. See [`./build.sh --help`](/jetson_containers/build.py) for build options.
+</details>
diff --git a/packages/diffusion/sd-next/docker-entrypoint.sh b/packages/diffusion/sd-next/docker-entrypoint.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+set -e
+
+cd /opt/automatic
+
+if [[ ! -z "${ACCELERATE}" ]] && [ ${ACCELERATE}="True" ] && [ -x "$(command -v accelerate)" ]
+then
+ echo "Launching accelerate launch.py..."
+ exec accelerate launch --num_cpu_threads_per_process=6 launch.py --data=/data/models/stable-diffusion --skip-all --use-xformers --use-cuda --listen --port=7860 "$@"
+else
+ echo "Launching launch.py..."
+ exec python3 launch.py --data=/data/models/stable-diffusion --skip-all --use-xformers --use-cuda --listen --port=7860 "$@"
+fi
diff --git a/packages/diffusion/sd-next/docs.md b/packages/diffusion/sd-next/docs.md
@@ -0,0 +1,36 @@
+
+<img src="https://raw.githubusercontent.com/dusty-nv/jetson-containers/docs/docs/images/diffusion_webui.jpg">
+
+* stable-diffusion-webui from https://github.com/AUTOMATIC1111/stable-diffusion-webui (found under `/opt/stable-diffusion-webui`)
+* with TensorRT extension from https://github.com/AUTOMATIC1111/stable-diffusion-webui-tensorrt 
+* see the tutorial at the [**Jetson Generative AI Lab**](https://nvidia-ai-iot.github.io/jetson-generative-ai-playground/tutorial_diffusion.html)
+
+This container has a default run command that will automatically start the webserver like this:
+
+```bash
+cd /opt/stable-diffusion-webui && python3 launch.py \
+ --data=/data/models/stable-diffusion \
+ --enable-insecure-extension-access \
+ --xformers \
+ --listen \
+ --port=7860
+```
+
+After starting the container, you can navigate your browser to `http://$IP_ADDRESS:7860` (substitute the address or hostname of your device). The server will automatically download the default model ([`stable-diffusion-1.5`](https://huggingface.co/runwayml/stable-diffusion-v1-5)) during startup.
+
+Other configuration arguments can be found at [AUTOMATIC1111/stable-diffusion-webui/wiki/Command-Line-Arguments-and-Settings](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Command-Line-Arguments-and-Settings)
+
+* `--medvram` (sacrifice some performance for low VRAM usage)
+* `--lowvram` (sacrafice a lot of speed for very low VRAM usage)
+
+See the [`stable-diffusion`](/packages/diffusion/stable-diffusion) container to run image generation from a script (`txt2img.py`) as opposed to the web UI. 
+
+### Tips & Tricks
+
+Negative prompts: https://huggingface.co/spaces/stabilityai/stable-diffusion/discussions/7857
+
+Stable Diffusion XL
+ * https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl
+ * https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
+ * https://huggingface.co/stabilityai/stable-diffusion-xl-refiner-1.0
+ * https://stable-diffusion-art.com/sdxl-model/
diff --git a/packages/diffusion/stable-diffusion-webui/Dockerfile b/packages/diffusion/stable-diffusion-webui/Dockerfile
@@ -34,6 +34,8 @@ RUN cd /opt/stable-diffusion-webui && \
 # set the cache dir for models
 ENV DIFFUSERS_CACHE=/data/models/diffusers
 
-# default start-up command
-CMD /bin/bash -c "cd /opt/stable-diffusion-webui && python3 launch.py --data=/data/models/stable-diffusion --enable-insecure-extension-access --xformers --listen --port=7860"
+COPY docker-entrypoint.sh /usr/local/bin
 
+WORKDIR /opt/stable-diffusion-webui
+
+ENTRYPOINT ["docker-entrypoint.sh"]
diff --git a/packages/diffusion/stable-diffusion-webui/docker-entrypoint.sh b/packages/diffusion/stable-diffusion-webui/docker-entrypoint.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+
+set -e
+
+cd /opt/stable-diffusion-webui && python3 launch.py --data=/data/models/stable-diffusion --enable-insecure-extension-access --xformers --listen --port=7860 "$@"
diff --git a/packages/jupyterlab/Dockerfile b/packages/jupyterlab/Dockerfile
@@ -14,7 +14,8 @@ RUN pip3 install --no-cache-dir --verbose jupyter 'jupyterlab<4' && \
 RUN jupyter lab --version && jupyter lab --generate-config
 RUN python3 -c "from notebook.auth.security import set_password; set_password('nvidia', '/root/.jupyter/jupyter_notebook_config.json')"
 
-CMD /bin/bash -c "jupyter lab --ip 0.0.0.0 --port 8888 --allow-root &> /var/log/jupyter.log" & \
- echo "allow 10 sec for JupyterLab to start @ http://$(hostname -I | cut -d' ' -f1):8888 (password nvidia)" && \
- echo "JupterLab logging location: /var/log/jupyter.log (inside the container)" && \
- /bin/bash
+COPY docker-entrypoint.sh /usr/local/bin
+
+ENTRYPOINT ["docker-entrypoint.sh"]
+
+CMD ["/bin/bash"]
diff --git a/packages/jupyterlab/docker-entrypoint.sh b/packages/jupyterlab/docker-entrypoint.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+set -e
+
+jupyter lab --ip 0.0.0.0 --port 8888 --allow-root &> /var/log/jupyter.log &
+echo "allow 10 sec for JupyterLab to start @ http://$(hostname -I | cut -d' ' -f1):8888 (password nvidia)"
+
+echo "JupterLab logging location: /var/log/jupyter.log (inside the container)"
+exec "$@"
diff --git a/packages/llm/bitsandbytes/Dockerfile b/packages/llm/bitsandbytes/Dockerfile
@@ -1,7 +1,7 @@
 #---
 # name: bitsandbytes
 # group: llm
-# requires: '==35.*'
+# requires: '>=35.*'
 # depends: [transformers]
 # test: test.py
 # notes: fork of https://github.com/TimDettmers/bitsandbytes for Jetson
@@ -10,23 +10,22 @@ ARG BASE_IMAGE
 FROM ${BASE_IMAGE}
 
 # upstream version is https://github.com/TimDettmers/bitsandbytes (the fork below includes some patches for Jetson) 
-ARG BITSANDBYTES_REPO=dusty-nv/bitsandbytes
-ARG BITSANDBYTES_BRANCH=main
+ARG BITSANDBYTES_REPO=jasl/bitsandbytes
+ARG BITSANDBYTES_BRANCH=jetson
 
 # force rebuild on new git commits - https://stackoverflow.com/a/56945508
 ADD https://api.github.com/repos/${BITSANDBYTES_REPO}/git/refs/heads/${BITSANDBYTES_BRANCH} /tmp/bitsandbytes_version.json
 
-RUN pip3 uninstall -y bitsandbytes && \
- cd /opt && \
- git clone --depth=1 https://github.com/${BITSANDBYTES_REPO} bitsandbytes && \
- cd bitsandbytes && \
- CUDA_VERSION=114 make -j$(nproc) cuda11x && \
- CUDA_VERSION=114 make -j$(nproc) cuda11x_nomatmul && \
- python3 setup.py --verbose build_ext --inplace -j$(nproc) bdist_wheel && \
- cp dist/bitsandbytes*.whl /opt && \
- pip3 install --no-cache-dir --verbose /opt/bitsandbytes*.whl && \
- cd ../ && \
- rm -rf bitsandbytes
+WORKDIR /opt
+
+RUN git clone --branch=${BITSANDBYTES_BRANCH} --depth=1 https://github.com/${BITSANDBYTES_REPO} bitsandbytes
+RUN cd bitsandbytes && \
+ CUDA_VERSION=122 make -j$(nproc) cuda12x && \
+ CUDA_VERSION=122 make -j$(nproc) cuda12x_nomatmul && \
+ pip3 wheel --wheel-dir=dist --no-deps --verbose . && \
+ cp dist/bitsandbytes*.whl /opt
+
+RUN pip3 install --no-cache-dir --verbose --no-build-isolation /opt/bitsandbytes*.whl
 
 RUN pip3 install --no-cache-dir --verbose scipy
 RUN pip3 show bitsandbytes && python3 -c 'import bitsandbytes'
diff --git a/packages/llm/flash_attn/Dockerfile b/packages/llm/flash_attn/Dockerfile
@@ -0,0 +1,23 @@
+#---
+# name: flash_attn
+# group: llm
+# docs: docs.md
+# depends: [pytorch]
+# requires: '>=34.1.0'
+#---
+ARG BASE_IMAGE
+FROM ${BASE_IMAGE}
+
+ARG FLASH_ATTN_REPO=jasl/flash-attention
+ARG FLASH_ATTN_BRANCH=aarch64
+
+ADD https://api.github.com/repos/${FLASH_ATTN_REPO}/git/refs/heads/${FLASH_ATTN_BRANCH} /tmp/flash_attn_version.json
+
+WORKDIR /opt
+
+RUN git clone --branch=${FLASH_ATTN_BRANCH} --depth=1 https://github.com/${FLASH_ATTN_REPO} --recursive flash_attn
+RUN cd flash_attn && \
+ MAX_JOBS=8 FORCE_BUILD=True CUDA_GENCODE='arch=compute_87,code=sm_87' pip3 wheel --wheel-dir=dist --no-deps --verbose . && \
+ cp dist/flash_attn*.whl /opt
+
+RUN pip3 install --no-cache-dir --verbose --no-build-isolation flash_attn*.whl