nod-ai · smedegaard · Nov 1, 2024 · Nov 11, 2024 · Nov 11, 2024 · Nov 12, 2024
diff --git a/.gitignore b/.gitignore
@@ -45,3 +45,9 @@ instances.yaml.backup
 # cpp
 cpp/_build
 cpp/third-party
+
+# projects
+.tool-versions
+**/*/.classpath
+**/*/.settings
+**/*/.project
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -11,18 +11,7 @@ Your contributions will fall into two categories:
     - Search for your issue here: https://github.com/pytorch/serve/issues (look for the "good first issue" tag if you're a first time contributor)
     - Pick an issue and comment on the task that you want to work on this feature.
     - To ensure your changes doesn't break any of the existing features run the sanity suite as follows from serve directory:
-        - Install dependencies (if not already installed)
-          For CPU
-
-          ```bash
-          python ts_scripts/install_dependencies.py --environment=dev
-          ```
-
-         For GPU
-           ```bash
-           python ts_scripts/install_dependencies.py --environment=dev --cuda=cu121
-           ```
-            > Supported cuda versions as cu121, cu118, cu117, cu116, cu113, cu111, cu102, cu101, cu92
+        - [Install dependencies](#Install-TorchServe-for-development) (if not already installed)
         - Install `pre-commit` to your Git flow:
             ```bash
             pre-commit install
@@ -60,26 +49,30 @@ pytest -k  test/pytest/test_mnist_template.py
 
 If you plan to develop with TorchServe and change some source code, you must install it from source code.
 
-Ensure that you have `python3` installed, and the user has access to the site-packages or `~/.local/bin` is added to the `PATH` environment variable.
-
-Run the following script from the top of the source directory.
-
-NOTE: This script force re-installs `torchserve`, `torch-model-archiver` and `torch-workflow-archiver` if existing installations are found
-
-#### For Debian Based Systems/ MacOS
-
-```
-python ./ts_scripts/install_dependencies.py --environment=dev
-python ./ts_scripts/install_from_src.py --environment=dev
-```
-
-Use `--cuda` flag with `install_dependencies.py` for installing cuda version specific dependencies. Possible values are `cu111`, `cu102`, `cu101`, `cu92`
-
-#### For Windows
-
-Refer to the documentation [here](docs/torchserve_on_win_native.md).
-
-For information about the model archiver, see [detailed documentation](model-archiver/README.md).
+1. Clone the repository, including third-party modules, with `git clone --recurse-submodules --remote-submodules [email protected]:pytorch/serve.git`
+2. Ensure that you have `python3` installed, and the user has access to the site-packages or `~/.local/bin` is added to the `PATH` environment variable.
+3. Run the following script from the top of the source directory. NOTE: This script force re-installs `torchserve`, `torch-model-archiver` and `torch-workflow-archiver` if existing installations are found
+
+    #### For Debian Based Systems/MacOS
+
+    ```
+    python ./ts_scripts/install_dependencies.py --environment=dev
+    python ./ts_scripts/install_from_src.py --environment=dev
+    ```
+    ##### Installing Dependencies for Accelerator Support
+    Use the optional `--rocm` or `--cuda` flag with `install_dependencies.py` for installing accelerator specific dependencies.
+
+    Possible values are
+    - rocm: `rocm61`, `rocm60`
+    - cuda: `cu111`, `cu102`, `cu101`, `cu92`
+
+    For example `python ./ts_scripts/install_dependencies.py --environment=dev --rocm=rocm61`
+
+    #### For Windows
+
+    Refer to the documentation [here](docs/torchserve_on_win_native.md).
+
+    For information about the model archiver, see [detailed documentation](model-archiver/README.md).
 
 ### What to Contribute?
 

diff --git a/README.md b/README.md
@@ -13,7 +13,9 @@ TorchServe now enforces token authorization enabled and model API control disabl
 
 TorchServe is a flexible and easy-to-use tool for serving and scaling PyTorch models in production.
 
-Requires python >= 3.8
+Requires: 
+- python >= 3.8
+- Java >= 17
 
 ```bash
 curl http://127.0.0.1:8080/predictions/bert -T input.txt
@@ -22,7 +24,10 @@ curl http://127.0.0.1:8080/predictions/bert -T input.txt
 
 ```bash
 # Install dependencies
-# cuda is optional
+python ./ts_scripts/install_dependencies.py
+
+# Include depeendencies for accelerator support with the relevant optional flags 
+python ./ts_scripts/install_dependencies.py --rocm=rocm61
 python ./ts_scripts/install_dependencies.py --cuda=cu121
 
 # Latest release
@@ -36,7 +41,10 @@ pip install torchserve-nightly torch-model-archiver-nightly torch-workflow-archi
 
 ```bash
 # Install dependencies
-# cuda is optional
+python ./ts_scripts/install_dependencies.py
+
+# Include depeendencies for accelerator support with the relevant optional flags 
+python ./ts_scripts/install_dependencies.py --rocm=rocm61
 python ./ts_scripts/install_dependencies.py --cuda=cu121
 
 # Latest release

diff --git a/docker/Dockerfile.dev b/docker/Dockerfile.dev
@@ -10,7 +10,7 @@
 #       For reference:
 #           https://docs.docker.com/develop/develop-images/build_enhancements/
 
-ARG BASE_IMAGE=ubuntu:rolling
+ARG BASE_IMAGE=ubuntu:24.04
 ARG BUILD_TYPE=dev
 FROM ${BASE_IMAGE} AS compile-image
 
@@ -19,6 +19,7 @@ ARG BRANCH_NAME=master
 ARG REPO_URL=https://github.com/pytorch/serve.git
 ARG MACHINE_TYPE=cpu
 ARG CUDA_VERSION
+ARG ROCM_VERSION
 
 ARG BUILD_WITH_IPEX
 ARG IPEX_VERSION=1.11.0
@@ -41,14 +42,16 @@ RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
     git \
     python$PYTHON_VERSION \
     python$PYTHON_VERSION-dev \
-    python3-distutils \
+    python3-setuptools \
     python$PYTHON_VERSION-venv \
     python3-venv \
     build-essential \
     openjdk-17-jdk \
     curl \
     vim \
     numactl \
+    zip \
+    wget \
     && if [ "$BUILD_WITH_IPEX" = "true" ]; then apt-get update && apt-get install -y libjemalloc-dev libgoogle-perftools-dev libomp-dev && ln -s /usr/lib/x86_64-linux-gnu/libjemalloc.so /usr/lib/libjemalloc.so && ln -s /usr/lib/x86_64-linux-gnu/libtcmalloc.so /usr/lib/libtcmalloc.so && ln -s /usr/lib/x86_64-linux-gnu/libiomp5.so /usr/lib/libiomp5.so; fi \
     && rm -rf /var/lib/apt/lists/* \
     && cd /tmp \
@@ -58,19 +61,43 @@ RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
 RUN update-alternatives --install /usr/bin/python python /usr/bin/python$PYTHON_VERSION 1 \
     && update-alternatives --install /usr/local/bin/pip pip /usr/local/bin/pip3 1
 
+RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
+    if [ -n "$ROCM_VERSION" ]; then \
+        apt-get update \
+        && wget https://repo.radeon.com/amdgpu-install/6.2.2/ubuntu/noble/amdgpu-install_6.2.60202-1_all.deb \
+        && DEBIAN_FRONTEND=noninteractive sudo apt-get install -y ./amdgpu-install_6.2.60202-1_all.deb \
+        && sudo apt-get update \
+        && sudo apt-get install --no-install-recommends -y amdgpu-dkms rocm \
+        && cd /home/; \
+    else \
+        echo "Skip ROCm installation"; \
+    fi
+
 # Build Dev Image
 FROM compile-image AS dev-image
 ARG MACHINE_TYPE=cpu
 ARG CUDA_VERSION
-RUN if [ "$MACHINE_TYPE" = "gpu" ]; then export USE_CUDA=1; fi \
+RUN if [ "$MACHINE_TYPE" = "nvidia_gpu" ]; then export USE_CUDA=1; fi \
     && git clone $REPO_URL \
     && cd serve \
     && git checkout ${BRANCH_NAME} \
     && python$PYTHON_VERSION -m venv /home/venv
 ENV PATH="/home/venv/bin:$PATH"
 WORKDIR serve
+
+COPY . .
+
 RUN python -m pip install -U pip setuptools \
-    && if [ -z "$CUDA_VERSION" ]; then python ts_scripts/install_dependencies.py --environment=dev; else python ts_scripts/install_dependencies.py --environment=dev  --cuda $CUDA_VERSION; fi \
+    && if ([ -z "$CUDA_VERSION" ] && [ -z "$ROCM_VERSION" ]); then \
+        python ts_scripts/install_dependencies.py --environment=dev; \
+    elif [ -n "$ROCM_VERSION" ]; then \
+        python ts_scripts/install_dependencies.py --environment=dev  --rocm $ROCM_VERSION \
+        && cd /opt/rocm/share/amd_smi \
+        && pip install . \
+        && cd /serve/; \
+    else \
+        python ts_scripts/install_dependencies.py --environment=dev  --cuda $CUDA_VERSION; \
+    fi \
     && if [ "$BUILD_WITH_IPEX" = "true" ]; then python -m pip install --no-cache-dir intel_extension_for_pytorch==${IPEX_VERSION} -f ${IPEX_URL}; fi \
     && python ts_scripts/install_from_src.py \
     && useradd -m model-server \
@@ -83,7 +110,6 @@ RUN python -m pip install -U pip setuptools \
     && chown -R model-server /home/venv
 
 EXPOSE 8080 8081 8082 7070 7071
-USER model-server
 WORKDIR /home/model-server
 ENV TEMP=/home/model-server/tmp
 ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"]
@@ -112,4 +138,5 @@ RUN set -ex \
 
 FROM ${BUILD_TYPE}-image AS final-image
 ARG BUILD_TYPE
+ENV CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
 RUN echo "${BUILD_TYPE} image creation completed"
diff --git a/frontend/build.gradle b/frontend/build.gradle
@@ -37,8 +37,8 @@ def javaProjects() {
 
 configure(javaProjects()) {
     apply plugin: 'java-library'
-    sourceCompatibility = 1.8
-    targetCompatibility = 1.8
+    sourceCompatibility = JavaVersion.VERSION_17
+    targetCompatibility = JavaVersion.VERSION_17
 
     defaultTasks 'jar'
 

diff --git a/frontend/server/src/main/java/org/pytorch/serve/device/Accelerator.java b/frontend/server/src/main/java/org/pytorch/serve/device/Accelerator.java
@@ -0,0 +1,90 @@
+package org.pytorch.serve.device;
+
+import java.text.MessageFormat;
+import org.pytorch.serve.device.interfaces.IAcceleratorUtility;
+
+public class Accelerator {
+    public final Integer id;
+    public final AcceleratorVendor vendor;
+    public final String model;
+    public IAcceleratorUtility acceleratorUtility;
+    public Float usagePercentage;
+    public Float memoryUtilizationPercentage;
+    public Integer memoryAvailableMegabytes;
+    public Integer memoryUtilizationMegabytes;
+
+    public Accelerator(String acceleratorName, AcceleratorVendor vendor, Integer gpuId) {
+        this.model = acceleratorName;
+        this.vendor = vendor;
+        this.id = gpuId;
+        this.usagePercentage = (float) 0.0;
+        this.memoryUtilizationPercentage = (float) 0.0;
+        this.memoryAvailableMegabytes = 0;
+        this.memoryUtilizationMegabytes = 0;
+    }
+
+    // Getters
+    public Integer getMemoryAvailableMegaBytes() {
+        return memoryAvailableMegabytes;
+    }
+
+    public AcceleratorVendor getVendor() {
+        return vendor;
+    }
+
+    public String getAcceleratorModel() {
+        return model;
+    }
+
+    public Integer getAcceleratorId() {
+        return id;
+    }
+
+    public Float getUsagePercentage() {
+        return usagePercentage;
+    }
+
+    public Float getMemoryUtilizationPercentage() {
+        return memoryUtilizationPercentage;
+    }
+
+    public Integer getMemoryUtilizationMegabytes() {
+        return memoryUtilizationMegabytes;
+    }
+
+    // Setters
+    public void setMemoryAvailableMegaBytes(Integer memoryAvailable) {
+        this.memoryAvailableMegabytes = memoryAvailable;
+    }
+
+    public void setUsagePercentage(Float acceleratorUtilization) {
+        this.usagePercentage = acceleratorUtilization;
+    }
+
+    public void setMemoryUtilizationPercentage(Float memoryUtilizationPercentage) {
+        this.memoryUtilizationPercentage = memoryUtilizationPercentage;
+    }
+
+    public void setMemoryUtilizationMegabytes(Integer memoryUtilizationMegabytes) {
+        this.memoryUtilizationMegabytes = memoryUtilizationMegabytes;
+    }
+
+    // Other Methods
+    public String utilizationToString() {
+        final String message =
+                MessageFormat.format(
+                        "gpuId::{0} utilization.gpu::{1} % utilization.memory::{2} % memory.used::{3} MiB",
+                        id,
+                        usagePercentage,
+                        memoryUtilizationPercentage,
+                        memoryUtilizationMegabytes);
+
+        return message;
+    }
+
+    public void updateDynamicAttributes(Accelerator updated) {
+        this.usagePercentage = updated.usagePercentage;
+        this.memoryUtilizationPercentage = updated.memoryUtilizationPercentage;
+        this.memoryUtilizationMegabytes = updated.memoryUtilizationMegabytes;
+    }
+}
diff --git a/frontend/server/src/main/java/org/pytorch/serve/device/AcceleratorVendor.java b/frontend/server/src/main/java/org/pytorch/serve/device/AcceleratorVendor.java
@@ -0,0 +1,9 @@
+package org.pytorch.serve.device;
+
+public enum AcceleratorVendor {
+    AMD,
+    NVIDIA,
+    INTEL,
+    APPLE,
+    UNKNOWN
+}