diff --git a/dockerfiles/README.md b/dockerfiles/README.md
index 4758e76..9b6e153 100644
--- a/dockerfiles/README.md
+++ b/dockerfiles/README.md
@@ -49,10 +49,27 @@ This script can be used as reference to build docker images for Gaudi.
     make build
     ```
 
+    #### Build triton vllm backend (default OS - ubuntu22.04):
+    ```
+    cd triton_vllm_backend
+    make build BUILD_OS=ubuntu22.04
+    ```
+
 3. Build command variables
 
     #### Optional Parameters
     * BUILD_OS - set the OS to build (default ubuntu22.04)
     * BUILD_DIR - the folder where the build be executed from (default dockerbuild in image folder)
     * VERBOSE - set to TRUE to echo the commands (default FALSE)
-    * DOCKER_CACHE - set to TRUE to use cache for building docker image (default FALSE)
\ No newline at end of file
+    * DOCKER_CACHE - set to TRUE to use cache for building docker image (default FALSE)
+
+4. Instructions for triton-vllm-back-end server
+
+   * Run the backend container as described in [habana docs](https://docs.habana.ai/en/latest/PyTorch/Inference_on_PyTorch/Triton_Inference.html?highlight=triton%20inference#run-the-backend-container)
+   * Start the triton server
+     ```bash
+     tritonserver --model-repository samples/model_repository
+     ```
+     The current samples/model_repository/vllm_model contains llama27B 1x.We also have sample model files for llama2 7b/70b and qwen2-7b respectively under samples/model_repository/test_models folder. To use them , copy the model.json and config.pbtxt to vllm_model folder structure.
+   * To test with client, please follow the instructions [here](https://github.com/triton-inference-server/vllm_backend?tab=readme-ov-file#sending-your-first-inference)
+
diff --git a/dockerfiles/base/Dockerfile.amzn2 b/dockerfiles/base/Dockerfile.amzn2
index f091ab3..dfc548d 100644
--- a/dockerfiles/base/Dockerfile.amzn2
+++ b/dockerfiles/base/Dockerfile.amzn2
@@ -24,8 +24,10 @@ RUN amazon-linux-extras enable python3.8 && \
     wget \
     lsof \
     tar \
-    mesa-libGL && \
-    yum clean all && rm -rf /var/cache/yum
+    mesa-libGL \
+    sox-devel && \
+    yum clean all && rm -rf /var/cache/yum && \
+    rm -f /etc/ssh/ssh_host_*_key*
 
 # Install jemalloc-3.6.0-1.el7.x86_64 package with required /lib64/libjemalloc.so.1 lib need for topologies
 RUN yum install -y https://archives.fedoraproject.org/pub/archive/epel/7/x86_64/Packages/e/epel-release-7-14.noarch.rpm && \
@@ -39,7 +41,7 @@ RUN yum install -y sudo system-lsb-core cmake
 COPY install_efa.sh .
 RUN ./install_efa.sh && rm install_efa.sh && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh
 
-ENV LIBFABRIC_VERSION="1.20.0"
+ENV LIBFABRIC_VERSION="1.22.0"
 ENV LIBFABRIC_ROOT="/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}"
 ENV MPI_ROOT=/opt/amazon/openmpi
 ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH
@@ -55,7 +57,7 @@ ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib
 RUN echo "[habanalabs]" > /etc/yum.repos.d/habanalabs.repo && \
     echo "name=Habana AWS Linux repo" >> /etc/yum.repos.d/habanalabs.repo && \
     echo "baseurl=https://${ARTIFACTORY_URL}/artifactory/AmazonLinux2" >> /etc/yum.repos.d/habanalabs.repo && \
-    echo "gpgkey=https://${ARTIFACTORY_URL}/artifactory/api/v2/repositories/AmazonLinux2/keyPairs/primary/public" >> /etc/yum.repos.d/habanalabs.repo
+    echo "gpgkey=https://${ARTIFACTORY_URL}/artifactory/AmazonLinux2/repodata/repomd.xml.key" >> /etc/yum.repos.d/habanalabs.repo
 
 RUN yum makecache && \
     yum install -y habanalabs-rdma-core-"$VERSION"-"$REVISION".amzn2 && \
@@ -74,7 +76,6 @@ RUN sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config &&
     sed -i 's/[ #]\(.*ForwardAgent \).*/ \1yes/g' /etc/ssh/ssh_config && \
     echo "    UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
     sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config && \
-    ssh-keygen -A && \
     mkdir -p /var/run/sshd && echo "/usr/sbin/sshd -p 3022" | tee -a ~/.bashrc
 
 # There is no need to store pip installation files inside docker image
@@ -94,7 +95,7 @@ RUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archi
     cd / && \
     rm -rf /tmp/main.zip /tmp/hccl_ofi_wrapper-main
 
-RUN python3 -m pip install pip==23.3.1 setuptools==67.3.3 wheel==0.38.4
+RUN python3 -m pip install pip==24.2 setuptools==75.1.0 wheel==0.44.0
 
 RUN python3 -m pip install habana_media_loader=="${VERSION}"."${REVISION}"
 
diff --git a/dockerfiles/base/Dockerfile.rhel8.6 b/dockerfiles/base/Dockerfile.rhel8.6
index ecefcd2..2e836c7 100644
--- a/dockerfiles/base/Dockerfile.rhel8.6
+++ b/dockerfiles/base/Dockerfile.rhel8.6
@@ -38,6 +38,8 @@ RUN dnf install -y \
     llvm \
     lsof \
     python38-devel \
+    bzip2 \
+    bzip2-devel \
     openssh-clients \
     libjpeg-devel \
     openssh-server \
@@ -50,7 +52,8 @@ RUN dnf install -y \
     # update pkgs (except OS version) for resolving potentials CVEs
     dnf versionlock add redhat-release* && \
     dnf update -y && \
-    dnf clean all && rm -rf /var/cache/yum
+    dnf clean all && rm -rf /var/cache/yum && \
+    rm -f /etc/ssh/ssh_host_*_key*
 
 # CVE-2023-47038 RHSA-2024:3128
 RUN dnf module reset perl -y && \
@@ -67,20 +70,22 @@ RUN echo "[appstream]" > /etc/yum.repos.d/CentOS-Linux-AppStream.repo && \
 COPY install_efa.sh .
 RUN ./install_efa.sh && rm install_efa.sh && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh
 
+ENV OPENMPI_VERSION=4.1.6
 ENV LIBFABRIC_VERSION="1.20.0"
 ENV LIBFABRIC_ROOT="/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}"
-ENV MPI_ROOT=/opt/amazon/openmpi
+ENV MPI_ROOT=/opt/habanalabs/openmpi
 ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH
 ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH
 ENV OPAL_PREFIX=${MPI_ROOT}
 ENV MPICC=${MPI_ROOT}/bin/mpicc
 ENV RDMAV_FORK_SAFE=1
-ENV FI_EFA_USE_DEVICE_RDMA=1
+ENV FI_EFA_USE_DEVICE_RDMA=0
+ENV OMPI_MCA_btl=^openib
 
 RUN echo "[habanalabs]" > /etc/yum.repos.d/habanalabs.repo && \
     echo "name=Habana RH8 Linux repo" >> /etc/yum.repos.d/habanalabs.repo && \
     echo "baseurl=https://${ARTIFACTORY_URL}/artifactory/rhel/8/8.6" >> /etc/yum.repos.d/habanalabs.repo && \
-    echo "gpgkey=https://${ARTIFACTORY_URL}/artifactory/api/v2/repositories/rhel/keyPairs/primary/public" >> /etc/yum.repos.d/habanalabs.repo
+    echo "gpgkey=https://${ARTIFACTORY_URL}/artifactory/rhel/8/8.6/repodata/repomd.xml.key" >> /etc/yum.repos.d/habanalabs.repo
 
 RUN echo "[powertools]" > /etc/yum.repos.d/powertools.repo && \
     echo "name=powertools" >> /etc/yum.repos.d/powertools.repo && \
@@ -109,6 +114,12 @@ RUN wget -nv -O /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/o
     ./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr && \
     make && make install && cd / && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION}
 
+RUN wget -q -O /tmp/openmpi-${OPENMPI_VERSION}.tar.gz https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OPENMPI_VERSION}.tar.gz && \
+    tar -xzf /tmp/openmpi-${OPENMPI_VERSION}.tar.gz -C /tmp && \
+    cd /tmp/openmpi-${OPENMPI_VERSION} && \
+    ./configure --prefix=${MPI_ROOT} --with-libfabric=$LIBFABRIC_ROOT --with-verbs && \
+    make -j$(nproc) && make install && cd / && rm -rf /tmp/openmpi-${OPENMPI_VERSION}.tar.gz /tmp/openmpi-${OPENMPI_VERSION}
+
 RUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip && \
     unzip /tmp/main.zip -d /tmp && \
     cd /tmp/hccl_ofi_wrapper-main && \
@@ -117,7 +128,7 @@ RUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archi
     rm -rf /tmp/main.zip /tmp/hccl_ofi_wrapper-main
 
 ENV PYTHON_VERSION=3.8
-RUN python3.8 -m pip install pip==23.3.1 setuptools==67.3.3 wheel==0.38.4
+RUN python3.8 -m pip install pip==24.2 setuptools==75.1.0 wheel==0.44.0
 
 RUN alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 2 && \
     alternatives --install /usr/bin/python3 python3 /usr/bin/python3.6 1 && \
@@ -131,7 +142,6 @@ RUN mkdir -p /var/run/sshd && \
     sed -i 's/#\(ForwardAgent \).*/\1yes/g' /etc/ssh/ssh_config && \
     echo "    UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
     sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config && \
-    ssh-keygen -A && \
     mkdir -p /var/run/sshd && echo "/usr/sbin/sshd -p 3022" | tee -a ~/.bashrc
 
 ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so
diff --git a/dockerfiles/base/Dockerfile.rhel9.2 b/dockerfiles/base/Dockerfile.rhel9.2
index 0fad818..4900b21 100644
--- a/dockerfiles/base/Dockerfile.rhel9.2
+++ b/dockerfiles/base/Dockerfile.rhel9.2
@@ -53,6 +53,7 @@ RUN dnf install -y \
     wget \
     git \
     libffi-devel \
+    bzip2 \
     bzip2-devel \
     zlib-devel \
     mesa-libGL \
@@ -61,7 +62,8 @@ RUN dnf install -y \
     # update pkgs (except OS version) for resolving potentials CVEs
     dnf versionlock add redhat-release* && \
     dnf update -y && \
-    dnf clean all && rm -rf /var/cache/yum
+    dnf clean all && rm -rf /var/cache/yum && \
+    rm -f /etc/ssh/ssh_host_*_key*
 
 ENV PYTHON_VERSION=3.10
 COPY install-python310.sh .
@@ -71,29 +73,31 @@ ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
 COPY install_efa.sh .
 RUN ./install_efa.sh && rm install_efa.sh && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh
 
+ENV OPENMPI_VERSION=4.1.6
 ENV LIBFABRIC_VERSION="1.20.0"
 ENV LIBFABRIC_ROOT="/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}"
-ENV MPI_ROOT=/opt/amazon/openmpi
+ENV MPI_ROOT=/opt/habanalabs/openmpi
 ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH
 ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH
 ENV OPAL_PREFIX=${MPI_ROOT}
 ENV MPICC=${MPI_ROOT}/bin/mpicc
 ENV RDMAV_FORK_SAFE=1
-ENV FI_EFA_USE_DEVICE_RDMA=1
+ENV FI_EFA_USE_DEVICE_RDMA=0
+ENV OMPI_MCA_btl=^openib
 
 RUN echo "[habanalabs]" > /etc/yum.repos.d/habanalabs.repo && \
     echo "name=Habana RH9 Linux repo" >> /etc/yum.repos.d/habanalabs.repo && \
     echo "baseurl=https://${ARTIFACTORY_URL}/artifactory/rhel/9/9.2" >> /etc/yum.repos.d/habanalabs.repo && \
-    echo "gpgkey=https://${ARTIFACTORY_URL}/artifactory/api/v2/repositories/rhel/keyPairs/primary/public" >> /etc/yum.repos.d/habanalabs.repo && \
+    echo "gpgkey=https://${ARTIFACTORY_URL}/artifactory/rhel/9/9.2/repodata/repomd.xml.key" >> /etc/yum.repos.d/habanalabs.repo && \
     echo "gpgcheck=1" >> /etc/yum.repos.d/habanalabs.repo
 
 # for Habana GPG key with SHA-1 signature
 RUN update-crypto-policies --set DEFAULT:SHA1
 
 RUN dnf install -y habanalabs-rdma-core-"$VERSION"-"$REVISION".el9 \
-    habanalabs-thunk-"$VERSION"-"$REVISION".el9 \
-    habanalabs-firmware-tools-"$VERSION"-"$REVISION".el9 \
-    habanalabs-graph-"$VERSION"-"$REVISION".el9 && \
+        habanalabs-thunk-"$VERSION"-"$REVISION".el9 \
+        habanalabs-firmware-tools-"$VERSION"-"$REVISION".el9 \
+        habanalabs-graph-"$VERSION"-"$REVISION".el9 && \
     rm -f /etc/yum.repos.d/habanalabs.repo && rm -f /etc/yum.repos.d/habana.repo && rm -rf /tmp/* && \
     dnf clean all && rm -rf /var/cache/yum
 
@@ -111,6 +115,12 @@ RUN wget -nv -O /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/o
     ./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr && \
     make && make install && cd / && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION}
 
+RUN wget -q -O /tmp/openmpi-${OPENMPI_VERSION}.tar.gz https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OPENMPI_VERSION}.tar.gz && \
+    tar -xzf /tmp/openmpi-${OPENMPI_VERSION}.tar.gz -C /tmp && \
+    cd /tmp/openmpi-${OPENMPI_VERSION} && \
+    ./configure --prefix=${MPI_ROOT} --with-libfabric=$LIBFABRIC_ROOT --with-verbs && \
+    make -j$(nproc) && make install && cd / && rm -rf /tmp/openmpi-${OPENMPI_VERSION}.tar.gz /tmp/openmpi-${OPENMPI_VERSION}
+
 RUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip && \
     unzip /tmp/main.zip -d /tmp && \
     cd /tmp/hccl_ofi_wrapper-main && \
@@ -118,7 +128,7 @@ RUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archi
     cd / && \
     rm -rf /tmp/main.zip /tmp/hccl_ofi_wrapper-main
 
-RUN python3.10 -m pip install pip==23.3.1 setuptools==67.3.3 wheel==0.38.4
+RUN python3.10 -m pip install pip==24.2 setuptools==75.1.0 wheel==0.44.0
 
 RUN ln -s /usr/bin/python3 /usr/bin/python
 
@@ -130,7 +140,6 @@ RUN mkdir -p /var/run/sshd && \
     sed -i 's/#\(ForwardAgent \).*/\1yes/g' /etc/ssh/ssh_config && \
     echo "    UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
     sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config && \
-    ssh-keygen -A && \
     mkdir -p /var/run/sshd && echo "/usr/sbin/sshd -p 3022" | tee -a ~/.bashrc
 
 ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so
diff --git a/dockerfiles/base/Dockerfile.rhel9.4 b/dockerfiles/base/Dockerfile.rhel9.4
index 53b62c7..a00aa3a 100644
--- a/dockerfiles/base/Dockerfile.rhel9.4
+++ b/dockerfiles/base/Dockerfile.rhel9.4
@@ -51,14 +51,15 @@ RUN dnf install -y \
     lsof \
     python3-devel \
     openssh-clients \
-    openssl-1:3.0.7-27.el9 \
-    openssl-devel-1:3.0.7-27.el9 \
+    openssl-1:3.0.7-28.el9_4 \
+    openssl-devel-1:3.0.7-28.el9_4 \
     libjpeg-devel \
     openssh-server \
     lsb_release \
     wget \
     git \
     libffi-devel \
+    bzip2 \
     bzip2-devel \
     zlib-devel \
     mesa-libGL \
@@ -66,13 +67,14 @@ RUN dnf install -y \
     python3.11 \
     python3.11-pip \
     python3.11-devel \
+    python3.11-rpm \
     ffmpeg-free \
-    perl-Net-SSLeay-1.92-2.el9 \
     python3-dnf-plugin-versionlock && \
     # update pkgs (except OS version) for resolving potentials CVEs
-    dnf versionlock add redhat-release* openssl* perl-Net-SSLeay && \
+    dnf versionlock add redhat-release* openssl* libcurl-minimal curl-minimal ima-evm-utils python3-rpm rpm* && \
     dnf update -y && \
-    dnf clean all && rm -rf /var/cache/yum
+    dnf clean all && rm -rf /var/cache/yum && \
+    rm -f /etc/ssh/ssh_host_*_key*
 
 RUN alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 2 && \
     alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 && \
@@ -84,29 +86,31 @@ RUN alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 2 && \
 COPY install_efa.sh .
 RUN ./install_efa.sh && rm install_efa.sh && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh
 
-ENV LIBFABRIC_VERSION="1.20.0"
+ENV OPENMPI_VERSION=4.1.6
+ENV LIBFABRIC_VERSION="1.22.0"
 ENV LIBFABRIC_ROOT="/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}"
-ENV MPI_ROOT=/opt/amazon/openmpi
+ENV MPI_ROOT=/opt/habanalabs/openmpi
 ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH
 ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH
 ENV OPAL_PREFIX=${MPI_ROOT}
 ENV MPICC=${MPI_ROOT}/bin/mpicc
 ENV RDMAV_FORK_SAFE=1
-ENV FI_EFA_USE_DEVICE_RDMA=1
+ENV FI_EFA_USE_DEVICE_RDMA=0
+ENV OMPI_MCA_btl=^openib
 
 RUN echo "[habanalabs]" > /etc/yum.repos.d/habanalabs.repo && \
     echo "name=Habana RH9 Linux repo" >> /etc/yum.repos.d/habanalabs.repo && \
     echo "baseurl=https://${ARTIFACTORY_URL}/artifactory/rhel/9/9.4" >> /etc/yum.repos.d/habanalabs.repo && \
-    echo "gpgkey=https://${ARTIFACTORY_URL}/artifactory/api/v2/repositories/rhel/keyPairs/primary/public" >> /etc/yum.repos.d/habanalabs.repo && \
+    echo "gpgkey=https://${ARTIFACTORY_URL}/artifactory/rhel/9/9.4/repodata/repomd.xml.key" >> /etc/yum.repos.d/habanalabs.repo && \
     echo "gpgcheck=1" >> /etc/yum.repos.d/habanalabs.repo
 
 # for Habana GPG key with SHA-1 signature
 RUN update-crypto-policies --set DEFAULT:SHA1
 
 RUN dnf install -y habanalabs-rdma-core-"$VERSION"-"$REVISION".el9 \
-    habanalabs-thunk-"$VERSION"-"$REVISION".el9 \
-    habanalabs-firmware-tools-"$VERSION"-"$REVISION".el9 \
-    habanalabs-graph-"$VERSION"-"$REVISION".el9 && \
+        habanalabs-thunk-"$VERSION"-"$REVISION".el9 \
+        habanalabs-firmware-tools-"$VERSION"-"$REVISION".el9 \
+        habanalabs-graph-"$VERSION"-"$REVISION".el9 && \
     rm -f /etc/yum.repos.d/habanalabs.repo && rm -f /etc/yum.repos.d/habana.repo && rm -rf /tmp/* && \
     dnf clean all && rm -rf /var/cache/yum
 
@@ -124,6 +128,12 @@ RUN wget -nv -O /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/o
     ./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr && \
     make && make install && cd / && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION}
 
+RUN wget -q -O /tmp/openmpi-${OPENMPI_VERSION}.tar.gz https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OPENMPI_VERSION}.tar.gz && \
+    tar -xzf /tmp/openmpi-${OPENMPI_VERSION}.tar.gz -C /tmp && \
+    cd /tmp/openmpi-${OPENMPI_VERSION} && \
+    ./configure --prefix=${MPI_ROOT} --with-libfabric=$LIBFABRIC_ROOT --with-verbs && \
+    make -j$(nproc) && make install && cd / && rm -rf /tmp/openmpi-${OPENMPI_VERSION}.tar.gz /tmp/openmpi-${OPENMPI_VERSION}
+
 RUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip && \
     unzip /tmp/main.zip -d /tmp && \
     cd /tmp/hccl_ofi_wrapper-main && \
@@ -131,7 +141,7 @@ RUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archi
     cd / && \
     rm -rf /tmp/main.zip /tmp/hccl_ofi_wrapper-main
 
-RUN python3.11 -m pip install pip==23.3.1 setuptools==67.3.3 wheel==0.38.4
+RUN python3.11 -m pip install pip==24.2 setuptools==75.1.0 wheel==0.44.0
 
 RUN ln -s /usr/bin/python3 /usr/bin/python
 
@@ -143,7 +153,6 @@ RUN mkdir -p /var/run/sshd && \
     sed -i 's/#\(ForwardAgent \).*/\1yes/g' /etc/ssh/ssh_config && \
     echo "    UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
     sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config && \
-    ssh-keygen -A && \
     mkdir -p /var/run/sshd && echo "/usr/sbin/sshd -p 3022" | tee -a ~/.bashrc
 
 ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so
diff --git a/dockerfiles/base/Dockerfile.suse15.5 b/dockerfiles/base/Dockerfile.suse15.5
new file mode 100644
index 0000000..f53bd3c
--- /dev/null
+++ b/dockerfiles/base/Dockerfile.suse15.5
@@ -0,0 +1,121 @@
+# Copyright (c) 2024 Habana Labs, Ltd.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# HabanaLabs Dockerfile base installer layer for SUSE 15.5
+FROM registry.suse.com/suse/sle15:15.5.36.11.33
+ARG ARTIFACTORY_URL
+ARG VERSION
+ARG REVISION
+
+# for RHEL certification
+LABEL vendor="Habanalabs Ltd."
+LABEL release="${VERSION}-${REVISION}"
+
+COPY LICENSE /licenses/
+
+RUN zypper addrepo -f http://download.opensuse.org/distribution/leap/15.5/repo/oss/ OpenSUSI && \
+    echo "gpgcheck=0" >> /etc/zypp/repos.d/OpenSUSI.repo && \
+    echo "repo_gpgcheck=0" >> /etc/zypp/repos.d/OpenSUSI.repo
+
+RUN zypper addrepo -f http://download.opensuse.org/source/distribution/leap/15.5/repo/oss/ OpenSUSISrc && \
+    echo "gpgcheck=0" >> /etc/zypp/repos.d/OpenSUSISrc.repo && \
+    echo "repo_gpgcheck=0" >> /etc/zypp/repos.d/OpenSUSISrc.repo
+
+RUN zypper install -y --allow-downgrade \
+    clang \
+    cmake \
+    ffmpeg \
+    gcc \
+    gcc-c++ \
+    git \
+    glibc-devel \
+    iproute \
+    jemalloc \
+    lbzip2 \
+    libarchive-devel \
+    libffi-devel \
+    libjpeg-devel \
+    libksba \
+    linux-glibc-devel \
+    llvm \
+    lsof \
+    Mesa-libGL-devel \
+    Mesa-libGL1 \
+    openssh-clients \
+    openssh-server \
+    openssl openssl-devel \
+    python311 \
+    python311-devel \
+    python311-pip \
+    unzip \
+    wget \
+    zlib-devel && \
+    rm -f /etc/ssh/ssh_host_*_key*
+
+RUN alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 2 && \
+    alternatives --install /usr/bin/python3 python3 /usr/bin/python3.6 1 && \
+    alternatives --set python3 /usr/bin/python3.11 && \
+    alternatives --install /usr/bin/pip3 pip3 /usr/bin/pip3.11 1 && \
+    alternatives --set pip3 /usr/bin/pip3.11
+
+COPY install_efa.sh .
+RUN ./install_efa.sh && rm install_efa.sh && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh
+
+ENV LIBFABRIC_VERSION="1.22.0"
+ENV LIBFABRIC_ROOT="/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}"
+ENV MPI_ROOT=/opt/amazon/openmpi
+ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH
+ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH
+ENV OPAL_PREFIX=${MPI_ROOT}
+ENV MPICC=${MPI_ROOT}/bin/mpicc
+ENV RDMA_FORK_SAFE=1
+ENV FI_EFA_USE_DEVICE_RDMA=1
+
+RUN echo "[habanalabs]" > /etc/zypp/repos.d/habanalabs.repo && \
+    echo "name=Habana SUSE Linux repo" >> /etc/zypp/repos.d/habanalabs.repo && \
+    echo "baseurl=https://${ARTIFACTORY_URL}/artifactory/sles/15/15.5" >> /etc/zypp/repos.d/habanalabs.repo && \
+    echo "gpgkey=https://${ARTIFACTORY_URL}/artifactory/sles/15/15.5/repodata/repomd.xml.key" >> /etc/zypp/repos.d/habanalabs.repo && \
+    echo "gpgcheck=1" >> /etc/zypp/repos.d/habanalabs.repo
+
+RUN zypper --gpg-auto-import-keys install -y habanalabs-rdma-core-"$VERSION"-"$REVISION" \
+        habanalabs-thunk-"$VERSION"-"$REVISION" \
+        habanalabs-firmware-tools-"$VERSION"-"$REVISION" \
+        habanalabs-graph-"$VERSION"-"$REVISION" && \
+    rm -f /etc/yum.repos.d/habanalabs.repo
+
+ENV PIP_NO_CACHE_DIR=on
+ENV PIP_DISABLE_PIP_VERSION_CHECK=1
+ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src
+ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib
+
+RUN wget -nv -O /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/ofiwg/libfabric/releases/download/v${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION}.tar.bz2 && \
+    cd /tmp/ && tar xf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 && \
+    cd /tmp/libfabric-${LIBFABRIC_VERSION} && \
+    ./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr && \
+    make && make install && cd / && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION}
+
+RUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip && \
+    unzip /tmp/main.zip -d /tmp && \
+    cd /tmp/hccl_ofi_wrapper-main && \
+    make && cp -f libhccl_ofi_wrapper.so /usr/lib/habanalabs/libhccl_ofi_wrapper.so && \
+    cd / && \
+    rm -rf /tmp/main.zip /tmp/hccl_ofi_wrapper-main
+
+RUN python3.11 -m pip install pip==24.2 setuptools==75.1.0 wheel==0.44.0
+
+
+RUN python3.11 -m pip install habana_media_loader=="${VERSION}"."${REVISION}"
+
+# SSH configuration necessary to support mpi-operator v2
+RUN mkdir -p /var/run/sshd && \
+    sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config && \
+    sed -i 's/#\(ForwardAgent \).*/\1yes/g' /etc/ssh/ssh_config && \
+    echo "    UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
+    sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config && \
+    mkdir -p /var/run/sshd && echo "/usr/sbin/sshd -p 3022" | tee -a ~/.bashrc
+
+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so
+ENV HABANA_LOGS=/var/log/habana_logs/
+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw
+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins
diff --git a/dockerfiles/base/Dockerfile.tencentos3.1 b/dockerfiles/base/Dockerfile.tencentos3.1
index 2cf70aa..c5a28a7 100644
--- a/dockerfiles/base/Dockerfile.tencentos3.1
+++ b/dockerfiles/base/Dockerfile.tencentos3.1
@@ -42,7 +42,8 @@ RUN dnf install -y \
     iproute \
     python3-dnf-plugin-versionlock && \
     dnf versionlock add redhat-release* && \
-    dnf clean all && rm -rf /var/cache/yum
+    dnf clean all && rm -rf /var/cache/yum && \
+    rm -f /etc/ssh/ssh_host_*_key*
 
 COPY install-python310.sh .
 RUN ./install-python310.sh tencentos3.1 && rm install-python310.sh
@@ -65,7 +66,7 @@ ENV FI_EFA_USE_DEVICE_RDMA=1
 RUN echo "[habanalabs]" > /etc/yum.repos.d/habanalabs.repo && \
     echo "name=Habana TC31 Linux repo" >> /etc/yum.repos.d/habanalabs.repo && \
     echo "baseurl=https://${ARTIFACTORY_URL}/artifactory/tencentos/3/3.1" >> /etc/yum.repos.d/habanalabs.repo && \
-    echo "gpgkey=https://${ARTIFACTORY_URL}/artifactory/api/v2/repositories/tencentos/keyPairs/primary/public" >> /etc/yum.repos.d/habanalabs.repo
+    echo "gpgkey=https://${ARTIFACTORY_URL}/artifactory/tencentos/3/3.1/repodata/repomd.xml.key" >> /etc/yum.repos.d/habanalabs.repo
 
 RUN dnf install -y habanalabs-rdma-core-"$VERSION"-"$REVISION".tl3 \
         habanalabs-thunk-"$VERSION"-"$REVISION".tl3 \
@@ -95,7 +96,7 @@ RUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archi
     cd / && \
     rm -rf /tmp/main.zip /tmp/hccl_ofi_wrapper-main
 
-RUN python3 -m pip install pip==23.3.1 setuptools==67.3.3 wheel==0.38.4
+RUN python3 -m pip install pip==24.2 setuptools==75.1.0 wheel==0.44.0
 
 RUN python3 -m pip install habana_media_loader=="${VERSION}"."${REVISION}"
 
@@ -105,7 +106,6 @@ RUN mkdir -p /var/run/sshd && \
     sed -i 's/#\(ForwardAgent \).*/\1yes/g' /etc/ssh/ssh_config && \
     echo "    UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
     sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config && \
-    ssh-keygen -A && \
     mkdir -p /var/run/sshd && echo "/usr/sbin/sshd -p 3022" | tee -a ~/.bashrc
 
 ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so
diff --git a/dockerfiles/base/Dockerfile.ubuntu22.04 b/dockerfiles/base/Dockerfile.ubuntu22.04
index 4a29c98..b322cbd 100644
--- a/dockerfiles/base/Dockerfile.ubuntu22.04
+++ b/dockerfiles/base/Dockerfile.ubuntu22.04
@@ -48,13 +48,14 @@ RUN apt-get update && \
     libkrb5-3 \
     libgnutls30 \
     wget && \
-    apt-get autoremove && apt-get clean
+    apt-get autoremove && apt-get clean && \
+    rm -f /etc/ssh/ssh_host_*_key*
 
 # There is no need to store pip installation files inside docker image
 ENV PIP_NO_CACHE_DIR=on
 ENV PIP_DISABLE_PIP_VERSION_CHECK=1
 
-RUN python3 -m pip install pip==23.3.1 setuptools==67.3.3 wheel==0.38.4
+RUN python3 -m pip install pip==24.2 setuptools==75.1.0 wheel==0.44.0
 
 COPY install_efa.sh .
 RUN ./install_efa.sh && rm install_efa.sh && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh
@@ -76,11 +77,14 @@ RUN wget -O- https://${ARTIFACTORY_URL}/artifactory/api/gpg/key/public | gpg --d
     chmod 644 /usr/share/keyrings/habana-artifactory.gpg  && \
     echo "deb [signed-by=/usr/share/keyrings/habana-artifactory.gpg] https://${ARTIFACTORY_URL}/artifactory/debian jammy main" | tee -a /etc/apt/sources.list && \
     apt-get update && \
+    cp /etc/dpkg/dpkg.cfg.d/excludes /etc/dpkg/dpkg.cfg.d/excludes.bak && \
+    sed -i '/path-exclude=\/usr\/share\/doc/d' /etc/dpkg/dpkg.cfg.d/excludes && \
     apt-get install -y habanalabs-rdma-core="$VERSION"-"$REVISION" \
         habanalabs-thunk="$VERSION"-"$REVISION" \
         habanalabs-firmware-tools="$VERSION"-"$REVISION" \
         habanalabs-graph="$VERSION"-"$REVISION" && \
     apt-get autoremove --yes && apt-get clean && rm -rf /var/lib/apt/lists/* && \
+    mv -f /etc/dpkg/dpkg.cfg.d/excludes.bak /etc/dpkg/dpkg.cfg.d/excludes && \
     sed --in-place "/$ARTIFACTORY_URL/d" /etc/apt/sources.list
 
 RUN wget -nv -O /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/ofiwg/libfabric/releases/download/v${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION}.tar.bz2 && \
diff --git a/dockerfiles/base/Dockerfile.ubuntu24.04 b/dockerfiles/base/Dockerfile.ubuntu24.04
new file mode 100644
index 0000000..7f47c08
--- /dev/null
+++ b/dockerfiles/base/Dockerfile.ubuntu24.04
@@ -0,0 +1,114 @@
+# Copyright (c) 2024 HabanaLabs, Ltd.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# HabanaLabs Dockerfile base installer layer for Ubuntu 24.04
+FROM ubuntu:noble
+ARG ARTIFACTORY_URL
+ARG VERSION
+ARG REVISION
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so
+ENV HABANA_LOGS=/var/log/habana_logs/
+ENV OS_NUMBER=2404
+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw
+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    apt-transport-https \
+    apt-utils \
+    bc \
+    build-essential \
+    ca-certificates \
+    dkms \
+    ethtool \
+    gcc \
+    git \
+    gnupg \
+    gpg-agent \
+    graphviz \
+    libgl1 \
+    libgoogle-glog0v6t64 \
+    libjemalloc2 \
+    libpq-dev \
+    lsof \
+    make \
+    openssh-client \
+    openssh-server \
+    protobuf-compiler \
+    python3 \
+    python3-dev \
+    python3-pip \
+    python3-tk \
+    python3-venv \
+    unzip \
+    vim \
+    libkrb5-3 \
+    libgnutls30 \
+    wget && \
+    apt-get autoremove && apt-get clean && \
+    rm -f /etc/ssh/ssh_host_*_key*
+
+# There is no need to store pip installation files inside docker image
+ENV PIP_NO_CACHE_DIR=on
+ENV PIP_DISABLE_PIP_VERSION_CHECK=1
+
+RUN python3 -m pip install pip==24.0 setuptools==75.1.0 wheel==0.42.0 --break-system-packages
+
+COPY install_efa.sh .
+RUN ./install_efa.sh && rm install_efa.sh && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh
+
+ENV LIBFABRIC_VERSION="1.20.0"
+ENV LIBFABRIC_ROOT="/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}"
+ENV MPI_ROOT=/opt/amazon/openmpi
+ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH
+ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH
+ENV OPAL_PREFIX=${MPI_ROOT}
+ENV MPICC=${MPI_ROOT}/bin/mpicc
+ENV RDMAV_FORK_SAFE=1
+ENV FI_EFA_USE_DEVICE_RDMA=1
+ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src
+ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib
+
+RUN wget -O- https://${ARTIFACTORY_URL}/artifactory/api/gpg/key/public | gpg --dearmor -o /usr/share/keyrings/habana-artifactory.gpg && \
+    chown root:root /usr/share/keyrings/habana-artifactory.gpg && \
+    chmod 644 /usr/share/keyrings/habana-artifactory.gpg  && \
+    echo "deb [signed-by=/usr/share/keyrings/habana-artifactory.gpg] https://${ARTIFACTORY_URL}/artifactory/debian noble main" | tee -a /etc/apt/sources.list && \
+    apt-get update && \
+    cp /etc/dpkg/dpkg.cfg.d/excludes /etc/dpkg/dpkg.cfg.d/excludes.bak && \
+    sed -i '/path-exclude=\/usr\/share\/doc/d' /etc/dpkg/dpkg.cfg.d/excludes && \
+    apt-get install -y habanalabs-rdma-core="$VERSION"-"$REVISION" \
+        habanalabs-thunk="$VERSION"-"$REVISION" \
+        habanalabs-firmware-tools="$VERSION"-"$REVISION" \
+        habanalabs-graph="$VERSION"-"$REVISION" && \
+    apt-get autoremove --yes && apt-get clean && rm -rf /var/lib/apt/lists/* && \
+    mv -f /etc/dpkg/dpkg.cfg.d/excludes.bak /etc/dpkg/dpkg.cfg.d/excludes && \
+    sed --in-place "/$ARTIFACTORY_URL/d" /etc/apt/sources.list
+
+RUN wget -nv -O /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/ofiwg/libfabric/releases/download/v${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION}.tar.bz2 && \
+    cd /tmp/ && tar xf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 && \
+    cd /tmp/libfabric-${LIBFABRIC_VERSION} && \
+    ./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr && \
+    make && make install && cd / && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION}
+
+RUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip && \
+    unzip /tmp/main.zip -d /tmp && \
+    cd /tmp/hccl_ofi_wrapper-main && \
+    make && cp -f libhccl_ofi_wrapper.so /usr/lib/habanalabs/libhccl_ofi_wrapper.so && \
+    cd / && \
+    rm -rf /tmp/main.zip /tmp/hccl_ofi_wrapper-main
+
+RUN python3 -m pip install habana_media_loader=="${VERSION}"."${REVISION}" --break-system-packages
+
+# SSH configuration necessary to support mpi-operator v2
+RUN mkdir -p /var/run/sshd && \
+    sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config && \
+    sed -i 's/#\(ForwardAgent \).*/\1yes/g' /etc/ssh/ssh_config && \
+    echo "    UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
+    sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config && \
+    echo "/etc/init.d/ssh start \"-p 3022\"" >> ~/.bashrc && \
+    sed -i '/[ -z "$PS1" ] && return/s/^/#/g' ~/.bashrc
+
+RUN mv /usr/lib/python3.12/EXTERNALLY-MANAGED /usr/lib/python3.12/EXTERNALLY-MANAGED.old
\ No newline at end of file
diff --git a/dockerfiles/base/install_efa.sh b/dockerfiles/base/install_efa.sh
index bb6f680..e651dff 100755
--- a/dockerfiles/base/install_efa.sh
+++ b/dockerfiles/base/install_efa.sh
@@ -1,22 +1,38 @@
 #!/bin/bash -ex
 
-DEFAULT_EFA_INSTALLER_VER=1.29.0
+DEFAULT_EFA_INSTALLER_VER=1.34.0
 efa_installer_version=${1:-$DEFAULT_EFA_INSTALLER_VER}
 
 tmp_dir=$(mktemp -d)
 wget -nv https://efa-installer.amazonaws.com/aws-efa-installer-$efa_installer_version.tar.gz -P $tmp_dir
 tar -xf $tmp_dir/aws-efa-installer-$efa_installer_version.tar.gz -C $tmp_dir
+RUN_EFA_INSTALLER="./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify"
 pushd $tmp_dir/aws-efa-installer
-case $(. /etc/os-release ; echo -n $ID) in
+. /etc/os-release
+case $ID in
     rhel)
         # we cannot install dkms packages on RHEL images due to OCP rules
-        rm -f RPMS/RHEL8/x86_64/dkms*.rpm
+        find RPMS/ -name 'dkms*.rpm' -exec rm -f {} \;
+        find RPMS/ -name 'efa-*.rpm' -exec rm -f {} \;
+        case $VERSION_ID in
+            8*)
+                dnf install -y RPMS/ROCKYLINUX8/x86_64/rdma-core/*.rpm
+            ;;
+            9*)
+                dnf install -y RPMS/ROCKYLINUX9/x86_64/rdma-core/*.rpm
+            ;;
+            *)
+                echo "Unsupported RHEL version: $VERSION_ID"
+                exit 1
+            ;;
+        esac
+        RUN_EFA_INSTALLER="echo 'Skipping EFA installer on RHEL'"
     ;;
     tencentos)
-        dnf install -y RPMS/ROCKYLINUX8/x86_64/rdma-core/libibverbs-46.0-1.el8.x86_64.rpm RPMS/ROCKYLINUX8/x86_64/rdma-core/libibverbs-utils-46.0-1.el8.x86_64.rpm
+        dnf install -y RPMS/ROCKYLINUX8/x86_64/rdma-core/*.rpm
         patch -f -p1 -i /tmp/tencentos_efa_patch.txt --reject-file=tencentos_efa_patch.rej --no-backup-if-mismatch
     ;;
 esac
-./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify
+eval $RUN_EFA_INSTALLER
 popd
 rm -rf $tmp_dir
diff --git a/dockerfiles/base/tencentos_efa_patch.txt b/dockerfiles/base/tencentos_efa_patch.txt
index 9b08bed..cfd050d 100644
--- a/dockerfiles/base/tencentos_efa_patch.txt
+++ b/dockerfiles/base/tencentos_efa_patch.txt
@@ -1,5 +1,5 @@
 diff --git a/common.sh b/common.sh
-index cae76fc..afe440a 100755
+index 3c3a0e4..b463f42 100755
 --- a/common.sh
 +++ b/common.sh
 @@ -50,6 +50,15 @@ has_substring() {
@@ -18,70 +18,88 @@ index cae76fc..afe440a 100755
  is_amazon_linux_2() {
  	. /etc/os-release
  	if [ "$NAME" = "Amazon Linux" ] && [ "$VERSION_ID" = "2" ]; then
-@@ -183,7 +192,7 @@ is_suse_15() {
+@@ -164,7 +173,7 @@ is_suse_15() {
  }
 
  install_cmd() {
--	if is_amazon_linux_2 || is_amazon_linux_2023 || is_centos_7 || is_rhel_7 || is_centos_8 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9; then
-+	if is_amazon_linux_2 || is_amazon_linux_2023 || is_centos_7 || is_rhel_7 || is_centos_8 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9 || is_tencentos_3; then
+-	if is_amazon_linux_2 || is_amazon_linux_2023 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9; then
++	if is_amazon_linux_2 || is_amazon_linux_2023 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9 || is_tencentos_3; then
  		if [ $1 == "localinstall" ]; then
  			shift
- 			if is_centos_8; then
-@@ -207,7 +216,7 @@ install_cmd() {
+ 			yum -y localinstall $@
+@@ -181,7 +190,7 @@ install_cmd() {
  	fi
  }
  search_cmd() {
--	if is_amazon_linux_2 || is_amazon_linux_2023 || is_centos_7 || is_rhel_7 || is_centos_8 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9; then
-+	if is_amazon_linux_2 || is_amazon_linux_2023 || is_centos_7 || is_rhel_7 || is_centos_8 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9 || is_tencentos_3; then
+-	if is_amazon_linux_2 || is_amazon_linux_2023 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9; then
++	if is_amazon_linux_2 || is_amazon_linux_2023 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9 || is_tencentos_3; then
  		yum list installed $@
  	elif is_suse_15; then
  		zypper search --installed-only --match-exact $@
-@@ -219,7 +228,7 @@ search_cmd() {
- 	fi
+@@ -194,7 +203,7 @@ search_cmd() {
  }
  remove_cmd() {
--	if is_amazon_linux_2 || is_amazon_linux_2023 || is_centos_7 || is_rhel_7 || is_centos_8 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9; then
-+	if is_amazon_linux_2 || is_amazon_linux_2023 || is_centos_7 || is_rhel_7 || is_centos_8 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9 || is_tencentos_3; then
- 		yum -y remove $@
- 	elif is_suse_15; then
- 		zypper remove -y $@
+ 	# we don't remove the dependencies of the efa packages as it may have reverse dependencies on other system packages
+-	if is_amazon_linux_2 || is_amazon_linux_2023 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9 || is_suse_15; then
++	if is_amazon_linux_2 || is_amazon_linux_2023 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9 || is_suse_15 || is_tencentos_3; then
+ 		rpm --erase --nodeps $@
+ 	elif is_debian_10 || is_debian_11 || is_ubuntu_2004 || is_ubuntu_2204 || is_ubuntu_2404; then
+ 		# purge is identical to remove except that packages are removed and purged
+@@ -207,7 +216,7 @@ remove_cmd() {
+ }
+ # Get the list of file installed by the package name
+ query_file_list_cmd() {
+-	if is_amazon_linux_2 || is_amazon_linux_2023 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9 || is_suse_15; then
++	if is_amazon_linux_2 || is_amazon_linux_2023 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9 || is_suse_15 || is_tencentos_3; then
+ 		rpm -ql $@
+ 	elif is_debian_10 || is_debian_11 || is_ubuntu_2004 || is_ubuntu_2204 || is_ubuntu_2404; then
+ 		dpkg -L $@
+@@ -220,7 +229,7 @@ query_file_list_cmd() {
+ # reverse dependencies (some other installed packages depend on them)
+ # this command will return non-zero
+ remove_dryrun_cmd() {
+-	if is_amazon_linux_2 || is_amazon_linux_2023 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9 || is_suse_15; then
++	if is_amazon_linux_2 || is_amazon_linux_2023 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9 || is_suse_15 || is_tencentos_3; then
+ 		rpm --erase --test $@
+ 	elif is_debian_10 || is_debian_11 || is_ubuntu_2004 || is_ubuntu_2204 || is_ubuntu_2404; then
+ 		dpkg -r --dry-run $@
 diff --git a/efa_installer.sh b/efa_installer.sh
-index 35a3628..5e94a21 100755
+index 544673f..faf3369 100755
 --- a/efa_installer.sh
 +++ b/efa_installer.sh
-@@ -49,7 +49,7 @@ EOF
+@@ -97,7 +97,7 @@ select_mpi() {
  }
 
  detect_os() {
--	if is_amazon_linux_2 || is_amazon_linux_2023 || is_centos_7 || is_rhel_7 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9; then
-+	if is_amazon_linux_2 || is_amazon_linux_2023 || is_centos_7 || is_rhel_7 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9 || is_tencentos_3; then
+-	if is_amazon_linux_2 || is_amazon_linux_2023 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9; then
++	if is_amazon_linux_2 || is_amazon_linux_2023 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9 || is_tencentos_3; then
  		PACKAGE_TYPE="rpm"
  		KERNEL_SEARCH_STRING=kernel
  		INSTALL_ARGS="--setopt=skip_missing_names_on_install=False"
-@@ -170,7 +170,7 @@ setup_install_package_paths() {
- 		if is_centos_7 || is_rhel_7; then
- 			base_dir="RPMS/CENT7/${arch}"
- 			debug_dir="RPMS/CENT7/${arch}/debug"
--		elif is_rhel_8 || is_rockylinux_8; then
-+		elif is_rhel_8 || is_rockylinux_8 || is_tencentos_3; then
+@@ -209,7 +209,7 @@ setup_install_package_paths() {
+ 	local kmod_path
+
+ 	if [ "${PACKAGE_TYPE}" = "rpm" ]; then
+-		if is_rhel_8 || is_rockylinux_8; then
++		if is_rhel_8 || is_rockylinux_8|| is_tencentos_3; then
  			base_dir="RPMS/ROCKYLINUX8/${arch}"
  			debug_dir="RPMS/ROCKYLINUX8/${arch}/debug"
  		elif is_rockylinux_9 || is_rhel_9; then
-@@ -390,7 +390,7 @@ install_apt_package() {
+@@ -465,7 +465,7 @@ install_apt_package() {
  install_dependencies() {
  	local packages
 
--	if is_amazon_linux_2 || is_amazon_linux_2023 || is_centos_7 || is_rhel_7 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9; then
-+	if is_amazon_linux_2 || is_amazon_linux_2023 || is_centos_7 || is_rhel_7 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9 || is_tencentos_3; then
+-	if is_amazon_linux_2 || is_amazon_linux_2023 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9; then
++	if is_amazon_linux_2 || is_amazon_linux_2023 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9 || is_tencentos_3; then
  		packages="pciutils rpmdevtools"
  		if [ ${SKIP_KMOD} -eq 0 ]; then
  			for kernel in ${INSTALLED_KERNELS[@]}; do
-@@ -642,7 +642,7 @@ uninstall_efa() {
+@@ -785,7 +785,7 @@ uninstall_efa() {
 
  uninstall_old_efa_packages() {
  	# Uninstall 'openmpi' and 'libfabric' if packaged by AWS.
--	if is_amazon_linux_2 || is_amazon_linux_2023 || is_centos_7 || is_rhel_7 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9; then
-+	if is_amazon_linux_2 || is_amazon_linux_2023 || is_centos_7 || is_rhel_7 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9 | is_tencentos_3; then
+-	if is_amazon_linux_2 || is_amazon_linux_2023 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9; then
++	if is_amazon_linux_2 || is_amazon_linux_2023 || is_rhel_8 || is_rockylinux_8 || is_rockylinux_9 || is_rhel_9 || is_tencentos_3; then
  		for pkg in openmpi libfabric libfabric-debuginfo; do
  			rpm -ql $pkg | grep -q /opt/amazon
  			if [ $? -eq 0 ]; then
diff --git a/dockerfiles/common.mk b/dockerfiles/common.mk
index 4404cd5..6e29640 100644
--- a/dockerfiles/common.mk
+++ b/dockerfiles/common.mk
@@ -5,9 +5,9 @@ BUILD_OS ?= ubuntu22.04
 BUILD_DIR ?= $(CURDIR)/dockerbuild
 
 REPO_SERVER ?= vault.habana.ai
-PT_VERSION ?= 2.3.1
-RELEASE_VERSION ?= 1.17.1
-RELEASE_BUILD_ID ?= 40
+PT_VERSION ?= 2.4.0
+RELEASE_VERSION ?= 1.18.0
+RELEASE_BUILD_ID ?= 524
 
 BASE_IMAGE_URL ?= base-installer-$(BUILD_OS)
 IMAGE_URL = $(IMAGE_NAME):$(RELEASE_VERSION)-$(RELEASE_BUILD_ID)
diff --git a/dockerfiles/pytorch/Dockerfile.rhel9.4 b/dockerfiles/pytorch/Dockerfile.rhel9.4
index 4e65878..d09fafe 100644
--- a/dockerfiles/pytorch/Dockerfile.rhel9.4
+++ b/dockerfiles/pytorch/Dockerfile.rhel9.4
@@ -26,7 +26,7 @@ RUN echo "[CRB]" > /etc/yum.repos.d/CentOS-Linux-CRB.repo && \
     echo "gpgcheck=1" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo
 
 RUN dnf install --allowerasing -y \
-    curl \
+    curl-7.76.1-29.el9_4.1 \
     cairo-devel \
     numactl-devel \
     iproute \
@@ -38,10 +38,19 @@ RUN dnf install --allowerasing -y \
     gperftools-devel && \
     dnf clean all && rm -rf /var/cache/yum
 
-RUN dnf config-manager --add-repo https://yum.repos.intel.com/mkl/setup/intel-mkl.repo -y && \
-    dnf install --allowerasing -y intel-mkl-64bit-2020.4-912 && \
+RUN echo "[oneAPI]" >> /etc/yum.repos.d/oneAPI.repo && \
+    echo "name=Intel® oneAPI repository" >> /etc/yum.repos.d/oneAPI.repo && \
+    echo "baseurl=https://yum.repos.intel.com/oneapi" >> /etc/yum.repos.d/oneAPI.repo && \
+    echo 'enabled=1'  >> /etc/yum.repos.d/oneAPI.repo && \
+    echo "gpgcheck=1" >> /etc/yum.repos.d/oneAPI.repo && \
+    echo "repo_gpgcheck=1" >> /etc/yum.repos.d/oneAPI.repo && \
+    echo "gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB" >> /etc/yum.repos.d/oneAPI.repo
+
+RUN dnf install --allowerasing -y intel-oneapi-mkl-2024.2.0 && \
     dnf clean all && rm -rf /var/cache/yum
 
+ENV LD_LIBRARY_PATH=/opt/intel/oneapi/mkl/2024.2/lib:${LD_LIBRARY_PATH}
+
 COPY install_packages.sh .
 
 RUN ./install_packages.sh && rm -f install_packages.sh && \
diff --git a/dockerfiles/pytorch/Dockerfile.suse15.5 b/dockerfiles/pytorch/Dockerfile.suse15.5
new file mode 100644
index 0000000..8fe9f54
--- /dev/null
+++ b/dockerfiles/pytorch/Dockerfile.suse15.5
@@ -0,0 +1,47 @@
+# Copyright (c) 2024 HabanaLabs, Ltd.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# HabanaLabs Dockerfile PyTorch installer layer for SUSE 15.5
+ARG BASE_NAME
+ARG VERSION
+ARG REVISION
+FROM ${BASE_NAME}:${VERSION}-${REVISION}
+ARG PT_VERSION
+ARG VERSION
+ARG REVISION
+ARG BASE_NAME
+ARG ARTIFACTORY_URL
+
+# for RHEL certification
+LABEL name="PyTorch Installer"
+LABEL summary="Habanalabs PyTorch installer layer for SUSE 15.5"
+LABEL description="Image with pre installed Habanalabs packages for PyTorch"
+
+ENV PYTHONPATH=/root:/usr/lib/habanalabs/
+
+RUN zypper install -y --allow-downgrade \
+    cairo-devel \
+    numactl \
+    lapack-devel \
+    numactl \
+    gperftools-devel
+
+RUN zypper addrepo -f https://yum.repos.intel.com/oneapi oneAPI && \
+    echo "gpgcheck=0" >> /etc/zypp/repos.d/oneAPI.repo && \
+    echo "repo_gpgcheck=0" >> /etc/zypp/repos.d/oneAPI.repo
+
+RUN zypper install -y intel-oneapi-mkl-2021.1.1 intel-oneapi-mkl-devel-2021.1.1
+
+
+COPY install_packages.sh .
+
+RUN ./install_packages.sh && rm -f install_packages.sh && \
+    /sbin/ldconfig && echo "source /etc/profile.d/habanalabs.sh" >> ~/.bashrc
+
+# Set LD_PRELOAD after all required installations to
+# avoid warnings during docker creation
+ENV LD_PRELOAD=/usr/lib64/libtcmalloc.so.4
+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768
+
+RUN rm -rf /tmp/*
\ No newline at end of file
diff --git a/dockerfiles/pytorch/install_packages.sh b/dockerfiles/pytorch/install_packages.sh
index a2cf8dd..396ab29 100755
--- a/dockerfiles/pytorch/install_packages.sh
+++ b/dockerfiles/pytorch/install_packages.sh
@@ -4,6 +4,9 @@ set -ex
 pt_package_name="pytorch_modules-v${PT_VERSION}_${VERSION}_${REVISION}.tgz"
 os_string="ubuntu${OS_NUMBER}"
 case "${BASE_NAME}" in
+    *sles15.5* | *suse15.5*)
+        os_string="suse155"
+    ;;
     *rhel9.2*)
         os_string="rhel92"
     ;;
diff --git a/dockerfiles/triton/Dockerfile b/dockerfiles/triton/Dockerfile
index 8da6a12..d8ac15a 100644
--- a/dockerfiles/triton/Dockerfile
+++ b/dockerfiles/triton/Dockerfile
@@ -52,8 +52,8 @@ RUN apt-get update && apt-get install -y \
     libgoogle-perftools-dev && \
     apt-get clean && rm -rf /var/lib/apt/lists/*
 
-RUN python3 -m pip install pip==23.3.1 --disable-pip-version-check && \
-    python3 -m pip install setuptools==67.3.3 --disable-pip-version-check && \
+RUN python3 -m pip install pip==24.2 --disable-pip-version-check && \
+    python3 -m pip install setuptools==75.1.0 --disable-pip-version-check && \
     python3 -m pip install habana_media_loader=="${VERSION}"."${REVISION}" --disable-pip-version-check
 
 RUN ln -s /usr/bin/python3.10 /usr/bin/python && wget --no-verbose "${PT_ARTIFACT_PATH}/${PT_PACKAGE_NAME}" && \
diff --git a/dockerfiles/triton_vllm_backend/Dockerfile b/dockerfiles/triton_vllm_backend/Dockerfile
new file mode 100644
index 0000000..6011cf2
--- /dev/null
+++ b/dockerfiles/triton_vllm_backend/Dockerfile
@@ -0,0 +1,79 @@
+# Copyright (c) 2023 HabanaLabs, Ltd.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# HabanaLabs Dockerfile triton installer layer for Ubuntu 22.04
+FROM nvcr.io/nvidia/tritonserver:24.06-py3
+ARG ARTIFACTORY_URL
+ARG PT_VERSION
+ARG VERSION
+ARG REVISION
+ARG HABANA_PIP_VERSION="22.3"
+ARG PT_BUILD_REPO=gaudi-pt-modules
+ARG PT_PACKAGE_NAME="pytorch_modules-v"${PT_VERSION}"_"${VERSION}"_"${REVISION}".tgz"
+ARG PT_ARTIFACT_PATH="https://"${ARTIFACTORY_URL}"/artifactory/${PT_BUILD_REPO}/"${VERSION}"/"${REVISION}"/pytorch/ubuntu2204"
+ENV DEBIAN_FRONTEND=noninteractive
+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so
+ENV HABANA_LOGS=/var/log/habana_logs/
+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw
+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins
+ENV PIP_NO_CACHE_DIR=on
+ENV PIP_DEFAULT_TIMEOUT=1000
+ENV MPI_ROOT=/opt/hpcx/ompi
+ENV LD_LIBRARY_PATH=${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH
+ENV PATH=${MPI_ROOT}/bin:$PATH
+ENV OPAL_PREFIX=${MPI_ROOT}
+ENV MPICC=${MPI_ROOT}/bin/mpicc
+ENV RDMAV_FORK_SAFE=1
+ENV PYTHONPATH=/root:/usr/lib/habanalabs/
+
+ADD model.py .
+RUN echo "deb https://${ARTIFACTORY_URL}/artifactory/debian jammy main" | tee -a /etc/apt/sources.list && \
+    wget "https://${ARTIFACTORY_URL}/artifactory/api/gpg/key/public" && \
+    apt-key add public && rm public && apt-get update && \
+    apt-get install -y habanalabs-rdma-core="$VERSION"-"$REVISION" \
+        habanalabs-thunk="$VERSION"-"$REVISION" \
+        habanalabs-firmware-tools="$VERSION"-"$REVISION" \
+        habanalabs-graph="$VERSION"-"$REVISION" && \
+    apt-get autoremove --yes && apt-get clean && rm -rf /var/lib/apt/lists/* && \
+    sed --in-place "/$ARTIFACTORY_URL/d" /etc/apt/sources.list
+
+RUN apt-get update && apt-get install -y \
+    libjemalloc2 \
+    libcairo2-dev \
+    libglib2.0-dev \
+    libhdf5-dev \
+    libnuma-dev \
+    libpcre2-dev \
+    libjpeg-dev \
+    liblapack-dev \
+    libopenblas-dev \
+    numactl \
+    libgoogle-perftools-dev && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+RUN python3 -m pip install pip==23.3.1 --disable-pip-version-check && \
+    python3 -m pip install setuptools==67.3.3 --disable-pip-version-check && \
+    python3 -m pip install habana_media_loader=="${VERSION}"."${REVISION}" --disable-pip-version-check
+
+RUN ln -s /usr/bin/python3.10 /usr/bin/python && wget --no-verbose "${PT_ARTIFACT_PATH}/${PT_PACKAGE_NAME}" && \
+    mkdir -p /root/habanalabs/pytorch_temp && \
+    tar -xf pytorch_modules-v"${PT_VERSION}"_"${VERSION}"_"${REVISION}".tgz -C /root/habanalabs/pytorch_temp/. && \
+    python3 -m pip install pip=="${HABANA_PIP_VERSION}" && \
+    pip install mpi4py==3.1.4 --disable-pip-version-check && \
+    #pip install $(grep -ivE "#|lightning" /root/habanalabs/pytorch_temp/requirements-pytorch.txt | grep .) --no-warn-script-location --disable-pip-version-check && \
+    pip install /root/habanalabs/pytorch_temp/*.whl --disable-pip-version-check && \
+    pip install $(grep "lightning" /root/habanalabs/pytorch_temp/requirements-pytorch.txt) --disable-pip-version-check && \
+    echo "source /etc/profile.d/habanalabs.sh" >> ~/.bashrc && \
+    pip uninstall -y pillow && \
+    pip uninstall -y pillow-simd && \
+    pip install pillow-simd==7.0.0.post3 --disable-pip-version-check && \
+    rm -rf /root/habanalabs pytorch_modules-v"${PT_VERSION}"_"${VERSION}"_"${REVISION}".tgz /tmp/*
+#RUN python3 -m pip install --no-cache-dir git+https://github.com/HabanaAI/vllm-fork.git@v0.4.2-Gaudi-1.16.0
+RUN python3 -m pip install --no-cache-dir git+https://github.com/HabanaAI/vllm-fork.git@275e3250ba6ed8cc13b2d6e4928db73df420e64b
+
+RUN mkdir -p /opt/tritonserver/backends/vllm
+COPY model.py /opt/tritonserver/backends/vllm/
+
+ENV LD_PRELOAD=/lib/x86_64-linux-gnu/libtcmalloc.so.4
+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768
diff --git a/dockerfiles/triton_vllm_backend/Makefile b/dockerfiles/triton_vllm_backend/Makefile
new file mode 100644
index 0000000..d749807
--- /dev/null
+++ b/dockerfiles/triton_vllm_backend/Makefile
@@ -0,0 +1,15 @@
+
+include ../common.mk
+
+IMAGE_NAME = triton-installer-$(PT_VERSION)-${BUILD_OS}
+DOCKER_BUILD_ARGS := $(DOCKER_BUILD_ARGS) --build-arg PT_VERSION=$(PT_VERSION)
+
+init:
+ifneq ($(BUILD_OS), ubuntu22.04)
+	$(error triton is only supported on ubuntu22.04)
+endif
+	$(HIDE)mkdir -p $(BUILD_DIR)
+	$(HIDE)cp $(CURDIR)/Dockerfile $(BUILD_DIR)/Dockerfile
+	$(HIDE)cp $(CURDIR)/model.py $(BUILD_DIR)/model.py
+
+build: init
diff --git a/dockerfiles/triton_vllm_backend/model.py b/dockerfiles/triton_vllm_backend/model.py
new file mode 100755
index 0000000..450b35c
--- /dev/null
+++ b/dockerfiles/triton_vllm_backend/model.py
@@ -0,0 +1,520 @@
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import asyncio
+import json
+import os
+import threading
+from typing import Dict, List
+
+import numpy as np
+import torch
+import triton_python_backend_utils as pb_utils
+from vllm.engine.arg_utils import AsyncEngineArgs
+from vllm.engine.async_llm_engine import AsyncLLMEngine
+from vllm.lora.request import LoRARequest
+from vllm.sampling_params import SamplingParams
+from vllm.utils import random_uuid
+from copy import deepcopy
+from transformers import AutoTokenizer
+_VLLM_ENGINE_ARGS_FILENAME = "model.json"
+_MULTI_LORA_ARGS_FILENAME = "multi_lora.json"
+#from https://github.com/triton-inference-server/vllm_backend/commit/18a96e365caa2032eb900ac116753e1384c624c8
+# add chat-template for qwen2
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        inputs = [
+            {"name": "text_input", "data_type": "TYPE_STRING", "dims": [1]},
+            {
+                "name": "stream",
+                "data_type": "TYPE_BOOL",
+                "dims": [1],
+                "optional": True,
+            },
+            {
+                "name": "sampling_parameters",
+                "data_type": "TYPE_STRING",
+                "dims": [1],
+                "optional": True,
+            },
+            {
+                "name": "exclude_input_in_output",
+                "data_type": "TYPE_BOOL",
+                "dims": [1],
+                "optional": True,
+            },
+        ]
+        outputs = [{"name": "text_output", "data_type": "TYPE_STRING", "dims": [-1]}]
+
+        # Store the model configuration as a dictionary.
+        config = auto_complete_model_config.as_dict()
+        input_names = []
+        output_names = []
+        for input in config["input"]:
+            input_names.append(input["name"])
+        for output in config["output"]:
+            output_names.append(output["name"])
+
+        # Add only missing inputs and output to the model configuration.
+        for input in inputs:
+            if input["name"] not in input_names:
+                auto_complete_model_config.add_input(input)
+        for output in outputs:
+            if output["name"] not in output_names:
+                auto_complete_model_config.add_output(output)
+
+        # We need to use decoupled transaction policy for saturating
+        # vLLM engine for max throughtput.
+        # TODO [DLIS:5233]: Allow asynchronous execution to lift this
+        # restriction for cases there is exactly a single response to
+        # a single request.
+        auto_complete_model_config.set_model_transaction_policy(dict(decoupled=True))
+
+        # Disabling batching in Triton, let vLLM handle the batching on its own.
+        auto_complete_model_config.set_max_batch_size(0)
+
+        return auto_complete_model_config
+
+    def initialize(self, args):
+        self.args = args
+        self.logger = pb_utils.Logger
+        self.model_config = json.loads(args["model_config"])
+        output_config = pb_utils.get_output_config_by_name(
+            self.model_config, "text_output"
+        )
+        self.output_dtype = pb_utils.triton_string_to_numpy(output_config["data_type"])
+
+        # Prepare vLLM engine
+        self.init_engine()
+
+        # Counter to keep track of ongoing request counts
+        self.ongoing_request_count = 0
+
+        # Starting asyncio event loop to process the received requests asynchronously.
+        self._loop = asyncio.get_event_loop()
+        self._loop_thread = threading.Thread(
+            target=self.engine_loop, args=(self._loop,)
+        )
+        self._shutdown_event = asyncio.Event()
+        self._loop_thread.start()
+
+    def init_engine(self):
+        # Currently, Triton needs to use decoupled policy for asynchronously
+        # forwarding requests to vLLM engine, so assert it.
+        self.using_decoupled = pb_utils.using_decoupled_model_transaction_policy(
+            self.model_config
+        )
+        assert (
+            self.using_decoupled
+        ), "vLLM Triton backend must be configured to use decoupled model transaction policy"
+
+        engine_args_filepath = os.path.join(
+            pb_utils.get_model_dir(), _VLLM_ENGINE_ARGS_FILENAME
+        )
+        assert os.path.isfile(
+            engine_args_filepath
+        ), f"'{_VLLM_ENGINE_ARGS_FILENAME}' containing vllm engine args must be provided in '{pb_utils.get_model_dir()}'"
+        with open(engine_args_filepath) as file:
+            self.vllm_engine_config = json.load(file)
+        self.tokenizer = AutoTokenizer.from_pretrained(self.vllm_engine_config["tokenizer"], resume_download=True)
+        # Validate device and multi-processing settings are currently set based on model/configs.
+        self.validate_device_config()
+
+        self.chat_template = self.vllm_engine_config.get("chat_template", None) and self.tokenizer.chat_template
+        self.vllm_engine_config.pop("chat_template", None)
+        # Check for LoRA config and set it up if enabled
+        self.setup_lora()
+
+
+        # Create an AsyncLLMEngine from the config from JSON
+        self.llm_engine = AsyncLLMEngine.from_engine_args(
+            AsyncEngineArgs(**self.vllm_engine_config)
+        )
+
+    def setup_lora(self):
+        self.enable_lora = False
+
+        if (
+            "enable_lora" in self.vllm_engine_config.keys()
+            and self.vllm_engine_config["enable_lora"].lower() == "true"
+        ):
+            # create Triton LoRA weights repository
+            multi_lora_args_filepath = os.path.join(
+                pb_utils.get_model_dir(), _MULTI_LORA_ARGS_FILENAME
+            )
+            try:
+                with open(multi_lora_args_filepath) as lora_file:
+                    lora_repository: Dict[str, str] = json.load(lora_file)
+                self.lora_repository = lora_repository
+                self.supported_loras: List[str] = list(self.lora_repository.keys())
+                self.supported_loras_len = len(self.supported_loras)
+                self.enable_lora = True
+            except FileNotFoundError:
+                raise FileNotFoundError(
+                    f"Triton backend cannot find {multi_lora_args_filepath}."
+                )
+
+    def validate_device_config(self):
+        triton_kind = self.args["model_instance_kind"]
+        triton_device_id = int(self.args["model_instance_device_id"])
+        triton_instance = f"{self.args['model_name']}_{triton_device_id}"
+
+        # Triton's current definition of KIND_GPU makes assumptions that
+        # models only use a single GPU. For multi-GPU models, the recommendation
+        # is to specify KIND_MODEL to acknowledge that the model will take control
+        # of the devices made available to it.
+        # NOTE: Consider other parameters that would indicate multi-GPU in the future.
+        tp_size = int(self.vllm_engine_config.get("tensor_parallel_size", 1))
+        if tp_size > 1 and triton_kind == "GPU":
+            raise ValueError(
+                "KIND_GPU is currently for single-GPU models, please specify KIND_MODEL "
+                "in the model's config.pbtxt for multi-GPU models"
+            )
+
+        # If KIND_GPU is specified, specify the device ID assigned by Triton to ensure that
+        # multiple model instances do not oversubscribe the same default device.
+        if triton_kind == "GPU" and triton_device_id >= 0:
+            self.logger.log_info(
+                f"Detected KIND_GPU model instance, explicitly setting GPU device={triton_device_id} for {triton_instance}"
+            )
+            # vLLM doesn't currently (v0.4.2) expose device selection in the APIs
+            torch.cuda.set_device(triton_device_id)
+
+    def create_task(self, coro):
+        """
+        Creates a task on the engine's event loop which is running on a separate thread.
+        """
+        assert (
+            self._shutdown_event.is_set() is False
+        ), "Cannot create tasks after shutdown has been requested"
+
+        return asyncio.run_coroutine_threadsafe(coro, self._loop)
+
+    def engine_loop(self, loop):
+        """
+        Runs the engine's event loop on a separate thread.
+        """
+        asyncio.set_event_loop(loop)
+        self._loop.run_until_complete(self.await_shutdown())
+
+    async def await_shutdown(self):
+        """
+        Primary coroutine running on the engine event loop. This coroutine is responsible for
+        keeping the engine alive until a shutdown is requested.
+        """
+        # first await the shutdown signal
+        while self._shutdown_event.is_set() is False:
+            await asyncio.sleep(5)
+
+        # Wait for the ongoing_requests
+        while self.ongoing_request_count > 0:
+            self.logger.log_info(
+                "[vllm] Awaiting remaining {} requests".format(
+                    self.ongoing_request_count
+                )
+            )
+            await asyncio.sleep(5)
+
+        for task in asyncio.all_tasks(loop=self._loop):
+            if task is not asyncio.current_task():
+                task.cancel()
+
+        self.logger.log_info("[vllm] Shutdown complete")
+
+    def get_sampling_params_dict(self, params_json):
+        """
+        This functions parses the dictionary values into their
+        expected format.
+        """
+
+        params_dict = json.loads(params_json)
+
+        # Special parsing for the supported sampling parameters
+        bool_keys = ["ignore_eos", "skip_special_tokens", "use_beam_search"]
+        for k in bool_keys:
+            if k in params_dict:
+                params_dict[k] = bool(params_dict[k])
+
+        float_keys = [
+            "frequency_penalty",
+            "length_penalty",
+            "presence_penalty",
+            "temperature",
+            "top_p",
+        ]
+        for k in float_keys:
+            if k in params_dict:
+                params_dict[k] = float(params_dict[k])
+
+        int_keys = ["best_of", "max_tokens", "min_tokens", "n", "top_k"]
+        for k in int_keys:
+            if k in params_dict:
+                params_dict[k] = int(params_dict[k])
+
+        return params_dict
+
+    def create_response(self, vllm_output, prepend_input):
+        """
+        Parses the output from the vLLM engine into Triton
+        response.
+        """
+        prompt = ""
+        if prepend_input:
+            prompt = vllm_output.prompt
+        if prompt:
+            text_outputs = [
+                (prompt + output.text).encode("utf-8") for output in vllm_output.outputs
+            ]
+        else:
+            text_outputs = [
+                output.text.encode("utf-8") for output in vllm_output.outputs
+            ]
+
+        triton_output_tensor = pb_utils.Tensor(
+            "text_output", np.asarray(text_outputs, dtype=self.output_dtype)
+        )
+
+        return pb_utils.InferenceResponse(output_tensors=[triton_output_tensor])
+
+    def create_stream_response(self, vllm_output, previous_outputs_lengths):
+        """
+        Parses the output from the vLLM engine, extracts only newly generated
+        text and packs it into Triton response.
+        """
+        if previous_outputs_lengths is None:
+            return self.create_response(vllm_output, prepend_input=False)
+
+        text_outputs = [
+            (output.text[prev_output_length:]).encode("utf-8")
+            for output, prev_output_length in zip(
+                vllm_output.outputs, previous_outputs_lengths
+            )
+        ]
+        triton_output_tensor = pb_utils.Tensor(
+            "text_output", np.asarray(text_outputs, dtype=self.output_dtype)
+        )
+        return pb_utils.InferenceResponse(output_tensors=[triton_output_tensor])
+
+    async def generate(self, request):
+        """
+        Forwards single request to LLM engine and returns responses.
+        """
+        response_sender = request.get_response_sender()
+        self.ongoing_request_count += 1
+        try:
+            request_id = random_uuid()
+            prompt = pb_utils.get_input_tensor_by_name(
+                request, "text_input"
+            ).as_numpy()[0]
+            if isinstance(prompt, bytes):
+                prompt = prompt.decode("utf-8")
+            stream = pb_utils.get_input_tensor_by_name(request, "stream")
+            if stream:
+                stream = stream.as_numpy()[0]
+            else:
+                stream = False
+            prepend_input = pb_utils.get_input_tensor_by_name(
+                request, "exclude_input_in_output"
+            )
+            if prepend_input:
+                # When `exclude_input_in_output` is False, we want to prepend
+                # input prompt to output, thus prepend_input should be True,
+                # and vice versa.
+                prepend_input = not prepend_input.as_numpy()[0]
+            elif prepend_input is None and stream:
+                prepend_input = False
+            else:
+                prepend_input = True
+
+            if prepend_input and stream:
+                raise ValueError(
+                    "When streaming, `exclude_input_in_output` = False is not allowed."
+                )
+
+            # Request parameters are not yet supported via
+            # BLS. Provide an optional mechanism to receive serialized
+            # parameters as an input tensor until support is added
+
+            parameters_input_tensor = pb_utils.get_input_tensor_by_name(
+                request, "sampling_parameters"
+            )
+            if parameters_input_tensor:
+                parameters = parameters_input_tensor.as_numpy()[0].decode("utf-8")
+            else:
+                parameters = request.parameters()
+
+            sampling_params_dict = self.get_sampling_params_dict(parameters)
+            lora_name = sampling_params_dict.pop("lora_name", None)
+            sampling_params = SamplingParams(**sampling_params_dict)
+            last_output = None
+            prev_outputs = None
+            lora_request = None
+            if lora_name is not None:
+                lora_id = str(self.supported_loras.index(lora_name) + 1)
+                lora_int_id = int(lora_id)
+                lora_local_path = self.lora_repository[lora_name]
+                lora_request = LoRARequest(lora_id, lora_int_id, lora_local_path)
+            if self.chat_template:
+                message = self.build_message(prompt)
+                message_template = self.tokenizer.apply_chat_template(
+                    message,
+                    tokenize=False,
+                    add_generation_prompt=True
+                )
+
+                model_inputs = self.tokenizer(message_template).input_ids
+                inputs = {}
+                inputs["prompt_token_ids"] = model_inputs
+            else:
+                inputs = prompt
+            async for output in self.llm_engine.generate(
+                inputs, sampling_params, request_id, lora_request=lora_request,
+            ):
+                if response_sender.is_cancelled():
+                    self.logger.log_info("[vllm] Cancelling the request")
+                    await self.llm_engine.abort(request_id)
+                    self.logger.log_info("[vllm] Successfully cancelled the request")
+                    break
+
+                if stream:
+                    prev_outputs_lengths = None
+                    if prev_outputs is not None:
+                        prev_outputs_lengths = [
+                            len(prev_output.text)
+                            for prev_output in prev_outputs.outputs
+                        ]
+                    if output.finished:
+                        response_sender.send(
+                            self.create_stream_response(output, prev_outputs_lengths),
+                            flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL,
+                        )
+                    else:
+                        response_sender.send(
+                            self.create_stream_response(output, prev_outputs_lengths)
+                        )
+                prev_outputs = output
+
+            last_output = output
+
+            if not stream:
+                response_sender.send(
+                    self.create_response(last_output, prepend_input),
+                    flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL,
+                )
+
+        except Exception as e:
+            self.logger.log_info(f"[vllm] Error generating stream: {e}")
+            error = pb_utils.TritonError(f"Error generating stream: {e}")
+            triton_output_tensor = pb_utils.Tensor(
+                "text_output", np.asarray(["N/A"], dtype=self.output_dtype)
+            )
+            response = pb_utils.InferenceResponse(
+                output_tensors=[triton_output_tensor], error=error
+            )
+            response_sender.send(
+                response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
+            )
+            raise e
+        finally:
+            self.ongoing_request_count -= 1
+
+    def verify_loras(self, request):
+        # We will check if the requested lora exists here, if not we will send a
+        # response with `LoRA not found` information. In this way we may avoid
+        # further processing.
+        verified_request = None
+        lora_error = None
+        lora_name = None
+        parameters_input_tensor = pb_utils.get_input_tensor_by_name(
+            request, "sampling_parameters"
+        )
+        if parameters_input_tensor:
+            parameters = parameters_input_tensor.as_numpy()[0].decode("utf-8")
+            sampling_params_dict = self.get_sampling_params_dict(parameters)
+            lora_name = sampling_params_dict.pop("lora_name", None)
+
+        if lora_name is not None:
+            if not self.enable_lora:
+                lora_error = pb_utils.TritonError("LoRA feature is not enabled.")
+                self.logger.log_info(
+                    "[vllm] LoRA is not enabled, please restart the backend with LoRA enabled."
+                )
+            elif lora_name not in self.supported_loras:
+                lora_error = pb_utils.TritonError(
+                    f"LoRA {lora_name} is not supported, we currently support {self.supported_loras}"
+                )
+                self.logger.log_info(f"[vllm] LoRA {lora_name} not found.")
+
+        if lora_error is not None:
+            output_tensor = pb_utils.Tensor(
+                "text_output",
+                np.asarray(["[Error] Unsupported LoRA."], dtype=self.output_dtype),
+            )
+            response = pb_utils.InferenceResponse(
+                output_tensors=[output_tensor], error=lora_error
+            )
+            response_sender = request.get_response_sender()
+            response_sender.send(
+                response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
+            )
+        else:
+            verified_request = request
+        return verified_request
+
+    def execute(self, requests):
+        """
+        Triton core issues requests to the backend via this method.
+
+        When this method returns, new requests can be issued to the backend. Blocking
+        this function would prevent the backend from pulling additional requests from
+        Triton into the vLLM engine. This can be done if the kv cache within vLLM engine
+        is too loaded.
+        We are pushing all the requests on vllm and let it handle the full traffic.
+        """
+        for request in requests:
+            request = self.verify_loras(request)
+            if request is not None:
+                self.create_task(self.generate(request))
+        return None
+
+    def finalize(self):
+        """
+        Triton virtual method; called when the model is unloaded.
+        """
+        self.logger.log_info("[vllm] Issuing finalize to vllm backend")
+        self._shutdown_event.set()
+        if self._loop_thread is not None:
+            self._loop_thread.join()
+            self._loop_thread = None
+
+    def build_message(self, prompt: str, history: List[Dict] = None):
+        history = deepcopy(history)
+        if len(history or []) == 0:
+            history = [{"role": "system", "content": "You are a helpful assistant."}]
+        history.append({"role": "user", "content": prompt})
+        return history
diff --git a/dockerfiles/triton_vllm_backend/samples/client.py b/dockerfiles/triton_vllm_backend/samples/client.py
new file mode 100755
index 0000000..390a365
--- /dev/null
+++ b/dockerfiles/triton_vllm_backend/samples/client.py
@@ -0,0 +1,280 @@
+#!/usr/bin/env python3
+
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import asyncio
+import json
+import sys
+
+import numpy as np
+import tritonclient.grpc.aio as grpcclient
+from tritonclient.utils import *
+
+
+class LLMClient:
+    def __init__(self, flags: argparse.Namespace):
+        self._client = grpcclient.InferenceServerClient(
+            url=flags.url, verbose=flags.verbose
+        )
+        self._flags = flags
+        self._loop = asyncio.get_event_loop()
+        self._results_dict = {}
+
+    async def async_request_iterator(
+        self, prompts, sampling_parameters, exclude_input_in_output
+    ):
+        try:
+            for iter in range(self._flags.iterations):
+                for i, prompt in enumerate(prompts):
+                    prompt_id = self._flags.offset + (len(prompts) * iter) + i
+                    self._results_dict[str(prompt_id)] = []
+                    yield self.create_request(
+                        prompt,
+                        self._flags.streaming_mode,
+                        prompt_id,
+                        sampling_parameters,
+                        exclude_input_in_output,
+                    )
+        except Exception as error:
+            print(f"Caught an error in the request iterator: {error}")
+
+    async def stream_infer(self, prompts, sampling_parameters, exclude_input_in_output):
+        try:
+            # Start streaming
+            response_iterator = self._client.stream_infer(
+                inputs_iterator=self.async_request_iterator(
+                    prompts, sampling_parameters, exclude_input_in_output
+                ),
+                stream_timeout=self._flags.stream_timeout,
+            )
+            async for response in response_iterator:
+                yield response
+        except InferenceServerException as error:
+            print(error)
+            sys.exit(1)
+
+    async def process_stream(
+        self, prompts, sampling_parameters, exclude_input_in_output
+    ):
+        # Clear results in between process_stream calls
+        self.results_dict = []
+        success = True
+        # Read response from the stream
+        async for response in self.stream_infer(
+            prompts, sampling_parameters, exclude_input_in_output
+        ):
+            result, error = response
+            if error:
+                print(f"Encountered error while processing: {error}")
+                success = False
+            else:
+                output = result.as_numpy("text_output")
+                for i in output:
+                    self._results_dict[result.get_response().id].append(i)
+        return success
+
+    async def run(self):
+        # Sampling parameters for text generation
+        # including `temperature`, `top_p`, top_k`, `max_tokens`, `early_stopping`.
+        # Full list available at:
+        # https://github.com/vllmproject/vllm/blob/5255d99dc595f9ae7647842242d6542aa4145a4f/vllm/sampling_params.py#L23
+        sampling_parameters = {
+            "temperature": "0.1",
+            "top_p": "0.95",
+            "max_tokens": "100",
+        }
+        exclude_input_in_output = self._flags.exclude_inputs_in_outputs
+        if self._flags.lora_name is not None:
+            sampling_parameters["lora_name"] = self._flags.lora_name
+        with open(self._flags.input_prompts, "r") as file:
+            print(f"Loading inputs from `{self._flags.input_prompts}`...")
+            prompts = file.readlines()
+
+        success = await self.process_stream(
+            prompts, sampling_parameters, exclude_input_in_output
+        )
+
+        with open(self._flags.results_file, "w") as file:
+            for id in self._results_dict.keys():
+                for result in self._results_dict[id]:
+                    file.write(result.decode("utf-8"))
+
+                file.write("\n")
+                file.write("\n=========\n\n")
+            print(f"Storing results into `{self._flags.results_file}`...")
+
+        if self._flags.verbose:
+            with open(self._flags.results_file, "r") as file:
+                print(f"\nContents of `{self._flags.results_file}` ===>")
+                print(file.read())
+        if success:
+            print("PASS: vLLM example")
+        else:
+            print("FAIL: vLLM example")
+
+    def run_async(self):
+        self._loop.run_until_complete(self.run())
+
+    def create_request(
+        self,
+        prompt,
+        stream,
+        request_id,
+        sampling_parameters,
+        exclude_input_in_output,
+        send_parameters_as_tensor=True,
+    ):
+        inputs = []
+        prompt_data = np.array([prompt.encode("utf-8")], dtype=np.object_)
+        try:
+            inputs.append(grpcclient.InferInput("text_input", [1], "BYTES"))
+            inputs[-1].set_data_from_numpy(prompt_data)
+        except Exception as error:
+            print(f"Encountered an error during request creation: {error}")
+
+        stream_data = np.array([stream], dtype=bool)
+        inputs.append(grpcclient.InferInput("stream", [1], "BOOL"))
+        inputs[-1].set_data_from_numpy(stream_data)
+
+        # Request parameters are not yet supported via BLS. Provide an
+        # optional mechanism to send serialized parameters as an input
+        # tensor until support is added
+
+        if send_parameters_as_tensor:
+            sampling_parameters_data = np.array(
+                [json.dumps(sampling_parameters).encode("utf-8")], dtype=np.object_
+            )
+            inputs.append(grpcclient.InferInput("sampling_parameters", [1], "BYTES"))
+            inputs[-1].set_data_from_numpy(sampling_parameters_data)
+
+        inputs.append(grpcclient.InferInput("exclude_input_in_output", [1], "BOOL"))
+        inputs[-1].set_data_from_numpy(np.array([exclude_input_in_output], dtype=bool))
+
+        # Add requested outputs
+        outputs = []
+        outputs.append(grpcclient.InferRequestedOutput("text_output"))
+
+        # Issue the asynchronous sequence inference.
+        return {
+            "model_name": self._flags.model,
+            "inputs": inputs,
+            "outputs": outputs,
+            "request_id": str(request_id),
+            "parameters": sampling_parameters,
+        }
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-m",
+        "--model",
+        type=str,
+        required=False,
+        default="vllm_model",
+        help="Model name",
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Enable verbose output",
+    )
+    parser.add_argument(
+        "-u",
+        "--url",
+        type=str,
+        required=False,
+        default="localhost:8001",
+        help="Inference server URL and its gRPC port. Default is localhost:8001.",
+    )
+    parser.add_argument(
+        "-t",
+        "--stream-timeout",
+        type=float,
+        required=False,
+        default=None,
+        help="Stream timeout in seconds. Default is None.",
+    )
+    parser.add_argument(
+        "--offset",
+        type=int,
+        required=False,
+        default=0,
+        help="Add offset to request IDs used",
+    )
+    parser.add_argument(
+        "--input-prompts",
+        type=str,
+        required=False,
+        default="prompts.txt",
+        help="Text file with input prompts",
+    )
+    parser.add_argument(
+        "--results-file",
+        type=str,
+        required=False,
+        default="results.txt",
+        help="The file with output results",
+    )
+    parser.add_argument(
+        "--iterations",
+        type=int,
+        required=False,
+        default=1,
+        help="Number of iterations through the prompts file",
+    )
+    parser.add_argument(
+        "-s",
+        "--streaming-mode",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Enable streaming mode",
+    )
+    parser.add_argument(
+        "--exclude-inputs-in-outputs",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Exclude prompt from outputs",
+    )
+    parser.add_argument(
+        "-l",
+        "--lora-name",
+        type=str,
+        required=False,
+        default=None,
+        help="The querying LoRA name",
+    )
+    FLAGS = parser.parse_args()
+
+    client = LLMClient(FLAGS)
+    client.run_async()
diff --git a/dockerfiles/triton_vllm_backend/samples/model_repository/vllm_model/1/model.json b/dockerfiles/triton_vllm_backend/samples/model_repository/vllm_model/1/model.json
new file mode 100755
index 0000000..f801b3b
--- /dev/null
+++ b/dockerfiles/triton_vllm_backend/samples/model_repository/vllm_model/1/model.json
@@ -0,0 +1,12 @@
+{
+    "model":"meta-llama/Llama-2-7b-hf",
+    "tokenizer":"meta-llama/Llama-2-7b-hf",
+    "disable_log_requests": "false",
+    "gpu_memory_utilization": 0.5,
+    "enforce_eager": "true",
+    "max_num_seqs": 512,
+    "swap_space": 16,
+    "dtype": "bfloat16",
+    "tensor_parallel_size": 1,
+    "max_num_batched_tokens": 8192
+}
diff --git a/dockerfiles/triton_vllm_backend/samples/model_repository/vllm_model/config.pbtxt b/dockerfiles/triton_vllm_backend/samples/model_repository/vllm_model/config.pbtxt
new file mode 100644
index 0000000..48871c6
--- /dev/null
+++ b/dockerfiles/triton_vllm_backend/samples/model_repository/vllm_model/config.pbtxt
@@ -0,0 +1,36 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Note: You do not need to change any fields in this configuration.
+
+backend: "vllm"
+# The usage of device is deferred to the vLLM engine
+instance_group [
+  {
+    count: 1
+    kind: KIND_MODEL
+  }
+]
diff --git a/dockerfiles/triton_vllm_backend/samples/prompts.txt b/dockerfiles/triton_vllm_backend/samples/prompts.txt
new file mode 100644
index 0000000..133800e
--- /dev/null
+++ b/dockerfiles/triton_vllm_backend/samples/prompts.txt
@@ -0,0 +1,4 @@
+Hello, my name is
+The most dangerous animal is
+The capital of France is
+The future of AI is
diff --git a/dockerfiles/triton_vllm_backend/samples/test_models/llama70b_8x/1/model.json b/dockerfiles/triton_vllm_backend/samples/test_models/llama70b_8x/1/model.json
new file mode 100644
index 0000000..f576654
--- /dev/null
+++ b/dockerfiles/triton_vllm_backend/samples/test_models/llama70b_8x/1/model.json
@@ -0,0 +1,12 @@
+{
+    "model":"meta-llama/Llama-2-70b-hf",
+    "tokenizer":"meta-llama/Llama-2-70b-hf",
+    "disable_log_requests": "false",
+    "gpu_memory_utilization": 0.5,
+    "enforce_eager": "true",
+    "max_num_seqs": 512,
+    "swap_space": 16,
+    "dtype": "bfloat16",
+    "tensor_parallel_size": 8,
+    "max_num_batched_tokens": 8192
+}
diff --git a/dockerfiles/triton_vllm_backend/samples/test_models/llama70b_8x/config.pbtxt b/dockerfiles/triton_vllm_backend/samples/test_models/llama70b_8x/config.pbtxt
new file mode 100644
index 0000000..48871c6
--- /dev/null
+++ b/dockerfiles/triton_vllm_backend/samples/test_models/llama70b_8x/config.pbtxt
@@ -0,0 +1,36 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Note: You do not need to change any fields in this configuration.
+
+backend: "vllm"
+# The usage of device is deferred to the vLLM engine
+instance_group [
+  {
+    count: 1
+    kind: KIND_MODEL
+  }
+]
diff --git a/dockerfiles/triton_vllm_backend/samples/test_models/llama7b_1x/1/model.json b/dockerfiles/triton_vllm_backend/samples/test_models/llama7b_1x/1/model.json
new file mode 100755
index 0000000..f801b3b
--- /dev/null
+++ b/dockerfiles/triton_vllm_backend/samples/test_models/llama7b_1x/1/model.json
@@ -0,0 +1,12 @@
+{
+    "model":"meta-llama/Llama-2-7b-hf",
+    "tokenizer":"meta-llama/Llama-2-7b-hf",
+    "disable_log_requests": "false",
+    "gpu_memory_utilization": 0.5,
+    "enforce_eager": "true",
+    "max_num_seqs": 512,
+    "swap_space": 16,
+    "dtype": "bfloat16",
+    "tensor_parallel_size": 1,
+    "max_num_batched_tokens": 8192
+}
diff --git a/dockerfiles/triton_vllm_backend/samples/test_models/llama7b_1x/config.pbtxt b/dockerfiles/triton_vllm_backend/samples/test_models/llama7b_1x/config.pbtxt
new file mode 100644
index 0000000..48871c6
--- /dev/null
+++ b/dockerfiles/triton_vllm_backend/samples/test_models/llama7b_1x/config.pbtxt
@@ -0,0 +1,36 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Note: You do not need to change any fields in this configuration.
+
+backend: "vllm"
+# The usage of device is deferred to the vLLM engine
+instance_group [
+  {
+    count: 1
+    kind: KIND_MODEL
+  }
+]
diff --git a/dockerfiles/triton_vllm_backend/samples/test_models/qwen_7b_chat/1/model.json b/dockerfiles/triton_vllm_backend/samples/test_models/qwen_7b_chat/1/model.json
new file mode 100644
index 0000000..ef8a958
--- /dev/null
+++ b/dockerfiles/triton_vllm_backend/samples/test_models/qwen_7b_chat/1/model.json
@@ -0,0 +1,13 @@
+{
+    "model":"Qwen/Qwen2-7B-Instruct",
+    "tokenizer":"Qwen/Qwen2-7B-Instruct", 
+    "disable_log_requests": "false",
+    "gpu_memory_utilization": 0.5,
+    "enforce_eager": "true",
+    "max_num_seqs": 512,
+    "swap_space": 16,
+    "dtype": "bfloat16",
+    "tensor_parallel_size": 1,
+    "max_num_batched_tokens": 131072,
+    "chat_template": "true"
+}
diff --git a/dockerfiles/triton_vllm_backend/samples/test_models/qwen_7b_chat/config.pbtxt b/dockerfiles/triton_vllm_backend/samples/test_models/qwen_7b_chat/config.pbtxt
new file mode 100644
index 0000000..48871c6
--- /dev/null
+++ b/dockerfiles/triton_vllm_backend/samples/test_models/qwen_7b_chat/config.pbtxt
@@ -0,0 +1,36 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Note: You do not need to change any fields in this configuration.
+
+backend: "vllm"
+# The usage of device is deferred to the vLLM engine
+instance_group [
+  {
+    count: 1
+    kind: KIND_MODEL
+  }
+]
diff --git a/utils/intel_gaudi_health_screen/IGNodes.py b/utils/intel_gaudi_health_screen/IGNodes.py
index be2a1f2..6fdbab8 100644
--- a/utils/intel_gaudi_health_screen/IGNodes.py
+++ b/utils/intel_gaudi_health_screen/IGNodes.py
@@ -114,21 +114,25 @@ def __init__(self, name="", health_report=HealthReport(), num_checks_link_state=
     def scan_cards(self):
         self.logger.info(f"Scanning cards info on Node: {self.name}")
 
-        cmd = "hl-smi -Q index,module_id,bus_id,memory.used,temperature.aip -f csv,noheader"
+        cmd = "hl-smi -Q index,module_id,bus_id,memory.used,temperature.aip,name -f csv,noheader"
         output = run_cmd(cmd)
 
         reader = csv.reader(output.split('\n'), delimiter=',')
         for row in reader:
             if len(row) == 0:
                 continue
+            elif len(row) < 6:
+                _logger.error("hl-smi output is not correct: Recieved output: {row}")
+                continue
 
             i             = row[0]
             module_id     = row[1].strip()
             pci_address   = row[2]
             memory_used   = int(row[3].split()[0])
             temperature_C = int(row[4].split()[0])
+            system_name   = row[5]
 
-            card = IGCard(index=i, module_id=module_id, pci_address=pci_address, memory_used=memory_used, temperature=temperature_C, logger=self.logger)
+            card = IGCard(system_name=system_name, index=i, module_id=module_id, pci_address=pci_address, memory_used=memory_used, temperature=temperature_C, logger=self.logger)
             self.cards[i] = card
 
         self.cards = dict(sorted(self.cards.items()))
@@ -190,7 +194,8 @@ def write_json(self, cards):
 
 class IGCard():
 
-    def __init__(self, index=-1, module_id=-1, pci_address="", memory_used=-1, framework="pytorch", temperature=-1, logger=None):
+    def __init__(self, system_name="", index=-1, module_id=-1, pci_address="", memory_used=-1, framework="pytorch", temperature=-1, logger=None):
+        self.system_name               = system_name
         self.node_id                   = ""
         self.logger                    = logger
         self.index                     = index
@@ -286,14 +291,22 @@ def check_device_acquire_fail(self):
         return self.device_acquire_fail
 
     def check_temperature_state(self):
-        max_good_temperature = 83
-        base_temperature     = 25
-        max_delta            = 25
+        if self.system_name == "HL-325":
+            # Gaudi-3 System
+            max_good_temperature = 200
+            base_temperature     = 45
+            max_delta            = 80
+        else:
+            # Gaudi-2 System
+            max_good_temperature = 83
+            base_temperature     = 25
+            max_delta            = 25
+            
 
         if self.temperature_C >= max_good_temperature:
             self.temperature_state_C = "CRITICAL"
             self.is_infected = True
-        elif self.temperature_C - base_temperature >= max_delta:
+        elif abs(self.temperature_C - base_temperature) >= max_delta:
             self.temperature_state_C = "WARN"
             self.is_infected = True
         else:
diff --git a/utils/intel_gaudi_health_screen/README.md b/utils/intel_gaudi_health_screen/README.md
index 7d67984..f0a537c 100644
--- a/utils/intel_gaudi_health_screen/README.md
+++ b/utils/intel_gaudi_health_screen/README.md
@@ -1,4 +1,4 @@
-# Intel Gaudi Health Screen 2.2.0
+# Intel Gaudi Health Screen 2.2.2
 
 A large scale Intel Gaudi cluster contains a lot of moving parts. To ensure distributed training proceeds smoothly, it is recommended to check the
 cluster network health. Troubleshooting issues on a large cluster can be a tedious act. To simplify the debugging process the
@@ -127,16 +127,16 @@ been tested, such as having missing cards, it is occupied by another session, or
 ## Setup
 
 IGHS is compatible with python3 default packages and does not require additional packages
-to be installed
+to be installed.
 
-If your setup envionrment requires custom configruation, update the yaml files located in the templates folder.
+If your setup Environment requires custom configruation, update the yaml files located in the templates folder.
 
 If running on bare metal system, then install `pdsh` to your system.
 
-Update [config.yaml](config.yaml) to match your system envionrment
+Update [config.yaml](config.yaml) to match your system Environment
 
 ``` yaml
-# Sets IGHS to screen for K8s or Bare Metal Envionrment (k8s, bare-metal).
+# Sets IGHS to screen for K8s or Bare Metal Environment (k8s, bare-metal).
 system-info:
   type: "k8s"
   # Namespace is only required for k8s settings
@@ -149,7 +149,7 @@ system-info:
   tcp-interface: "10.3.124.0/24"
 
 # Image to run Intel Gaudi Health Screen
-image: "vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest"
+image: "vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest"
 
 # Node Label used to identify a Intel Gaudi Node
 gaudi-node-label: "habana.ai/gaudi:NoSchedule"
@@ -220,7 +220,7 @@ IGHS can alternatively be run through below script:
 To run on bare-metal systems update the [config.yaml](config.yaml) to use bare-metal configuration.
 
 ``` yaml
-# Sets IGHS to screen for K8s or Bare Metal Envionrment (k8s, bare-metal).
+# Sets IGHS to screen for K8s or Bare Metal Environment (k8s, bare-metal).
 system-info:
   type: "bare-metal"
   # Namespace is only required for k8s settings
@@ -233,7 +233,7 @@ system-info:
   tcp-interface: "10.3.124.0/24"
 
 # Image to run Intel Gaudi Health Screen
-image: "vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest"
+image: "vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest"
 
 # Node Label used to identify a Intel Gaudi Node
 gaudi-node-label: "habana.ai/gaudi:NoSchedule"
@@ -260,11 +260,13 @@ Before running the screening test, you need to generate the ssh key used for pas
 
 ``` bash
 # Keys to setup initial bare-metal passwordless ssh connection between systems
+mkdir -p ssh;
 ssh-keygen -t rsa -f ssh/ighs_rsa;
 chmod 600 ssh/ighs_rsa;
 chmod 644 ssh/ighs_rsa.pub;
 
 # Keys to setup containers passwordless ssh connection
+mkdir -p template/bare-metal/ssh;
 ssh-keygen -t rsa -f template/bare-metal/ssh/id_rsa;
 chmod 600 template/bare-metal/ssh/id_rsa;
 chmod 644 template/bare-metal/ssh/id_rsa.pub;
diff --git a/utils/intel_gaudi_health_screen/config.yaml b/utils/intel_gaudi_health_screen/config.yaml
index b9c3ae0..f3aef5b 100644
--- a/utils/intel_gaudi_health_screen/config.yaml
+++ b/utils/intel_gaudi_health_screen/config.yaml
@@ -12,7 +12,7 @@ system-info:
   tcp-interface: "10.3.124.0/24"
 
 # Image to run Intel Gaudi Health Screen
-image: "vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest"
+image: "vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest"
 
 # Node Label used to identify a Intel Gaudi Node
 gaudi-node-label: "habana.ai/gaudi:NoSchedule"
@@ -32,4 +32,4 @@ level-2:
   run: true
   timeout_s: 130
   # Number of times to check Network connections between nodes
-  num-rounds: 5
\ No newline at end of file
+  num-rounds: 5
diff --git a/utils/intel_gaudi_health_screen/hccl_demo_helper.py b/utils/intel_gaudi_health_screen/hccl_demo_helper.py
index d0d492c..107d8ba 100644
--- a/utils/intel_gaudi_health_screen/hccl_demo_helper.py
+++ b/utils/intel_gaudi_health_screen/hccl_demo_helper.py
@@ -35,10 +35,11 @@ def find_groups(healthy_nodes, watch_nodes, groups_tracker):
     max_num_groups    = num_nodes // 2
     max_combinations  = (math.factorial(num_nodes)) / (math.factorial(num_nodes-2) * 2)
     max_attempts      = 10
+    groups_tracker    = set(groups_tracker)
 
     if num_nodes == 1:
-        _logger.warn(f"Need more than 1 Node to test pair all_reduce")
-        return False
+        _logger.warning(f"Need more than 1 Node to test pair all_reduce")
+        return node_groups, list(groups_tracker)
 
     while len(node_groups) < max_num_groups and found_unique:
         i            = 0
@@ -49,27 +50,27 @@ def find_groups(healthy_nodes, watch_nodes, groups_tracker):
             break
 
         node_group, group_id, (h_i, w_i) = find_group_id(healthy_nodes, watch_nodes, h_i, w_i)
-
-        if node_group[0] == node_group[1]:
-            _logger.info(f"Found duplicate nodes in node_group {node_group}. Exiting group id search")
+        i += 1
+        if len(node_group) < 2 or node_group[0] == node_group[1]:
+            _logger.info(f"Found invalid node_group {node_group}. Exiting group id search")
             found_unique = False
             break
 
         while group_id in groups_tracker:
-            if i > max_attempts:
-                _logger.warn(f"Max attempt {max_attempts} reached for finding unique pair combination.")
+            if i >= max_attempts:
+                _logger.warning(f"Max attempt {max_attempts} reached for finding unique pair combination.")
                 found_unique = False
                 break
 
             node_group, group_id, (h_i, w_i) = find_group_id(healthy_nodes, watch_nodes, h_i, w_i)
-            if group_id == "" and node_group[0] == node_group[1]:
+            i += 1
+            if len(node_group) < 2 or node_group[0] == node_group[1]:
+                _logger.info(f"Internal while Found invalid node_group {node_group}. Exiting group id search")
                 found_unique = False
                 break
 
-            i += 1
-
         if found_unique:
-            groups_tracker.append(group_id)
+            groups_tracker.add(group_id)
             node_groups.append(node_group)
 
             for n in node_group:
@@ -81,7 +82,7 @@ def find_groups(healthy_nodes, watch_nodes, groups_tracker):
         if len(watch_nodes) == 0:
             break
 
-    return node_groups, groups_tracker
+    return node_groups, list(groups_tracker)
 
 def find_group_id(healthy_nodes, watch_nodes, h_i=0, w_i=0):
     """ Finds a group of nodes and combines to form a group id
@@ -111,10 +112,10 @@ def find_group_id(healthy_nodes, watch_nodes, h_i=0, w_i=0):
             node_group.append(healthy_nodes[h_i])
             h_i += 1
 
-        if h_i > len(healthy_nodes):
+        if h_i >= len(healthy_nodes):
             random.shuffle(healthy_nodes)
             h_i = 0
-        if w_i > len(watch_nodes):
+        if w_i >= len(watch_nodes):
             random.shuffle(watch_nodes)
             w_i = 0
 
diff --git a/utils/intel_gaudi_health_screen/screen.py b/utils/intel_gaudi_health_screen/screen.py
index 3f644e1..18fd25d 100644
--- a/utils/intel_gaudi_health_screen/screen.py
+++ b/utils/intel_gaudi_health_screen/screen.py
@@ -14,7 +14,7 @@
 import argparse
 import logging
 
-from utilities import download_repos, clear_ighs_pods, create_logger, get_logging_level
+from utilities import download_repos, create_logger, get_logging_level
 from hccl_demo_helper import hccl_demo_check
 from system_utils import KubeUtils, BareMetalUtils
 
@@ -77,11 +77,11 @@ def monitor_ighs_status(system_mode, level, nodes, timeout_s=240, round=0):
         if level == 1:
             nodes.healthy_nodes = set(healthy_nodes)
 
-        _logger.info(f"Infected {len(infected_nodes)} Node: {infected_nodes}")
+        _logger.info(f"Detected {len(detected_nodes)} Node: {detected_nodes}")
+        _logger.info(f"  Healthy {len(healthy_nodes)} Node: {healthy_nodes}")
+        _logger.info(f"  Infected {len(infected_nodes)} Node: {infected_nodes}")
         _logger.info(f"Missing {len(missing_nodes)} Node: {missing_nodes}")
         _logger.info(f"Unverified {len(watch_nodes)} Node: {watch_nodes}")
-        _logger.info(f"Healthy {len(healthy_nodes)} Node: {healthy_nodes}")
-        _logger.info(f"Detected {len(detected_nodes)} Node: {detected_nodes}")
 
         return healthy_nodes, infected_nodes, missing_nodes
 
@@ -111,18 +111,21 @@ def main(args):
     with open(args.config, 'r') as f:
         config_data = yaml.safe_load(f)
 
+    hostfile = ""
+    if "hostfile" in config_data["system-info"]:
+        hostfile = config_data["system-info"]["hostfile"]
+
     log_level  = get_logging_level(config_data["log-level"])
     _logger, _ = create_logger(logger_name="health_screener", logger_file_name="screener", f_path=args.logs_dir, level=log_level)
 
     if config_data["system-info"]["type"] == "k8s":
         system_mode = KubeUtils(image=config_data["image"],
-                                hostfile=config_data["system-info"]["hostfile"],
+                                hostfile=hostfile,
                                 namespace=config_data["system-info"]["namespace"],
                                 log_dir=args.logs_dir)
     elif config_data["system-info"]["type"] == "bare-metal":
-
         system_mode = BareMetalUtils(image=config_data["image"],
-                                     hostfile=config_data["system-info"]["hostfile"],
+                                     hostfile=hostfile,
                                      ssh_path=config_data["system-info"]["ssh-path"],
                                      tcp_interface=config_data["system-info"]["tcp-interface"],
                                      log_dir=args.logs_dir)
@@ -145,7 +148,6 @@ def main(args):
 
         intel_gaudi_nodes             = IGNodes(health_report=health_report)
         intel_gaudi_nodes.all_nodes   = system_mode.collect_nodes(gaudi_node_label=config_data["gaudi-node-label"])
-        intel_gaudi_nodes.watch_nodes = set(intel_gaudi_nodes.all_nodes)
         healthy_nodes, infected_nodes, missing_nodes    = list(), list(), list()
         occupied_nodes, missing_cards_nodes, misc_nodes = list(), list(), list()
 
@@ -162,7 +164,7 @@ def main(args):
                                                                             level=1,
                                                                             nodes=intel_gaudi_nodes,
                                                                             timeout_s=config_data["level-1"]["timeout_s"])
-                occupied_nodes, missing_cards_nodes, misc_nodes = system_mode.diagnose_unhealthy_nodes(infected_nodes, missing_nodes)
+                occupied_nodes, missing_cards_nodes, misc_nodes = system_mode.diagnose_missing_nodes(missing_nodes)
                 system_mode.clear_ighs_pods()
 
             summary = {
@@ -184,7 +186,8 @@ def main(args):
                 os.makedirs(f"{health_report.f_dir}/L2")
 
             intel_gaudi_nodes.healthy_nodes = set()
-            intel_gaudi_nodes.watch_nodes = set(intel_gaudi_nodes.all_nodes)
+            intel_gaudi_nodes.watch_nodes   = set(intel_gaudi_nodes.all_nodes).difference(set(missing_nodes))
+            intel_gaudi_nodes.missing_nodes = set(missing_nodes)
 
             for i in range(config_data["level-2"]["num-rounds"]):
                 nodes_initialized = system_mode.initialize_node_jobs(level=2,
@@ -200,7 +203,7 @@ def main(args):
                                                                                 nodes=intel_gaudi_nodes,
                                                                                 timeout_s=config_data["level-2"]["timeout_s"],
                                                                                 round=i)
-                occupied_nodes, missing_cards_nodes, misc_nodes = system_mode.diagnose_unhealthy_nodes(infected_nodes, missing_nodes)
+                occupied_nodes, missing_cards_nodes, misc_nodes = system_mode.diagnose_missing_nodes(missing_nodes)
                 system_mode.clear_ighs_pods(job_type="mpijobs")
 
                 if len(intel_gaudi_nodes.watch_nodes) == 0:
diff --git a/utils/intel_gaudi_health_screen/system_utils.py b/utils/intel_gaudi_health_screen/system_utils.py
index 99d0671..76551eb 100644
--- a/utils/intel_gaudi_health_screen/system_utils.py
+++ b/utils/intel_gaudi_health_screen/system_utils.py
@@ -71,7 +71,7 @@ def collect_nodes(self, gaudi_node_label):
             output    = run_cmd(cmd)
             all_nodes = output.strip().split()
 
-        _logger.info(f"Collected Nodes: {all_nodes}")
+        _logger.info(f"Collected {len(all_nodes)} k8s Nodes: {all_nodes}")
 
         return all_nodes
 
@@ -109,7 +109,7 @@ def initialize_node_jobs(self, level,
             job_path                          = f"{job_base_path}/L2/r{round}"
 
         if len(node_groups) == 0 :
-            _logger.warn(f"No Node Groups to test found during initialization")
+            _logger.warning(f"No Node Groups to test found during initialization")
             return nodes_initialized
 
 
@@ -266,53 +266,56 @@ def check_screen_complete(self, current_run_status, health_report, level, round=
         pods    = output.split("\n")
 
         for p in pods:
-            p_name, status, state = p.split()
-            if status == "Succeeded":
-                cmd    = f"kubectl logs -n {self.namespace} {p_name}"
-                output = run_cmd(cmd).strip().split("\n")
-
-                start_analyze = False
-                for l in output:
-                    if "START of Node Report" in l:
-                        start_analyze = True
-                        continue
-                    elif "END of Node Report" in l:
-                        start_analyze = False
-                        continue
-                    
-                    #### analyze output
-                    if start_analyze:
-                        # Ignore Logger output level
-                        bracket_index = l.index("{")
-                        node_status_txt = l[bracket_index:]
-                        status_dict = json.loads(node_status_txt)
-
-                        if not p_name in current_run_status:
-                            with open(f"{log_dir}/{p_name}.json", 'w', encoding ='utf8') as f:
-                                json.dump(status_dict, f, indent=4)
-                            with open(f"{log_dir}/{p_name}.log", 'w', encoding ='utf8') as f:
-                                f.write('\n'.join(output))
-
-                            if level == 1:
-                                health_report.write_rows(data=status_dict["cards"], level=level)
-                                current_run_status[p_name] = True
-                            elif level == 2:
-                                health_report.write_rows(data=[status_dict], level=level)
-                                current_run_status[p_name] = (True, status_dict["num_nodes"])
-            elif state == "CrashLoopBackOff" and level==2 or (final_check and "launcher" in p_name and status=="Running"):
-                cmd    = f"kubectl logs -n {self.namespace} {p_name}"
-                output = run_cmd(cmd).strip().split("\n")
-
-                hccL_results = hccl_demo_check(job_id=p_name, health_report=health_report, hccl_log=output, write=False)
-
-                if not p_name in current_run_status:
-                    with open(f"{log_dir}/{p_name}.json", 'w', encoding ='utf8') as f:
-                        json.dump(hccL_results, f, indent=4)
-                    with open(f"{log_dir}/{p_name}.log", 'w', encoding ='utf8') as f:
-                        f.write('\n'.join(output))
-
-                    health_report.write_rows(data=[hccL_results], level=level)
-                    current_run_status[p_name] = (True, hccL_results["num_nodes"])
+            try:
+                p_name, status, state = p.split()
+                if status == "Succeeded":
+                    cmd    = f"kubectl logs -n {self.namespace} {p_name}"
+                    output = run_cmd(cmd).strip().split("\n")
+
+                    start_analyze = False
+                    for l in output:
+                        if "START of Node Report" in l:
+                            start_analyze = True
+                            continue
+                        elif "END of Node Report" in l:
+                            start_analyze = False
+                            continue
+                        
+                        #### analyze output
+                        if start_analyze:
+                            # Ignore Logger output level
+                            bracket_index = l.index("{")
+                            node_status_txt = l[bracket_index:]
+                            status_dict = json.loads(node_status_txt)
+
+                            if not p_name in current_run_status:
+                                with open(f"{log_dir}/{p_name}.json", 'w', encoding ='utf8') as f:
+                                    json.dump(status_dict, f, indent=4)
+                                with open(f"{log_dir}/{p_name}.log", 'w', encoding ='utf8') as f:
+                                    f.write('\n'.join(output))
+
+                                if level == 1:
+                                    health_report.write_rows(data=status_dict["cards"], level=level)
+                                    current_run_status[p_name] = True
+                                elif level == 2:
+                                    health_report.write_rows(data=[status_dict], level=level)
+                                    current_run_status[p_name] = (True, status_dict["num_nodes"])
+                elif state == "CrashLoopBackOff" and level==2 or (final_check and "launcher" in p_name and status=="Running"):
+                    cmd    = f"kubectl logs -n {self.namespace} {p_name}"
+                    output = run_cmd(cmd).strip().split("\n")
+
+                    hccL_results = hccl_demo_check(job_id=p_name, health_report=health_report, hccl_log=output, write=False)
+
+                    if not p_name in current_run_status:
+                        with open(f"{log_dir}/{p_name}.json", 'w', encoding ='utf8') as f:
+                            json.dump(hccL_results, f, indent=4)
+                        with open(f"{log_dir}/{p_name}.log", 'w', encoding ='utf8') as f:
+                            f.write('\n'.join(output))
+
+                        health_report.write_rows(data=[hccL_results], level=level)
+                        current_run_status[p_name] = (True, hccL_results["num_nodes"])
+            except ValueError:
+                _logger.error(f"Not able to retrieve Running Pods. Expected to recieve list of pods but got output: {pods}")
 
         if level == 1:
             num_nodes = len(current_run_status)
@@ -325,10 +328,11 @@ def check_screen_complete(self, current_run_status, health_report, level, round=
 
         return num_nodes
 
-    def diagnose_unhealthy_nodes(self, infected_nodes, missing_nodes):
+    def diagnose_missing_nodes(self, missing_nodes):
         in_use_set = set()
         missing_cards_set = set()
         misc_set = set()
+        _logger.info(f"Diagnose {len(missing_nodes)} missing_nodes:")
 
         for n in missing_nodes:
             cmd        = f"kubectl describe nodes -n {self.namespace} {n}"
@@ -359,11 +363,11 @@ def diagnose_unhealthy_nodes(self, infected_nodes, missing_nodes):
         misc_list          = sorted(list(set(missing_nodes).difference(in_use_set).difference(missing_cards_set)))
 
         if(len(in_use_list)):
-            _logger.info(f"{len(in_use_list)} Occupied Nodes: {in_use_list}")
+            _logger.info(f"  {len(in_use_list)} Occupied Nodes: {in_use_list}")
         if(len(missing_cards_list)):
-            _logger.info(f"{len(missing_cards_list)} Nodes w/ missing cards: {missing_cards_list}")
+            _logger.info(f"  {len(missing_cards_list)} Nodes w/ missing cards: {missing_cards_list}")
         if(len(misc_list)):
-            _logger.info(f"{len(misc_list)} Unaccounted Nodes: {misc_list}")
+            _logger.info(f"  {len(misc_list)} Untested Nodes: {misc_list}")
 
         return in_use_list, missing_cards_list, misc_list
 
@@ -426,7 +430,7 @@ def initialize_system(self):
 
 
     def collect_nodes(self, gaudi_node_label=""):
-        _logger.info(f"Collected Nodes: {self.hosts}")
+        _logger.info(f"Collected {len(self.hosts)} Nodes: {self.hosts}")
 
         return self.hosts
 
@@ -460,7 +464,7 @@ def initialize_node_jobs(self, level,
             nodes.worker_nodes                = list()
 
         if len(node_groups) == 0:
-            _logger.warn(f"No Node Groups to test found during initialization")
+            _logger.warning(f"No Node Groups to test found during initialization")
             return nodes_initialized
 
         self.update_yaml_job(source_file="config.yaml", out_dir="tmp", out_file="config.yaml", yaml_type="config")
@@ -618,63 +622,66 @@ def check_screen_complete(self, current_run_status, health_report, level, round=
         pods   = output.split("\n")
 
         for p in pods:
-            if ":" not in p:
-                continue
-
-            colon_index = p.index(":")
-            name = p[:colon_index]
-            data_txt = p[colon_index+1:]
-
-            data = json.loads(data_txt)
-
-            if data["State"] == "exited":
-                cmd = f"ssh {name} {check_log_cmd}"
-                output = run_cmd(cmd).strip().split("\n")
-
-                start_analyze = False
-                for l in output:
-                    if "START of Node Report" in l:
-                        start_analyze = True
-                        continue
-                    elif "END of Node Report" in l:
-                        start_analyze = False
-                        continue
-
-                    #### analyze output
-                    if start_analyze:
-                        # Ignore Logger output level
-                        bracket_index = l.index("{")
-                        node_status_txt = l[bracket_index:]
-                        status_dict = json.loads(node_status_txt)
-
-                        if not name in current_run_status:
-                            if level == 1:
-                                health_report.write_rows(data=status_dict["cards"], level=level)
-                                current_run_status[name] = True
-                            elif level == 2:
-                                health_report.write_rows(data=[status_dict], level=level)
-                                current_run_status[name] = (True, status_dict["num_nodes"])
-                                name = f"ighs-hccl-r{status_dict['round']}-{status_dict['group_id']}"
-
-                            with open(f"{log_dir}/{name}.json", 'w', encoding ='utf8') as f:
-                                json.dump(status_dict, f, indent=4)
-                            with open(f"{log_dir}/{name}.log", 'w', encoding ='utf8') as f:
-                                f.write('\n'.join(output))
-            elif level==2 and final_check:
-                cmd = f"ssh {name} {check_log_cmd}"
-                output = run_cmd(cmd).strip().split("\n")
-
-                if not name in current_run_status:
-                    hccL_results = hccl_demo_check(job_id=name, health_report=health_report, hccl_log=output, write=False)
-                    f_name = f"ighs-hccl-r{hccL_results['round']}-{hccL_results['group_id']}"
-
-                    with open(f"{log_dir}/{f_name}.json", 'w', encoding ='utf8') as f:
-                        json.dump(hccL_results, f, indent=4)
-                    with open(f"{log_dir}/{f_name}.log", 'w', encoding ='utf8') as f:
-                        f.write('\n'.join(output))
-
-                    health_report.write_rows(data=[hccL_results], level=level)
-                    current_run_status[name] = (True, hccL_results["num_nodes"])
+            try:
+                if ":" not in p:
+                    continue
+
+                colon_index = p.index(":")
+                name = p[:colon_index]
+                data_txt = p[colon_index+1:]
+
+                data = json.loads(data_txt)
+
+                if data["State"] == "exited":
+                    cmd = f"ssh {name} {check_log_cmd}"
+                    output = run_cmd(cmd).strip().split("\n")
+
+                    start_analyze = False
+                    for l in output:
+                        if "START of Node Report" in l:
+                            start_analyze = True
+                            continue
+                        elif "END of Node Report" in l:
+                            start_analyze = False
+                            continue
+
+                        #### analyze output
+                        if start_analyze:
+                            # Ignore Logger output level
+                            bracket_index = l.index("{")
+                            node_status_txt = l[bracket_index:]
+                            status_dict = json.loads(node_status_txt)
+
+                            if not name in current_run_status:
+                                if level == 1:
+                                    health_report.write_rows(data=status_dict["cards"], level=level)
+                                    current_run_status[name] = True
+                                elif level == 2:
+                                    health_report.write_rows(data=[status_dict], level=level)
+                                    current_run_status[name] = (True, status_dict["num_nodes"])
+                                    name = f"ighs-hccl-r{status_dict['round']}-{status_dict['group_id']}"
+
+                                with open(f"{log_dir}/{name}.json", 'w', encoding ='utf8') as f:
+                                    json.dump(status_dict, f, indent=4)
+                                with open(f"{log_dir}/{name}.log", 'w', encoding ='utf8') as f:
+                                    f.write('\n'.join(output))
+                elif level==2 and final_check:
+                    cmd = f"ssh {name} {check_log_cmd}"
+                    output = run_cmd(cmd).strip().split("\n")
+
+                    if not name in current_run_status:
+                        hccL_results = hccl_demo_check(job_id=name, health_report=health_report, hccl_log=output, write=False)
+                        f_name = f"ighs-hccl-r{hccL_results['round']}-{hccL_results['group_id']}"
+
+                        with open(f"{log_dir}/{f_name}.json", 'w', encoding ='utf8') as f:
+                            json.dump(hccL_results, f, indent=4)
+                        with open(f"{log_dir}/{f_name}.log", 'w', encoding ='utf8') as f:
+                            f.write('\n'.join(output))
+
+                        health_report.write_rows(data=[hccL_results], level=level)
+                        current_run_status[name] = (True, hccL_results["num_nodes"])
+            except:
+                _logger.error(f"Not able to retrieve Running Pods. Expected to recieve list of pods but got output: {pods}")
 
         if level == 1:
             num_nodes = len(current_run_status)
@@ -687,5 +694,5 @@ def check_screen_complete(self, current_run_status, health_report, level, round=
 
         return num_nodes
 
-    def diagnose_unhealthy_nodes(self, infected_nodes, missing_nodes):
-        pass
+    def diagnose_missing_nodes(self, missing_nodes):
+        return [],[],[]
diff --git a/utils/intel_gaudi_health_screen/template/bare-metal/dockerfile b/utils/intel_gaudi_health_screen/template/bare-metal/dockerfile
index e57131c..9a0d218 100644
--- a/utils/intel_gaudi_health_screen/template/bare-metal/dockerfile
+++ b/utils/intel_gaudi_health_screen/template/bare-metal/dockerfile
@@ -3,6 +3,7 @@ FROM ${BASE_IMAGE}
 
 RUN mkdir ~/.ssh && \
 cd ~/.ssh && \
+ssh-keygen -A && \
 sed -i 's/#Port 22/Port 3122/g' /etc/ssh/sshd_config && \
 sed -i 's/#   Port 22/    Port 3122/g' /etc/ssh/ssh_config && \
 sed -i 's/3022/3122/g' ~/.bashrc && \
@@ -11,6 +12,7 @@ echo "ForwardAgent yes" >> ~/.ssh/config && \
 echo "StrictHostKeyChecking no" >> ~/.ssh/config && \
 echo "UserKnownHostsFile /dev/null" >> ~/.ssh/config && \
 echo "LogLevel ERROR" >> ~/.ssh/config && \
+service ssh start && \
 chmod 600 ~/.ssh/config
 
 
diff --git a/utils/intel_gaudi_health_screen/template/k8s/intel-gaudi-health-screen-L1.yaml b/utils/intel_gaudi_health_screen/template/k8s/intel-gaudi-health-screen-L1.yaml
index 2dab422..d1f6941 100644
--- a/utils/intel_gaudi_health_screen/template/k8s/intel-gaudi-health-screen-L1.yaml
+++ b/utils/intel_gaudi_health_screen/template/k8s/intel-gaudi-health-screen-L1.yaml
@@ -36,6 +36,9 @@ spec:
           command: ["/bin/bash", "-c"]
           args:
             - >-
+              ssh-keygen -A;
+              service ssh start;
+
               while [ ! -d /workdir/intel_gaudi_health_screen ]; do
                 sleep 2s;
               done;
diff --git a/utils/intel_gaudi_health_screen/template/k8s/intel-gaudi-health-screen-L2_hccl-demo.yaml b/utils/intel_gaudi_health_screen/template/k8s/intel-gaudi-health-screen-L2_hccl-demo.yaml
index 6319743..04c50c0 100644
--- a/utils/intel_gaudi_health_screen/template/k8s/intel-gaudi-health-screen-L2_hccl-demo.yaml
+++ b/utils/intel_gaudi_health_screen/template/k8s/intel-gaudi-health-screen-L2_hccl-demo.yaml
@@ -48,8 +48,10 @@ spec:
               command: ["/bin/bash", "-c"]
               args:
                 - >-
-                  set -eo pipefail; 
+                  set -eo pipefail;
                   echo "Target Nodes: $TARGET_NODES";
+                  ssh-keygen -A;
+                  service ssh start;
 
                   while [ ! -d /workdir/intel_gaudi_health_screen ]; do
                     sleep 2s;
@@ -155,5 +157,6 @@ spec:
               args:
                 - >-
                   printenv | grep "MY" >> /etc/environment;
+                  ssh-keygen -A;
                   service ssh start;
                   sleep 365d;
diff --git a/utils/intel_gaudi_health_screen/utilities.py b/utils/intel_gaudi_health_screen/utilities.py
index 47f5458..cfcd893 100644
--- a/utils/intel_gaudi_health_screen/utilities.py
+++ b/utils/intel_gaudi_health_screen/utilities.py
@@ -91,7 +91,7 @@ def run_cmd(cmd, timeout_s=1_800, verbose=False):
 
     if (verbose):
         _logger.debug(f"Running cmd: {cmd}")
-        _logger.info(result.stdout)
+        _logger.debug(result.stdout)
 
     return result.stdout
 
@@ -161,41 +161,3 @@ def clear_job(job):
 
             _logger.info(f"Attempt {attempts} Pods are still up. Will wait 10 seconds to check again")
             time.sleep(10)
-
-
-def clear_ighs_pods(job_type="jobs"):
-    """ Clear Pods with label=ighs,ighs-hccl
-
-    Args:
-        job_type (str, optional): Type of Job to delete. Options: [jobs, mpijobs]. Defaults to "jobs".
-    """
-    _logger.info(f"Checking for existing IGHS Pods ({job_type})")
-
-    metadata_app = "ighs" if (job_type == "jobs") else "ighs-hccl"
-
-    cmd = f"kubectl get pods -n default -l app={metadata_app} -o=custom-columns='NAME:.metadata.name' --no-headers"
-    output = run_cmd(cmd).strip()
-
-    if len(output) > 0:
-        _logger.info(f"Found existing IGHS Pods ({job_type}). Will delete.")
-
-        cmd     = f"kubectl get {job_type} -n default -l app={metadata_app} -o=custom-columns='NAME:.metadata.name' --no-headers"
-        output  = run_cmd(cmd).strip()
-        jobs = output.split()
-
-        _logger.info(f"Deleting jobs {jobs}")
-        for job in jobs:
-            cmd      = f"kubectl delete {job_type} -n default {job}"
-            output   = run_cmd(cmd)
-
-        cmd         = f"kubectl get pods -n default -l app={metadata_app} -o=custom-columns='NAME:.metadata.name' --no-headers"
-        max_attempt = 15
-        for attempts in range(max_attempt):
-            output = run_cmd(cmd).strip()
-
-            if(len(output) == 0):
-                break
-
-            _logger.info(f"Attempt {attempts}: Pods are still up. Will wait 10 seconds to check again")
-            time.sleep(10)
-
diff --git a/utils/intel_gaudi_health_screen/version.txt b/utils/intel_gaudi_health_screen/version.txt
index e3a4f19..7e541ae 100644
--- a/utils/intel_gaudi_health_screen/version.txt
+++ b/utils/intel_gaudi_health_screen/version.txt
@@ -1 +1 @@
-2.2.0
\ No newline at end of file
+2.2.2
\ No newline at end of file