diff --git a/dockerfiles/worker.Dockerfile b/dockerfiles/worker.Dockerfile
index aaae7e87..35976677 100644
--- a/dockerfiles/worker.Dockerfile
+++ b/dockerfiles/worker.Dockerfile
@@ -17,48 +17,46 @@
 # under the License.
 #
 
-FROM nvidia/cuda:9.0-base-ubuntu16.04
+FROM ubuntu:16.04
 
-RUN apt-get update && apt-get -y upgrade
-
-# `tensorflow-gpu` dependencies
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-      build-essential \
-      cuda-command-line-tools-9-0 \
-      cuda-cublas-9-0 \
-      cuda-cufft-9-0 \
-      cuda-curand-9-0 \
-      cuda-cusolver-9-0 \
-      cuda-cusparse-9-0 \
-      libcudnn7=7.2.1.38-1+cuda9.0 \
-      libnccl2=2.2.13-1+cuda9.0 \
-      libfreetype6-dev \
-      libhdf5-serial-dev \
-      libpng12-dev \
-      libzmq3-dev \
-      pkg-config \
-      software-properties-common \
-      unzip \
-      && \
+RUN apt-get update && apt-get -y upgrade && \
+    apt-get install -y vim && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
-RUN apt-get update && \
-    apt-get install nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0 && \
-    apt-get update && \
-    apt-get install libnvinfer4=4.1.2-1+cuda9.0
+
+# update and install dependencies
+RUN apt-get update &&  \
+    apt-get install -y \
+      software-properties-common \
+      wget \
+    && add-apt-repository -y ppa:ubuntu-toolchain-r/test \
+    && apt-get update \
+    && apt-get install -y \
+        make \
+        git \
+        curl \
+        vim \
+        vim-gnome \
+    && apt-get install -y cmake=3.5.1-1ubuntu3 \
+    && apt-get install -y \
+        gcc-4.9 g++-4.9 gcc-4.9-base \
+        gcc-4.8 g++-4.8 gcc-4.8-base \
+        gcc-4.7 g++-4.7 gcc-4.7-base \
+        gcc-4.6 g++-4.6 gcc-4.6-base \
+    && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.9 100 \
+    && update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.9 100
 
 # Install conda with pip and python 3.6
 ARG CONDA_ENVIORNMENT
-RUN apt-get -y install curl bzip2 \
-  && curl -sSL https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -o /tmp/miniconda.sh \
-  && bash /tmp/miniconda.sh -bfp /usr/local \
-  && rm -rf /tmp/miniconda.sh \
-  && conda create -y --name $CONDA_ENVIORNMENT python=3.6 \
-  && conda clean --all --yes
+RUN apt-get update --fix-missing && apt-get -y upgrade && \
+    apt-get install -y curl bzip2 && \
+    curl -sSL https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -o /tmp/miniconda.sh && \
+    bash /tmp/miniconda.sh -bfp /usr/local && \
+    rm -rf /tmp/miniconda.sh && \
+    conda create -y --name $CONDA_ENVIORNMENT python=3.6 && \
+    conda clean --all --yes
 ENV PATH /usr/local/envs/$CONDA_ENVIORNMENT/bin:$PATH
 
-
 RUN pip install --upgrade pip
 ENV PYTHONUNBUFFERED 1
 
@@ -68,22 +66,24 @@ WORKDIR $DOCKER_WORKDIR_PATH
 ENV PYTHONPATH $DOCKER_WORKDIR_PATH
 
 # Install python dependencies
-RUN mkdir ~/.pip
-#COPY ./pip.conf /root/.pip/pip.conf
-COPY singa_auto/requirements.txt singa_auto/requirements.txt
+COPY singa_auto/ singa_auto/
+
+RUN mkdir -p /root/.config/pip/
+COPY ./.config/pip/pip.conf /root/.config/pip/pip.conf
+
+COPY ./backup_lib/torch-1.6.0-cp36-cp36m-manylinux1_x86_64.whl /root/torch-1.6.0-cp36-cp36m-manylinux1_x86_64.whl
+RUN pip install /root/torch-1.6.0-cp36-cp36m-manylinux1_x86_64.whl
+COPY ./backup_lib/opencv_python-4.4.0.46-cp36-cp36m-manylinux2014_x86_64.whl /root/opencv_python-4.4.0.46-cp36-cp36m-manylinux2014_x86_64.whl
+RUN pip install /root/opencv_python-4.4.0.46-cp36-cp36m-manylinux2014_x86_64.whl
+
 RUN pip install -r singa_auto/requirements.txt
-COPY singa_auto/utils/requirements.txt singa_auto/utils/requirements.txt
 RUN pip install -r singa_auto/utils/requirements.txt
-COPY singa_auto/meta_store/requirements.txt singa_auto/meta_store/requirements.txt
 RUN pip install -r singa_auto/meta_store/requirements.txt
-COPY singa_auto/redis/requirements.txt singa_auto/redis/requirements.txt
 RUN pip install -r singa_auto/redis/requirements.txt
-COPY singa_auto/kafka/requirements.txt singa_auto/kafka/requirements.txt
 RUN pip install -r singa_auto/kafka/requirements.txt
-COPY singa_auto/advisor/requirements.txt singa_auto/advisor/requirements.txt
 RUN pip install -r singa_auto/advisor/requirements.txt
+RUN pip install -r singa_auto/worker/requirements.txt
 
-COPY singa_auto/ singa_auto/
 COPY scripts/ scripts/
 RUN mkdir data/
 
diff --git a/dockerfiles/worker_cu100.Dockerfile b/dockerfiles/worker_cu100.Dockerfile
new file mode 100644
index 00000000..08d65947
--- /dev/null
+++ b/dockerfiles/worker_cu100.Dockerfile
@@ -0,0 +1,110 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+FROM nvidia/cuda:10.0-base-ubuntu16.04
+
+RUN apt-get update && apt-get -y upgrade && \
+    apt-get install -y vim && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# `tensorflow-gpu` dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends  --allow-unauthenticated\
+      build-essential \
+      cuda-command-line-tools-10-0 \
+      cuda-cublas-dev-10-0 \
+      cuda-cudart-dev-10-0 \
+      cuda-cufft-dev-10-0 \
+      cuda-curand-dev-10-0 \
+      cuda-cusolver-dev-10-0 \
+      cuda-cusparse-dev-10-0 \
+      libcudnn7=7.5.1.10-1+cuda10.0 \
+      libfreetype6-dev \
+      libhdf5-serial-dev \
+      libnccl-dev=2.4.7-1+cuda10.0 \
+      libnccl2=2.4.7-1+cuda10.0 \
+      libpng-dev \
+      libgl1-mesa-glx \
+      libsm6 \
+      libxrender1 \
+      libzmq3-dev \
+      pkg-config \
+      software-properties-common \
+      unzip \
+      lsb-core \
+      && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN apt-get update && \
+    apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0 && \
+    apt-get update && \
+    apt-get install -y --no-install-recommends \
+      libnvinfer5=5.1.5-1+cuda10.0 \
+      libnvinfer6=6.0.1-1+cuda10.0 \
+      libnvinfer7=7.0.0-1+cuda10.0 \
+      libnvinfer-dev=5.1.5-1+cuda10.0 \
+      libnvinfer-dev=6.0.1-1+cuda10.0 \
+      libnvinfer-dev=7.0.0-1+cuda10.0 \
+    && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+
+# install cuda/bin
+# RUN mkdir -p /usr/local/cuda-10.1/bin
+# COPY /usr/local/cuda-10.1/bin/ /usr/local/cuda-10.1/bin/
+
+# Install conda with pip and python 3.6
+ARG CONDA_ENVIORNMENT
+RUN apt-get update --fix-missing && apt-get -y upgrade && \
+    apt-get install -y curl bzip2 && \
+    curl -sSL https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -o /tmp/miniconda.sh && \
+    bash /tmp/miniconda.sh -bfp /usr/local && \
+    rm -rf /tmp/miniconda.sh && \
+    conda create -y --name $CONDA_ENVIORNMENT python=3.6 && \
+    conda clean --all --yes
+ENV PATH /usr/local/envs/$CONDA_ENVIORNMENT/bin:$PATH
+
+RUN pip install --upgrade pip
+ENV PYTHONUNBUFFERED 1
+
+ARG DOCKER_WORKDIR_PATH
+RUN mkdir -p $DOCKER_WORKDIR_PATH
+WORKDIR $DOCKER_WORKDIR_PATH
+ENV PYTHONPATH $DOCKER_WORKDIR_PATH
+
+# Install python dependencies
+COPY singa_auto/ singa_auto/
+
+RUN mkdir -p /root/.config/pip/
+COPY ./.config/pip/pip.conf /root/.config/pip/pip.conf
+
+RUN pip install -r singa_auto/requirements.txt
+RUN pip install -r singa_auto/utils/requirements.txt
+RUN pip install -r singa_auto/meta_store/requirements.txt
+RUN pip install -r singa_auto/redis/requirements.txt
+RUN pip install -r singa_auto/kafka/requirements.txt
+RUN pip install -r singa_auto/advisor/requirements.txt
+
+COPY scripts/ scripts/
+RUN mkdir data/
+
+CMD ["python", "scripts/start_worker.py"]
diff --git a/dockerfiles/worker_cu101.Dockerfile b/dockerfiles/worker_cu101.Dockerfile
new file mode 100644
index 00000000..af161da0
--- /dev/null
+++ b/dockerfiles/worker_cu101.Dockerfile
@@ -0,0 +1,117 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+FROM nvidia/cuda:10.1-base-ubuntu16.04
+
+RUN apt-get update && apt-get -y upgrade && \
+    apt-get install -y vim && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# `tensorflow-gpu` dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+      build-essential \
+      cuda-command-line-tools-10-1 \
+      cuda-cufft-10-1 \
+      cuda-curand-10-1 \
+      cuda-cusolver-10-1 \
+      cuda-cusparse-10-1 \      
+      libcublas10=10.2.3.254-1 \
+      libcublas-dev=10.2.3.254-1 \
+      libcudnn7=7.6.4.38-1+cuda10.1 \
+      libcudnn7-dev=7.6.4.38-1+cuda10.1  \
+      libfreetype6-dev \
+      libhdf5-serial-dev \
+      libpng-dev \
+      libgl1-mesa-glx \
+      libsm6 \
+      libxrender1 \
+      libzmq3-dev \
+      pkg-config \
+      software-properties-common \
+      unzip \
+      lsb-core \
+      && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# cuda-10.1 package install cublas in cuda-10.2
+# call ldconfig to link them
+RUN cp -r /usr/local/cuda-10.2/* /usr/local/cuda-10.1/ && \
+    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/ && \
+    ldconfig /etc/ld.so.conf.d
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+      libnvinfer5=5.1.5-1+cuda10.1 \
+      libnvinfer6=6.0.1-1+cuda10.1 \
+      libnvinfer-dev=5.1.5-1+cuda10.1 \
+      libnvinfer-dev=6.0.1-1+cuda10.1 \
+    && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+
+# install cuda/bin
+# RUN mkdir -p /usr/local/cuda-10.1/bin
+# COPY /usr/local/cuda-10.1/bin/ /usr/local/cuda-10.1/bin/
+
+# Install conda with pip and python 3.6
+ARG CONDA_ENVIORNMENT
+RUN apt-get update --fix-missing && apt-get -y upgrade && \
+    apt-get install -y curl bzip2 && \
+    curl -sSL https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -o /tmp/miniconda.sh && \
+    bash /tmp/miniconda.sh -bfp /usr/local && \
+    rm -rf /tmp/miniconda.sh && \
+    conda create -y --name $CONDA_ENVIORNMENT python=3.6 && \
+    conda clean --all --yes
+ENV PATH /usr/local/envs/$CONDA_ENVIORNMENT/bin:$PATH
+
+RUN pip install --upgrade pip
+ENV PYTHONUNBUFFERED 1
+
+ARG DOCKER_WORKDIR_PATH
+RUN mkdir -p $DOCKER_WORKDIR_PATH
+WORKDIR $DOCKER_WORKDIR_PATH
+ENV PYTHONPATH $DOCKER_WORKDIR_PATH
+
+# Install python dependencies
+COPY singa_auto/ singa_auto/
+
+RUN mkdir -p /root/.config/pip/
+COPY ./.config/pip/pip.conf /root/.config/pip/pip.conf
+
+COPY ./backup_lib/torch-1.6.0-cp36-cp36m-manylinux1_x86_64.whl /root/torch-1.6.0-cp36-cp36m-manylinux1_x86_64.whl
+RUN pip install /root/torch-1.6.0-cp36-cp36m-manylinux1_x86_64.whl
+COPY ./backup_lib/opencv_python-4.4.0.46-cp36-cp36m-manylinux2014_x86_64.whl /root/opencv_python-4.4.0.46-cp36-cp36m-manylinux2014_x86_64.whl
+RUN pip install /root/opencv_python-4.4.0.46-cp36-cp36m-manylinux2014_x86_64.whl
+
+RUN pip install -r singa_auto/requirements.txt
+RUN pip install -r singa_auto/utils/requirements.txt
+RUN pip install -r singa_auto/meta_store/requirements.txt
+RUN pip install -r singa_auto/redis/requirements.txt
+RUN pip install -r singa_auto/kafka/requirements.txt
+RUN pip install -r singa_auto/advisor/requirements.txt
+RUN pip install -r singa_auto/worker/requirements.txt
+
+COPY scripts/ scripts/
+RUN mkdir data/
+
+CMD ["python", "scripts/start_worker.py"]
diff --git a/dockerfiles/worker_cu110.Dockerfile b/dockerfiles/worker_cu110.Dockerfile
new file mode 100644
index 00000000..d86d2cc7
--- /dev/null
+++ b/dockerfiles/worker_cu110.Dockerfile
@@ -0,0 +1,123 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+FROM nvidia/cuda:11.0-base-ubuntu16.04
+
+RUN apt-get update && apt-get -y upgrade && \
+    apt-get install -y vim && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# `tensorflow-gpu` dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+      build-essential \
+      cuda-command-line-tools-11-0 \
+      cuda-cudart-11-0 \
+      cuda-cudart-dev-11-0 \
+      # cuda-cufft-11-0 \
+      # cuda-curand-11-0 \
+      # cuda-cusolver-11-0 \
+      # cuda-cusparse-11-0 \      
+    #   libcublas-11-0==11.2.0.252-1 \
+    #   libcublas-dev-11-0==11.2.0.252-1 \
+    #   libcudnn8==8.0.4.30-1+cuda11.0 \
+    #   libcudnn8-dev==8.0.4.30-1+cuda11.0  \
+      libcufft-11-0 \
+      libcufft-dev-11-0 \
+      libcurand-11-0 \
+      libcurand-dev-11-0 \
+      libcusolver-11-0 \
+      libcusolver-dev-11-0 \
+      libcusparse-11-0 \
+      libcusparse-dev-11-0 \
+      libcublas-11-0 \
+      libcublas-dev-11-0 \
+      libcudnn8 \
+      libcudnn8-dev \
+      libfreetype6-dev \
+      libhdf5-serial-dev \
+      libpng-dev \
+      libgl1-mesa-glx \
+      libsm6 \
+      libxrender1 \
+      libzmq3-dev \
+      pkg-config \
+      software-properties-common \
+      unzip \
+      lsb-core \
+      && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# # cuda-10.1 package install cublas in cuda-10.2
+# # call ldconfig to link them
+# RUN cp -r /usr/local/cuda-10.2/* /usr/local/cuda-10.1/ && \
+#     export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/ && \
+#     ldconfig /etc/ld.so.conf.d
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+      libnvinfer7=7.1.3-1+cuda11.0 \
+      libnvinfer-dev=7.1.3-1+cuda11.0 \
+    && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+
+# install cuda/bin
+# RUN mkdir -p /usr/local/cuda-10.1/bin
+# COPY /usr/local/cuda-10.1/bin/ /usr/local/cuda-10.1/bin/
+
+# Install conda with pip and python 3.6
+ARG CONDA_ENVIORNMENT
+RUN apt-get update --fix-missing && apt-get -y upgrade && \
+    apt-get install -y curl bzip2 && \
+    curl -sSL https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -o /tmp/miniconda.sh && \
+    bash /tmp/miniconda.sh -bfp /usr/local && \
+    rm -rf /tmp/miniconda.sh && \
+    conda create -y --name $CONDA_ENVIORNMENT python=3.6 && \
+    conda clean --all --yes
+ENV PATH /usr/local/envs/$CONDA_ENVIORNMENT/bin:$PATH
+
+RUN pip install --upgrade pip
+ENV PYTHONUNBUFFERED 1
+
+ARG DOCKER_WORKDIR_PATH
+RUN mkdir -p $DOCKER_WORKDIR_PATH
+WORKDIR $DOCKER_WORKDIR_PATH
+ENV PYTHONPATH $DOCKER_WORKDIR_PATH
+
+# Install python dependencies
+COPY singa_auto/ singa_auto/
+
+RUN mkdir -p /root/.config/pip/
+COPY ./.config/pip/pip.conf /root/.config/pip/pip.conf
+
+RUN pip install -r singa_auto/requirements.txt
+RUN pip install -r singa_auto/utils/requirements.txt
+RUN pip install -r singa_auto/meta_store/requirements.txt
+RUN pip install -r singa_auto/redis/requirements.txt
+RUN pip install -r singa_auto/kafka/requirements.txt
+RUN pip install -r singa_auto/advisor/requirements.txt
+
+COPY scripts/ scripts/
+RUN mkdir data/
+
+CMD ["python", "scripts/start_worker.py"]
diff --git a/dockerfiles/worker_cu90.Dockerfile b/dockerfiles/worker_cu90.Dockerfile
new file mode 100644
index 00000000..7a079432
--- /dev/null
+++ b/dockerfiles/worker_cu90.Dockerfile
@@ -0,0 +1,90 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+FROM nvidia/cuda:9.0-base-ubuntu16.04
+
+RUN apt-get update && apt-get -y upgrade
+
+# `tensorflow-gpu` dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+      build-essential \
+      cuda-command-line-tools-9-0 \
+      cuda-cublas-9-0 \
+      cuda-cufft-9-0 \
+      cuda-curand-9-0 \
+      cuda-cusolver-9-0 \
+      cuda-cusparse-9-0 \
+      libcudnn7=7.2.1.38-1+cuda9.0 \
+      libfreetype6-dev \
+      libhdf5-serial-dev \
+      libnccl2=2.2.13-1+cuda9.0 \
+      libpng12-dev \
+      libgl1-mesa-glx \
+      libsm6 \
+      libxrender1 \
+      libzmq3-dev \
+      pkg-config \
+      software-properties-common \
+      unzip \
+      && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+    
+RUN apt-get update && \
+    apt-get install nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0 && \
+    apt-get update && \
+    apt-get install libnvinfer4=4.1.2-1+cuda9.0
+
+# Install conda with pip and python 3.6
+ARG CONDA_ENVIORNMENT
+RUN apt-get update --fix-missing && apt-get -y upgrade && \
+    apt-get install -y curl bzip2 && \
+    curl -sSL https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -o /tmp/miniconda.sh && \
+    bash /tmp/miniconda.sh -bfp /usr/local && \
+    rm -rf /tmp/miniconda.sh && \
+    conda create -y --name $CONDA_ENVIORNMENT python=3.6 && \
+    conda clean --all --yes
+ENV PATH /usr/local/envs/$CONDA_ENVIORNMENT/bin:$PATH
+
+RUN pip install --upgrade pip
+ENV PYTHONUNBUFFERED 1
+
+ARG DOCKER_WORKDIR_PATH
+RUN mkdir -p $DOCKER_WORKDIR_PATH
+WORKDIR $DOCKER_WORKDIR_PATH
+ENV PYTHONPATH $DOCKER_WORKDIR_PATH
+
+# Install python dependencies
+COPY singa_auto/ singa_auto/
+
+RUN mkdir -p /root/.config/pip/
+COPY ./.config/pip/pip.conf /root/.config/pip/pip.conf
+
+RUN pip install -r singa_auto/requirements.txt
+RUN pip install -r singa_auto/utils/requirements.txt
+RUN pip install -r singa_auto/meta_store/requirements.txt
+RUN pip install -r singa_auto/redis/requirements.txt
+RUN pip install -r singa_auto/kafka/requirements.txt
+RUN pip install -r singa_auto/advisor/requirements.txt
+
+COPY scripts/ scripts/
+RUN mkdir data/
+
+CMD ["python", "scripts/start_worker.py"]
diff --git a/examples/data/image_segmentaion/2007_000862.jpg b/examples/data/image_segmentaion/2007_000862.jpg
new file mode 100644
index 00000000..d2eb8f81
Binary files /dev/null and b/examples/data/image_segmentaion/2007_000862.jpg differ
diff --git a/examples/data/image_segmentaion/2007_001397.jpg b/examples/data/image_segmentaion/2007_001397.jpg
new file mode 100644
index 00000000..8fbf68f8
Binary files /dev/null and b/examples/data/image_segmentaion/2007_001397.jpg differ
diff --git a/examples/data/image_segmentaion/Persian_120.jpg b/examples/data/image_segmentaion/Persian_120.jpg
new file mode 100644
index 00000000..90097e16
Binary files /dev/null and b/examples/data/image_segmentaion/Persian_120.jpg differ
diff --git a/examples/data/image_segmentaion/pomeranian_159.jpg b/examples/data/image_segmentaion/pomeranian_159.jpg
new file mode 100644
index 00000000..48f501af
Binary files /dev/null and b/examples/data/image_segmentaion/pomeranian_159.jpg differ
diff --git a/examples/data/object_detection/cat.jpg b/examples/data/object_detection/cat.jpg
new file mode 100644
index 00000000..d9331e09
Binary files /dev/null and b/examples/data/object_detection/cat.jpg differ
diff --git a/examples/models/image_object_detection/SaYolo.py b/examples/models/image_object_detection/SaYolo.py
new file mode 100644
index 00000000..8378ba11
--- /dev/null
+++ b/examples/models/image_object_detection/SaYolo.py
@@ -0,0 +1,664 @@
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = "0"
+
+import sys
+sys.path.append(os.getcwd())
+
+import base64
+import copy
+import cv2
+import io
+import json
+import logging
+import numpy as np
+import random
+import tempfile
+import torch
+import torchvision
+import zipfile
+
+import PIL
+
+from PIL import Image
+from PIL import ImageFile
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+
+from terminaltables import AsciiTable
+from torch.optim import lr_scheduler
+from torch.utils.data import DataLoader
+from torchvision.transforms import transforms
+from typing import List
+
+# Singa-auto Dependency
+from singa_auto.darknet.model import DarkNet
+from singa_auto.darknet.utils import ap_per_class
+from singa_auto.darknet.utils import get_batch_statistics
+from singa_auto.darknet.utils import non_max_suppression
+from singa_auto.darknet.utils import pad_to_square
+from singa_auto.darknet.utils import rescale_boxes
+from singa_auto.darknet.utils import resize
+from singa_auto.darknet.utils import weights_init_normal
+from singa_auto.darknet.utils import xywh2xyxy
+from singa_auto.datasets.image_detection_dataset import YoloDataset
+from singa_auto.datasets.image_detection_dataset import fetch_from_train_set
+from singa_auto.datasets.image_detection_dataset import split_dataset
+from singa_auto.model.dev import test_model_class
+from singa_auto.model.knob import FixedKnob
+# from singa_auto.model.model import BaseModel
+from singa_auto.model.object_detection import ObjtDetModel
+from singa_auto.model.utils import utils
+
+
+logger = logging.getLogger(__name__)
+
+
+class SaYolo(ObjtDetModel):
+    """
+    implements a yolo
+    """
+    def __init__(self, **knobs):
+        super().__init__(**knobs)
+        self._knobs = knobs
+
+        self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+        print("using device", self.device)
+        logger.info("using device", self.device)
+
+        self.model = None
+        self.dataset_name = None
+        self.gradient_accumulations = 2
+
+        # default is cat , only one class
+        self.filter_classes = ['cat']
+
+        # # make sure results folder exists
+        # if os.path.exists(r"./results/"):
+        #     import shutil
+        #     shutil.rmtree(r"./results/")
+        # os.makedirs(r"./results/")
+    
+    @staticmethod
+    def get_knob_config():
+        return {
+            "conf_thresh": FixedKnob(0.1),
+            "lr": FixedKnob(0.01),
+            "model_def": FixedKnob("./singa_auto/darknet/yolov3-tiny.cfg"),
+            "nms_thresh": FixedKnob(0.2),
+            "pretrained_weights": FixedKnob("./singa_auto/darknet/darknet53.conv.74"),
+        }
+
+    def is_predict_valid(self, box_info, class_info, image_size):
+        """
+        make sure predicted result is valid, ie coordinates and labels are correct
+        """
+        if box_info[6] - 1 in range(len(class_info)) and min(box_info[0:4]) >= 0 and max(box_info[0:4]) < image_size:
+            return True
+        else:
+            return False
+
+    def __collate_fn(self, batch):
+        return tuple(zip(*batch))
+
+    def train(self, dataset_path, **kwargs):
+        logger.info("Training params: {}".format(json.dumps(kwargs)))
+
+        # num_classes = len(self._knobs.get("filter_classes"))
+        num_epoch = kwargs["num_epoch"] if "num_epoch" in kwargs else 2
+        batch_size = kwargs["batch_size"] if "batch_size" in kwargs else 8
+
+        if "filter_classes" in kwargs:
+            self.filter_classes = kwargs["filter_classes"]
+
+        print("{} in train.".format(self.filter_classes))
+        logger.info("{} in train.".format(self.filter_classes))
+
+        # root_path = r"/home/taomingyang/dataset/coco_mini_cat/"
+
+        # load 
+        dataset_zipfile = zipfile.ZipFile(dataset_path, 'r')
+        train_folder = tempfile.TemporaryDirectory()
+        dataset_zipfile.extractall(path=train_folder.name)
+        root_path = train_folder.name
+        print("root_path: {}".format(root_path))
+        logger.info("root_path: {}".format(root_path))
+
+        print("prepare dataset")
+        logger.info("prepare dataset")
+        if os.path.isdir(os.path.join(root_path, "image")):
+            print("split train/val subsets...")
+            logger.info("split train/val subsets...")
+            split_dataset(root_path)          
+        elif os.path.isdir(os.path.join(root_path, "train")):
+            if not os.path.exists(os.path.join(root_path, "val")):
+                logger.info("fetch val from train")
+                fetch_from_train_set(root_path)
+        else:
+            print("unsupported dataset format!")
+            logger.info("unsupported dataset format!")
+            return None
+
+        image_train = os.path.join(root_path, "train", "image")
+        image_val = os.path.join(root_path, "val", "image")
+        annotation_train = os.path.join(root_path, "train", "annotation")
+        annotation_val = os.path.join(root_path, "val", "annotation")
+
+        # Get dataloader
+        dataset_train = YoloDataset(
+            image_train,
+            annotation_train,
+            is_single_json_file=False,
+            filter_classes=self.filter_classes,
+            is_train=True,
+            augment=True,
+            multiscale=True
+        )
+        # Get dataloader
+        dataset_test = YoloDataset(
+            image_val,
+            annotation_val,
+            is_single_json_file=False,
+            filter_classes=self.filter_classes,
+            is_train=False,
+            augment=False,
+            multiscale=False
+        )
+
+        print("Training the model YOLO using {}".format(self.device))
+        logger.info("Training the model YOLO using {}".format(self.device))
+
+        # define training and validation data loaders
+        data_loader_train = torch.utils.data.DataLoader(
+            dataset_train, batch_size=batch_size, shuffle=True, collate_fn=dataset_train.collate_fn
+        )
+
+        data_loader_test = torch.utils.data.DataLoader(
+            dataset_test, batch_size=batch_size, shuffle=False, collate_fn=dataset_test.collate_fn
+        )
+
+        # get the model using our helper function
+        self.model = DarkNet(config_path=self._knobs.get("model_def")).to(self.device)
+        self.model.apply(weights_init_normal)
+
+        # pretrained weights
+        if self._knobs.get("pretrained_weights"):
+            pretrained_weights_path = self._knobs.get("pretrained_weights")
+            if pretrained_weights_path.endswith(".pth"):
+                if os.path.exists(pretrained_weights_path):
+                    self.model.load_state_dict(torch.load(pretrained_weights_path, map_location="cpu"))
+                    logger.info("using pretrained_weights {}".format(pretrained_weights_path))
+                else:
+                    logger.warning("pretrained_weights {} not exists.".format(pretrained_weights_path))
+            else:
+                if not os.path.exists(pretrained_weights_path):
+                    import wget
+                    os.makedirs(os.path.dirname(pretrained_weights_path), exist_ok=True)
+                    pretrained_weights_path = wget.download(r"https://pjreddie.com/media/files/darknet53.conv.74", out=os.path.dirname(pretrained_weights_path))
+                self.model.load_darknet_weights(pretrained_weights_path)
+                logger.info("using pretrained_weights {}".format(pretrained_weights_path))
+
+        # # move model to the right device
+        # self.model.to(self.device)
+
+        # construct an optimizer
+        optimizer = torch.optim.Adam(
+            self.model.parameters(),
+            lr=self._knobs.get("lr"),
+        )
+        
+        torch.manual_seed(1)
+
+        for epoch in range(num_epoch):
+            # train for one epoch, printing every 10 iterations
+            loss_value = self._train_one_epoch(self.model, optimizer, data_loader_train, epoch)
+
+            print("loss is {}".format(loss_value))
+            logger.info("loss is {}".format(loss_value))
+
+            if loss_value is None:
+                break
+
+            # update the learning rate
+            # lr_scheduler.step()
+
+            print("begin to evalute after epoch: {}".format(epoch))
+            logger.info("begin to evalute after epoch: {}".format(epoch))
+            precision, recall, AP, f1, ap_class = self._evaluate(data_loader_test)
+            print("Average Precisions:")
+            logger.info("Average Precisions:")
+            for i, c in enumerate(ap_class):
+                info_str = "\t+ Class \"{}\" ({}) - AP: {:.5f}".format(c, dataset_test.coco.cats[dataset_test.label_to_cat[c]]['name'], AP[i])
+                print(info_str)
+                logger.info(info_str)
+            print("mAP: {:.9f}".format(AP.mean()))
+            logger.info("mAP: {:.9f}".format(AP.mean()))
+
+    def _train_one_epoch(self, model, optimizer, data_loader, epoch):
+        model.train()
+
+        # lr_scheduler = None
+        # if epoch == 0:
+        #     warmup_factor = 1. / 1000
+        #     warmup_iters = min(1000, len(data_loader) - 1)
+        # 
+        #     lr_scdheduler = self.__warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)
+
+        logger.info("On Epoch {}, begin to train".format(epoch))
+        # loss_value = 0
+
+        metrics = [
+            "grid_size",
+            "loss",
+            "x",
+            "y",
+            "w",
+            "h",
+            "conf",
+            "cls",
+            "cls_acc",
+            "recall50",
+            "recall75",
+            "precision",
+            "conf_obj",
+            "conf_noobj",
+        ]
+
+        for batch_i, (_, images, targets) in enumerate(data_loader):
+            logger.info("\t batch {}/{}".format(batch_i, len(data_loader)))
+            batches_done = len(data_loader) * epoch + batch_i
+            
+            images = images.to(self.device)
+            targets = targets.to(self.device)
+
+            loss, outputs = model(images, targets)
+
+            if not np.math.isfinite(loss):
+                logger.info("Loss is {}, stopping training".format(loss))
+                return None
+
+            loss.backward()
+
+            if batches_done % self.gradient_accumulations:
+                optimizer.step()
+                optimizer.zero_grad()
+
+            # log_str = "\n---- [Epoch %d, Batch %d/%d] ----\n" % (epoch, batch_i, len(data_loader))
+            # 
+            # metric_table = [["Metrics", *[f"YOLO Layer {i}" for i in range(len(self.model.yolo_layers))]]]
+            # 
+            # # Log metrics at each YOLO layer
+            # for i, metric in enumerate(metrics):
+            #     formats = {m: "%.6f" for m in metrics}
+            #     formats["grid_size"] = "%2d"
+            #     formats["cls_acc"] = "%.2f%%"
+            #     row_metrics = [formats[metric] % yolo.metrics.get(metric, 0) for yolo in self.model.yolo_layers]
+            #     metric_table += [[metric, *row_metrics]]
+            # 
+            #     # # Tensorboard logging
+            #     # tensorboard_log = []
+            #     # for j, yolo in enumerate(model.yolo_layers):
+            #     #     for name, metric in yolo.metrics.items():
+            #     #         if name != "grid_size":
+            #     #             tensorboard_log += [(f"{name}_{j+1}", metric)]
+            #     # tensorboard_log += [("loss", loss.item())]
+            #     # summary_logger.list_of_scalars_summary(tensorboard_log, batches_done)
+            # 
+            # log_str += AsciiTable(metric_table).table
+            # log_str += f"\nTotal loss {loss.item()}"
+            # print(log_str)
+            # logger.info(log_str)
+            # 
+            # if lr_scheduler is not None:
+            #     lr_scheduler.step()
+
+            model.seen += images.size(0)
+
+        return loss.item()
+
+    def dump_parameters(self):
+        """
+        dump parameters to local file
+        """
+        params = {}
+        with tempfile.NamedTemporaryFile() as tmp:
+            # Save whole model to temp h5 file
+            torch.save(self.model.state_dict(), tmp.name)
+            # Read from temp h5 file & encode it to base64 string
+            with open(tmp.name, 'rb') as f:
+                weight_base64 = f.read()
+        params['weight_base64'] = base64.b64encode(weight_base64).decode('utf-8')
+        params["module_cfg"] = json.dumps(self.model.model_cfg)
+        return params
+
+    def load_parameters(self, params):
+        """
+        load parameters from local file
+        """
+
+        logger.info("load parameters")
+        weight_base64 = params['weight_base64']
+        self.module_cfg = json.loads(params["module_cfg"])
+
+        weight_base64_bytes = base64.b64decode(weight_base64.encode('utf-8'))
+
+        state_dict = torch.load(io.BytesIO(weight_base64_bytes), map_location=self.device)
+        self.model = DarkNet(model_cfg=self.module_cfg).to(self.device)
+        self.model.load_state_dict(state_dict)
+        # self.model.cuda()
+
+    def evaluate(self, dataset_path, **kwargs):
+        print(kwargs)
+
+        # root_path = r"/home/taomingyang/dataset/coco_mini_cat/"
+
+        # load 
+        dataset_zipfile = zipfile.ZipFile(dataset_path, 'r')
+        evaluate_folder = tempfile.TemporaryDirectory()
+        dataset_zipfile.extractall(path=evaluate_folder.name)
+        root_path = evaluate_folder.name
+        print(root_path)
+        logger.info("root_path: {}".format(root_path))
+
+        print("prepare dataset")
+        if os.path.isdir(os.path.join(root_path, "image")):
+            print("split train/val subsets...")
+            logger.info("split train/val subsets...")
+            split_dataset(root_path)
+        elif os.path.isdir(os.path.join(root_path, "train")):
+            if not os.path.exists(os.path.join(root_path, "val")):
+                fetch_from_train_set(root_path)
+                logger.info("fetch val from train")
+        else:
+            print("unsupported dataset format!")
+            logger.info("unsupported dataset format!")
+            return None
+
+        image_val = os.path.join(root_path, "val", "image")
+        annotation_val = os.path.join(root_path, "val", "annotation")
+
+        dataset_valid = YoloDataset(
+            image_val,
+            annotation_val,
+            is_single_json_file=False,
+            filter_classes=self.filter_classes,
+            is_train=False,
+        )
+        data_loader_valid = torch.utils.data.DataLoader(
+            dataset_valid,
+            batch_size=1,
+            shuffle=False,
+            collate_fn=dataset_valid.collate_fn
+        )
+
+        logger.info("dataset prepared")
+
+        # perform an evaluate
+        precision, recall, AP, f1, ap_class = self._evaluate(data_loader_valid)
+        
+        return np.mean(precision)
+
+    @torch.no_grad()
+    def _evaluate(self, data_loader):
+        self.model.eval()
+
+        labels = []
+        sample_metrics = []  # List of tuples (TP, confs, pred)
+
+        for batch_i, (names, images, targets) in enumerate(data_loader):
+            # Extract labels
+            labels += targets[:, 1].tolist()
+            # Rescale target
+            targets[:, 2:] = xywh2xyxy(targets[:, 2:])
+            targets[:, 2:] *= 416
+
+            images = images.to(self.device)
+
+            with torch.no_grad():
+                outputs = self.model(images)
+                outputs = non_max_suppression(outputs, conf_thresh=self._knobs.get("conf_thresh"), nms_thresh=self._knobs.get("nms_thresh"))
+            
+            for name, image, output in zip(names, images, outputs):
+                tmp = image.cpu().detach().permute((1, 2, 0)).mul(255).clamp(0, 255).numpy()
+                tmp = cv2.cvtColor(tmp, cv2.COLOR_RGB2BGR)
+                if output is not None:
+                    for rect_info in output:
+                        coord = rect_info.cpu().numpy()
+                        if self.is_predict_valid(coord, self.filter_classes, image.size(-1)):
+                            cv2.rectangle(tmp, (coord[0], coord[1]), (coord[2], coord[3]), (0, 255, 0), 3)
+            
+                cv2.imwrite('./results/{}'.format(os.path.basename(name)), tmp)
+
+            sample_metrics += get_batch_statistics(outputs, targets, iou_thresh=0.5)
+
+        # return score, evaluate_res_str
+        if 0 == len(sample_metrics):
+            ap_class = np.array(list(set(labels)), dtype=np.int32)
+            precision = recall = AP = f1 = np.array([0 for x in ap_class], dtype=np.float64)
+        else:
+            # Concatenate sample statistics
+            true_positives, pred_scores, pred_labels = [np.concatenate(x, 0) for x in list(zip(*sample_metrics))]
+            precision, recall, AP, f1, ap_class = ap_per_class(true_positives, pred_scores, pred_labels, labels)
+
+        return precision, recall, AP, f1, ap_class
+
+    def predict(self, queries: List[PIL.Image.Image]) -> List[dict]:
+        """
+        predict with trained model
+        """
+
+        os.makedirs("./results/", exist_ok=True)
+
+        result = list()
+
+        for img in queries:
+            img_res = dict()
+
+            if isinstance(img, List):
+                print(len(img))
+                img = np.array(img[0])
+                img_data = Image.fromarray(np.uint8(img))
+            elif isinstance(img, np.ndarray):
+                img_data = Image.fromarray(img)
+            else:
+                img_data = img
+
+            # get prediction
+            res = self.__get_prediction(img_data)
+            if res is None:
+                img_with_box = img_with_segmentation = img_data
+                boxes, pred_cls = None, None
+            else:
+                boxes, pred_cls = res
+                img_data = np.asarray(img_data).astype(np.uint8)
+                img_with_box = self.__get_bounding_box(img_data, boxes, pred_cls)
+
+            # the response format is only used to show on origin web ui
+            img_res['explanations'] = {}
+            # img_res['explanations']['lime_img'] = self.__convert_img_to_str(img_with_box)
+            # img_res['explanations']['box_info'] = boxes
+            # img_res['explanations']['classes'] = pred_cls
+            
+            img_res['explanations']['box_info'] = []
+            
+            if boxes is not None and pred_cls is not None and len(boxes) == len(pred_cls):
+                for box_coord, class_name in zip(boxes, pred_cls):
+                    img_res['explanations']['box_info'].append({
+                        "coord": box_coord,
+                        "class_name": class_name,
+                    })
+            img_res['mc_dropout'] = []
+            
+            result.append(img_res)
+
+        return result
+
+    def __warmup_lr_scheduler(self, optimizer, warmup_iters, warmup_factor):
+        def f(x):
+            if x >= warmup_iters:
+                return 1
+            alpha = float(x) / warmup_iters
+            return warmup_factor * (1 - alpha) + alpha
+
+        return torch.optim.lr_scheduler.LambdaLR(optimizer, f)
+
+    def __get_prediction(self, img):
+        self.model.eval()
+        
+        img = torchvision.transforms.ToTensor()(img)
+        # Handle images with less than three channels
+        if len(img.shape) != 3:
+            img = img.unsqueeze(0)
+            img = img.expand((3, img.shape[1], img.shape[2]))
+        elif len(img.shape) == 3 and img.shape[0] == 1:
+            img = img.expand((3, img.shape[1], img.shape[2]))
+
+        ori_size = img.shape[-2:]
+
+        # Pad to square resolution
+        img, pad = pad_to_square(img, 0)
+        img = torch.unsqueeze(resize(img, 416), 0)
+        img = img.to(self.device)
+        pred = self.model(img)
+        pred = non_max_suppression(pred, conf_thresh=self._knobs.get("conf_thresh"), nms_thresh=self._knobs.get("nms_thresh"))
+        pred_class = []
+        pred_boxes = []
+        if pred[0] is None:
+            return None
+        
+        box_info = rescale_boxes(pred[0], 416, ori_size)
+        num_box = box_info.size()[0]
+
+        # get predicted info
+        for rect_info in box_info:
+            coord = rect_info.cpu().numpy()
+            if self.is_predict_valid(coord, self.filter_classes, img.size(-1)):
+                pred_class.append(self.filter_classes[np.int(coord[6])-1])
+                pred_boxes.append((np.int(coord[0]), np.int(coord[1]), np.int(coord[2]), np.int(coord[3])))
+        
+        if len(pred_boxes) == 0:
+            return None
+        else:
+            return pred_boxes, pred_class
+
+    def __get_bounding_box(self, img, boxes, pred_cls,  rect_th=3, text_size=1, text_th=3):
+        """
+        draw the bounding box on img
+        """
+        
+        img = copy.deepcopy(img)
+        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+        for i in range(len(boxes)):
+            cv2.rectangle(img, (boxes[i][0], boxes[i][1]), (boxes[i][2], boxes[i][3]), (0, 255, 0), rect_th)
+            cv2.putText(img, pred_cls[i], (boxes[i][0], boxes[i][1]), cv2.FONT_HERSHEY_SIMPLEX, text_size, (0, 255, 0),
+                        thickness=text_th)
+
+        cv2.imwrite("./results/{:04d}.png".format(random.randint(0, 9999)), img)
+        return img
+
+    def __get_segmentation(self, img, masks):
+        """
+        draw the segmentation box on img
+        """
+        def random_colour_masks(image):
+            """
+            for display the prediction image
+            """
+            colours = [
+                [0, 255, 0],
+                [0, 0, 255],
+                [255, 0, 0],
+                [0, 255, 255],
+                [255, 255, 0],
+                [255, 0, 255],
+                [80, 70, 180],
+                [250, 80, 190],
+                [245, 145, 50],
+                [70, 150, 250],
+                [50, 190, 190]
+            ]
+            r = np.zeros_like(image).astype(np.uint8)
+            g = np.zeros_like(image).astype(np.uint8)
+            b = np.zeros_like(image).astype(np.uint8)
+            r[image == 1], g[image == 1], b[image == 1] = colours[random.randrange(0, 10)]
+            coloured_mask = np.stack([r, g, b], axis=2)
+            return coloured_mask
+
+        img = copy.deepcopy(img)
+        for i in range(len(masks)):
+            rgb_mask = random_colour_masks(masks[i])
+            img = cv2.addWeighted(img, 1, rgb_mask, 0.5, 0)
+        return img
+
+    def __convert_img_to_str(self, arr):
+        im = Image.fromarray(arr.astype("uint8"))
+        rawBytes = io.BytesIO()
+        encoding = 'utf-8'
+        im.save(rawBytes, "PNG")
+        rawBytes.seek(0)
+        return base64.b64encode(rawBytes.read()).decode(encoding)
+
+    def __get_iou_types(self, model):
+        model_without_ddp = model
+        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
+            model_without_ddp = model.module
+        iou_types = ["bbox"]
+        if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
+            iou_types.append("segm")
+        if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
+            iou_types.append("keypoints")
+        return iou_types
+
+
+if __name__ == "__main__":
+    import argparse
+    from singa_auto.model.dev import test_model_class
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--train_path',
+        type=str,
+        default='/home/taomingyang/dataset/package/coco_mini.zip',
+        help='Path to train dataset'
+    )
+    parser.add_argument(
+        '--val_path',
+        type=str,
+        default='/home/taomingyang/dataset/package/coco_mini.zip',
+        help='Path to validation dataset'
+    )
+    parser.add_argument(
+        '--query_path',
+        type=str,
+        default='./examples/data/object_detection/cat.jpg',
+        help='Path(s) to query image(s), delimited by commas'
+    )
+
+    (args, _) = parser.parse_known_args()
+
+    queries = utils.dataset.load_images(args.query_path.split(','))
+    test_model_class(
+        model_file_path=__file__,
+        model_class='SaYolo',
+        task='OBJECT_DETECTION',
+        dependencies={
+            "opencv-python": "4.4.0.46",
+            "terminaltables": "3.1.0",
+            "torch": "1.4.0",
+            "torchvision": "0.5.0",
+        },
+        train_dataset_path=args.train_path,
+        val_dataset_path=args.val_path,
+        train_args={
+            "batch_size": 8,
+            "model_def": "./singa_auto/darknet/yolov3-tiny.cfg",
+            "filter_classes": ['cat'],
+            "num_epoch": 1,
+            "pretrained_weights": "./singa_auto/darknet/darknet53.conv.74",
+        },
+        queries=queries
+    )
+
+    """
+    Test the model out of singa-auto platform
+    python -c "import torch;print(torch.cuda.is_available())"
+    """
+
diff --git a/examples/models/image_object_detection/food_detection/FoodlgNet.py b/examples/models/image_object_detection/food_detection/FoodlgNet.py
new file mode 100644
index 00000000..0441d54e
--- /dev/null
+++ b/examples/models/image_object_detection/food_detection/FoodlgNet.py
@@ -0,0 +1,1339 @@
+import os
+from os.path import join
+os.environ['CUDA_VISIBLE_DEVICES'] = "0"
+
+import sys
+sys.path.append(os.getcwd())
+
+import time
+import json
+import base64
+import random
+import logging
+import zipfile
+import tempfile
+import datetime
+import requests
+from PIL import Image
+from io import BytesIO
+from typing import List
+from collections import OrderedDict
+
+from singa_auto.model import ObjtDetModel, FixedKnob
+from singa_auto.model.dev import test_model_class
+from singa_auto.model.utils import dataset
+
+import torch
+import numpy as np
+from torch import nn
+from torch.utils.data import Dataset, DataLoader
+from torchvision import transforms
+import torch.nn.functional as F
+
+logger = logging.getLogger(__name__)
+
+class FoodlgNet(ObjtDetModel):
+
+    @staticmethod
+    def get_knob_config():
+        return {
+            'learning_rate': FixedKnob(1e-10),
+            'momentum': FixedKnob(0.7),
+            'epoch': FixedKnob(0),
+            'batch_size': FixedKnob(4)
+        }
+
+    def __init__(self, **knobs):
+        # load model parameters and configurations
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+        # image preprocessing function
+        self.cls_transform = transforms.Compose([
+            transforms.Resize((299,299), interpolation=2),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        ])
+
+        # initialize other parameters
+        self.lr = knobs.get('learning_rate')
+        self.momentum = knobs.get('momentum')
+        self.epoch = knobs.get('epoch')
+        self.batch_size = knobs.get('batch_size')
+
+    def _initialize_model(self, paths=None):
+        # two networks, yolo detection network (to train) and resnext classification network (fixed)
+
+        if paths is None:
+            # here you need to set an extra file server to provide config/pretrained weights files
+            # the codes below will download files from file server.
+            # During prediction, this process is not required any more.
+            dst_folder = tempfile.TemporaryDirectory()
+            dst_folder_path = dst_folder.name
+            # object_names_path = load_url(save_path=dst_folder_path, url='http://192.168.100.203:8000/FoodlgNet/classes.names')
+            model_config_path = load_url(save_path=dst_folder_path, url='http://192.168.100.203:8000/FoodlgNet/foodlg_yolo.cfg')
+            pretrain_model_path = load_url(save_path=dst_folder_path, url='http://192.168.100.203:8000/FoodlgNet/yolov3_ckpt_SGD_94.pth')
+
+            classify_names_path = load_url(save_path=dst_folder_path, url='http://192.168.100.203:8000/FoodlgNet/food783.name')
+            classify_model_path = load_url(save_path=dst_folder_path, url='http://192.168.100.203:8000/FoodlgNet/resnext101_ckpt.pth')
+
+        else:
+            model_config_path = paths['model_config_path']
+            pretrain_model_path = paths['pretrain_model_path']
+            classify_model_path = paths['classify_model_path']
+            classify_names_path = paths['classify_names_path']
+
+
+        # initiate detection model
+        self.conf_thres = 0.5
+        self.nms_thres = 0.4
+        self.img_size = 416
+        with open(model_config_path, encoding = 'utf-8') as f:
+            self.model_config_path = f.readlines()
+
+        det_model = Darknet(model_config_path, img_size=self.img_size)
+        det_model.load_state_dict(torch.load(pretrain_model_path, map_location='cpu'))
+        det_model = det_model.to(self.device)
+        det_model.eval()
+        self.det_model = det_model
+        self.det_classes = ['food']
+
+        # initiate classification model
+        self.num_classes = 783
+        self.clf_classes = load_classes(classify_names_path)
+
+        from torchvision.models.resnet import Bottleneck, ResNet
+        clf_model = ResNet(block=Bottleneck, layers=[3, 4, 23, 3], groups=32, width_per_group=16)
+        clf_model.fc = nn.Linear(2048, self.num_classes)
+
+        ckpt = torch.load(classify_model_path, map_location='cpu')
+        ckpt = OrderedDict({k.replace('module.', ''): v for k, v in ckpt.items()})
+        clf_model.load_state_dict(ckpt)
+        clf_model = clf_model.to(self.device)
+        clf_model.eval()
+        self.clf_model = clf_model
+
+
+    def train(self, dataset_path, shared_params=None, **train_args):
+        # fine-tune yolov3 model for detectection part
+        self._initialize_model()
+
+        # load and process data
+        dataset_folder = load_zip(dataset_path)
+        # split dataset and then save to txt files
+        train_path, valid_path = split_dataset_save(dataset_folder, ratio=0.9)
+        # load data to torch dataloader
+        dataset = ListDataset(train_path, augment=True, multiscale=True)
+        dataloader = torch.utils.data.DataLoader(
+            dataset,
+            batch_size=self.batch_size,
+            shuffle=False,
+            num_workers=8,
+            pin_memory=True,
+            collate_fn=dataset.collate_fn,
+        )
+        # other settings for training
+        optimizer = torch.optim.SGD(self.det_model.parameters(), lr=self.lr, momentum=self.momentum)
+
+        metrics = [
+            "grid_size",
+            "loss",
+            "x",
+            "y",
+            "w",
+            "h",
+            "conf",
+            "cls",
+            "cls_acc",
+            "recall50",
+            "recall75",
+            "precision",
+            "conf_obj",
+            "conf_noobj",
+        ]
+
+        # start training
+        start_epoch = 0
+        for epoch in range(start_epoch, self.epoch):
+            self.det_model.train()
+            start_time = time.time()
+            for batch_i, (_, imgs, targets) in enumerate(dataloader):
+
+                batches_done = len(dataloader) * epoch + batch_i
+
+                imgs = imgs.to(self.device)
+                targets = targets.to(self.device)
+
+                loss, outputs = self.det_model(imgs, targets)
+                loss.backward()
+
+                if batches_done % 2:
+                    # Accumulates gradient before each step
+                    optimizer.step()
+                    optimizer.zero_grad()
+
+                # ----------------
+                #   Log progress
+                # ----------------
+
+                log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % (epoch, self.epoch, batch_i, len(dataloader))
+                metric_table = [["Metrics", *[f"YOLO Layer {i}" for i in range(len(self.det_model.yolo_layers))]]]
+
+                # Log metrics at each YOLO layer
+                for i, metric in enumerate(metrics):
+                    formats = {m: "%.6f" for m in metrics}
+                    formats["grid_size"] = "%2d"
+                    formats["cls_acc"] = "%.2f%%"
+                    row_metrics = [formats[metric] % yolo.metrics.get(metric, 0) for yolo in self.det_model.yolo_layers]
+                    metric_table += [[metric, *row_metrics]]
+
+                if batch_i%50==0:
+                    log_str += toAscii(metric_table)
+                    log_str += f"\nTotal loss {loss.item()}"
+
+                    # Determine approximate time left for epoch
+                    epoch_batches_left = len(dataloader) - (batch_i + 1)
+                    time_left = datetime.timedelta(seconds=epoch_batches_left * (time.time() - start_time) / (batch_i + 1))
+                    log_str += f"\n---- ETA {time_left}"
+
+                    print(log_str)
+
+                self.det_model.seen += imgs.size(0)
+
+            print("\n---- Evaluating Model ----")
+            # Evaluate the model on the validation set
+            precision, recall, AP, f1, ap_class = _evaluate(
+                self.det_model,
+                path=valid_path,
+                iou_thres=0.5,
+                conf_thres=self.conf_thres,
+                nms_thres=self.nms_thres,
+                img_size=self.img_size,
+                batch_size=self.batch_size,
+                device=self.device
+            )
+            evaluation_metrics = [
+                ("val_precision", precision.mean()),
+                ("val_recall", recall.mean()),
+                ("val_mAP", AP.mean()),
+                ("val_f1", f1.mean()),
+            ]
+
+            # Print class APs and mAP
+            ap_table = [["Index", "Class name", "AP"]]
+            for i, c in enumerate(ap_class):
+                ap_table += [[c, self.det_classes[c], "%.5f" % AP[i]]]
+            # print(AsciiTable(ap_table).table)
+            print(toAscii(ap_table))
+            print(f"---- mAP {AP.mean()}")
+
+
+    def dump_parameters(self):
+        params = {}
+        # get models weights
+        det_model_weights = self.det_model.state_dict()
+        clf_model_weights = self.clf_model.state_dict()
+
+        # convert model weights and configs to json format
+        params['det_model_weights'] = serialize_state_dict(det_model_weights)
+        params['clf_model_weights'] = serialize_state_dict(clf_model_weights)
+        params['det_model_cfg'] = json.dumps(self.model_config_path)
+        params['clf_classes'] = json.dumps(self.clf_classes)
+
+        return params
+
+    def evaluate(self, dataset_path, **kargs):
+        # load and process data
+        dataset_folder = load_zip(dataset_path)
+        # split dataset and then save to txt files
+        train_path, valid_path = split_dataset_save(dataset_folder, ratio=0.9)
+        # evaluate process
+        precision, recall, AP, f1, ap_class = _evaluate(
+            self.det_model,
+            path=valid_path,
+            iou_thres=0.5,
+            conf_thres=self.conf_thres,
+            nms_thres=self.nms_thres,
+            img_size=self.img_size,
+            batch_size=self.batch_size,
+            device=self.device
+        )
+
+        return f1[0]
+
+    def load_parameters(self, params):
+        paths = {}
+        # prepare tmp file paths
+        dst_folder = tempfile.TemporaryDirectory().name
+        os.mkdir(dst_folder)
+
+        model_config_path = os.path.join(dst_folder,'det_config.pth')
+        pretrain_model_path = os.path.join(dst_folder, 'det_model.pth')
+        classify_model_path = os.path.join(dst_folder, 'clf_model.names')
+        classify_names_path = os.path.join(dst_folder, 'clf_class.cfg')
+
+        # convert params to python object and save to tmp paths
+        det_model_weights = deserialize_state_dict(params['det_model_weights'])
+        clf_model_weights = deserialize_state_dict(params['clf_model_weights'])
+
+        torch.save(det_model_weights, pretrain_model_path)
+        torch.save(clf_model_weights, classify_model_path)
+
+        with open(model_config_path,'w', encoding = 'utf-8') as f:
+            f.writelines(json.loads(params['det_model_cfg']))
+
+        with open(classify_names_path,'w', encoding = 'utf-8') as f:
+            for name in json.loads(params['clf_classes']):
+                f.write(name + '\n')
+
+        # record these paths
+        paths['model_config_path'] = model_config_path
+        paths['pretrain_model_path'] = pretrain_model_path
+        paths['classify_model_path'] = classify_model_path
+        paths['classify_names_path'] = classify_names_path
+
+        # initiate model
+        self._initialize_model(paths=paths)
+
+
+    def predict(self, queries):
+        logger.info(f'the length of queries is {len(queries)}')
+        # queries is a list of PIL.Image object
+        ##########
+        # yolo part
+        input_imgs = []
+        widths = []
+        heights = []
+
+        queries = unifyImageType(queries)
+        for img in queries:
+            w, h = img.size
+            widths.append(w)
+            heights.append(h)
+
+            img = transforms.ToTensor()(img) # Extract image as PyTorch tensor
+            img, _ = pad_to_square(img, 0) # Pad to square resolution
+            img = resize(img, self.img_size) # Resize
+            input_imgs.append(img.unsqueeze(0))
+
+        input_imgs = torch.cat(input_imgs)
+        input_imgs = input_imgs.to(self.device)
+
+
+        # Get detections
+        with torch.no_grad():
+            detections = self.det_model(input_imgs)
+            detections = non_max_suppression(detections, self.conf_thres, self.nms_thres)
+
+        cls_results = []
+        for det_res, width, height, img in zip(detections, widths, heights, queries):
+
+            if det_res is None: # no food detected, skip irv2 part
+                cls_results.append({
+                    'explanations': {
+                        'box_info': []
+                    },
+                    'raw_preds': [],
+                    'mc_dropout': [],  # not used
+                })
+                continue
+
+            ##########
+            # irv2 part
+            # pass each detection to classification model
+
+            cropped_imgs = []
+            bbox_values = []
+            confs = []
+            predictions = []
+
+            det_res = rescale_boxes(det_res, self.img_size, (height, width))
+            for x1, y1, x2, y2, conf, cls_conf, cls_pred in det_res:
+
+                x1, y1, x2, y2 = list(map(lambda x: x.tolist(), (x1, y1, x2, y2)))
+                cropped = img.crop((x1, y1, x2, y2))  # (left, upper, right, lower)
+
+                cropped_imgs.append(cropped)
+                bbox_values.append([x1, y1, x2, y2])
+                confs.append(conf.cpu().item())
+
+            cropped_imgs = [self.cls_transform(img) for img in cropped_imgs]
+            test_dataloader = DataLoader(cropped_imgs, batch_size=self.batch_size, shuffle=False)
+
+            for batch in test_dataloader:
+
+                batch = batch.to(self.device)
+                # parallelly batch prediction
+                with torch.no_grad():
+                    prediction = self.clf_model(batch)
+
+                predictions.extend(p.cpu().numpy() for p in prediction)
+
+            # post processing to make result compatible with different front ends
+            result = {
+                'explanations':{
+                    'box_info': []
+                },
+                'raw_preds': [],
+                'mc_dropout': [], # not used
+            }
+            for idx in range(len(predictions)):
+                class_id = np.argsort(predictions[idx])[::-1][:1]
+                str_class = ' '.join(self.clf_classes[i] for i in class_id)
+
+                jbox = {}
+                jbox['label_id'] = str(class_id[0])
+                jbox['label'] = str(str_class)
+                jbox['probability'] = confs[idx]
+
+                x1, y1, x2, y2 = bbox_values[idx]
+                jbox['detection_box'] = [
+                    max(0, y1 / height),
+                    max(0, x1 / width),
+                    min(1, y2 / height),
+                    min(1, x2 / width)
+                ]
+
+                exp_box = {}
+                exp_box['coord'] = [int(x1), int(y1), int(x2), int(y2)]
+                exp_box['class_name'] = str(str_class)
+
+                result['explanations']['box_info'].append(exp_box)
+                result['raw_preds'].append(jbox)
+
+            cls_results.append(result)
+
+        logger.info(f'Predict result: {cls_results}')
+        return cls_results
+
+def unifyImageType(imgs):
+    # to check if the image is PIL.Image or numpy.ndarray
+    # and convert all to PIL.Image
+    results = []
+    for img in imgs:
+        if isinstance(img, List):
+            # used for accepting image from forkcloud frontend
+            img = np.uint8(np.array(img[0])) 
+            results.append(Image.fromarray(img))
+        elif isinstance(img, np.ndarray):
+            results.append(Image.fromarray(img))
+        else:
+            results.append(img)
+    return results
+
+
+def load_zip(zip_path):
+    logger.info(zip_path)
+    # extract uploaded zipfile
+    if not os.path.exists(zip_path):
+        raise FileNotFoundError(f'zip file {zip_path} does not exist')
+
+    dst_folder = tempfile.TemporaryDirectory().name
+    zip_data = zipfile.ZipFile(zip_path, 'r')
+    zip_data.extractall(path=dst_folder)
+    return dst_folder
+
+def load_url(save_path, url):
+    # download file and save
+    res = requests.get(url, timeout=300)
+    filename = join(save_path, url.split('/')[-1])
+    with open(filename, 'wb') as f:
+        f.write(res.content)
+    return filename
+
+def serialize_state_dict(state_dict):
+    with tempfile.NamedTemporaryFile() as tmp:
+        torch.save(state_dict, tmp.name)
+        with open(tmp.name, 'rb') as f:
+            weights = f.read()
+
+    return base64.b64encode(weights).decode('utf-8')
+
+def deserialize_state_dict(b64bytes):
+    b64bytes = base64.b64decode(b64bytes.encode('utf-8'))
+    state_dict = torch.load(BytesIO(b64bytes), map_location='cpu')
+
+    return state_dict
+
+def split_dataset_save(dataset_folder, ratio = 0.9):
+    image_paths = os.listdir(join(dataset_folder, 'images'))
+    image_paths = [join(dataset_folder, 'images', path) for path in image_paths]
+
+    # split dataset
+    train_size = round(len(image_paths) * ratio)
+    valid_size = len(image_paths) - train_size
+    random.seed(42)
+    random_idx = random.sample(range(len(image_paths)), k=train_size)
+    random_idx = sorted(random_idx)
+
+    train_set = []
+    valid_set = []
+    idx_pointer = 0
+    for i, path in enumerate(image_paths):
+        if i == random_idx[idx_pointer]:
+            train_set.append(path)
+            idx_pointer += 1
+        else:
+            valid_set.append(path)
+
+    train_file_path = join(dataset_folder, 'train.txt')
+    valid_file_path = join(dataset_folder, 'valid.txt')
+    with open(train_file_path, 'w') as f:
+        for line in train_set:
+            f.write(line + '\n')
+
+    with open(valid_file_path, 'w') as f:
+        for line in valid_set:
+            f.write(line + '\n')
+
+    return train_file_path, valid_file_path
+
+
+def toAscii(data_list):
+    # convert evaluate result list to string
+    res = ''
+    for line in data_list:
+        line = [str(l) for l in line]
+        line = ','.join(line) + '\n'
+        res += line
+    return res
+
+def _evaluate(model, path, iou_thres, conf_thres, nms_thres, img_size, batch_size, device):
+    model.eval()
+
+    # Get dataloader
+    dataset = ListDataset(path, img_size=img_size, augment=False, multiscale=False)
+    dataloader = torch.utils.data.DataLoader(
+        dataset, batch_size=batch_size, shuffle=False, num_workers=1, collate_fn=dataset.collate_fn
+    )
+
+    Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
+
+    labels = []
+    sample_metrics = []  # List of tuples (TP, confs, pred)
+    for batch_i, (_, imgs, targets) in enumerate(dataloader):
+
+        # Extract labels
+        labels += targets[:, 1].tolist()
+        # Rescale target
+        targets[:, 2:] = xywh2xyxy(targets[:, 2:])
+        targets[:, 2:] *= img_size
+
+
+        with torch.no_grad():
+            imgs = imgs.to(device)
+            outputs = model(imgs)
+            outputs = non_max_suppression(outputs, conf_thres=conf_thres, nms_thres=nms_thres)
+
+        sample_metrics += get_batch_statistics(outputs, targets, iou_threshold=iou_thres)
+
+    # Concatenate sample statistics
+    true_positives, pred_scores, pred_labels = [np.concatenate(x, 0) for x in list(zip(*sample_metrics))]
+    precision, recall, AP, f1, ap_class = ap_per_class(true_positives, pred_scores, pred_labels, labels)
+
+    return precision, recall, AP, f1, ap_class
+
+
+
+
+#############################
+# YOLOv3 part
+
+def to_cpu(tensor):
+    return tensor.detach().cpu()
+
+def load_classes(path):
+    """
+    Loads class labels at 'path'
+    """
+    fp = open(path, "r", encoding = 'utf-8')
+    names = fp.read().split("\n")[:-1]
+    return names
+
+def parse_model_config(path):
+    """Parses the yolo-v3 layer configuration file and returns module definitions"""
+    file = open(path, 'r', encoding = 'utf-8')
+    lines = file.read().split('\n')
+    lines = [x for x in lines if x and not x.startswith('#')]
+    lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
+    module_defs = []
+    for line in lines:
+        if line.startswith('['): # This marks the start of a new block
+            module_defs.append({})
+            module_defs[-1]['type'] = line[1:-1].rstrip()
+            if module_defs[-1]['type'] == 'convolutional':
+                module_defs[-1]['batch_normalize'] = 0
+        else:
+            key, value = line.split("=")
+            value = value.strip()
+            module_defs[-1][key.rstrip()] = value.strip()
+
+    return module_defs
+
+def pad_to_square(img, pad_value):
+    c, h, w = img.shape
+    dim_diff = np.abs(h - w)
+    # (upper / left) padding and (lower / right) padding
+    pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
+    # Determine padding
+    pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0)
+    # Add padding
+    img = F.pad(img, pad, "constant", value=pad_value)
+
+    return img, pad
+
+def resize(image, size):
+    image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0)
+    return image
+
+def horisontal_flip(images, targets):
+    images = torch.flip(images, [-1])
+    targets[:, 2] = 1 - targets[:, 2]
+    return images, targets
+
+def ap_per_class(tp, conf, pred_cls, target_cls):
+    """ Compute the average precision, given the recall and precision curves.
+    Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
+    # Arguments
+        tp:    True positives (list).
+        conf:  Objectness value from 0-1 (list).
+        pred_cls: Predicted object classes (list).
+        target_cls: True object classes (list).
+    # Returns
+        The average precision as computed in py-faster-rcnn.
+    """
+
+    # Sort by objectness
+    i = np.argsort(-conf)
+    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
+
+    # Find unique classes
+    unique_classes = np.unique(target_cls)
+
+    # Create Precision-Recall curve and compute AP for each class
+    ap, p, r = [], [], []
+    for c in unique_classes:
+        i = pred_cls == c
+        n_gt = (target_cls == c).sum()  # Number of ground truth objects
+        n_p = i.sum()  # Number of predicted objects
+
+        if n_p == 0 and n_gt == 0:
+            continue
+        elif n_p == 0 or n_gt == 0:
+            ap.append(0)
+            r.append(0)
+            p.append(0)
+        else:
+            # Accumulate FPs and TPs
+            fpc = (1 - tp[i]).cumsum()
+            tpc = (tp[i]).cumsum()
+
+            # Recall
+            recall_curve = tpc / (n_gt + 1e-16)
+            r.append(recall_curve[-1])
+
+            # Precision
+            precision_curve = tpc / (tpc + fpc)
+            p.append(precision_curve[-1])
+
+            # AP from recall-precision curve
+            ap.append(compute_ap(recall_curve, precision_curve))
+
+    # Compute F1 score (harmonic mean of precision and recall)
+    p, r, ap = np.array(p), np.array(r), np.array(ap)
+    f1 = 2 * p * r / (p + r + 1e-16)
+
+    return p, r, ap, f1, unique_classes.astype("int32")
+
+def compute_ap(recall, precision):
+    """ Compute the average precision, given the recall and precision curves.
+    Code originally from https://github.com/rbgirshick/py-faster-rcnn.
+
+    # Arguments
+        recall:    The recall curve (list).
+        precision: The precision curve (list).
+    # Returns
+        The average precision as computed in py-faster-rcnn.
+    """
+    # correct AP calculation
+    # first append sentinel values at the end
+    mrec = np.concatenate(([0.0], recall, [1.0]))
+    mpre = np.concatenate(([0.0], precision, [0.0]))
+
+    # compute the precision envelope
+    for i in range(mpre.size - 1, 0, -1):
+        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+
+    # to calculate area under PR curve, look for points
+    # where X axis (recall) changes value
+    i = np.where(mrec[1:] != mrec[:-1])[0]
+
+    # and sum (\Delta recall) * prec
+    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap
+
+def get_batch_statistics(outputs, targets, iou_threshold):
+    """ Compute true positives, predicted scores and predicted labels per sample """
+    batch_metrics = []
+    for sample_i in range(len(outputs)):
+
+        if outputs[sample_i] is None:
+            continue
+
+        output = outputs[sample_i]
+        pred_boxes = output[:, :4]
+        pred_scores = output[:, 4]
+        pred_labels = output[:, -1]
+
+        true_positives = np.zeros(pred_boxes.shape[0])
+
+        annotations = targets[targets[:, 0] == sample_i][:, 1:]
+        target_labels = annotations[:, 0] if len(annotations) else []
+        if len(annotations):
+            detected_boxes = []
+            target_boxes = annotations[:, 1:]
+
+            for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):
+
+                # If targets are found break
+                if len(detected_boxes) == len(annotations):
+                    break
+
+                # Ignore if label is not one of the target labels
+                if pred_label not in target_labels:
+                    continue
+
+                iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0)
+                if iou >= iou_threshold and box_index not in detected_boxes:
+                    true_positives[pred_i] = 1
+                    detected_boxes += [box_index]
+        batch_metrics.append([true_positives, pred_scores, pred_labels])
+    return batch_metrics
+
+def bbox_wh_iou(wh1, wh2):
+    wh2 = wh2.t()
+    w1, h1 = wh1[0], wh1[1]
+    w2, h2 = wh2[0], wh2[1]
+    inter_area = torch.min(w1, w2) * torch.min(h1, h2)
+    union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
+    return inter_area / union_area
+
+def bbox_iou(box1, box2, x1y1x2y2=True):
+    """
+    Returns the IoU of two bounding boxes
+    """
+    if not x1y1x2y2:
+        # Transform from center and width to exact coordinates
+        b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
+        b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
+        b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
+        b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
+    else:
+        # Get the coordinates of bounding boxes
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
+
+    # get the corrdinates of the intersection rectangle
+    inter_rect_x1 = torch.max(b1_x1, b2_x1)
+    inter_rect_y1 = torch.max(b1_y1, b2_y1)
+    inter_rect_x2 = torch.min(b1_x2, b2_x2)
+    inter_rect_y2 = torch.min(b1_y2, b2_y2)
+    # Intersection area
+    inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
+        inter_rect_y2 - inter_rect_y1 + 1, min=0
+    )
+    # Union Area
+    b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
+    b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
+
+    iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
+
+    return iou
+
+def weights_init_normal(m):
+    classname = m.__class__.__name__
+    if classname.find("Conv") != -1:
+        torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
+    elif classname.find("BatchNorm2d") != -1:
+        torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
+        torch.nn.init.constant_(m.bias.data, 0.0)
+
+def rescale_boxes(boxes, current_dim, original_shape):
+    """ Rescales bounding boxes to the original shape """
+    orig_h, orig_w = original_shape
+    # The amount of padding that was added
+    pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape))
+    pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape))
+    # Image height and width after padding is removed
+    unpad_h = current_dim - pad_y
+    unpad_w = current_dim - pad_x
+    # Rescale bounding boxes to dimension of original image
+    boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w
+    boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h
+    boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w
+    boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h
+    return boxes
+
+def xywh2xyxy(x):
+    y = x.new(x.shape)
+    y[..., 0] = x[..., 0] - x[..., 2] / 2
+    y[..., 1] = x[..., 1] - x[..., 3] / 2
+    y[..., 2] = x[..., 0] + x[..., 2] / 2
+    y[..., 3] = x[..., 1] + x[..., 3] / 2
+    return y
+
+def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
+    """
+    Removes detections with lower object confidence score than 'conf_thres' and performs
+    Non-Maximum Suppression to further filter detections.
+    Returns detections with shape:
+        (x1, y1, x2, y2, object_conf, class_score, class_pred)
+    """
+
+    # From (center x, center y, width, height) to (x1, y1, x2, y2)
+    prediction[..., :4] = xywh2xyxy(prediction[..., :4])
+    output = [None for _ in range(len(prediction))]
+    for image_i, image_pred in enumerate(prediction):
+        # Filter out confidence scores below threshold
+        image_pred = image_pred[image_pred[:, 4] >= conf_thres]
+        # If none are remaining => process next image
+        if not image_pred.size(0):
+            continue
+        # Object confidence times class confidence
+        score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]
+        # Sort by it
+        image_pred = image_pred[(-score).argsort()]
+        class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True)
+        detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1)
+        # Perform non-maximum suppression
+        keep_boxes = []
+        while detections.size(0):
+            large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thres
+            label_match = detections[0, -1] == detections[:, -1]
+            # Indices of boxes with lower confidence scores, large IOUs and matching labels
+            invalid = large_overlap & label_match
+            weights = detections[invalid, 4:5]
+            # Merge overlapping bboxes by order of confidence
+            detections[0, :4] = (weights * detections[invalid, :4]).sum(0) / weights.sum()
+            keep_boxes += [detections[0]]
+            detections = detections[~invalid]
+        if keep_boxes:
+            output[image_i] = torch.stack(keep_boxes)
+
+    return output
+
+def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres):
+
+    # ByteTensor = torch.cuda.ByteTensor if pred_boxes.is_cuda else torch.ByteTensor
+    ByteTensor = torch.cuda.BoolTensor if pred_boxes.is_cuda else torch.BoolTensor
+    FloatTensor = torch.cuda.FloatTensor if pred_boxes.is_cuda else torch.FloatTensor
+
+    nB = pred_boxes.size(0)
+    nA = pred_boxes.size(1)
+    nC = pred_cls.size(-1)
+    nG = pred_boxes.size(2)
+
+    # Output tensors
+    obj_mask = ByteTensor(nB, nA, nG, nG).fill_(0)
+    noobj_mask = ByteTensor(nB, nA, nG, nG).fill_(1)
+    class_mask = FloatTensor(nB, nA, nG, nG).fill_(0)
+    iou_scores = FloatTensor(nB, nA, nG, nG).fill_(0)
+    tx = FloatTensor(nB, nA, nG, nG).fill_(0)
+    ty = FloatTensor(nB, nA, nG, nG).fill_(0)
+    tw = FloatTensor(nB, nA, nG, nG).fill_(0)
+    th = FloatTensor(nB, nA, nG, nG).fill_(0)
+    tcls = FloatTensor(nB, nA, nG, nG, nC).fill_(0)
+
+
+    # # note: solver multi gpu problem
+    # target  = target[target.sum(dim=1) != 0]
+
+    # Convert to position relative to box
+    target_boxes = target[:, 2:6] * nG
+    gxy = target_boxes[:, :2]
+    gwh = target_boxes[:, 2:]
+    # Get anchors with best iou
+    ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors])
+    best_ious, best_n = ious.max(0)
+    # Separate target values
+    b, target_labels = target[:, :2].long().t()
+    gx, gy = gxy.t()
+    gw, gh = gwh.t()
+    gi, gj = gxy.long().t()
+
+    # prevent index out of boundary
+    gi = gi.clamp(0, nG - 1)
+    gj = gj.clamp(0, nG - 1)
+
+    # Set masks
+    obj_mask[b, best_n, gj, gi] = 1
+    noobj_mask[b, best_n, gj, gi] = 0
+
+    # Set noobj mask to zero where iou exceeds ignore threshold
+    for i, anchor_ious in enumerate(ious.t()):
+        noobj_mask[b[i], anchor_ious > ignore_thres, gj[i], gi[i]] = 0
+
+    # Coordinates
+    tx[b, best_n, gj, gi] = gx - gx.floor()
+    ty[b, best_n, gj, gi] = gy - gy.floor()
+    # Width and height
+    tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16)
+    th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16)
+    # One-hot encoding of label
+    tcls[b, best_n, gj, gi, target_labels] = 1
+    # Compute label correctness and iou at best anchor
+    class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float()
+    iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi], target_boxes, x1y1x2y2=False)
+
+    tconf = obj_mask.float()
+    return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf
+
+def create_modules(module_defs):
+    """
+    Constructs module list of layer blocks from module configuration in module_defs
+    """
+    hyperparams = module_defs.pop(0)
+    output_filters = [int(hyperparams["channels"])]
+    module_list = nn.ModuleList()
+    for module_i, module_def in enumerate(module_defs):
+        modules = nn.Sequential()
+
+        if module_def["type"] == "convolutional":
+            bn = int(module_def["batch_normalize"])
+            filters = int(module_def["filters"])
+            kernel_size = int(module_def["size"])
+            pad = (kernel_size - 1) // 2
+            modules.add_module(
+                f"conv_{module_i}",
+                nn.Conv2d(
+                    in_channels=output_filters[-1],
+                    out_channels=filters,
+                    kernel_size=kernel_size,
+                    stride=int(module_def["stride"]),
+                    padding=pad,
+                    bias=not bn,
+                ),
+            )
+            if bn:
+                modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5))
+            if module_def["activation"] == "leaky":
+                modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1))
+
+        elif module_def["type"] == "maxpool":
+            kernel_size = int(module_def["size"])
+            stride = int(module_def["stride"])
+            if kernel_size == 2 and stride == 1:
+                modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1)))
+            maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
+            modules.add_module(f"maxpool_{module_i}", maxpool)
+
+        elif module_def["type"] == "upsample":
+            upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
+            modules.add_module(f"upsample_{module_i}", upsample)
+
+        elif module_def["type"] == "route":
+            layers = [int(x) for x in module_def["layers"].split(",")]
+            filters = sum([output_filters[1:][i] for i in layers])
+            modules.add_module(f"route_{module_i}", EmptyLayer())
+
+        elif module_def["type"] == "shortcut":
+            filters = output_filters[1:][int(module_def["from"])]
+            modules.add_module(f"shortcut_{module_i}", EmptyLayer())
+
+        elif module_def["type"] == "yolo":
+            anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
+            # Extract anchors
+            anchors = [int(x) for x in module_def["anchors"].split(",")]
+            anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
+            anchors = [anchors[i] for i in anchor_idxs]
+            num_classes = int(module_def["classes"])
+            img_size = int(hyperparams["height"])
+            # Define detection layer
+            yolo_layer = YOLOLayer(anchors, num_classes, img_size)
+            modules.add_module(f"yolo_{module_i}", yolo_layer)
+        # Register module list and number of output filters
+        module_list.append(modules)
+        output_filters.append(filters)
+
+    return hyperparams, module_list
+
+
+class Upsample(nn.Module):
+    """ nn.Upsample is deprecated """
+
+    def __init__(self, scale_factor, mode="nearest"):
+        super(Upsample, self).__init__()
+        self.scale_factor = scale_factor
+        self.mode = mode
+
+    def forward(self, x):
+        x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
+        return x
+
+
+class EmptyLayer(nn.Module):
+    """Placeholder for 'route' and 'shortcut' layers"""
+
+    def __init__(self):
+        super(EmptyLayer, self).__init__()
+
+
+class YOLOLayer(nn.Module):
+    """Detection layer"""
+
+    def __init__(self, anchors, num_classes, img_dim=416):
+        super(YOLOLayer, self).__init__()
+        self.anchors = anchors
+        self.num_anchors = len(anchors)
+        self.num_classes = num_classes
+        self.ignore_thres = 0.5
+        self.mse_loss = nn.MSELoss()
+        self.bce_loss = nn.BCELoss()
+        self.obj_scale = 1
+        self.noobj_scale = 100
+        self.metrics = {}
+        self.img_dim = img_dim
+        self.grid_size = 0  # grid size
+
+    def compute_grid_offsets(self, grid_size, cuda=True):
+        self.grid_size = grid_size
+        g = self.grid_size
+        FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
+        self.stride = self.img_dim / self.grid_size
+        # Calculate offsets for each grid
+        self.grid_x = torch.arange(g).repeat(g, 1).view([1, 1, g, g]).type(FloatTensor)
+        self.grid_y = torch.arange(g).repeat(g, 1).t().view([1, 1, g, g]).type(FloatTensor)
+        self.scaled_anchors = FloatTensor([(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors])
+        self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
+        self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))
+
+    def forward(self, x, targets=None, img_dim=None):
+
+        # Tensors for cuda support
+        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
+        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
+        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor
+
+        self.img_dim = img_dim
+        num_samples = x.size(0)
+        grid_size = x.size(2)
+
+        prediction = (
+            x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
+            .permute(0, 1, 3, 4, 2)
+            .contiguous()
+        )
+
+        # Get outputs
+        x = torch.sigmoid(prediction[..., 0])  # Center x
+        y = torch.sigmoid(prediction[..., 1])  # Center y
+        w = prediction[..., 2]  # Width
+        h = prediction[..., 3]  # Height
+        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
+        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.
+
+        # If grid size does not match current we compute new offsets
+        if grid_size != self.grid_size:
+            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)
+
+        # Add offset and scale with anchors
+        pred_boxes = FloatTensor(prediction[..., :4].shape)
+        pred_boxes[..., 0] = x.data + self.grid_x
+        pred_boxes[..., 1] = y.data + self.grid_y
+        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
+        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
+
+        output = torch.cat(
+            (
+                pred_boxes.view(num_samples, -1, 4) * self.stride,
+                pred_conf.view(num_samples, -1, 1),
+                pred_cls.view(num_samples, -1, self.num_classes),
+            ),
+            -1,
+        )
+
+        if targets is None:
+            return output, 0
+        else:
+            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
+                pred_boxes=pred_boxes,
+                pred_cls=pred_cls,
+                target=targets,
+                anchors=self.scaled_anchors,
+                ignore_thres=self.ignore_thres,
+            )
+
+            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
+            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
+            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
+            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
+            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
+            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
+            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
+            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
+            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
+            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
+
+            # Metrics
+            cls_acc = 100 * class_mask[obj_mask].mean()
+            conf_obj = pred_conf[obj_mask].mean()
+            conf_noobj = pred_conf[noobj_mask].mean()
+            conf50 = (pred_conf > 0.5).float()
+            iou50 = (iou_scores > 0.5).float()
+            iou75 = (iou_scores > 0.75).float()
+            detected_mask = conf50 * class_mask * tconf
+            precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
+            recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
+            recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)
+
+            self.metrics = {
+                "loss": to_cpu(total_loss).item(),
+                "x": to_cpu(loss_x).item(),
+                "y": to_cpu(loss_y).item(),
+                "w": to_cpu(loss_w).item(),
+                "h": to_cpu(loss_h).item(),
+                "conf": to_cpu(loss_conf).item(),
+                "cls": to_cpu(loss_cls).item(),
+                "cls_acc": to_cpu(cls_acc).item(),
+                "recall50": to_cpu(recall50).item(),
+                "recall75": to_cpu(recall75).item(),
+                "precision": to_cpu(precision).item(),
+                "conf_obj": to_cpu(conf_obj).item(),
+                "conf_noobj": to_cpu(conf_noobj).item(),
+                "grid_size": grid_size,
+            }
+
+            return output, total_loss
+
+
+class Darknet(nn.Module):
+    """YOLOv3 object detection model"""
+
+    def __init__(self, config_path, img_size=416):
+        super(Darknet, self).__init__()
+        self.module_defs = parse_model_config(config_path)
+        self.hyperparams, self.module_list = create_modules(self.module_defs)
+        self.yolo_layers = [layer[0] for layer in self.module_list if hasattr(layer[0], "metrics")]
+        self.img_size = img_size
+        self.seen = 0
+        self.header_info = np.array([0, 0, 0, self.seen, 0], dtype=np.int32)
+
+    def forward(self, x, targets=None):
+        img_dim = x.shape[2]
+        loss = 0
+        layer_outputs, yolo_outputs = [], []
+        for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
+            if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
+                x = module(x)
+            elif module_def["type"] == "route":
+                x = torch.cat([layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",")], 1)
+            elif module_def["type"] == "shortcut":
+                layer_i = int(module_def["from"])
+                x = layer_outputs[-1] + layer_outputs[layer_i]
+            elif module_def["type"] == "yolo":
+                x, layer_loss = module[0](x, targets, img_dim)
+                loss += layer_loss
+                yolo_outputs.append(x)
+            layer_outputs.append(x)
+        yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1))
+        return yolo_outputs if targets is None else (loss, yolo_outputs)
+
+    def load_darknet_weights(self, weights_path):
+        """Parses and loads the weights stored in 'weights_path'"""
+
+        # Open the weights file
+        with open(weights_path, "rb") as f:
+            header = np.fromfile(f, dtype=np.int32, count=5)  # First five are header values
+            self.header_info = header  # Needed to write header when saving weights
+            self.seen = header[3]  # number of images seen during training
+            weights = np.fromfile(f, dtype=np.float32)  # The rest are weights
+
+        # Establish cutoff for loading backbone weights
+        cutoff = None
+        if "darknet53.conv.74" in weights_path:
+            cutoff = 75
+
+        ptr = 0
+        for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
+            if i == cutoff:
+                break
+            if module_def["type"] == "convolutional":
+                conv_layer = module[0]
+                if module_def["batch_normalize"]:
+                    # Load BN bias, weights, running mean and running variance
+                    bn_layer = module[1]
+                    num_b = bn_layer.bias.numel()  # Number of biases
+                    # Bias
+                    bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)
+                    bn_layer.bias.data.copy_(bn_b)
+                    ptr += num_b
+                    # Weight
+                    bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)
+                    bn_layer.weight.data.copy_(bn_w)
+                    ptr += num_b
+                    # Running Mean
+                    bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)
+                    bn_layer.running_mean.data.copy_(bn_rm)
+                    ptr += num_b
+                    # Running Var
+                    bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)
+                    bn_layer.running_var.data.copy_(bn_rv)
+                    ptr += num_b
+                else:
+                    # Load conv. bias
+                    num_b = conv_layer.bias.numel()
+                    conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)
+                    conv_layer.bias.data.copy_(conv_b)
+                    ptr += num_b
+                # Load conv. weights
+                num_w = conv_layer.weight.numel()
+                conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)
+                conv_layer.weight.data.copy_(conv_w)
+                ptr += num_w
+
+    def save_darknet_weights(self, path, cutoff=-1):
+        """
+            @:param path    - path of the new weights file
+            @:param cutoff  - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
+        """
+        fp = open(path, "wb")
+        self.header_info[3] = self.seen
+        self.header_info.tofile(fp)
+
+        # Iterate through layers
+        for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
+            if module_def["type"] == "convolutional":
+                conv_layer = module[0]
+                # If batch norm, load bn first
+                if module_def["batch_normalize"]:
+                    bn_layer = module[1]
+                    bn_layer.bias.data.cpu().numpy().tofile(fp)
+                    bn_layer.weight.data.cpu().numpy().tofile(fp)
+                    bn_layer.running_mean.data.cpu().numpy().tofile(fp)
+                    bn_layer.running_var.data.cpu().numpy().tofile(fp)
+                # Load conv bias
+                else:
+                    conv_layer.bias.data.cpu().numpy().tofile(fp)
+                # Load conv weights
+                conv_layer.weight.data.cpu().numpy().tofile(fp)
+
+        fp.close()
+
+
+class ListDataset(Dataset):
+    def __init__(self, list_path, img_size=416, augment=True, multiscale=True, normalized_labels=True):
+        with open(list_path, "r") as file:
+            self.img_files = file.readlines()
+
+        self.label_files = [
+            # path.replace("images", "labels").replace(".png", ".txt").replace(".jpg", ".txt")
+            # just remove the postfix '.txt'
+            path.replace("/images/", "/labels/").rstrip() + '.txt'
+            for path in self.img_files
+        ]
+        self.img_size = img_size
+        self.max_objects = 100
+        self.augment = augment
+        self.multiscale = multiscale
+        self.normalized_labels = normalized_labels
+        self.min_size = self.img_size - 3 * 32
+        self.max_size = self.img_size + 3 * 32
+        self.batch_count = 0
+
+    def __getitem__(self, index):
+
+        # ---------
+        #  Image
+        # ---------
+
+        img_path = self.img_files[index % len(self.img_files)].rstrip()
+
+        # Extract image as PyTorch tensor
+        img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))
+
+        # Handle images with less than three channels
+        if len(img.shape) != 3:
+            img = img.unsqueeze(0)
+            img = img.expand((3, img.shape[1:]))
+
+        _, h, w = img.shape
+        h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
+        # Pad to square resolution
+        img, pad = pad_to_square(img, 0)
+        _, padded_h, padded_w = img.shape
+
+        # ---------
+        #  Label
+        # ---------
+
+        label_path = self.label_files[index % len(self.img_files)].rstrip()
+
+        targets = None
+        if os.path.exists(label_path):
+            boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5))
+            # Extract coordinates for unpadded + unscaled image
+            x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
+            y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
+            x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
+            y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
+            # Adjust for added padding
+            x1 += pad[0]
+            y1 += pad[2]
+            x2 += pad[1]
+            y2 += pad[3]
+            # Returns (x, y, w, h)
+            boxes[:, 1] = ((x1 + x2) / 2) / padded_w
+            boxes[:, 2] = ((y1 + y2) / 2) / padded_h
+            boxes[:, 3] *= w_factor / padded_w
+            boxes[:, 4] *= h_factor / padded_h
+
+            targets = torch.zeros((len(boxes), 6))
+            targets[:, 1:] = boxes
+
+        # Apply augmentations
+        if self.augment:
+            if np.random.random() < 0.5:
+                img, targets = horisontal_flip(img, targets)
+
+        return img_path, img, targets
+
+    def collate_fn(self, batch):
+        paths, imgs, targets = list(zip(*batch))
+        # Remove empty placeholder targets
+        targets = [boxes for boxes in targets if boxes is not None]
+        # Add sample index to targets
+        for i, boxes in enumerate(targets):
+            boxes[:, 0] = i
+        targets = torch.cat(targets, 0)
+        # Selects new image size every tenth batch
+        if self.multiscale and self.batch_count % 10 == 0:
+            self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32))
+        # Resize images to input shape
+        imgs = torch.stack([resize(img, self.img_size) for img in imgs])
+        self.batch_count += 1
+        return paths, imgs, targets
+
+    def __len__(self):
+        return len(self.img_files)
+
+
+if __name__ == "__main__":
+    os.environ['WORKDIR_PATH'] = os.getcwd()
+    os.environ['PARAMS_DIR_PATH'] = os.getcwd()
+    
+    test_img_paths = [
+        '/home/jiahua/food_all/三明治/99_sanmingzhi.jpg',
+        '/home/jiahua/food_all/三明治/0_sanmingzhi.jpg'
+    ]
+    imgs = dataset.load_images(test_img_paths)
+
+    # # forkcloud format test
+    # imgs = [[np.array(imgs[0]).tolist()]]
+
+    test_model_class(model_file_path=__file__,
+                     model_class='FoodlgNet',
+                     task='IMAGE_DETECTION',
+                     dependencies={},
+                     train_dataset_path='/home/jiahua/singa-local/dataset.zip',
+                     val_dataset_path='/home/jiahua/singa-local/dataset.zip',
+                     train_args={},
+                     queries=imgs)
diff --git a/examples/models/image_object_detection/onnx_tiny_yolov2.py b/examples/models/image_object_detection/onnx_tiny_yolov2.py
index 2e20c447..2ac0f6d2 100644
--- a/examples/models/image_object_detection/onnx_tiny_yolov2.py
+++ b/examples/models/image_object_detection/onnx_tiny_yolov2.py
@@ -24,7 +24,7 @@
 
 from singa_auto.model import BaseModel, utils
 from singa_auto.constants import ModelDependency
-from singa_auto.model.dev import make_predictions, _check_model_class, _print_header, _check_dependencies, inform_user
+from singa_auto.model.dev import make_predictions_json, _check_model_class, _print_header, _check_dependencies, inform_user
 from singa_auto.model.utils import load_model_class
 from singa_auto.advisor.constants import Proposal, ParamsType
 
@@ -215,7 +215,7 @@ def softmax(x):
     proposal = Proposal(trial_no=0, knobs={},
                         params_type=ParamsType.LOCAL_RECENT)
 
-    (predictions, model_inst) = make_predictions(queries, task,
+    (predictions, model_inst) = make_predictions_json(queries, task,
                                                  py_model_class,
                                                  proposal,
                                                  fine_tune_dataset_path=None,
diff --git a/examples/models/image_segmentation/PyPandaResUnet.py b/examples/models/image_segmentation/PyPandaResUnet.py
new file mode 100644
index 00000000..e1e01d70
--- /dev/null
+++ b/examples/models/image_segmentation/PyPandaResUnet.py
@@ -0,0 +1,549 @@
+import os
+
+os.environ['CUDA_VISIBLE_DEVICES'] = "0"
+
+import sys
+sys.path.append(os.getcwd())
+
+
+import base64
+import json
+import logging
+import os
+import tempfile
+import zipfile
+from collections.abc import Sequence
+from copy import deepcopy
+from io import BytesIO
+from typing import List
+from glob import glob
+
+import cv2
+import numpy as np
+import PIL
+import torch
+import torch.nn as nn
+import torchvision
+from PIL import Image
+from torch.utils.data import DataLoader, Dataset
+from torch.utils.data.sampler import RandomSampler
+from torchvision import models
+from torchvision.transforms import functional as F
+from torchvision.transforms.transforms import Pad, Resize
+from tqdm import tqdm
+
+from singa_auto.model import SegmentationModel, CategoricalKnob, FixedKnob, utils
+from singa_auto.model.knob import BaseKnob
+# from singa_auto.utils.metrics import do_kaggle_metric
+
+from singa_auto.datasets.image_segmentation_dataset import *
+
+
+# define model
+def convrelu(in_channels, out_channels, kernel, padding):
+    return nn.Sequential(
+        nn.Conv2d(in_channels, out_channels, kernel, padding=padding),
+        nn.ReLU(inplace=True),
+    )
+
+
+class ResNetUNet(nn.Module):
+    def __init__(self, n_class):
+        super().__init__()
+
+        self.base_model = models.resnet18(pretrained=False)
+        self.base_layers = list(self.base_model.children())
+
+        self.layer0 = nn.Sequential(*self.base_layers[:3]) # size=(N, 64, x.H/2, x.W/2)
+        self.layer0_1x1 = convrelu(64, 64, 1, 0)
+        self.layer1 = nn.Sequential(*self.base_layers[3:5]) # size=(N, 64, x.H/4, x.W/4)
+        self.layer1_1x1 = convrelu(64, 64, 1, 0)
+        self.layer2 = self.base_layers[5]  # size=(N, 128, x.H/8, x.W/8)
+        self.layer2_1x1 = convrelu(128, 128, 1, 0)
+        self.layer3 = self.base_layers[6]  # size=(N, 256, x.H/16, x.W/16)
+        self.layer3_1x1 = convrelu(256, 256, 1, 0)
+        self.layer4 = self.base_layers[7]  # size=(N, 512, x.H/32, x.W/32)
+        self.layer4_1x1 = convrelu(512, 512, 1, 0)
+
+        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
+
+        self.conv_up3 = convrelu(256 + 512, 512, 3, 1)
+        self.conv_up2 = convrelu(128 + 512, 256, 3, 1)
+        self.conv_up1 = convrelu(64 + 256, 256, 3, 1)
+        self.conv_up0 = convrelu(64 + 256, 128, 3, 1)
+
+        self.conv_original_size0 = convrelu(3, 64, 3, 1)
+        self.conv_original_size1 = convrelu(64, 64, 3, 1)
+        self.conv_original_size2 = convrelu(64 + 128, 64, 3, 1)
+
+        self.conv_last = nn.Conv2d(64, n_class, 1)
+
+    def forward(self, input):
+        x_original = self.conv_original_size0(input)
+        x_original = self.conv_original_size1(x_original)
+
+        layer0 = self.layer0(input)
+        layer1 = self.layer1(layer0)
+        layer2 = self.layer2(layer1)
+        layer3 = self.layer3(layer2)
+        layer4 = self.layer4(layer3)
+
+        layer4 = self.layer4_1x1(layer4)
+        x = self.upsample(layer4)
+        layer3 = self.layer3_1x1(layer3)
+        x = torch.cat([x, layer3], dim=1)
+        x = self.conv_up3(x)
+
+        x = self.upsample(x)
+        layer2 = self.layer2_1x1(layer2)
+        x = torch.cat([x, layer2], dim=1)
+        x = self.conv_up2(x)
+
+        x = self.upsample(x)
+        layer1 = self.layer1_1x1(layer1)
+        x = torch.cat([x, layer1], dim=1)
+        x = self.conv_up1(x)
+
+        x = self.upsample(x)
+        layer0 = self.layer0_1x1(layer0)
+        x = torch.cat([x, layer0], dim=1)
+        x = self.conv_up0(x)
+
+        x = self.upsample(x)
+        x = torch.cat([x, x_original], dim=1)
+        x = self.conv_original_size2(x)
+
+        out = self.conv_last(x)
+
+        return out
+
+
+# pre-process: resize image to the target scale keeping aspect ratio then pad to square
+class ResizeSquarePad(Resize, Pad):
+    def __init__(self, target_length, interpolation_strategy):
+        if not isinstance(target_length, (int, Sequence)):
+            raise TypeError("Size should be int or sequence. Got {}".format(type(target_length)))
+        if isinstance(target_length, Sequence) and len(target_length) not in (1, 2):
+            raise ValueError("If size is a sequence, it should have 1 or 2 values")
+
+        self.target_length = target_length
+        self.interpolation_strategy = interpolation_strategy
+        Resize.__init__(self, size=(512, 512), interpolation=self.interpolation_strategy)
+        Pad.__init__(self, padding=(0,0,0,0), fill=255, padding_mode="constant")
+
+
+    def __call__(self, img):
+        w, h = img.size
+        if w > h:
+            self.size = (int(np.round(self.target_length * (h / w))), self.target_length)
+            img = Resize.__call__(self, img)
+
+            total_pad = self.size[1] - self.size[0]
+            half_pad = total_pad // 2
+            self.padding = (0, half_pad, 0, total_pad - half_pad)
+            return Pad.__call__(self, img)
+        else:
+            self.size = (self.target_length, int(np.round(self.target_length * (w / h))))
+            img = Resize.__call__(self, img)
+
+            total_pad = self.size[0] - self.size[1]
+            half_pad = total_pad // 2
+            self.padding = (half_pad, 0, total_pad - half_pad, 0)
+            return Pad.__call__(self, img)
+
+
+logger = logging.getLogger(__name__)
+
+
+# main process procedure
+class PyPandaResUnet(SegmentationModel):
+    '''
+    train UNet
+    '''
+    @staticmethod
+    def get_knob_config():
+        return {
+            # hyper parameters
+            "lr": FixedKnob(1e-4),
+            "ignore_index": FixedKnob(255),
+            "batch_size": FixedKnob(4),
+            "epoch": FixedKnob(2),
+
+            # application parameters
+            # "num_classes": FixedKnob(1),
+            "fine_size": FixedKnob(512),
+
+        }
+
+
+    def __init__(self, **knobs):
+        super().__init__(**knobs)
+        self._knobs = knobs
+
+        self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+        print("self.device", self.device)
+        logger.info(self.device)
+
+        self.model = None
+
+        self.fine_size = self._knobs.get("fine_size")
+
+        # define preprocessing procedure
+        self.transform_img = torchvision.transforms.Compose([
+            ResizeSquarePad(self.fine_size, Image.BILINEAR),
+            torchvision.transforms.ToTensor(),
+            torchvision.transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
+        ])
+
+        self.transform_mask = torchvision.transforms.Compose([
+            ResizeSquarePad(self.fine_size, Image.NEAREST)
+        ])
+            
+
+    def train(self, dataset_path, **kwargs):
+        # hyper parameters 
+        self.batch_size = self._knobs.get("batch_size")
+        self.epoch = self._knobs.get("epoch")
+        snapshot = 2 
+
+        self.lr = self._knobs.get("lr")
+        self.ignore_index = self._knobs.get("ignore_index")
+
+        logger.info("Training params: {}".format(json.dumps(kwargs)))
+
+
+        # extract uploaded zipfile
+        dataset_zipfile = zipfile.ZipFile(dataset_path, 'r')
+
+        train_folder = tempfile.TemporaryDirectory()
+        folder_name = train_folder.name
+        dataset_zipfile.extractall(path=folder_name)
+
+        # load train params from zipfile
+        with open(os.path.join(folder_name, 'param.json'),'r') as load_f:
+            load_dict = json.load(load_f)
+            self.num_classes = load_dict["num_classes"] if "num_classes" in list(load_dict.keys()) else 21 # default class number(21) is the same as voc2012
+
+        # load images from zipfile
+        if os.path.isdir(os.path.join(folder_name, "image")):
+            print("split train/val subsets...")
+            logger.info("split train/val subsets...")
+            image_train, mask_train, image_val, mask_val = ImageFetch(folder_name)
+            self.num_image = len(image_train)
+            print("Total training images : ", self.num_image) 
+            logger.info(f"Total training images : {self.num_image}")           
+        elif os.path.isdir(os.path.join(folder_name, "train")):
+            print("directly load train/val datasets...")
+            logger.info("directly load train/val datasets...")
+            image_train, mask_train = trainImageFetch(folder_name)
+            image_val, mask_val = valImageFetch(folder_name)
+            self.num_image = len(image_train)
+            print("Total training images : ", self.num_image)
+            logger.info(f"Total training images : {self.num_image}")  
+        else:
+            print("unsupported dataset format!")
+            logger.info("unsupported dataset format!")
+
+        # load dataset
+        train_data = SegDataset(image_train, mask_train, self.transform_img, self.transform_mask)
+        val_data = SegDataset(image_val, mask_val, self.transform_img, self.transform_mask)
+
+        logger.info("Training the model ResUNet using {}".format(self.device))
+        print("Training the model ResUNet using {}".format(self.device))
+
+        # define training and validation data loaders
+        train_loader = DataLoader(train_data,
+                    shuffle=RandomSampler(train_data), 
+                    batch_size=self.batch_size) 
+
+        val_loader = DataLoader(val_data,
+                    shuffle=False, 
+                    batch_size=self.batch_size) 
+
+        # get the model using our helper function
+        self.model = ResNetUNet(self.num_classes)
+        self.model.to(self.device)
+
+        self.criterion = nn.CrossEntropyLoss(ignore_index=self.ignore_index)
+
+        self.optimizer_ft = torch.optim.Adam(filter(lambda p: p.requires_grad, self.model.parameters()), lr=self.lr)
+        self.exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer_ft, step_size=30, gamma=0.1)
+
+        # start training
+        for epoch_ in range(self.epoch):
+            train_loss = self._train_one_epoch(train_loader, self.model)
+            val_loss, accuracy = self._evaluate(val_loader, self.model)
+            self.exp_lr_scheduler.step()
+
+            print('epoch: {} train_loss: {:.3f} val_loss: {:.3f} val_accuracy: {:.3f}'.format(epoch_ + 1, train_loss, val_loss, accuracy))
+            logger.info('epoch: {} train_loss: {:.3f} val_loss: {:.3f} val_accuracy: {:.3f}'.format(epoch_ + 1, train_loss, val_loss, accuracy))
+    
+
+    def _train_one_epoch(self, train_loader, model):
+        '''
+        consider as a sub-train function inside singa-auto framework
+        '''
+        running_loss = 0.0
+        data_size = len(train_loader)
+
+        model.train()
+
+        for inputs, masks in tqdm(train_loader):
+            inputs, masks = inputs.to(self.device), masks.long().to(self.device)
+            self.optimizer_ft.zero_grad()
+
+            logit = model(inputs)
+
+            loss = self.criterion(logit, masks.squeeze(1)) # cross_entropy loss
+            loss.backward()
+            self.optimizer_ft.step()
+            running_loss += loss.item() * self.batch_size
+
+        epoch_loss = running_loss / data_size
+        return epoch_loss
+
+    def _evaluate(self, test_loader, model):
+        '''
+        validation per epoch
+        '''
+        running_loss = 0.0
+        acc = 0.0
+        data_size = len(test_loader)
+
+        model.eval()
+
+        with torch.no_grad():
+            for inputs, masks in test_loader:
+                inputs, masks = inputs.to(self.device), masks.long().to(self.device)
+
+                outputs = self.model(inputs)
+
+                predict = torch.argmax(nn.Softmax(dim=1)(outputs), dim=1) # extract argmax as the final prediction
+
+                # we do not consider the ignore_index
+                pure_mask = masks.masked_select(masks.ne(self.ignore_index))
+                pure_predict = predict.masked_select(masks.ne(self.ignore_index))
+
+                acc += pure_mask.cpu().eq(pure_predict.cpu()).sum().item()/len(pure_mask) # find the correct piixels
+                
+                loss = self.criterion(outputs.squeeze(1), masks.squeeze(1))           
+                running_loss += loss.item() * inputs.size(0)
+
+        epoch_loss = running_loss / data_size
+        accuracy = acc / data_size
+        return epoch_loss, accuracy
+
+
+    def evaluate(self, val_dataset_path, **kwargs):
+        # extract validation datasets
+        dataset_zipfile = zipfile.ZipFile(val_dataset_path, 'r')
+        val_folder = tempfile.TemporaryDirectory()
+        dataset_zipfile.extractall(path=val_folder.name)
+        folder_name = val_folder.name
+
+        if os.path.isdir(os.path.join(folder_name, "image")):
+            print("split train/val subsets...")
+            logger.info("split train/val subsets...")
+            image_train, mask_train, X_val, y_val = ImageFetch(folder_name)
+            self.num_image = len(X_val)
+            print("Total val images : ", self.num_image) 
+            logger.info(f"Total val images : {self.num_image}")           
+        elif os.path.isdir(os.path.join(folder_name, "train")):
+            print("directly load train/val datasets...")
+            logger.info("directly load train/val datasets...")
+            image_train, mask_train = trainImageFetch(folder_name)
+            X_val, y_val = valImageFetch(folder_name)
+            self.num_image = len(X_val)
+            print("Total val images : ", self.num_image)
+            logger.info(f"Total val images : {self.num_image}")  
+        else:
+            print("unsupported dataset format!")
+            logger.info("unsupported dataset format!")
+
+        val_data = SegDataset(X_val, y_val, self.transform_img, self.transform_mask)
+
+        val_loader = DataLoader(val_data,
+                            shuffle=False,
+                            batch_size=4)
+        # compute MIoU metric(consider as accuracy)
+        temp_miou = {}
+        for i in range(self.num_classes):
+            temp_miou[i] = [0, 0.0]
+
+        self.model.eval()
+
+        with torch.no_grad():
+            for inputs, masks in val_loader:
+                inputs, masks = inputs.to(self.device), masks.long().to(self.device)
+
+                outputs = self.model(inputs)
+
+                predict = torch.argmax(nn.Softmax(dim=1)(outputs), dim=1)
+                pure_mask = masks.masked_select(masks.ne(255))
+                pure_predict = predict.masked_select(masks.ne(255))
+
+                for class_value in pure_mask.unique():
+                    valued_mask = pure_mask.masked_select(pure_mask.eq(class_value))
+                    real_len = len(valued_mask)
+                    
+                    valued_predict = pure_predict.masked_select(pure_mask.eq(class_value))
+                    cross_len = valued_mask.eq(valued_predict).sum().item()
+
+                    predict_len = len(pure_predict.masked_select(pure_predict.eq(class_value)))
+
+                    temp_miou[class_value.item()][1] += cross_len / (real_len + predict_len - cross_len)
+                    temp_miou[class_value.item()][0] += 1
+
+        miou_overall = 0.0
+        existed_classes = 0
+        for key in temp_miou.keys():
+            if temp_miou[key][0] != 0:
+                miou_overall += (temp_miou[key][1] / temp_miou[key][0])
+                existed_classes += 1
+        temp_miou['overall'] = [1, miou_overall / existed_classes]
+
+        for key in temp_miou.keys():
+            if temp_miou[key][0] != 0:
+                print(f"class {key} accuracy: {temp_miou[key][1] / temp_miou[key][0]}")
+        return temp_miou['overall'][1]
+
+
+    def dump_parameters(self):
+        params = {}
+        with tempfile.NamedTemporaryFile() as tmp:
+            # Save whole model to a tempfile
+            torch.save(self.model, tmp.name)
+            # Read from tempfile & encode it to base64 string
+            with open(tmp.name, 'rb') as f:
+                weight_base64 = f.read()
+        params['weight_base64'] = base64.b64encode(weight_base64).decode('utf-8')
+        return params
+
+
+    def load_parameters(self, params):
+        weight_base64 = params['weight_base64']
+
+        weight_base64_bytes = base64.b64decode(weight_base64.encode('utf-8'))
+
+        self.model = torch.load(BytesIO(weight_base64_bytes), map_location=self.device)
+
+    def _get_prediction(self, img):
+
+        image = self.transform_img(img)
+
+        image = image.to(self.device)
+        predict = self.model(image.unsqueeze(0))
+
+        predict = predict.squeeze(0)
+        predict = nn.Softmax(dim=0)(predict)
+        predict = torch.argmax(predict, dim=0)
+
+        # transform result image into original size
+        w, h = img.size
+        if w > h:
+            re_h = int(np.round(self.fine_size * (h / w)))
+            total_pad = self.fine_size - re_h
+            half_pad = total_pad // 2
+            out = predict[half_pad : half_pad + re_h, :]
+        else:
+            re_w = int(np.round(self.fine_size * (w / h)))
+            total_pad = self.fine_size - re_w
+            half_pad = total_pad // 2
+            out = predict[:, half_pad : half_pad + re_w]
+
+        out = cv2.resize(out.cpu().numpy(), (w, h), interpolation=cv2.INTER_NEAREST)
+
+        return out
+
+
+
+    def predict(self, queries: List[PIL.Image.Image]) -> List[dict]:
+
+        result = list()
+
+        for idx, img in enumerate(queries):
+            res_raw = self._get_prediction(img)
+
+            # add color palette (we follow the VOC2012 color map ant the max num_class is 21)
+            res_raw = res_raw.astype(np.uint8)
+            res = Image.fromarray(res_raw)
+            palette = []
+            for i in range(256):
+                palette.extend((i, i, i))
+            palette[:3*21] = np.array([[0, 0, 0],
+                                [128, 0, 0],
+                                [0, 128, 0],
+                                [128, 128, 0],
+                                [0, 0, 128],
+                                [128, 0, 128],
+                                [0, 128, 128],
+                                [128, 128, 128],
+                                [64, 0, 0],
+                                [192, 0, 0],
+                                [64, 128, 0],
+                                [192, 128, 0],
+                                [64, 0, 128],
+                                [192, 0, 128],
+                                [64, 128, 128],
+                                [192, 128, 128],
+                                [0, 64, 0],
+                                [128, 64, 0],
+                                [0, 192, 0],
+                                [128, 192, 0],
+                                [0, 64, 128]
+                             ], dtype='uint8').flatten()
+            res.putpalette(palette)
+
+            name = f"./query_{idx}.png"
+            res.save(name)
+            
+            result.append(name)
+
+        return result
+
+
+if __name__ == "__main__":
+    import argparse
+
+    from singa_auto.model.dev import test_model_class
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--train_path',
+                        type=str,
+                        default='/home/taomingyang/dataset/package/voc2012_mini.zip',
+                        help='Path to train dataset')
+    parser.add_argument('--val_path',
+                        type=str,
+                        default='/home/taomingyang/dataset/package/voc2012_mini.zip',
+                        help='Path to validation dataset')
+
+    # parser.add_argument('--annotation_dataset_path',
+    #                     type=str,
+    #                     default='./dataset/voc2012/val2014.zip',
+    #                     help='Path to validation dataset')
+
+    # parser.add_argument('--test_path',
+    #                     type=str,
+    #                     default='/hdd1/PennFudanPed.zip',
+    #                     help='Path to test dataset')
+    parser.add_argument('--query_path',
+                        type=str,
+                        default='/home/taomingyang/git/singa_auto_hub/examples/data/image_segmentaion/2007_000862.jpg,/home/taomingyang/git/singa_auto_hub/examples/data/image_segmentaion/2007_001397.jpg',
+                        help='Path(s) to query image(s), delimited by commas')
+
+    (args, _) = parser.parse_known_args()
+
+    # print(args.query_path.split(','))
+
+    queries = utils.dataset.load_images(args.query_path.split(','))
+    test_model_class(model_file_path=__file__,
+                     model_class='PyPandaResUnet',
+                     task='IMAGE_SEGMENTATION',
+                     dependencies={"torch": "1.6.0+cu101",
+                                   "torchvision": "0.7.0+cu101",
+                                   "opencv-python": "4.4.0.46",
+                                   "tqdm": "4.28.0"},
+                     train_dataset_path=args.train_path,
+                     val_dataset_path=args.val_path,
+                     test_dataset_path=None,
+                     train_args={"num_classes": 21},
+                     queries=queries)
diff --git a/examples/models/image_segmentation/SaDeeplab.py b/examples/models/image_segmentation/SaDeeplab.py
new file mode 100644
index 00000000..d829aafe
--- /dev/null
+++ b/examples/models/image_segmentation/SaDeeplab.py
@@ -0,0 +1,714 @@
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = "0, 1, 2, 3"
+
+import sys
+sys.path.append(os.getcwd())
+
+import base64
+import json
+import logging
+import tempfile
+import zipfile
+from collections.abc import Sequence
+from copy import deepcopy
+from io import BytesIO
+from typing import List
+from glob import glob
+from time import time
+import requests
+from singa_auto.model import BaseModel, CategoricalKnob, FixedKnob, utils
+from singa_auto.model.knob import BaseKnob
+
+
+import tensorflow as tf
+
+tf.random.set_seed(100)
+
+from tensorflow import keras
+from tensorflow.keras import layers
+import numpy as np
+from tensorflow.keras.preprocessing.image import load_img
+import random
+import cv2
+from PIL import Image
+import h5py
+from tensorflow.python.keras.saving import hdf5_format
+
+class OxfordPets(keras.utils.Sequence):
+    """Helper to iterate over the data (as Numpy arrays)."""
+
+    def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
+        self.batch_size = batch_size
+        self.img_size = img_size
+        self.input_img_paths = input_img_paths
+        self.target_img_paths = target_img_paths
+
+    def __len__(self):
+        return len(self.target_img_paths) // self.batch_size
+
+    def __getitem__(self, idx):
+        """Returns tuple (input, target) correspond to batch #idx."""
+        i = idx * self.batch_size
+        batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]
+        batch_target_img_paths = self.target_img_paths[i : i + self.batch_size]
+        x = np.zeros((self.batch_size,) + (self.img_size, self.img_size) + (3,), dtype="float32")
+        for j, path in enumerate(batch_input_img_paths):
+            img = load_img(path, target_size=(self.img_size, self.img_size))
+            x[j] = img
+        # y = np.zeros((self.batch_size,) + self.img_size + (1,), dtype="uint8")
+        # for j, path in enumerate(batch_target_img_paths):
+        #     img = load_img(path, target_size=self.img_size, color_mode="grayscale")
+        #     y[j] = np.expand_dims(img, 2)
+        #     # Ground truth labels are 1, 2, 3. Subtract one to make them 0, 1, 2:
+        #     y[j] -= 1
+        
+        y = np.zeros((self.batch_size,) + (self.img_size * self.img_size,) + (1,), dtype="uint8")
+        for j, path in enumerate(batch_target_img_paths):
+            img = load_img(path, target_size=(self.img_size, self.img_size), color_mode="grayscale")
+            img = np.array(img).flatten()
+            y[j] = np.expand_dims(img, 1)
+            # Ground truth labels are 1, 2, 3. Subtract one to make them 0, 1, 2: (already update dataset, this operation has expired)
+            # y[j] -= 1
+        
+        sample_weight = np.zeros((self.batch_size,) + (self.img_size * self.img_size,), dtype="float32")
+        for k in range(self.batch_size):
+            unique_class = np.unique(y)
+            if len(unique_class):
+                class_weights = {class_id: 1.0 for class_id in unique_class}
+                class_weights[unique_class[-1]] = 0.0
+            for yy in unique_class:
+                np.putmask(sample_weight[k], y[k]==yy, class_weights[yy])
+            np.putmask(sample_weight[k], y[k]==unique_class[-1], class_weights[unique_class[-1]])
+
+        return x, y, sample_weight
+
+
+def Bottleneck(input_shape, output_channels, stride=1, dilation=1):
+    '''
+    a classic residual convolution module
+    '''
+    inputs = tf.keras.Input(input_shape)
+    residual  = inputs
+
+    # residual conv branch
+    x = layers.Conv2D(output_channels, (1, 1), padding='same', use_bias=False)(residual)
+    x = layers.BatchNormalization()(x)
+    x = layers.ReLU()(x)
+
+    x = layers.Conv2D(output_channels, (3, 3), strides=(stride, stride), padding='same', 
+                        dilation_rate=(dilation, dilation), use_bias=False)(x)
+    x = layers.BatchNormalization()(x)
+    x = layers.ReLU()(x)
+
+    x = layers.Conv2D(output_channels * 4, (1, 1), padding='same', use_bias=False)(x)
+    x = layers.BatchNormalization()(x)
+
+    # original branch
+    if stride != 1 or inputs.shape[-1] != x.shape[-1]:
+        residual = layers.Conv2D(output_channels * 4, (1, 1), padding='same', 
+                                strides=(stride, stride), use_bias=False)(residual)
+        residual = layers.BatchNormalization()(residual)
+
+    # merge two branches
+    x = layers.Add()([x, residual])
+    x = layers.ReLU()(x)
+
+    # export model
+    return keras.Model(inputs=inputs, outputs=x)
+
+
+def ResNetAtrous(layer_num=[3, 4, 6, 3], dilations=[1, 2, 1]):
+    '''
+    an atrous conv version resnet50 model
+    '''
+    inputs = keras.Input((None, None, 3))
+    strides = [2, 1, 1]
+
+    # conv
+    x = layers.Conv2D(64, (7, 7), (2, 2), padding='same', use_bias=False)(inputs)
+    x = layers.BatchNormalization()(x, training=False)
+    x = layers.ReLU()(x)
+
+    # down-sampling
+    x = layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)
+
+    # resblock 1
+    for i in range(layer_num[0]):
+        x = Bottleneck(x.shape[1:], 64, stride=1, dilation=1)(x)
+    low = x # save low level features
+
+    # resblock 2
+    for i in range(layer_num[1]):
+        x = Bottleneck(x.shape[1:], 128, stride=strides[0] if i == 0 else 1, dilation=1)(x) 
+
+    # resblock 3
+    for i in range(layer_num[2]):
+        x = Bottleneck(x.shape[1:], 256, stride=strides[1] if i == 0 else 1, dilation=1)(x)
+
+    # resblock 4
+    for i in range(layer_num[3]):
+        x = Bottleneck(x.shape[1:], 512, stride=strides[2] if i == 0 else 1, dilation=dilations[i])(x)
+    high = x
+
+    return keras.Model(inputs=inputs, outputs=(low, high))
+
+
+def ASPP(input_channels):
+    inputs = layers.Input((None, None, input_channels))
+
+    # global pooling
+    global_mean = layers.Lambda(lambda x: tf.math.reduce_mean(x, [1, 2], keepdims=True))(inputs) # size (b, 1, 1, c)
+    
+    global_mean = layers.Conv2D(256, (1, 1), padding='same', 
+                                kernel_initializer=tf.keras.initializers.he_normal(), use_bias=False)(global_mean)
+    global_mean = layers.BatchNormalization()(global_mean)
+    global_mean = layers.ReLU()(global_mean) # size (b, 1, 1, 256)
+
+    global_mean = layers.Lambda(lambda x: tf.image.resize(x[0], (tf.shape(x[1])[1], tf.shape(x[1])[2]), 
+                            method=tf.image.ResizeMethod.BILINEAR))([global_mean, inputs]) # size (b, h, w, 256)
+    
+    # dilation with rate 1
+    dilated_1 = layers.Conv2D(256, (1, 1), dilation_rate=1, padding='same', 
+                                kernel_initializer=tf.keras.initializers.he_normal(), use_bias=False)(inputs)
+    dilated_1 = layers.BatchNormalization()(dilated_1)
+    dilated_1 = layers.ReLU()(dilated_1)
+
+    # dilation with rate 6
+    dilated_6 = layers.Conv2D(256, (3, 3), dilation_rate=6, padding='same', 
+                                kernel_initializer=tf.keras.initializers.he_normal(), use_bias=False)(inputs)
+    dilated_6 = layers.BatchNormalization()(dilated_6)
+    dilated_6 = layers.ReLU()(dilated_6)
+
+    # dilation with rate 12
+    dilated_12 = layers.Conv2D(256, (3, 3), dilation_rate=12, padding='same', 
+                                kernel_initializer=tf.keras.initializers.he_normal(), use_bias=False)(inputs)
+    dilated_12 = layers.BatchNormalization()(dilated_12)
+    dilated_12 = layers.ReLU()(dilated_12)
+
+    # dilation with rate 18
+    dilated_18 = layers.Conv2D(256, (3, 3), dilation_rate=18, padding='same', 
+                                kernel_initializer=tf.keras.initializers.he_normal(), use_bias=False)(inputs)
+    dilated_18 = layers.BatchNormalization()(dilated_18)
+    dilated_18 = layers.ReLU()(dilated_18)
+
+    # concate pyramid
+    x = layers.Concatenate(axis=-1)([global_mean, dilated_1, dilated_6, dilated_12, dilated_18])
+    x = layers.Conv2D(256, (1, 1), padding='same', 
+                        kernel_initializer=tf.keras.initializers.he_normal(), use_bias=False)(x)
+    x = layers.BatchNormalization()(x)
+    x = layers.ReLU()(x)
+
+    return keras.Model(inputs=inputs, outputs=x)
+
+
+def DeepLabV3Plus(img_size, n_classes):
+    inputs = keras.Input(shape=img_size + (3,))
+    # inputs = keras.Input((None, None, 3))
+
+    low, high = ResNetAtrous(layer_num=[3, 4, 6, 3], dilations=[1, 2, 1])(inputs)
+
+    # modify low level feature channel number
+    low = layers.Conv2D(48, (1, 1), padding='same', 
+                        kernel_initializer=keras.initializers.he_normal(), use_bias=False)(low)
+    low = layers.BatchNormalization()(low)
+    low = layers.ReLU()(low) # size (b, h/4, w/4, 48)
+
+    # pass high level feature into ASPP module
+    high = ASPP(high.shape[-1])(high) # size (b, h/8, w/8, 256)
+    high = layers.Lambda(lambda x: tf.image.resize(x[0], (tf.shape(x[1])[1], tf.shape(x[1])[2]), 
+                        method = tf.image.ResizeMethod.BILINEAR))([high, low]);
+    # concate and modify channel
+    x = layers.Concatenate(axis=-1)([high, low]) # size (b, h/4, w/4, 304)
+
+    x = layers.Conv2D(256, (3, 3), padding='same', activation='relu', 
+                        kernel_initializer=keras.initializers.he_normal(), use_bias=False)(x)
+    x = layers.BatchNormalization()(x)
+    x = layers.ReLU()(x)
+
+    x = layers.Conv2D(256, (3, 3), padding='same', activation='relu', 
+                        kernel_initializer=keras.initializers.he_normal(), use_bias=False)(x)
+    x = layers.BatchNormalization()(x)
+    x = layers.ReLU()(x)
+
+    # upsampling
+    x = layers.Lambda(lambda x: tf.image.resize(x[0], (tf.shape(x[1])[1], tf.shape(x[1])[2]), 
+                        method = tf.image.ResizeMethod.BILINEAR))([x, inputs])
+
+    
+    # full conv
+    x = layers.Conv2D(n_classes, (1,1), padding='same', activation=keras.activations.softmax, 
+                        name = 'full_conv')(x)
+
+    # flatten
+    x = layers.Reshape((img_size[0] * img_size[1], n_classes))(x)
+
+
+    return keras.Model(inputs=inputs, outputs=x)
+
+
+class CustomModel(keras.Model):
+    def train_step(self, data):
+        # Unpack the data. Its structure depends on your model and
+        # on what you pass to `fit()`.
+        if len(data) == 3:
+            x, y, sample_weight = data
+        else:
+            sample_weight = None
+            x, y = data
+
+        with tf.GradientTape() as tape:
+            y_pred = self(x, training=True)  # Forward pass
+            # Compute the loss value.
+            # The loss function is configured in `compile()`.
+            loss = self.compiled_loss(
+                y,
+                y_pred,
+                sample_weight=sample_weight,
+                regularization_losses=self.losses,
+            )
+
+        # Compute gradients
+        trainable_vars = self.trainable_variables
+        gradients = tape.gradient(loss, trainable_vars)
+
+        # Update weights
+        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
+
+        # Update the metrics.
+        # Metrics are configured in `compile()`.
+        self.compiled_metrics.update_state(y, y_pred, sample_weight=sample_weight)
+
+        # Return a dict mapping metric names to current value.
+        # Note that it will include the loss (tracked in self.metrics).
+        return {m.name: m.result() for m in self.metrics}
+
+
+logger = logging.getLogger(__name__)
+
+
+class SaDeeplab(BaseModel):
+    '''
+    train deeplab
+    '''
+    @staticmethod
+    def get_knob_config():
+        return {
+            # hyper parameters
+            "lr": FixedKnob(1e-4),
+            "batch_size": FixedKnob(2),
+            "epoch": FixedKnob(1),
+
+            # application parameters
+            # "num_classes": FixedKnob(1),
+            "fine_size": FixedKnob(160),
+            "train_val_split_rate": FixedKnob(0.9),
+
+        }
+
+
+    def __init__(self, **knobs):
+        super().__init__(**knobs)
+        self._knobs = knobs
+
+        self.model = None
+
+        self.fine_size = self._knobs.get("fine_size")
+        self.split_rate = self._knobs.get("train_val_split_rate")
+
+
+    def train(self, dataset_path, **kwargs):
+        # hyper parameters 
+        self.batch_size = self._knobs.get("batch_size")
+        self.epoch = self._knobs.get("epoch")
+
+        self.lr = self._knobs.get("lr")
+ 
+
+        logger.info("Training params: {}".format(json.dumps(kwargs)))
+
+
+        # extract uploaded zipfile
+        dataset_zipfile = zipfile.ZipFile(dataset_path, 'r')
+
+        train_folder = tempfile.TemporaryDirectory()
+        folder_name = train_folder.name
+        dataset_zipfile.extractall(path=folder_name)
+
+        # load train params from zipfile
+        with open(os.path.join(folder_name, 'param.json'),'r') as load_f:
+            load_dict = json.load(load_f)
+            self.num_classes = load_dict["num_classes"] if "num_classes" in list(load_dict.keys()) else 21 # default class number(21) is the same as voc2012
+            print(f"total number of classes: {self.num_classes}")
+            logger.info(f"total number of classes: {self.num_classes}")
+
+        # load images from zipfile
+        if os.path.isdir(os.path.join(folder_name, "image")):
+            print("split train/val subsets...")
+            logger.info("split train/val subsets...")
+
+            # load image and mask seperately
+            input_img_paths = sorted(
+                [
+                    os.path.join(folder_name, "image", fname)
+                    for fname in os.listdir(os.path.join(folder_name, "image"))
+                ]
+            )
+            target_img_paths = sorted(
+                [
+                    os.path.join(folder_name, "mask", fname)
+                    for fname in os.listdir(os.path.join(folder_name, "mask"))
+                ]
+            )
+            self.num_image = len(input_img_paths)
+            print("Total image number: ", self.num_image) 
+            logger.info(f"Total image number : {self.num_image}")
+
+            # split train/val
+            val_samples = int((1 - self.split_rate) * self.num_image)
+            # random.Random(1337).shuffle(input_img_paths)
+            # random.Random(1337).shuffle(target_img_paths)
+            train_input_img_paths = input_img_paths[:-val_samples]
+            train_target_img_paths = target_img_paths[:-val_samples]
+            val_input_img_paths = input_img_paths[-val_samples:]
+            val_target_img_paths = target_img_paths[-val_samples:]
+            
+            print(f"train images: {len(train_input_img_paths)}, val images: {len(val_input_img_paths)}") 
+            logger.info(f"train images: {len(train_input_img_paths)}, val images: {len(val_input_img_paths)}")  
+
+        elif os.path.isdir(os.path.join(folder_name, "train")):
+            print("directly load train/val datasets...")
+            logger.info("directly load train/val datasets...")
+
+            # load image and mask seperately
+            train_input_img_paths = sorted(
+                [
+                    os.path.join(folder_name, "train", "image", fname)
+                    for fname in os.listdir(os.path.join(folder_name, "train", "image"))
+                ]
+            )
+            train_target_img_paths = sorted(
+                [
+                    os.path.join(folder_name, "train", "mask", fname)
+                    for fname in os.listdir(os.path.join(folder_name, "train", "mask"))
+                ]
+            )
+
+            val_input_img_paths = sorted(
+                [
+                    os.path.join(folder_name, "val", "image", fname)
+                    for fname in os.listdir(os.path.join(folder_name, "val", "image"))
+                ]
+            )
+            val_target_img_paths = sorted(
+                [
+                    os.path.join(folder_name, "val", "mask", fname)
+                    for fname in os.listdir(os.path.join(folder_name, "val", "mask"))
+                ]
+            )
+            self.num_image = len(train_input_img_paths) + len(val_input_img_paths)
+            print("Total image number: ", self.num_image) 
+            logger.info(f"Total image number : {self.num_image}")
+
+            print(f"train images: {len(train_input_img_paths)}, val images: {len(val_input_img_paths)}") 
+            logger.info(f"train images: {len(train_input_img_paths)}, val images: {len(val_input_img_paths)}")  
+        else:
+            print("unsupported dataset format!")
+            logger.info("unsupported dataset format!")
+
+        # load dataset
+        train_gen = OxfordPets(
+                self.batch_size, self.fine_size, train_input_img_paths, train_target_img_paths
+        )
+        val_gen = OxfordPets(self.batch_size, self.fine_size, val_input_img_paths, val_target_img_paths)
+
+        logger.info("Training the model DeeplabV3+ using {}".format("cuda" if tf.test.is_gpu_available()==True else "cpu"))
+        print("Training the model DeeplabV3+ using {}".format("cuda" if tf.test.is_gpu_available()==True else "cpu"))
+
+        # clear session buffer 
+        keras.backend.clear_session()
+
+        # get the model using our helper function
+        inputs = keras.Input(shape=(self.fine_size, self.fine_size) + (3,))
+        outputs = DeepLabV3Plus((self.fine_size, self.fine_size), self.num_classes)(inputs)
+        self.model = CustomModel(inputs, outputs)
+        self.model.summary()
+
+        # coimpile model with optimizers...
+        self.model.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["sparse_categorical_accuracy"])
+        callbacks = [
+            # keras.callbacks.ModelCheckpoint("oxford_segmentation.h5", save_best_only=True),
+            keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5, min_lr=1e-6)
+        ]
+        
+        # start training
+        self.model.fit(train_gen, epochs=self.epoch, validation_data=val_gen, callbacks=callbacks)
+    
+
+    def evaluate(self, val_dataset_path, **kwargs):
+        # extract validation datasets
+        dataset_zipfile = zipfile.ZipFile(val_dataset_path, 'r')
+        val_folder = tempfile.TemporaryDirectory()
+        dataset_zipfile.extractall(path=val_folder.name)
+        folder_name = val_folder.name
+
+        if os.path.isdir(os.path.join(folder_name, "image")):
+            print("split train/val subsets...")
+            logger.info("split train/val subsets...")
+
+            # load image and mask seperately
+            input_img_paths = sorted(
+                [
+                    os.path.join(folder_name, "image", fname)
+                    for fname in os.listdir(os.path.join(folder_name, "image"))
+                ]
+            )
+            target_img_paths = sorted(
+                [
+                    os.path.join(folder_name, "mask", fname)
+                    for fname in os.listdir(os.path.join(folder_name, "mask"))
+                ]
+            )
+
+            # split train/val
+            val_samples = int((1 - self.split_rate) * self.num_image)
+            # random.Random(1337).shuffle(input_img_paths)
+            # random.Random(1337).shuffle(target_img_paths)
+            train_input_img_paths = input_img_paths[:-val_samples]
+            train_target_img_paths = target_img_paths[:-val_samples]
+            val_input_img_paths = input_img_paths[-val_samples:]
+            val_target_img_paths = target_img_paths[-val_samples:]
+
+        elif os.path.isdir(os.path.join(folder_name, "train")):
+            print("directly load train/val datasets...")
+            logger.info("directly load train/val datasets...")
+
+            # load image and mask seperately
+            train_input_img_paths = sorted(
+                [
+                    os.path.join(folder_name, "train", "image", fname)
+                    for fname in os.listdir(os.path.join(folder_name, "train", "image"))
+                ]
+            )
+            train_target_img_paths = sorted(
+                [
+                    os.path.join(folder_name, "train", "mask", fname)
+                    for fname in os.listdir(os.path.join(folder_name, "train", "mask"))
+                ]
+            )
+
+            val_input_img_paths = sorted(
+                [
+                    os.path.join(folder_name, "val", "image", fname)
+                    for fname in os.listdir(os.path.join(folder_name, "val", "image"))
+                ]
+            )
+            val_target_img_paths = sorted(
+                [
+                    os.path.join(folder_name, "val", "mask", fname)
+                    for fname in os.listdir(os.path.join(folder_name, "val", "mask"))
+                ]
+            )
+
+        else:
+            print("unsupported dataset format!")
+            logger.info("unsupported dataset format!")
+
+        val_gen = OxfordPets(self.batch_size, self.fine_size, val_input_img_paths, val_target_img_paths)
+
+        loss, accuracy = self.model.evaluate(val_gen)
+
+        
+        return accuracy
+
+
+    def dump_parameters(self):
+        params = {}
+        with tempfile.NamedTemporaryFile(suffix=".h5") as tmp:
+            
+            # Save whole model to a tempfile
+            self.model.save_weights(tmp.name)
+            # Read from tempfile & encode it to base64 string
+            # with h5py.File(tmp.name, 'r') as f:
+            #     if 'layer_names' not in f.attrs and 'model_weights' in f:
+            #         weights_h5 = f['model_weights']
+            with open(tmp.name, 'rb') as f:
+                weight_base64 = f.read()
+        params['weight_base64'] = base64.b64encode(weight_base64).decode('utf-8')
+        params['num_classes'] = self.num_classes
+        return params
+
+
+    def load_parameters(self, params):
+        weight_base64 = params['weight_base64']
+        self.num_classes = params['num_classes']
+
+        weight_base64_bytes = base64.b64decode(weight_base64.encode('utf-8'))
+
+        # state_dict = torch.load(BytesIO(weight_base64_bytes), map_location=self.device)
+        
+        inputs = keras.Input(shape=(self.fine_size, self.fine_size) + (3,))
+        outputs = DeepLabV3Plus((self.fine_size, self.fine_size), self.num_classes)(inputs)
+        self.model = CustomModel(inputs, outputs)
+
+        # weight_h5 = h5py.File(BytesIO(weight_base64_bytes))
+        with h5py.File(BytesIO(weight_base64_bytes), 'r') as f:
+            hdf5_format.load_weights_from_hdf5_group(f, self.model.layers)
+
+        # self.model.load_weights(weight_h5)
+
+
+    def _get_prediction(self, img):
+
+        image = cv2.resize(img.astype('float32'), (self.fine_size, self.fine_size))
+        print("+"*30)
+        print(image.shape)
+        image = np.expand_dims(image, axis=0)
+        predict = self.model.predict(image)
+
+        mask = np.argmax(predict, axis=-1)
+        mask = np.expand_dims(mask, axis=-1)
+        mask = np.reshape(mask, (160, 160))
+
+        h, w = image.shape[:2]
+
+        # transform result image into original size
+        mask_out = cv2.resize(mask.astype(np.uint8), (w, h), cv2.INTER_NEAREST)
+
+
+        return mask_out
+
+
+    def predict(self, queries: List[List]) -> List[dict]:
+        # print(len(queries))
+        result = list()
+
+        # depending on different input types, need different conditions
+        for idx, img in enumerate(queries):
+            logger.info(type(img))
+            if isinstance(img, List):
+                print(len(img))
+                img = np.array(img[0])
+                print(img.shape)
+                img_file = img
+                # print(type(img_file))
+            elif isinstance(img, Image.Image):
+                img_file = np.array(img)
+            else:
+                img_data = img
+
+            # get prediction
+            res_raw = self._get_prediction(img_file)
+
+            # add color palette (we follow the VOC2012 color map and the max num_class is 21)
+            res_raw = res_raw.astype(np.uint8)
+            res = Image.fromarray(res_raw)
+            palette = []
+            for i in range(256):
+                palette.extend((i, i, i))
+            palette[:3*21] = np.array([[0, 0, 0],
+                                [128, 0, 0],
+                                [0, 128, 0],
+                                [128, 128, 0],
+                                [0, 0, 128],
+                                [128, 0, 128],
+                                [0, 128, 128],
+                                [128, 128, 128],
+                                [64, 0, 0],
+                                [192, 0, 0],
+                                [64, 128, 0],
+                                [192, 128, 0],
+                                [64, 0, 128],
+                                [192, 0, 128],
+                                [64, 128, 128],
+                                [192, 128, 128],
+                                [0, 64, 0],
+                                [128, 64, 0],
+                                [0, 192, 0],
+                                [128, 192, 0],
+                                [0, 64, 128]
+                             ], dtype='uint8').flatten()
+            res.putpalette(palette)
+
+            name = f"./query_{idx}.png"
+            res.save(name)
+            full_name = os.path.abspath(name)
+
+            buffered = BytesIO()
+            res.save(buffered, format="PNG")
+            img_str = base64.b64encode(buffered.getvalue())
+
+
+            result.append(img_str.decode('utf-8'))
+            
+            # result.append(requests.get('http://192.168.100.203:36667/fetch').text)
+                
+        return result
+
+if __name__ == "__main__":
+    import argparse
+
+    from singa_auto.model.dev import test_model_class
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--train_path',
+                        type=str,
+                        default='./dataset/oxford_pets/datasets.zip',
+                        help='Path to train dataset')
+    parser.add_argument('--val_path',
+                        type=str,
+                        default='./dataset/oxford_pets/datasets.zip',
+                        help='Path to validation dataset')
+
+    # parser.add_argument('--annotation_dataset_path',
+    #                     type=str,
+    #                     default='./dataset/voc2012/val2014.zip',
+    #                     help='Path to validation dataset')
+
+    # parser.add_argument('--test_path',
+    #                     type=str,
+    #                     default='/hdd1/PennFudanPed.zip',
+    #                     help='Path to test dataset')
+    parser.add_argument(
+        '--query_path',
+        type=str,
+        default='/home/zhaozixiao/projects/singa_local/singa-auto/dataset/oxford_pets/Persian_120.jpg,/home/zhaozixiao/projects/singa_local/singa-auto/dataset/oxford_pets/pomeranian_159.jpg',
+        help='Path(s) to query image(s), delimited by commas'
+    )
+
+    (args, _) = parser.parse_known_args()
+
+    # print(args.query_path.split(','))
+
+    imgs = utils.dataset.load_images(args.query_path.split(','))
+    img_nps = []
+    for i in imgs:
+        img = np.array(i)
+        img_nps.append(img)
+    
+    queries = img_nps
+    test_model_class(model_file_path=__file__,
+                     model_class='SaDeeplab',
+                     task='IMAGE_SEGMENTATION',
+                     dependencies={
+                        "tensorflow": "2.3.0",
+                        "opencv": "3.4.2.16",
+                    },
+                     train_dataset_path=args.train_path,
+                     val_dataset_path=args.val_path,
+                     test_dataset_path=None,
+                     train_args={"num_classes": 3},
+                     queries=img_nps)
+
+
+
+    
+
+
+
+
+
+
+
+
+
+
+
diff --git a/examples/models/image_segmentation/SaUNetBorderLoss.py b/examples/models/image_segmentation/SaUNetBorderLoss.py
new file mode 100644
index 00000000..aae6180b
--- /dev/null
+++ b/examples/models/image_segmentation/SaUNetBorderLoss.py
@@ -0,0 +1,847 @@
+import os
+
+os.environ['CUDA_VISIBLE_DEVICES'] = "4, 5, 6, 7"
+
+import sys
+sys.path.append(os.getcwd())
+
+import base64
+import json
+import logging
+import os
+import tempfile
+import zipfile
+from collections.abc import Sequence
+from collections import defaultdict
+from copy import deepcopy
+from io import BytesIO
+from typing import List
+from glob import glob
+from time import time
+import requests
+
+import cv2
+import numpy as np
+import PIL
+import torch
+import torch.nn as nn
+import torchvision
+from PIL import Image
+from torch.utils.data import DataLoader, Dataset
+from torch.utils.data.sampler import RandomSampler
+from torchvision import models
+from torchvision.transforms import functional as F
+from torchvision.transforms.transforms import Pad, Resize
+from tqdm import tqdm
+from torch.nn import DataParallel
+
+from singa_auto.model import SegmentationModel, CategoricalKnob, FixedKnob, utils
+from singa_auto.model.knob import BaseKnob
+# from singa_auto.utils.metrics import do_kaggle_metric
+
+# from singa_auto.datasets.image_segmentation_dataset import *
+
+
+# dataset fetch
+def ImageFetch(img_folder, split_rate=0.9):
+    img_train = []
+    mask_train = []
+    img_val = []
+    mask_val = []
+
+    image_folder = os.path.join(img_folder, "image")
+    mask_folder = os.path.join(img_folder, "mask")
+
+    img_list = os.listdir(image_folder)
+    total_img_num = len(img_list)
+    print(f'Total number of images: {total_img_num}')
+
+    train_num = int(total_img_num * split_rate)
+    for idx, image_name in tqdm(enumerate(img_list[:train_num]), total=train_num, desc="load train images......"):
+        image_path = os.path.join(image_folder, image_name)
+        mask_path = os.path.join(mask_folder, image_name.split('.')[0] + ".png")
+
+        image = Image.open(image_path)
+        img_train.append(image)
+
+        mask = Image.open(mask_path)
+        mask_train.append(mask)
+    for idx, image_name in tqdm(enumerate(img_list[train_num:]), total=(total_img_num - train_num), desc="load val images......"):
+        image_path = os.path.join(image_folder, image_name)
+        mask_path = os.path.join(mask_folder, image_name.split('.')[0] + ".png")
+
+        image = Image.open(image_path)
+        img_val.append(image)
+
+        mask = Image.open(mask_path)
+        mask_val.append(mask)
+
+    return img_train, mask_train, img_val, mask_val
+
+
+def trainImageFetch(train_folder):
+    image_train = []
+    mask_train = []
+
+    # load images and masks from their folders
+    images_folder = os.path.join(train_folder, "image")
+    masks_folder = os.path.join(train_folder, "mask")
+
+    image_list = os.listdir(images_folder)
+    for idx, image_name in tqdm(enumerate(image_list), total=len(image_list), desc="load train images......"):
+        image_path = os.path.join(images_folder, image_name)
+        mask_path = os.path.join(masks_folder, image_name.split('.')[0] + ".png")
+
+        image = Image.open(image_path)
+        image_train.append(image)
+
+        mask = Image.open(mask_path)
+        mask_train.append(mask)
+
+    return image_train, mask_train
+
+
+def valImageFetch(val_folder):
+    image_val = []
+    mask_val = []
+
+    images_folder = os.path.join(val_folder, "image")
+    masks_folder = os.path.join(val_folder, "mask")
+
+    image_list = os.listdir(images_folder)
+    for idx, image_name in tqdm(enumerate(image_list), total=len(image_list), desc="load validation images......"):
+        image_path = os.path.join(images_folder, image_name)
+        mask_path = os.path.join(masks_folder, image_name.split('.')[0] + ".png")
+
+        image = Image.open(image_path)
+        image_val.append(image)
+
+        mask = Image.open(mask_path)
+        mask_val.append(mask)
+
+    return image_val, mask_val
+
+
+class SegDataset(Dataset):
+    def __init__(self, image_list, mask_list, mode, transform_img, transform_mask, transform_border):
+        self.mode = mode
+        self.transform_img = transform_img
+        self.transform_mask = transform_mask
+        self.transform_border = transform_border
+        self.imagelist = image_list
+        self.masklist = mask_list
+
+
+    def __len__(self):
+        return len(self.imagelist)
+
+
+    def __getitem__(self, idx):
+        image = deepcopy(self.imagelist[idx])
+
+        if self.mode == 'train':
+            mask = deepcopy(self.masklist[idx])
+            
+            mask_arr = np.array(mask)
+            border = cv2.Canny(mask_arr, 0, 0).astype(np.float)
+            border /= 255
+            border = Image.fromarray(border.astype(np.uint8))
+            border_img = self.transform_border(border)
+            border = torch.as_tensor(np.array(border_img), dtype=torch.int64)
+            # one_hot = torch.cat((torch.zeros_like(border).unsqueeze(0), torch.zeros_like(border).unsqueeze(0))).scatter_(0, border.unsqueeze(0), 1)
+
+            image = self.transform_img(image)
+
+            mask = self.transform_mask(mask)
+            mask = torch.as_tensor(np.array(mask), dtype=torch.int64)
+            # print(f'after transform mask max: {mask.max()}')
+
+            # image = image.unsqueeze(0)
+            # mask = mask.unsqueeze(0)
+
+            return image, mask, border
+
+        elif self.mode == 'val':
+            mask = deepcopy(self.masklist[idx])
+
+            mask_arr = np.array(mask)
+            border = cv2.Canny(mask_arr, 0, 0).astype(np.float)
+            border /= 255
+            border = Image.fromarray(border.astype(np.uint8))
+            border_img = self.transform_border(border)
+            border = torch.as_tensor(np.array(border_img), dtype=torch.int64)
+            # one_hot = torch.cat((torch.zeros_like(border).unsqueeze(0), torch.zeros_like(border).unsqueeze(0))).scatter_(0, border.unsqueeze(0), 1)
+
+            image = self.transform_img(image)
+
+            mask = self.transform_mask(mask)
+            mask = torch.as_tensor(np.array(mask), dtype=torch.int64)
+
+            # image = image.unsqueeze(0)
+            # mask = mask.unsqueeze(0)
+
+            return image, mask, border
+
+
+# define model
+down_feature = defaultdict(list)
+filter_list = [i for i in range(6, 9)]
+
+
+class down_sampling(nn.Module):
+    def __init__(self, in_channel, out_channel):
+        super(down_sampling, self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_channel, out_channel, 3, padding=1),
+            nn.BatchNorm2d(out_channel),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(out_channel, out_channel, 3, padding=1),
+            nn.BatchNorm2d(out_channel),
+            nn.ReLU(inplace=True)
+        )
+        self.pool = nn.MaxPool2d(2)
+
+
+    def forward(self, in_feat):
+        x = self.conv(in_feat)
+        down_feature[in_feat.device.index].append(x)
+        x = self.pool(x)
+
+        return x
+
+
+class up_sampling(nn.Module):
+    def __init__(self, in_channel, out_channel):
+        super(up_sampling, self).__init__()
+        self.up_conv = nn.ConvTranspose2d(in_channel, out_channel, 2, stride=2)
+        self.relu_conv = nn.Sequential(
+            nn.Conv2d(in_channel, out_channel, kernel_size=3, padding=1),
+            nn.BatchNorm2d(out_channel),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(out_channel, out_channel, kernel_size=3, padding=1),
+            nn.BatchNorm2d(out_channel),
+            nn.ReLU(inplace=True)
+        )
+
+
+    def forward(self, in_feat):
+        x = self.up_conv(in_feat)
+        down_map = down_feature[in_feat.device.index].pop()
+        x = torch.cat([x, down_map], dim=1)
+        x = self.relu_conv(x)
+        return x
+
+
+class UNet(nn.Module):
+    def __init__(self, num_classes):
+        super(UNet, self).__init__()
+        self.input_conv = down_sampling(3, 64)
+        self.down_list = [down_sampling(2 ** i, 2 ** (i + 1)) for i in filter_list]
+        self.down = nn.Sequential(*self.down_list)
+
+        self.last_layer = nn.Sequential(
+            nn.Conv2d(512, 1024, 3, padding=1),
+            nn.BatchNorm2d(1024),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(1024, 1024, 3, padding=1),
+            nn.BatchNorm2d(1024),
+            nn.ReLU(inplace=True)
+        )
+
+        self.up_init = up_sampling(1024, 512)
+        self.up_list = [up_sampling(2 ** (i + 1), 2 ** i) for i in filter_list[::-1]]
+        self.up = nn.Sequential(*self.up_list)
+
+        self.output = nn.Conv2d(64, num_classes, 1)
+        # self.classifier = nn.Softmax()
+        
+
+
+    def forward(self, in_feat):
+        x = self.input_conv(in_feat)
+        x = self.down(x)
+        x = self.last_layer(x)
+        x = self.up_init(x)
+        x = self.up(x)
+        out = self.output(x)
+
+
+        # out = self.classifier(x)
+        # return out
+        return out, x
+
+
+class BorderUNet(nn.Module):
+    def __init__(self, n_class):
+        super().__init__()
+
+        self.unet = UNet(n_class)
+
+        self.border_extraction = nn.Conv2d(64, 2, kernel_size=1, padding=0)
+
+        self.softmax = nn.Softmax2d()
+    
+    def forward(self, in_feat):
+        # regular cnn process
+        init_seg, unet_feature = self.unet(in_feat)
+
+        # extract and enhance border
+        init_border = self.border_extraction(unet_feature)
+        
+        # output
+        out = self.softmax(init_seg)
+
+        return out, init_border
+
+
+# pre-process: resize image to the target scale keeping aspect ratio then pad to square
+class ResizeSquarePad(Resize, Pad):
+    def __init__(self, target_length, interpolation_strategy):
+        if not isinstance(target_length, (int, Sequence)):
+            raise TypeError("Size should be int or sequence. Got {}".format(type(target_length)))
+        if isinstance(target_length, Sequence) and len(target_length) not in (1, 2):
+            raise ValueError("If size is a sequence, it should have 1 or 2 values")
+
+        self.target_length = target_length
+        self.interpolation_strategy = interpolation_strategy
+        Resize.__init__(self, size=(320, 320), interpolation=self.interpolation_strategy)
+        Pad.__init__(self, padding=(0,0,0,0), fill=255, padding_mode="constant")
+
+
+    def __call__(self, img):
+        w, h = img.size
+        if w > h:
+            self.size = (int(np.round(self.target_length * (h / w))), self.target_length)
+            img = Resize.__call__(self, img)
+
+            total_pad = self.size[1] - self.size[0]
+            half_pad = total_pad // 2
+            self.padding = (0, half_pad, 0, total_pad - half_pad)
+            return Pad.__call__(self, img)
+        else:
+            self.size = (self.target_length, int(np.round(self.target_length * (w / h))))
+            img = Resize.__call__(self, img)
+
+            total_pad = self.size[0] - self.size[1]
+            half_pad = total_pad // 2
+            self.padding = (half_pad, 0, total_pad - half_pad, 0)
+            return Pad.__call__(self, img)
+
+
+# customized loss function
+class DiceLoss(nn.Module):
+    def __init__(self):
+        super(DiceLoss, self).__init__()
+
+    def forward(self, input, target):
+        N = target.size(0)
+        smooth = 1
+
+        input_flat = input.view(N, -1)
+        target_flat = target.view(N, -1)
+
+        intersection = input_flat * target_flat
+
+        loss = 2 * (intersection.sum(1) + smooth) / \
+                    (input_flat.sum(1) + target_flat.sum(1) + smooth)
+        loss = 1 - loss.sum() / N
+
+        return loss
+
+
+class MulticlassDiceLoss(nn.Module):
+    """
+    requires input(prediction) dimension as [b, c, h, w]
+    target(ground truth mask) dimension as [b, 1, h, w] where dimension 2 refers to the class index
+    Can convert target to one_hot automatically and support ignore labels (should be in the form of list)
+    """
+
+    def __init__(self, ignore_labels=None):
+        super(MulticlassDiceLoss, self).__init__()
+        self.ignore_labels = ignore_labels
+
+    def forward(self, input, target):
+
+        num_ignore = 0 if self.ignore_labels == None else len(self.ignore_labels)
+
+        n, _, h, w = target.shape[:]
+
+        num_classes = input.shape[1]
+
+        # initialize zeros for one_hot
+        zeros = torch.zeros((n, (num_classes + num_ignore), h, w)).to(target.device)
+
+        # decrease ignore labels' indexes into successive integers(eg: convert 0, 1, 2, 255 into 0, 1, 2, 3)
+        for i in range(num_ignore):
+            target[target == self.ignore_labels[i]] = num_classes + i
+
+        # scatter to one_hot
+        one_hot = zeros.scatter_(1, target, 1)
+
+        dice = DiceLoss()
+        totalLoss = 0
+
+        # for indexes out of range, not compute corresponding loss
+        for i in range(num_classes):
+             diceLoss = dice(input[:, i], one_hot[:,i])
+             totalLoss += diceLoss
+
+        return totalLoss
+
+
+logger = logging.getLogger(__name__)
+
+
+# main process procedure
+class SaUNetBorderLoss(SegmentationModel):
+    '''
+    train UNet
+    '''
+    @staticmethod
+    def get_knob_config():
+        return {
+            # hyper parameters
+            "lr": FixedKnob(1e-3),
+            "momentum": FixedKnob(0.9),
+
+            "ignore_index": FixedKnob(255),
+            "batch_size": FixedKnob(12),
+            "epoch": FixedKnob(1),
+
+            # application parameters
+            # "num_classes": FixedKnob(1),
+            "fine_size": FixedKnob(512),
+
+        }
+
+
+    def __init__(self, **knobs):
+        super().__init__(**knobs)
+        # load knobs
+        self._knobs = knobs
+
+        # initiate hyper params
+        self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+        print("self.device", self.device)
+        logger.info(self.device)
+
+        self.model = None
+
+        self.fine_size = self._knobs.get("fine_size")
+
+        self.ignore_index = self._knobs.get("ignore_index")
+
+        self.batch_size = self._knobs.get("batch_size")
+        self.epoch = self._knobs.get("epoch")
+
+        self.lr = self._knobs.get("lr")
+        self.momentum = self._knobs.get("momentum")
+
+        # define preprocessing procedure
+        self.transform_img = torchvision.transforms.Compose([
+            ResizeSquarePad(self.fine_size, Image.BILINEAR),
+            torchvision.transforms.ToTensor(),
+            torchvision.transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
+        ])
+
+        self.transform_mask = torchvision.transforms.Compose([
+            ResizeSquarePad(self.fine_size, Image.NEAREST)
+        ])
+
+        self.transform_border = torchvision.transforms.Compose([
+            ResizeSquarePad(self.fine_size, Image.NEAREST)
+        ])
+            
+
+    def train(self, dataset_path, **kwargs):
+        logger.info("Training params: {}".format(json.dumps(kwargs)))
+
+
+        # extract uploaded zipfile
+        dataset_zipfile = zipfile.ZipFile(dataset_path, 'r')
+
+        train_folder = tempfile.TemporaryDirectory()
+        folder_name = train_folder.name
+        dataset_zipfile.extractall(path=folder_name)
+
+        # load train params from zipfile
+        with open(os.path.join(folder_name, 'param.json'),'r') as load_f:
+            load_dict = json.load(load_f)
+            self.num_classes = load_dict["num_classes"] if "num_classes" in list(load_dict.keys()) else 21 # default class number(21) is the same as voc2012
+
+        # load images from zipfile
+        if os.path.isdir(os.path.join(folder_name, "image")):
+            print("split train/val subsets...")
+            logger.info("split train/val subsets...")
+            image_train, mask_train, image_val, mask_val = ImageFetch(folder_name)
+            self.num_image = len(image_train)
+            print("Total training images : ", self.num_image) 
+            logger.info(f"Total training images : {self.num_image}")           
+        elif os.path.isdir(os.path.join(folder_name, "train")):
+            print("directly load train/val datasets...")
+            logger.info("directly load train/val datasets...")
+            image_train, mask_train = trainImageFetch(folder_name)
+            image_val, mask_val = valImageFetch(folder_name)
+            self.num_image = len(image_train)
+            print("Total training images : ", self.num_image)
+            logger.info(f"Total training images : {self.num_image}")  
+        else:
+            print("unsupported dataset format!")
+            logger.info("unsupported dataset format!")
+
+        # load dataset
+        train_data = SegDataset(image_train, mask_train, 'train', self.transform_img, self.transform_mask, self.transform_border)
+        val_data = SegDataset(image_val, mask_val, 'val', self.transform_img, self.transform_mask, self.transform_border)
+
+        logger.info("Training the model ResUNet using {}".format(self.device))
+        print("Training the model ResUNet using {}".format(self.device))
+
+        # define training and validation data loaders
+        train_loader = DataLoader(train_data,
+                    shuffle=RandomSampler(train_data), 
+                    batch_size=self.batch_size) 
+
+        val_loader = DataLoader(val_data,
+                    shuffle=False, 
+                    batch_size=self.batch_size) 
+
+        # get the model using our helper function
+        self.model = BorderUNet(self.num_classes)
+        self.model = DataParallel(self.model)
+        self.model.to(self.device)
+
+        self.criterion_ce = nn.CrossEntropyLoss(weight=torch.Tensor([1, 100]), ignore_index=self.ignore_index)
+        self.criterion_dice = MulticlassDiceLoss(ignore_labels=[255])
+
+        self.optimizer_ft = torch.optim.SGD(filter(lambda p: p.requires_grad, self.model.parameters()), lr=self.lr, momentum=self.momentum)
+        self.exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer_ft, step_size=20, gamma=0.1)
+
+        # start training
+        for epoch_ in range(self.epoch):
+            train_loss = self._train_one_epoch(train_loader, self.model)
+            val_loss, accuracy = self._evaluate(val_loader, self.model)
+            self.exp_lr_scheduler.step()
+
+            print('epoch: {} train_loss: {:.3f} val_loss: {:.3f} val_accuracy: {:.3f}'.format(epoch_ + 1, train_loss, val_loss, accuracy))
+            logger.info('epoch: {} train_loss: {:.3f} val_loss: {:.3f} val_accuracy: {:.3f}'.format(epoch_ + 1, train_loss, val_loss, accuracy))
+    
+
+    def _train_one_epoch(self, train_loader, model):
+        '''
+        consider as a sub-train function inside singa-auto framework
+        '''
+        running_loss = 0.0
+        data_size = len(train_loader) * self.batch_size
+
+        model.train()
+
+        for inputs, masks, borders in tqdm(train_loader):
+            inputs, masks, borders = inputs.to(self.device), masks.long().to(self.device), borders.long().to(self.device)
+            self.optimizer_ft.zero_grad()
+
+            init_seg, init_border = model(inputs)
+
+            self.criterion_ce.to(self.device)
+            loss_border = self.criterion_ce(init_border, borders)
+            loss_seg = self.criterion_dice(init_seg, masks.unsqueeze(1))
+
+            loss = loss_border + loss_seg
+
+            loss.backward()
+            self.optimizer_ft.step()
+
+            running_loss += loss.item() * self.batch_size
+
+        epoch_loss = running_loss / data_size
+        return epoch_loss
+
+
+    def _evaluate(self, test_loader, model):
+        '''
+        validation per epoch
+        '''
+        running_loss = 0.0
+        acc = 0.0
+        data_size = len(test_loader) * self.batch_size
+
+        model.eval()
+
+        with torch.no_grad():
+            for inputs, masks, borders in test_loader:
+                inputs, masks, borders = inputs.to(self.device), masks.long().to(self.device), borders.long().to(self.device)
+
+                outputs, fine_border = self.model(inputs)
+
+                predict = torch.argmax(nn.Softmax(dim=1)(outputs), dim=1) # extract argmax as the final prediction
+
+                # we do not consider the ignore_index
+                pure_mask = masks.masked_select(masks.ne(self.ignore_index))
+                pure_predict = predict.masked_select(masks.ne(self.ignore_index))
+
+                acc += pure_mask.cpu().eq(pure_predict.cpu()).sum().item()/len(pure_mask) # find the correct pixels
+                
+                self.criterion_ce.to(self.device)
+                loss_border = self.criterion_ce(fine_border, borders)
+                loss_seg = self.criterion_dice(outputs, masks.unsqueeze(1))
+
+                loss = loss_seg + loss_border
+
+                running_loss += loss.item() * self.batch_size
+
+        epoch_loss = running_loss / data_size
+        accuracy = acc / len(test_loader)
+        return epoch_loss, accuracy
+
+
+    def evaluate(self, val_dataset_path, **kwargs):
+        # extract validation datasets
+        dataset_zipfile = zipfile.ZipFile(val_dataset_path, 'r')
+        val_folder = tempfile.TemporaryDirectory()
+        dataset_zipfile.extractall(path=val_folder.name)
+        folder_name = val_folder.name
+
+        if os.path.isdir(os.path.join(folder_name, "image")):
+            print("split train/val subsets...")
+            logger.info("split train/val subsets...")
+            image_train, mask_train, X_val, y_val = ImageFetch(folder_name)
+            self.num_image = len(X_val)
+            print("Total val images : ", self.num_image) 
+            logger.info(f"Total val images : {self.num_image}")           
+        elif os.path.isdir(os.path.join(folder_name, "train")):
+            print("directly load train/val datasets...")
+            logger.info("directly load train/val datasets...")
+            image_train, mask_train = trainImageFetch(folder_name)
+            X_val, y_val = valImageFetch(folder_name)
+            self.num_image = len(X_val)
+            print("Total val images : ", self.num_image)
+            logger.info(f"Total val images : {self.num_image}")  
+        else:
+            print("unsupported dataset format!")
+            logger.info("unsupported dataset format!")
+
+        val_data = SegDataset(X_val, y_val, 'val', self.transform_img, self.transform_mask, self.transform_border)
+
+        val_loader = DataLoader(val_data,
+                            shuffle=False,
+                            batch_size=self.batch_size)
+        # compute MIoU metric(consider as accuracy)
+        temp_miou = {}
+        for i in range(self.num_classes):
+            temp_miou[i] = [0, 0.0]
+
+        self.model.eval()
+
+        with torch.no_grad():
+            for inputs, masks, borders in val_loader:
+                inputs, masks, borders = inputs.to(self.device), masks.long().to(self.device), borders.long().to(self.device)
+
+                outputs, fine_border = self.model(inputs)
+
+                predict = torch.argmax(nn.Softmax(dim=1)(outputs), dim=1)
+                pure_mask = masks.masked_select(masks.ne(255))
+                pure_predict = predict.masked_select(masks.ne(255))
+
+                for class_value in pure_mask.unique():
+                    valued_mask = pure_mask.masked_select(pure_mask.eq(class_value))
+                    real_len = len(valued_mask)
+                    
+                    valued_predict = pure_predict.masked_select(pure_mask.eq(class_value))
+                    cross_len = valued_mask.eq(valued_predict).sum().item()
+
+                    predict_len = len(pure_predict.masked_select(pure_predict.eq(class_value)))
+
+                    temp_miou[class_value.item()][1] += cross_len / (real_len + predict_len - cross_len)
+                    temp_miou[class_value.item()][0] += 1
+
+        miou_overall = 0.0
+        existed_classes = 0
+        for key in temp_miou.keys():
+            if temp_miou[key][0] != 0:
+                miou_overall += (temp_miou[key][1] / temp_miou[key][0])
+                existed_classes += 1
+        temp_miou['overall'] = [1, miou_overall / existed_classes]
+
+        for key in temp_miou.keys():
+            if temp_miou[key][0] != 0:
+                print(f"class {key} accuracy: {temp_miou[key][1] / temp_miou[key][0]}")
+        return temp_miou['overall'][1]
+
+
+    def dump_parameters(self):
+        params = {}
+        with tempfile.NamedTemporaryFile() as tmp:
+            
+            # Save whole model to a tempfile
+            torch.save(self.model.module.state_dict(), tmp.name)
+            # Read from tempfile & encode it to base64 string
+            with open(tmp.name, 'rb') as f:
+                weight_base64 = f.read()
+        params['weight_base64'] = base64.b64encode(weight_base64).decode('utf-8')
+        params['num_classes'] = self.num_classes
+        return params
+
+
+    def load_parameters(self, params):
+        weight_base64 = params['weight_base64']
+        self.num_classes = params['num_classes']
+
+        weight_base64_bytes = base64.b64decode(weight_base64.encode('utf-8'))
+
+        state_dict = torch.load(BytesIO(weight_base64_bytes), map_location=self.device)
+
+        self.model = BorderUNet(self.num_classes)
+        self.model.load_state_dict(state_dict)
+
+        self.model = DataParallel(self.model)
+        self.model.to(self.device)
+        self.model.eval()
+
+    def _get_prediction(self, img):
+
+        image = self.transform_img(img)
+
+        image = image.to(self.device)
+        predict, _ = self.model(image.unsqueeze(0))
+
+        predict = predict.squeeze(0)
+        predict = nn.Softmax(dim=0)(predict)
+        predict = torch.argmax(predict, dim=0)
+
+        # transform result image into original size
+        w, h = img.size
+        if w > h:
+            re_h = int(np.round(self.fine_size * (h / w)))
+            total_pad = self.fine_size - re_h
+            half_pad = total_pad // 2
+            out = predict[half_pad : half_pad + re_h, :]
+        else:
+            re_w = int(np.round(self.fine_size * (w / h)))
+            total_pad = self.fine_size - re_w
+            half_pad = total_pad // 2
+            out = predict[:, half_pad : half_pad + re_w]
+
+        out = cv2.resize(out.cpu().numpy(), (w, h), interpolation=cv2.INTER_NEAREST)
+
+        return out
+
+
+
+    def predict(self, queries: List[List]) -> List[dict]:
+        # print(len(queries))
+        result = list()
+
+        # depending on different input types, need different conditions
+        for idx, img in enumerate(queries):
+            print("*" * 30)
+            print(type(img))
+            if isinstance(img, List):
+                print(len(img))
+                img = np.array(img[0])
+                print(img.shape)
+                img_file = Image.fromarray(np.uint8(img))
+                print(type(img_file))
+            elif isinstance(img, np.ndarray):
+                img_file = Image.fromarray(img)
+            else:
+                img_file = img
+
+            # get prediction
+            res_raw = self._get_prediction(img_file)
+
+            # add color palette (we follow the VOC2012 color map and the max num_class is 21)
+            res_raw = res_raw.astype(np.uint8)
+            res = Image.fromarray(res_raw)
+            palette = []
+            for i in range(256):
+                palette.extend((i, i, i))
+            palette[:3*21] = np.array([[0, 0, 0],
+                                [128, 0, 0],
+                                [0, 128, 0],
+                                [128, 128, 0],
+                                [0, 0, 128],
+                                [128, 0, 128],
+                                [0, 128, 128],
+                                [128, 128, 128],
+                                [64, 0, 0],
+                                [192, 0, 0],
+                                [64, 128, 0],
+                                [192, 128, 0],
+                                [64, 0, 128],
+                                [192, 0, 128],
+                                [64, 128, 128],
+                                [192, 128, 128],
+                                [0, 64, 0],
+                                [128, 64, 0],
+                                [0, 192, 0],
+                                [128, 192, 0],
+                                [0, 64, 128]
+                             ], dtype='uint8').flatten()
+            res.putpalette(palette)
+
+            name = f"./query_{idx}.png"
+            res.save(name)
+            full_name = os.path.abspath(name)
+
+            buffered = BytesIO()
+            res.save(buffered, format="PNG")
+            img_str = base64.b64encode(buffered.getvalue())
+
+
+            result.append(img_str.decode('utf-8'))
+            
+            # result.append(requests.get('http://192.168.100.203:36667/fetch').text)
+                
+        return result
+
+
+if __name__ == "__main__":
+    import argparse
+
+    from singa_auto.model.dev import test_model_class
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--train_path',
+                        type=str,
+                        default='/home/zhaozixiao/dataset/pets/datasets.zip',
+                        help='Path to train dataset')
+    parser.add_argument('--val_path',
+                        type=str,
+                        default='/home/zhaozixiao/dataset/pets/datasets.zip',
+                        help='Path to validation dataset')
+
+    # parser.add_argument('--annotation_dataset_path',
+    #                     type=str,
+    #                     default='./dataset/voc2012/val2014.zip',
+    #                     help='Path to validation dataset')
+
+    # parser.add_argument('--test_path',
+    #                     type=str,
+    #                     default='/hdd1/PennFudanPed.zip',
+    #                     help='Path to test dataset')
+    parser.add_argument('--query_path',
+                        type=str,
+                        default='/home/zhaozixiao/dataset/pets/Persian_120.jpg,/home/zhaozixiao/dataset/pets/pomeranian_159.jpg',
+                        help='Path(s) to query image(s), delimited by commas')
+
+    (args, _) = parser.parse_known_args()
+
+    # print(args.query_path.split(','))
+
+    imgs = utils.dataset.load_images(args.query_path.split(','))
+    img_nps = []
+    for i in imgs:
+        img = np.array(i)
+        img_nps.append(img)
+    
+    queries = img_nps
+    test_model_class(model_file_path=__file__,
+                     model_class='SaUNetBorderLoss',
+                     task='IMAGE_SEGMENTATION',
+                     dependencies={"torch": "1.6.0+cu101",
+                                   "torchvision": "0.7.0+cu101",
+                                   "opencv": "3.4.2",
+                                   "tqdm": "4.28.0"},
+                     train_dataset_path=args.train_path,
+                     val_dataset_path=args.val_path,
+                     test_dataset_path=None,
+                     train_args={"num_classes": 3},
+                     queries=img_nps)
diff --git a/examples/models/question_answering/onnx_bert/onnx_bert.py b/examples/models/question_answering/onnx_bert/onnx_bert.py
index bc9bd0b1..a6bfc0f0 100644
--- a/examples/models/question_answering/onnx_bert/onnx_bert.py
+++ b/examples/models/question_answering/onnx_bert/onnx_bert.py
@@ -23,7 +23,7 @@
 
 from singa_auto.model import BaseModel
 from singa_auto.constants import ModelDependency
-from singa_auto.model.dev import make_predictions, _check_model_class, _print_header, _check_dependencies, inform_user
+from singa_auto.model.dev import make_predictions_json, _check_model_class, _print_header, _check_dependencies, inform_user
 from singa_auto.model.utils import load_model_class
 from singa_auto.advisor.constants import Proposal, ParamsType
 
@@ -204,7 +204,7 @@ def _postprocess(self, eval_examples, extra_data, all_results):
     proposal = Proposal(trial_no=0, knobs={},
                         params_type=ParamsType.LOCAL_RECENT)
 
-    (predictions, model_inst) = make_predictions(queries, task,
+    (predictions, model_inst) = make_predictions_json(queries, task,
                                                  py_model_class,
                                                  proposal,
                                                  fine_tune_dataset_path=None,
diff --git a/examples/models/tabular_classification/SVCClf.py b/examples/models/tabular_classification/SVCClf.py
index 72675304..26ee49a3 100644
--- a/examples/models/tabular_classification/SVCClf.py
+++ b/examples/models/tabular_classification/SVCClf.py
@@ -1,200 +1,198 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-import numpy as np
-import pandas as pd
-
-import json
-import pickle
-import base64
-from sklearn.preprocessing import StandardScaler
-from sklearn.svm import SVC
-
-from singa_auto.model import TabularClfModel, IntegerKnob, CategoricalKnob, FloatKnob, logger
-from singa_auto.model.dev import test_model_class
-from singa_auto.constants import ModelDependency
-
-
-class SVCClf(TabularClfModel):
-    '''
-    Implements a C-Support Vector Classifier for classification task using Pima Indian Diabetes dataset.
-    '''
-
-    @staticmethod
-    def get_knob_config():
-        return {
-            'C': IntegerKnob(2, 3),
-            'kernel': CategoricalKnob(['poly', 'rbf', 'linear']),
-            'degree': IntegerKnob(2, 3),
-            'gamma': CategoricalKnob(['scale', 'auto']),
-            'coef0': FloatKnob(0.0, 0.1),
-            'shrinking': CategoricalKnob([True, False]),
-            'tol': FloatKnob(1e-03, 1e-01, is_exp=True),
-            'decision_function_shape': CategoricalKnob(['ovo', 'ovr']),
-            'probability': CategoricalKnob([True, False]),
-        }
-
-    def __init__(self, **knobs):
-        self._knobs = knobs
-        self.__dict__.update(knobs)
-        self._clf = self._build_classifier(
-            self._knobs.get("C"), self._knobs.get("kernel"),
-            self._knobs.get("degree"), self._knobs.get("gamma"),
-            self._knobs.get("coef0"), self._knobs.get("shrinking"),
-            self._knobs.get("tol"), self._knobs.get("decision_function_shape"),
-            self._knobs.get("probability"))
-
-    def train(self, dataset_path, features=None, target=None, **kwargs):
-        # Record features & target
-        self._features = features
-        self._target = target
-
-        # Load CSV file as pandas dataframe
-        csv_path = dataset_path
-        data = pd.read_csv(csv_path)
-
-        # Extract X & y from dataframe
-        (X, y) = self._extract_xy(data)
-
-        X = self.prepare_X(X)
-
-        self._clf.fit(X, y)
-
-        # Compute train accuracy
-        score = self._clf.score(X, y)
-        logger.log('Train accuracy: {}'.format(score))
-
-    def evaluate(self, dataset_path,  **kwargs):
-        # Load CSV file as pandas dataframe
-        csv_path = dataset_path
-        data = pd.read_csv(csv_path)
-
-        # Extract X & y from dataframe
-        (X, y) = self._extract_xy(data)
-
-        X = self.prepare_X(X)
-
-        accuracy = self._clf.score(X, y)
-        return accuracy
-
-    def predict(self, queries):
-        queries = pd.DataFrame.from_records(queries, index=[0])
-        data = self.prepare_X(queries)
-        probs = self._clf.predict_proba(data)
-        return probs.tolist()
-        
-
-    def destroy(self):
-        pass
-
-    def dump_parameters(self):
-        params = {}
-
-        # Put model parameters
-        clf_bytes = pickle.dumps(self._clf)
-        clf_base64 = base64.b64encode(clf_bytes).decode('utf-8')
-        params['clf_base64'] = clf_base64
-        params['features'] = json.dumps(self._features)
-        if self._target:
-            params['target'] = self._target
-
-        return params
-
-    def load_parameters(self, params):
-        # Load model parameters
-        assert 'clf_base64' in params
-        clf_base64 = params['clf_base64']
-        clf_bytes = base64.b64decode(clf_base64.encode('utf-8'))
-
-        self._clf = pickle.loads(clf_bytes)
-        self._features = json.loads(params['features'])
-        if "target" in params:
-            self._target = params['target']
-        else:
-            self._target = None
-
-    def _extract_xy(self, data):
-        features = self._features
-        target = self._target
-
-        if features is None:
-            X = data.iloc[:, :-1]
-        else:
-            X = data[features]
-
-        if target is None:
-            y = data.iloc[:, -1]
-        else:
-            y = data[target]
-
-        return (X, y)
-
-    def median_dataset(self, df):
-        #replace zero values by median so that 0 will not affect median.
-        for col in df.columns:
-            df[col].replace(0, np.nan, inplace=True)
-            df[col].fillna(df[col].median(), inplace=True)
-        return df
-
-    def prepare_X(self, df):
-        data = self.median_dataset(df)
-        X = StandardScaler().fit_transform(data)
-        return X
-
-
-    def _build_classifier(self, C, kernel, degree, gamma, coef0, shrinking, tol,
-                          decision_function_shape, probability):
-        clf = SVC(
-            C=C,
-            kernel=kernel,
-            degree=degree,
-            gamma=gamma,
-            coef0=coef0,
-            shrinking=shrinking,
-            tol=tol,
-            decision_function_shape=decision_function_shape,
-            probability=probability,
-        )
-        return clf
-
-
-if __name__ == '__main__':
-    test_model_class(model_file_path=__file__,
-                     model_class='SVCClf',
-                     task='TABULAR_CLASSIFICATION',
-                     dependencies={ModelDependency.SCIKIT_LEARN: '0.20.0'},
-                     train_dataset_path='data/diabetes_train.csv',
-                     val_dataset_path='data/diabetes_val.csv',
-                     train_args={
-                         'features': [
-                             'Pregnancies', 'Glucose', 'BloodPressure',
-                             'SkinThickness', 'Insulin', 'DiabetesPedigreeFunction','BMI', 'Age'],
-                         'target': 'Outcome'
-                     },
-                     queries={
-                         'Pregnancies': 3,
-                         'Glucose': 130,
-                         'BloodPressure': 92,
-                         'SkinThickness': 30,
-                         'Insulin': 90,
-                         'DiabetesPedigreeFunction': 1,
-                         'BMI': 30.4,
-                         'Age': 40
-                     })
-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import numpy as np
+import pandas as pd
+
+import json
+import pickle
+import base64
+from sklearn.preprocessing import StandardScaler
+from sklearn.svm import SVC
+
+from singa_auto.model import TabularClfModel, IntegerKnob, CategoricalKnob, FloatKnob, logger
+from singa_auto.model.dev import test_model_class
+from singa_auto.constants import ModelDependency
+
+
+class SVCClf(TabularClfModel):
+    '''
+    Implements a C-Support Vector Classifier for classification task using Pima Indian Diabetes dataset.
+    '''
+
+    @staticmethod
+    def get_knob_config():
+        return {
+            'C': IntegerKnob(2, 3),
+            'kernel': CategoricalKnob(['poly', 'rbf', 'linear']),
+            'degree': IntegerKnob(2, 3),
+            'gamma': CategoricalKnob(['scale', 'auto']),
+            'coef0': FloatKnob(0.0, 0.1),
+            'shrinking': CategoricalKnob([True, False]),
+            'tol': FloatKnob(1e-03, 1e-01, is_exp=True),
+            'decision_function_shape': CategoricalKnob(['ovo', 'ovr']),
+            'probability': CategoricalKnob([True, False]),
+        }
+
+    def __init__(self, **knobs):
+        self._knobs = knobs
+        self.__dict__.update(knobs)
+        self._clf = self._build_classifier(
+            self._knobs.get("C"), self._knobs.get("kernel"),
+            self._knobs.get("degree"), self._knobs.get("gamma"),
+            self._knobs.get("coef0"), self._knobs.get("shrinking"),
+            self._knobs.get("tol"), self._knobs.get("decision_function_shape"),
+            self._knobs.get("probability"))
+
+    def train(self, dataset_path, features=None, target=None, **kwargs):
+        # Record features & target
+        self._features = features
+        self._target = target
+
+        # Load CSV file as pandas dataframe
+        csv_path = dataset_path
+        data = pd.read_csv(csv_path)
+
+        # Extract X & y from dataframe
+        (X, y) = self._extract_xy(data)
+
+        X = self.prepare_X(X)
+
+        self._clf.fit(X, y)
+
+        # Compute train accuracy
+        score = self._clf.score(X, y)
+        logger.log('Train accuracy: {}'.format(score))
+
+    def evaluate(self, dataset_path,  **kwargs):
+        # Load CSV file as pandas dataframe
+        csv_path = dataset_path
+        data = pd.read_csv(csv_path)
+
+        # Extract X & y from dataframe
+        (X, y) = self._extract_xy(data)
+
+        X = self.prepare_X(X)
+
+        accuracy = self._clf.score(X, y)
+        return accuracy
+
+    def predict(self, queries):
+        queries = pd.DataFrame.from_records(queries, index=[0])
+        data = self.prepare_X(queries)
+        probs = self._clf.predict_proba(data)
+        return probs.tolist()
+
+    def destroy(self):
+        pass
+
+    def dump_parameters(self):
+        params = {}
+
+        # Put model parameters
+        clf_bytes = pickle.dumps(self._clf)
+        clf_base64 = base64.b64encode(clf_bytes).decode('utf-8')
+        params['clf_base64'] = clf_base64
+        params['features'] = json.dumps(self._features)
+        if self._target:
+            params['target'] = self._target
+
+        return params
+
+    def load_parameters(self, params):
+        # Load model parameters
+        assert 'clf_base64' in params
+        clf_base64 = params['clf_base64']
+        clf_bytes = base64.b64decode(clf_base64.encode('utf-8'))
+
+        self._clf = pickle.loads(clf_bytes)
+        self._features = json.loads(params['features'])
+        if "target" in params:
+            self._target = params['target']
+        else:
+            self._target = None
+
+    def _extract_xy(self, data):
+        features = self._features
+        target = self._target
+
+        if features is None:
+            X = data.iloc[:, :-1]
+        else:
+            X = data[features]
+
+        if target is None:
+            y = data.iloc[:, -1]
+        else:
+            y = data[target]
+
+        return (X, y)
+
+    def median_dataset(self, df):
+        #replace zero values by median so that 0 will not affect median.
+        for col in df.columns:
+            df[col].replace(0, np.nan, inplace=True)
+            df[col].fillna(df[col].median(), inplace=True)
+        return df
+
+    def prepare_X(self, df):
+        data = self.median_dataset(df)
+        X = StandardScaler().fit_transform(data)
+        return X
+
+    def _build_classifier(self, C, kernel, degree, gamma, coef0, shrinking, tol,
+                          decision_function_shape, probability):
+        clf = SVC(
+            C=C,
+            kernel=kernel,
+            degree=degree,
+            gamma=gamma,
+            coef0=coef0,
+            shrinking=shrinking,
+            tol=tol,
+            decision_function_shape=decision_function_shape,
+            probability=probability,
+        )
+        return clf
+
+
+if __name__ == '__main__':
+    test_model_class(model_file_path=__file__,
+                     model_class='SVCClf',
+                     task='TABULAR_CLASSIFICATION',
+                     dependencies={ModelDependency.SCIKIT_LEARN: '0.20.0'},
+                     train_dataset_path='data/diabetes_train.csv',
+                     val_dataset_path='data/diabetes_val.csv',
+                     train_args={
+                         'features': [
+                             'Pregnancies', 'Glucose', 'BloodPressure',
+                             'SkinThickness', 'Insulin', 'DiabetesPedigreeFunction','BMI', 'Age'],
+                         'target': 'Outcome'
+                     },
+                     queries={
+                         'Pregnancies': 3,
+                         'Glucose': 130,
+                         'BloodPressure': 92,
+                         'SkinThickness': 30,
+                         'Insulin': 90,
+                         'DiabetesPedigreeFunction': 1,
+                         'BMI': 30.4,
+                         'Age': 40
+                     })
+
diff --git a/examples/models/tabular_regression/RidgeReg.py b/examples/models/tabular_regression/RidgeReg.py
index 43de7aba..2fc0bc5b 100644
--- a/examples/models/tabular_regression/RidgeReg.py
+++ b/examples/models/tabular_regression/RidgeReg.py
@@ -1,224 +1,226 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-import pickle
-import base64
-import numpy as np
-import pandas as pd
-import json
-
-from sklearn.linear_model import Ridge
-from sklearn.metrics import mean_squared_error
-
-from singa_auto.model import BaseModel, IntegerKnob, FloatKnob, CategoricalKnob, logger
-from singa_auto.model.dev import test_model_class
-from singa_auto.constants import ModelDependency
-
-
-class RidgeReg(BaseModel):
-    '''
-    Implements a Linear Ridge Regressor for regression task using bodyfat dataset.
-    '''
-
-    @staticmethod
-    def get_knob_config():
-        return {
-            'alpha': FloatKnob(0.001, 0.01),
-            'normalize': CategoricalKnob([True, False]),
-            'copy_X': CategoricalKnob([True, False]),
-            'tol': FloatKnob(1e-05, 1e-04),
-            'solver': CategoricalKnob(['svd', 'sag']),
-            'random_state': IntegerKnob(1, 123)
-        }
-
-    def __init__(self, **knobs):
-        self._knobs = knobs
-        self.__dict__.update(knobs)
-        self._regressor = self._build_regressor(self._knobs.get("alpha"),
-                                                self._knobs.get("normalize"),
-                                                self._knobs.get("copy_X"),
-                                                self._knobs.get("tol"),
-                                                self._knobs.get("solver"),
-                                                self._knobs.get("random_state"))
-
-    def train(self, dataset_path, features=None, target=None, **kwargs):
-        # Record features & target
-        self._features = features
-        self._target = target
-
-        # Load CSV file as pandas dataframe
-        csv_path = dataset_path
-        data = pd.read_csv(csv_path)
-
-        # Extract X & y from dataframe
-        (X, y) = self._extract_xy(data)
-
-        # Encode categorical features
-        X = self._encoding_categorical_type(X)
-
-        self._regressor.fit(X, y)
-
-        # Compute train root mean square error
-        preds = self._regressor.predict(X)
-        rmse = np.sqrt(mean_squared_error(y, preds))
-        logger.log('Train RMSE: {}'.format(rmse))
-
-
-    def evaluate(self, dataset_path,  **kwargs):
-        # Load CSV file as pandas dataframe
-        csv_path = dataset_path
-        data = pd.read_csv(csv_path)
-
-        # Extract X & y from dataframe
-        (X, y) = self._extract_xy(data)
-
-        # Encode categorical features
-        X = self._encoding_categorical_type(X)
-
-        preds = self._regressor.predict(X)
-        rmse = np.sqrt(mean_squared_error(y, preds))
-        return 1 / rmse
-
-    def predict(self, queries):
-        queries = [pd.DataFrame(query, index=[0]) for query in queries]
-        results = [
-            self._regressor.predict(self._features_mapping(query)).tolist()[0]
-            for query in queries
-        ]
-        return results
-
-
-    def destroy(self):
-        pass
-
-    def dump_parameters(self):
-        params = {}
-
-        # Put model parameters
-        regressor_bytes = pickle.dumps(self._regressor)
-        regressor_base64 = base64.b64encode(regressor_bytes).decode('utf-8')
-        params['regressor_base64'] = regressor_base64
-        params['encoding_dict'] = json.dumps(self._encoding_dict)
-        params['features'] = json.dumps(self._features)
-        params['target'] = self._target
-
-        return params
-
-
-    def load_parameters(self, params):
-        # Load model parameters
-        assert 'regressor_base64' in params
-        regressor_base64 = params['regressor_base64']
-        regressor_bytes = base64.b64decode(regressor_base64.encode('utf-8'))
-        self._regressor = pickle.loads(regressor_bytes)
-
-        self._encoding_dict = json.loads(params['encoding_dict'])
-        self._features = json.loads(params['features'])
-        self._target = params['target']
-
-
-    def _extract_xy(self, data):
-        features = self._features
-        target = self._target
-
-        if features is None:
-            X = data.iloc[:, :-1]
-        else:
-            X = data[features]
-
-        if target is None:
-            y = data.iloc[:, -1]
-        else:
-            y = data[target]
-
-        return (X, y)
-
-
-    def _encoding_categorical_type(self, cols):
-        # Apply label encoding for those categorical columns
-        cat_cols = list(
-            filter(lambda x: cols[x].dtype == 'object', cols.columns))
-        encoded_cols = pd.DataFrame({col: cols[col].astype('category').cat.codes \
-            if cols[col].dtype == 'object' else cols[col] for col in cols}, index=cols.index)
-
-        # Recover the missing elements (Use XGBoost to automatically handle them)
-        encoded_cols = encoded_cols.replace(to_replace=-1, value=np.nan)
-
-        # Generate the dict that maps categorical features to numerical
-        encoding_dict = {col: {cat: n for n, cat in enumerate(cols[col].astype('category'). \
-            cat.categories)} for col in cat_cols}
-        self._encoding_dict = encoding_dict
-
-        return encoded_cols
-
-
-    def _features_mapping(self, df):
-        # Encode the categorical features with pre saved encoding dict
-        cat_cols = list(filter(lambda x: df[x].dtype == 'object', df.columns))
-        df_temp = df.copy()
-        for col in cat_cols:
-            df_temp[col] = df[col].map(self._encoding_dict[col])
-        df = df_temp
-        return df
-
-
-    def _build_regressor(self, alpha, normalize, copy_X, tol, solver,
-                         random_state):
-        regressor = Ridge(
-            alpha=alpha,
-            normalize=normalize,
-            copy_X=copy_X,
-            tol=tol,
-            solver=solver,
-            random_state=random_state,
-        )
-        return regressor
-
-
-if __name__ == '__main__':
-    test_model_class(model_file_path=__file__,
-                     model_class='RidgeReg',
-                     task='TABULAR_REGRESSION',
-                     dependencies={ModelDependency.SCIKIT_LEARN: '0.20.0'},
-                     train_dataset_path='data/bodyfat_train.csv',
-                     val_dataset_path='data/bodyfat_val.csv',
-                     train_args={
-                         'features': [
-                             'density', 'age', 'weight', 'height', 'neck',
-                             'chest', 'abdomen', 'hip', 'thigh', 'knee',
-                             'ankle', 'biceps', 'forearm', 'wrist'
-                         ],
-                         'target': 'bodyfat'
-                     },
-                     queries=[{
-                         'density': 1.0207,
-                         'age': 65,
-                         'weight': 224.5,
-                         'height': 68.25,
-                         'neck': 38.8,
-                         'chest': 119.6,
-                         'abdomen': 118.0,
-                         'hip': 114.3,
-                         'thigh': 61.3,
-                         'knee': 42.1,
-                         'ankle': 23.4,
-                         'biceps': 34.9,
-                         'forearm': 30.1,
-                         'wrist': 19.4
-                     }])
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import pickle
+import base64
+import numpy as np
+import pandas as pd
+import json
+
+from sklearn.linear_model import Ridge
+from sklearn.metrics import mean_squared_error
+
+from singa_auto.model import BaseModel, IntegerKnob, FloatKnob, CategoricalKnob, logger
+from singa_auto.model.dev import test_model_class
+from singa_auto.constants import ModelDependency
+
+
+class RidgeReg(BaseModel):
+    '''
+    Implements a Linear Ridge Regressor for regression task using bodyfat dataset.
+    '''
+
+    @staticmethod
+    def get_knob_config():
+        return {
+            'alpha': FloatKnob(0.001, 0.01),
+            'normalize': CategoricalKnob([True, False]),
+            'copy_X': CategoricalKnob([True, False]),
+            'tol': FloatKnob(1e-05, 1e-04),
+            'solver': CategoricalKnob(['svd', 'sag']),
+            'random_state': IntegerKnob(1, 123)
+        }
+
+    def __init__(self, **knobs):
+        self._knobs = knobs
+        self.__dict__.update(knobs)
+        self._regressor = self._build_regressor(self._knobs.get("alpha"),
+                                                self._knobs.get("normalize"),
+                                                self._knobs.get("copy_X"),
+                                                self._knobs.get("tol"),
+                                                self._knobs.get("solver"),
+                                                self._knobs.get("random_state"))
+
+    def train(self, dataset_path, features=None, target=None, **kwargs):
+        # Record features & target
+        self._features = features
+        self._target = target
+
+        # Load CSV file as pandas dataframe
+        csv_path = dataset_path
+        data = pd.read_csv(csv_path)
+
+        # Extract X & y from dataframe
+        (X, y) = self._extract_xy(data)
+
+        # Encode categorical features
+        X = self._encoding_categorical_type(X)
+
+        self._regressor.fit(X, y)
+
+        # Compute train root mean square error
+        preds = self._regressor.predict(X)
+
+        rmse = np.sqrt(mean_squared_error(y, preds))
+        logger.log('Train RMSE: {}'.format(rmse))
+
+    def evaluate(self, dataset_path,  **kwargs):
+        # Load CSV file as pandas dataframe
+        csv_path = dataset_path
+        data = pd.read_csv(csv_path)
+
+        # Extract X & y from dataframe
+        (X, y) = self._extract_xy(data)
+
+        # Encode categorical features
+        X = self._encoding_categorical_type(X)
+
+        preds = self._regressor.predict(X)
+
+        rmse = np.sqrt(mean_squared_error(y, preds))
+
+        return 1 / rmse
+
+    def predict(self, queries):
+        queries = [pd.DataFrame(query, index=[0]) for query in queries]
+        results = [
+            self._regressor.predict(self._features_mapping(query)).tolist()[0]
+            for query in queries
+        ]
+        return results
+
+
+    def destroy(self):
+        pass
+
+    def dump_parameters(self):
+        params = {}
+
+        # Put model parameters
+        regressor_bytes = pickle.dumps(self._regressor)
+        regressor_base64 = base64.b64encode(regressor_bytes).decode('utf-8')
+        params['regressor_base64'] = regressor_base64
+        params['encoding_dict'] = json.dumps(self._encoding_dict)
+        params['features'] = json.dumps(self._features)
+        params['target'] = self._target
+
+        return params
+
+    def load_parameters(self, params):
+        # Load model parameters
+        assert 'regressor_base64' in params
+        regressor_base64 = params['regressor_base64']
+        regressor_bytes = base64.b64decode(regressor_base64.encode('utf-8'))
+
+        self._regressor = pickle.loads(regressor_bytes)
+
+        self._encoding_dict = json.loads(params['encoding_dict'])
+        self._features = json.loads(params['features'])
+        self._target = params['target']
+
+
+    def _extract_xy(self, data):
+        features = self._features
+        target = self._target
+
+        if features is None:
+            X = data.iloc[:, :-1]
+        else:
+            X = data[features]
+
+        if target is None:
+            y = data.iloc[:, -1]
+        else:
+            y = data[target]
+
+        return (X, y)
+
+
+    def _encoding_categorical_type(self, cols):
+        # Apply label encoding for those categorical columns
+        cat_cols = list(
+            filter(lambda x: cols[x].dtype == 'object', cols.columns))
+        encoded_cols = pd.DataFrame({col: cols[col].astype('category').cat.codes \
+            if cols[col].dtype == 'object' else cols[col] for col in cols}, index=cols.index)
+
+        # Recover the missing elements (Use XGBoost to automatically handle them)
+        encoded_cols = encoded_cols.replace(to_replace=-1, value=np.nan)
+
+        # Generate the dict that maps categorical features to numerical
+        encoding_dict = {col: {cat: n for n, cat in enumerate(cols[col].astype('category'). \
+            cat.categories)} for col in cat_cols}
+        self._encoding_dict = encoding_dict
+
+        return encoded_cols
+
+
+    def _features_mapping(self, df):
+        # Encode the categorical features with pre saved encoding dict
+        cat_cols = list(filter(lambda x: df[x].dtype == 'object', df.columns))
+        df_temp = df.copy()
+        for col in cat_cols:
+            df_temp[col] = df[col].map(self._encoding_dict[col])
+        df = df_temp
+        return df
+
+
+    def _build_regressor(self, alpha, normalize, copy_X, tol, solver,
+                         random_state):
+        regressor = Ridge(
+            alpha=alpha,
+            normalize=normalize,
+            copy_X=copy_X,
+            tol=tol,
+            solver=solver,
+            random_state=random_state,
+        )
+        return regressor
+
+
+if __name__ == '__main__':
+    test_model_class(model_file_path=__file__,
+                     model_class='RidgeReg',
+                     task='TABULAR_REGRESSION',
+                     dependencies={ModelDependency.SCIKIT_LEARN: '0.20.0'},
+                     train_dataset_path='data/bodyfat_train.csv',
+                     val_dataset_path='data/bodyfat_val.csv',
+                     train_args={
+                         'features': [
+                             'density', 'age', 'weight', 'height', 'neck',
+                             'chest', 'abdomen', 'hip', 'thigh', 'knee',
+                             'ankle', 'biceps', 'forearm', 'wrist'
+                         ],
+                         'target': 'bodyfat'
+                     },
+                     queries=[{
+                         'density': 1.0207,
+                         'age': 65,
+                         'weight': 224.5,
+                         'height': 68.25,
+                         'neck': 38.8,
+                         'chest': 119.6,
+                         'abdomen': 118.0,
+                         'hip': 114.3,
+                         'thigh': 61.3,
+                         'knee': 42.1,
+                         'ankle': 23.4,
+                         'biceps': 34.9,
+                         'forearm': 30.1,
+                         'wrist': 19.4
+                     }])
diff --git a/examples/models/tabular_regression/TreeReg.py b/examples/models/tabular_regression/TreeReg.py
index 9a810417..18343cee 100644
--- a/examples/models/tabular_regression/TreeReg.py
+++ b/examples/models/tabular_regression/TreeReg.py
@@ -1,224 +1,226 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-import pickle
-import base64
-import numpy as np
-import pandas as pd
-import json
-
-from sklearn.tree import DecisionTreeRegressor
-from sklearn.metrics import mean_squared_error
-
-from singa_auto.model import BaseModel, IntegerKnob, FloatKnob, CategoricalKnob, logger
-from singa_auto.model.dev import test_model_class
-from singa_auto.constants import ModelDependency
-
-
-class TreeReg(BaseModel):
-    '''
-    Implements a Decision Tree Regressor for regression task using bodyfat dataset.
-    '''
-
-    @staticmethod
-    def get_knob_config():
-        return {
-            'criterion': CategoricalKnob(['mse', 'mae']),
-            'splitter': CategoricalKnob(['best', 'random']),
-            'min_samples_split': IntegerKnob(2, 5),
-            'max_features': CategoricalKnob(['auto', 'sqrt']),
-            'random_state': IntegerKnob(1, 123),
-            'min_impurity_decrease': FloatKnob(0.0, 0.2),
-            'min_impurity_split': FloatKnob(1e-07, 1e-03)
-        }
-
-    def __init__(self, **knobs):
-        self._knobs = knobs
-        self.__dict__.update(knobs)
-        self._regressor = self._build_regressor(
-            self._knobs.get("criterion"), self._knobs.get("splitter"),
-            self._knobs.get("min_samples_split"),
-            self._knobs.get("max_features"), self._knobs.get("random_state"),
-            self._knobs.get("min_impurity_decrease"),
-            self._knobs.get("min_impurity_split"))
-
-
-    def train(self, dataset_path, features=None, target=None, **kwargs):
-        # Record features & target
-        self._features = features
-        self._target = target
-
-        # Load CSV file as pandas dataframe
-        csv_path = dataset_path
-        data = pd.read_csv(csv_path)
-
-        # Extract X & y from dataframe
-        (X, y) = self._extract_xy(data)
-
-        # Encode categorical features
-        X = self._encoding_categorical_type(X)
-
-        self._regressor.fit(X, y)
-
-        # Compute train root mean square error
-        preds = self._regressor.predict(X)
-        rmse = np.sqrt(mean_squared_error(y, preds))
-        logger.log('Train RMSE: {}'.format(rmse))
-
-    def evaluate(self, dataset_path,  **kwargs):
-        # Load CSV file as pandas dataframe
-        csv_path = dataset_path
-        data = pd.read_csv(csv_path)
-
-        # Extract X & y from dataframe
-        (X, y) = self._extract_xy(data)
-
-        # Encode categorical features
-        X = self._encoding_categorical_type(X)
-
-        preds = self._regressor.predict(X)
-        rmse = np.sqrt(mean_squared_error(y, preds))
-        return 1 / rmse
-
-    def predict(self, queries):
-        queries = [pd.DataFrame(query, index=[0]) for query in queries]
-        results = [
-            self._regressor.predict(self._features_mapping(query)).tolist()[0]
-            for query in queries
-        ]
-        return results
-
-
-    def destroy(self):
-        pass
-
-    def dump_parameters(self):
-        params = {}
-
-        # Put model parameters
-        regressor_bytes = pickle.dumps(self._regressor)
-        regressor_base64 = base64.b64encode(regressor_bytes).decode('utf-8')
-        params['regressor_base64'] = regressor_base64
-        params['encoding_dict'] = json.dumps(self._encoding_dict)
-        params['features'] = json.dumps(self._features)
-        params['target'] = self._target
-
-        return params
-
-    def load_parameters(self, params):
-        # Load model parameters
-        assert 'regressor_base64' in params
-        regressor_base64 = params['regressor_base64']
-        regressor_bytes = base64.b64decode(regressor_base64.encode('utf-8'))
-        self._regressor = pickle.loads(regressor_bytes)
-
-        self._encoding_dict = json.loads(params['encoding_dict'])
-        self._features = json.loads(params['features'])
-        self._target = params['target']
-
-    def _extract_xy(self, data):
-        features = self._features
-        target = self._target
-
-        if features is None:
-            X = data.iloc[:, :-1]
-        else:
-            X = data[features]
-
-        if target is None:
-            y = data.iloc[:, -1]
-        else:
-            y = data[target]
-
-        return (X, y)
-
-
-    def _encoding_categorical_type(self, cols):
-        # Apply label encoding for those categorical columns
-        cat_cols = list(
-            filter(lambda x: cols[x].dtype == 'object', cols.columns))
-        encoded_cols = pd.DataFrame({col: cols[col].astype('category').cat.codes \
-            if cols[col].dtype == 'object' else cols[col] for col in cols}, index=cols.index)
-
-        # Recover the missing elements (Use XGBoost to automatically handle them)
-        encoded_cols = encoded_cols.replace(to_replace=-1, value=np.nan)
-
-        # Generate the dict that maps categorical features to numerical
-        encoding_dict = {col: {cat: n for n, cat in enumerate(cols[col].astype('category'). \
-            cat.categories)} for col in cat_cols}
-        self._encoding_dict = encoding_dict
-
-        return encoded_cols
-
-    def _features_mapping(self, df):
-        # Encode the categorical features with pre saved encoding dict
-        cat_cols = list(filter(lambda x: df[x].dtype == 'object', df.columns))
-        df_temp = df.copy()
-        for col in cat_cols:
-            df_temp[col] = df[col].map(self._encoding_dict[col])
-        df = df_temp
-        return df
-
-
-    def _build_regressor(self, criterion, splitter, min_samples_split,
-                         max_features, random_state, min_impurity_decrease,
-                         min_impurity_split):
-        regressor = DecisionTreeRegressor(
-            criterion=criterion,
-            splitter=splitter,
-            min_samples_split=min_samples_split,
-            max_features=max_features,
-            random_state=random_state,
-            min_impurity_decrease=min_impurity_decrease,
-            min_impurity_split=min_impurity_split,
-        )
-        return regressor
-
-
-if __name__ == '__main__':
-    test_model_class(model_file_path=__file__,
-                     model_class='TreeReg',
-                     task='TABULAR_REGRESSION',
-                     dependencies={ModelDependency.SCIKIT_LEARN: '0.20.0'},
-                     train_dataset_path='data/bodyfat_train.csv',
-                     val_dataset_path='data/bodyfat_val.csv',
-                     train_args={
-                         'features': [
-                             'density', 'age', 'weight', 'height', 'neck',
-                             'chest', 'abdomen', 'hip', 'thigh', 'knee',
-                             'ankle', 'biceps', 'forearm', 'wrist'
-                         ],
-                         'target': 'bodyfat'
-                     },
-                     queries=[{
-                         'density': 1.0207,
-                         'age': 65,
-                         'weight': 224.5,
-                         'height': 68.25,
-                         'neck': 38.8,
-                         'chest': 119.6,
-                         'abdomen': 118.0,
-                         'hip': 114.3,
-                         'thigh': 61.3,
-                         'knee': 42.1,
-                         'ankle': 23.4,
-                         'biceps': 34.9,
-                         'forearm': 30.1,
-                         'wrist': 19.4
-                     }])
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import pickle
+import base64
+import numpy as np
+import pandas as pd
+import json
+
+from sklearn.tree import DecisionTreeRegressor
+from sklearn.metrics import mean_squared_error
+
+from singa_auto.model import BaseModel, IntegerKnob, FloatKnob, CategoricalKnob, logger
+from singa_auto.model.dev import test_model_class
+from singa_auto.constants import ModelDependency
+
+
+class TreeReg(BaseModel):
+    '''
+    Implements a Decision Tree Regressor for regression task using bodyfat dataset.
+    '''
+
+    @staticmethod
+    def get_knob_config():
+        return {
+            'criterion': CategoricalKnob(['mse', 'mae']),
+            'splitter': CategoricalKnob(['best', 'random']),
+            'min_samples_split': IntegerKnob(2, 5),
+            'max_features': CategoricalKnob(['auto', 'sqrt']),
+            'random_state': IntegerKnob(1, 123),
+            'min_impurity_decrease': FloatKnob(0.0, 0.2),
+            'min_impurity_split': FloatKnob(1e-07, 1e-03)
+        }
+
+    def __init__(self, **knobs):
+        self._knobs = knobs
+        self.__dict__.update(knobs)
+        self._regressor = self._build_regressor(
+            self._knobs.get("criterion"), self._knobs.get("splitter"),
+            self._knobs.get("min_samples_split"),
+            self._knobs.get("max_features"), self._knobs.get("random_state"),
+            self._knobs.get("min_impurity_decrease"),
+            self._knobs.get("min_impurity_split"))
+
+    def train(self, dataset_path, features=None, target=None, **kwargs):
+        # Record features & target
+        self._features = features
+        self._target = target
+
+        # Load CSV file as pandas dataframe
+        csv_path = dataset_path
+        data = pd.read_csv(csv_path)
+
+        # Extract X & y from dataframe
+        (X, y) = self._extract_xy(data)
+
+        # Encode categorical features
+        X = self._encoding_categorical_type(X)
+
+        self._regressor.fit(X, y)
+
+        # Compute train root mean square error
+        preds = self._regressor.predict(X)
+
+        rmse = np.sqrt(mean_squared_error(y, preds))
+        logger.log('Train RMSE: {}'.format(rmse))
+
+    def evaluate(self, dataset_path,  **kwargs):
+        # Load CSV file as pandas dataframe
+        csv_path = dataset_path
+        data = pd.read_csv(csv_path)
+
+        # Extract X & y from dataframe
+        (X, y) = self._extract_xy(data)
+
+        # Encode categorical features
+        X = self._encoding_categorical_type(X)
+
+        preds = self._regressor.predict(X)
+
+        rmse = np.sqrt(mean_squared_error(y, preds))
+        return 1 / rmse
+
+    def predict(self, queries):
+        queries = [pd.DataFrame(query, index=[0]) for query in queries]
+        results = [
+            self._regressor.predict(self._features_mapping(query)).tolist()[0]
+            for query in queries
+        ]
+        return results
+
+
+    def destroy(self):
+        pass
+
+    def dump_parameters(self):
+        params = {}
+
+        # Put model parameters
+        regressor_bytes = pickle.dumps(self._regressor)
+        regressor_base64 = base64.b64encode(regressor_bytes).decode('utf-8')
+        params['regressor_base64'] = regressor_base64
+        params['encoding_dict'] = json.dumps(self._encoding_dict)
+        params['features'] = json.dumps(self._features)
+        params['target'] = self._target
+
+        return params
+
+    def load_parameters(self, params):
+        # Load model parameters
+        assert 'regressor_base64' in params
+        regressor_base64 = params['regressor_base64']
+        regressor_bytes = base64.b64decode(regressor_base64.encode('utf-8'))
+
+        self._regressor = pickle.loads(regressor_bytes)
+
+        self._encoding_dict = json.loads(params['encoding_dict'])
+        self._features = json.loads(params['features'])
+        self._target = params['target']
+
+    def _extract_xy(self, data):
+        features = self._features
+        target = self._target
+
+        if features is None:
+            X = data.iloc[:, :-1]
+        else:
+            X = data[features]
+
+        if target is None:
+            y = data.iloc[:, -1]
+        else:
+            y = data[target]
+
+        return (X, y)
+
+
+    def _encoding_categorical_type(self, cols):
+        # Apply label encoding for those categorical columns
+        cat_cols = list(
+            filter(lambda x: cols[x].dtype == 'object', cols.columns))
+        encoded_cols = pd.DataFrame({col: cols[col].astype('category').cat.codes \
+            if cols[col].dtype == 'object' else cols[col] for col in cols}, index=cols.index)
+
+        # Recover the missing elements (Use XGBoost to automatically handle them)
+        encoded_cols = encoded_cols.replace(to_replace=-1, value=np.nan)
+
+        # Generate the dict that maps categorical features to numerical
+        encoding_dict = {col: {cat: n for n, cat in enumerate(cols[col].astype('category'). \
+            cat.categories)} for col in cat_cols}
+        self._encoding_dict = encoding_dict
+
+        return encoded_cols
+
+    def _features_mapping(self, df):
+        # Encode the categorical features with pre saved encoding dict
+        cat_cols = list(filter(lambda x: df[x].dtype == 'object', df.columns))
+        df_temp = df.copy()
+        for col in cat_cols:
+            df_temp[col] = df[col].map(self._encoding_dict[col])
+        df = df_temp
+        return df
+
+
+    def _build_regressor(self, criterion, splitter, min_samples_split,
+                         max_features, random_state, min_impurity_decrease,
+                         min_impurity_split):
+        regressor = DecisionTreeRegressor(
+            criterion=criterion,
+            splitter=splitter,
+            min_samples_split=min_samples_split,
+            max_features=max_features,
+            random_state=random_state,
+            min_impurity_decrease=min_impurity_decrease,
+            min_impurity_split=min_impurity_split,
+        )
+        return regressor
+
+
+if __name__ == '__main__':
+    test_model_class(model_file_path=__file__,
+                     model_class='TreeReg',
+                     task='TABULAR_REGRESSION',
+                     dependencies={ModelDependency.SCIKIT_LEARN: '0.20.0'},
+                     train_dataset_path='data/bodyfat_train.csv',
+                     val_dataset_path='data/bodyfat_val.csv',
+                     train_args={
+                         'features': [
+                             'density', 'age', 'weight', 'height', 'neck',
+                             'chest', 'abdomen', 'hip', 'thigh', 'knee',
+                             'ankle', 'biceps', 'forearm', 'wrist'
+                         ],
+                         'target': 'bodyfat'
+                     },
+                     queries=[{
+                         'density': 1.0207,
+                         'age': 65,
+                         'weight': 224.5,
+                         'height': 68.25,
+                         'neck': 38.8,
+                         'chest': 119.6,
+                         'abdomen': 118.0,
+                         'hip': 114.3,
+                         'thigh': 61.3,
+                         'knee': 42.1,
+                         'ankle': 23.4,
+                         'biceps': 34.9,
+                         'forearm': 30.1,
+                         'wrist': 19.4
+                     }])
diff --git a/examples/models/text_generation/onnx_gpt2.py b/examples/models/text_generation/onnx_gpt2.py
index edeeec00..56cd5ed4 100644
--- a/examples/models/text_generation/onnx_gpt2.py
+++ b/examples/models/text_generation/onnx_gpt2.py
@@ -22,7 +22,7 @@
 
 from singa_auto.model import BaseModel
 from singa_auto.constants import ModelDependency
-from singa_auto.model.dev import make_predictions, _check_model_class, _print_header, _check_dependencies, inform_user
+from singa_auto.model.dev import make_predictions_json, _check_model_class, _print_header, _check_dependencies, inform_user
 from singa_auto.model.utils import load_model_class
 from singa_auto.advisor.constants import Proposal, ParamsType
 
@@ -150,7 +150,7 @@ def _postprocess(self, out: List[int]):
     proposal = Proposal(trial_no=0, knobs={},
                         params_type=ParamsType.LOCAL_RECENT)
 
-    (predictions, model_inst) = make_predictions(queries, task,
+    (predictions, model_inst) = make_predictions_json(queries, task,
                                                  py_model_class,
                                                  proposal,
                                                  fine_tune_dataset_path=None,
diff --git a/examples/scripts/quickstart.py b/examples/scripts/quickstart.py
index 2c6ab0cf..8440fb95 100644
--- a/examples/scripts/quickstart.py
+++ b/examples/scripts/quickstart.py
@@ -43,7 +43,22 @@ def get_predictor_host(client, app):
             time.sleep(10)
 
 
-def make_predictions(client, predictor_host, queries):
+def make_predictions_image(client, predictor_host, queries):
+    predictions = []
+
+    for query in queries:
+        res = requests.post(url='http://{}/predict'.format(predictor_host),
+                            files={'img': open(query, 'rb')})
+
+        if res.status_code != 200:
+            raise Exception(res.text)
+
+        predictions.append(res.text)
+
+    return predictions
+
+
+def make_predictions_json(client, predictor_host, queries):
     predictions = []
 
     for query in queries:
@@ -131,7 +146,7 @@ def quickstart(client, train_dataset_path, val_dataset_path, gpus, hours,
     print('Making predictions for query images:')
     print(query_paths)
     queries = utils.dataset.load_images(query_paths)
-    predictions = make_predictions(client, predictor_host, queries)
+    predictions = make_predictions_json(client, predictor_host, queries)
     print('Predictions are:')
     print(predictions)
 
diff --git a/examples/scripts/run_image_segmentation.py b/examples/scripts/run_image_segmentation.py
new file mode 100644
index 00000000..78e4e3d4
--- /dev/null
+++ b/examples/scripts/run_image_segmentation.py
@@ -0,0 +1,180 @@
+from __future__ import absolute_import
+
+import os
+import sys
+sys.path.append(os.getcwd())
+
+import argparse
+import base64
+
+from pprint import pprint
+
+from singa_auto.client import Client
+# from singa_auto.config import SUPERADMIN_EMAIL
+from singa_auto.constants import BudgetOption
+from singa_auto.constants import InferenceBudgetOption
+from singa_auto.constants import ModelDependency
+
+
+from examples.scripts.quickstart import gen_id
+from examples.scripts.quickstart import get_predictor_host
+from examples.scripts.quickstart import make_predictions_image
+from examples.scripts.quickstart import wait_until_train_job_has_stopped
+
+SINGA_AUTO_IMAGE_NAME = f"singa_auto/singa_auto_worker"
+SINGA_AUTO_VERSION = os.environ.get('SINGA_AUTO_VERSION', '0.2.0')
+IMAGE_SEGMENTATION_NAME = f'{SINGA_AUTO_IMAGE_NAME}:{SINGA_AUTO_VERSION}'
+
+
+def run_image_segmentation(client, dataset_path, gpus, hours, **kwargs):
+    '''
+    Conducts training with the `YoloV3` model for the task ``OBJECT_DETECTION`.
+    '''
+
+    task = 'IMAGE_SEGMENTATION'
+
+    import time
+
+    if "dataset" in kwargs:
+        dataset = kwargs["dataset"]
+    else:
+        print('Creating & uploading train dataset onto SINGA-Auto...')
+        curr_time = time.strftime("%m%d_%H%M", time.localtime(time.time()))
+        dataset = client.create_dataset('oxford_pets_{}'.format(curr_time), task, dataset_path)
+    pprint(dataset)
+
+    if "model" in kwargs:
+        model = kwargs["model"]
+    else:
+        curr_time = time.strftime("%m%d_%H%M", time.localtime(time.time()))
+        model_name = 'deeplab_{}_iter_{}'.format(curr_time, 1)
+        print('Adding models "{}" to SINGA-Auto...'.format(model_name))
+        model = client.create_model(
+            model_name,
+            task,
+            'examples/models/image_segmentation/SaDeeplab.py',
+            'SaDeeplab',
+            docker_image=IMAGE_SEGMENTATION_NAME,
+            dependencies={
+                "opencv-python":"4.4.0.46",
+                "tensorflow": "2.3.0",
+            }
+        )
+    pprint(model)
+
+    # generate app & model names by time to avoid naming conflicts
+    curr_time = time.strftime("%m%d_%H%M", time.localtime(time.time()))
+    app = 'deeplab_{}_gpu_{}'.format(curr_time, gpus)
+
+    print('Creating train job for app "{}" on SINGA-Auto...'.format(app))
+    budget = {BudgetOption.TIME_HOURS: hours, BudgetOption.GPU_COUNT: gpus}
+    train_job = client.create_train_job(
+        app,
+        task,
+        dataset['id'],
+        dataset['id'],
+        budget,
+        models=[model['id']]
+    )
+    pprint(train_job)
+
+    print('Waiting for train job to complete...')
+    print('This might take a few minutes')
+    wait_until_train_job_has_stopped(client, app)
+    print('Train job has been stopped')
+
+    # app = "deeplab_0519_1026_gpu_4"
+
+    print('Listing best trials of latest train job for app "{}"...'.format(app))
+    pprint(client.get_best_trials_of_train_job(app))
+
+    print('Creating inference job for app "{}" on SINGA-Auto...'.format(app))
+    budget = {InferenceBudgetOption.GPU_COUNT: 1}
+    pprint(client.create_inference_job(app, budget=budget))
+    predictor_host = get_predictor_host(client, app)
+    if not predictor_host:
+        raise Exception('Inference job has errored or stopped')
+    print('Inference job is running!')
+
+    print('Making predictions for queries:')
+    queries = ['./examples/data/image_segmentaion/Persian_120.jpg']
+    print(queries)
+    predictions = make_predictions_image(client, predictor_host, queries)
+    print('Predictions are:')
+    print(predictions)
+
+    print('Stopping inference job...')
+    pprint(client.stop_inference_job(app))
+
+
+if "__main__" == __name__:
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--email',
+        type=str,
+        default="superadmin@singaauto",
+        help='Email of user',
+    )
+    parser.add_argument(
+        '--password',
+        type=str,
+        default="singa_auto",
+        help='Password of user',
+    )
+    parser.add_argument(
+        '--gpus',
+        type=int,
+        default=1,
+        help='How many GPUs to use',
+    )
+    parser.add_argument(
+        '--hours',
+        type=float,
+        default=1,
+        help='How long the train job should run for (in hours)',
+    )
+    parser.add_argument(
+        '--use_old',
+        type=bool,
+        default=True,
+        help='whether use existing dataset and model',
+    )
+    (args, _) = parser.parse_known_args()
+
+    # Initialize client
+    client = Client()
+    client.login(email=args.email, password=args.password)
+
+    print('Preprocessing dataset...')
+    data_dir = '/home/taomingyang/dataset/package'
+    dataset_path = os.path.join(data_dir, 'oxford_pets.zip')
+
+    if args.use_old:
+        dataset = {
+            'id': '0e6723fa-7e3e-4942-9808-07b9873b2244',
+            'name': 'oxford_pets_0518_1643',
+            'owner_id': 'cabd4ec6-3911-4439-b88b-660eaa7d7ad8',
+            'size_bytes': 401767917,
+            'stat': {},
+            'store_dataset_id': '4edfa4cc-5d5e-431b-a893-0bcebf653fd0.data',
+            'task': 'IMAGE_SEGMENTATION'
+        }
+        model = {
+            'id': '0a3a6bc9-a3ab-4ec7-8b4c-585af2fec948',
+            'name': 'deeplab_0519_1331_iter_1',
+            'user_id': 'cabd4ec6-3911-4439-b88b-660eaa7d7ad8'
+        }
+        # model = {
+        #     'id': '6302dbe8-22c2-4b39-bd09-dd29ffed254d',
+        #     'name': 'yolo_0427_1404_iter_10',
+        #     'user_id': '8e29b96b-ea16-4595-a1fd-86decddbab6b'
+        # }
+
+        run_image_segmentation(
+            client, dataset_path, args.gpus, args.hours,
+            dataset=dataset, model=model,
+        )
+    else:
+        run_image_segmentation(client, dataset_path, args.gpus, args.hours)
+
+    print(args)
diff --git a/examples/scripts/run_object_detection.py b/examples/scripts/run_object_detection.py
new file mode 100644
index 00000000..60ae02aa
--- /dev/null
+++ b/examples/scripts/run_object_detection.py
@@ -0,0 +1,207 @@
+from __future__ import absolute_import
+
+import os
+import sys
+sys.path.append(os.getcwd())
+
+import argparse
+import base64
+
+from pprint import pprint
+
+from singa_auto.client import Client
+# from singa_auto.config import SUPERADMIN_EMAIL
+from singa_auto.constants import BudgetOption
+from singa_auto.constants import InferenceBudgetOption
+from singa_auto.constants import ModelDependency
+
+
+from examples.scripts.quickstart import gen_id
+from examples.scripts.quickstart import get_predictor_host
+from examples.scripts.quickstart import make_predictions_image
+from examples.scripts.quickstart import wait_until_train_job_has_stopped
+
+SINGA_AUTO_IMAGE_NAME = f"singa_auto/singa_auto_worker"
+SINGA_AUTO_VERSION = os.environ.get('SINGA_AUTO_VERSION', '0.2.0')
+IMAGE_OBJECT_DETECTION_NAME = f'{SINGA_AUTO_IMAGE_NAME}:{SINGA_AUTO_VERSION}'
+
+
+def run_object_detection(client, train_dataset_path, val_dataset_path, gpus, hours, **kwargs):
+    '''
+    Conducts training with the `YoloV3` model for the task ``OBJECT_DETECTION`.
+    '''
+
+    task = 'OBJECT_DETECTION'
+
+    import time
+
+    if "train_dataset" in kwargs:
+        train_dataset = kwargs["train_dataset"]
+    else:
+        print('Creating & uploading train dataset onto SINGA-Auto...')
+        curr_time = time.strftime("%m%d_%H%M", time.localtime(time.time()))
+        train_dataset = client.create_dataset('yolo_train_{}'.format(curr_time), task, train_dataset_path)
+    pprint(train_dataset)
+
+    if "val_dataset" in kwargs:
+        val_dataset = kwargs["val_dataset"]
+    else:
+        print('Creating & uploading val dataset onto SINGA-Auto...')
+        curr_time = time.strftime("%m%d_%H%M", time.localtime(time.time()))
+        val_dataset = client.create_dataset('yolo_val_{}'.format(curr_time), task, val_dataset_path)
+    pprint(val_dataset)
+
+    if "train_model" in kwargs:
+        train_model = kwargs["train_model"]
+    else:
+        curr_time = time.strftime("%m%d_%H%M", time.localtime(time.time()))
+        model_name = 'yolo_{}_iter_2'.format(curr_time)
+        print('Adding models "{}" to SINGA-Auto...'.format(model_name))
+        train_model = client.create_model(
+            model_name,
+            task,
+            'examples/models/image_object_detection/SaYolo.py',
+            'SaYolo',
+            docker_image=IMAGE_OBJECT_DETECTION_NAME,
+            dependencies={
+                "opencv-python":"4.4.0.46",
+                "terminaltables":"3.1.0",
+                "torch":"1.6.0",
+                "torchvision":"0.7.0",
+                "tqdm":"4.53.0",
+                "wget":"3.2",
+                "pycocotools":"2.0.2",
+            })
+    pprint(train_model)
+
+    # generate app & model names by time to avoid naming conflicts
+    curr_time = time.strftime("%m%d_%H%M", time.localtime(time.time()))
+    app = 'yolo_{}_gpu_{}'.format(curr_time, gpus)
+
+    print('Creating train job for app "{}" on SINGA-Auto...'.format(app))
+    budget = {BudgetOption.TIME_HOURS: hours, BudgetOption.GPU_COUNT: gpus}
+    train_job = client.create_train_job(
+        app,
+        task,
+        train_dataset['id'],
+        val_dataset['id'],
+        budget,
+        models=[train_model['id']]
+    )
+    pprint(train_job)
+
+    print('Waiting for train job to complete...')
+    print('This might take a few minutes')
+    wait_until_train_job_has_stopped(client, app)
+    print('Train job has been stopped')
+
+    # app = "yolo_0601_0935_gpu_1"
+
+    print('Listing best trials of latest train job for app "{}"...'.format(app))
+    pprint(client.get_best_trials_of_train_job(app))
+
+    print('Creating inference job for app "{}" on SINGA-Auto...'.format(app))
+    budget = {InferenceBudgetOption.GPU_COUNT: gpus}
+    pprint(client.create_inference_job(app, budget=budget))
+    predictor_host = get_predictor_host(client, app)
+    if not predictor_host:
+        raise Exception('Inference job has errored or stopped')
+    print('Inference job is running!')
+
+    print('Making predictions for queries:')
+    queries = ['./examples/data/object_detection/cat.jpg']
+    print(queries)
+    predictions = make_predictions_image(client, predictor_host, queries)
+    print('Predictions are:')
+    print(predictions)
+
+    print('Stopping inference job...')
+    pprint(client.stop_inference_job(app))
+
+
+if "__main__" == __name__:
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--email',
+        type=str,
+        default="superadmin@singaauto",
+        help='Email of user',
+    )
+    parser.add_argument(
+        '--password',
+        type=str,
+        default="singa_auto",
+        help='Password of user',
+    )
+    parser.add_argument(
+        '--gpus',
+        type=int,
+        default=1,
+        help='How many GPUs to use',
+    )
+    parser.add_argument(
+        '--hours',
+        type=float,
+        default=1,
+        help='How long the train job should run for (in hours)',
+    )
+    parser.add_argument(
+        '--use_old',
+        type=bool,
+        default=True,
+        help='whether use existing dataset and model',
+    )
+    (args, _) = parser.parse_known_args()
+
+    # Initialize client
+    client = Client()
+    client.login(email=args.email, password=args.password)
+
+    print('Preprocessing dataset...')
+    data_dir = '/home/taomingyang/dataset/package'
+    train_dataset_path = os.path.join(data_dir, 'coco_cat.zip')
+    val_dataset_path = os.path.join(data_dir, 'coco_mini.zip')
+
+    if args.use_old:
+        train_dataset = {
+            'id': 'a5181e1f-74d1-4916-a853-4ab75afa81d5',
+            'name': 'yolo_train_0531_1650',
+            'owner_id': '6d37f19f-9063-4b47-a73f-5cb6577f4f85',
+            'size_bytes': 573851732,
+            'stat': {},
+            'store_dataset_id': '039e05a7-917a-4441-aaff-110cf7552c73.data',
+            'task': 'OBJECT_DETECTION'
+        }
+        val_dataset = {
+            'id': '3bb9113f-2339-4cd4-9c22-d99ffcdd27b9',
+            'name': 'yolo_val_0531_1650',
+            'owner_id': '6d37f19f-9063-4b47-a73f-5cb6577f4f85',
+            'size_bytes': 24435329,
+            'stat': {},
+            'store_dataset_id': '93f4c5d2-b9d8-4585-b193-c1d19bb8026d.data',
+            'task': 'OBJECT_DETECTION'
+        }
+        # train_model = { # using server dataset
+        #     'id': '48cd0413-ec4b-4f9b-8364-bbb51db52a45',
+        #     'name': 'yolo_0512_1055_iter_1',
+        #     'user_id': 'dd703056-e2f2-4e30-9e44-ecd1f7ccee7d'
+        # }
+        train_model = { # using local dataset, mini train
+            'id': 'dcd9e8a0-e74f-4ca1-880c-864405a91f92',
+            'name': 'yolo_0531_1650_iter_2',
+            'user_id': '6d37f19f-9063-4b47-a73f-5cb6577f4f85'
+        }
+        # train_model = { # using local dataset, cat train
+        #     'id': '4ac4b8bc-7225-46bd-8f50-786434cf0d3e',
+        #     'name': 'yolo_0520_0855_iter_2',
+        #     'user_id': 'dd703056-e2f2-4e30-9e44-ecd1f7ccee7d'
+        # }
+
+        run_object_detection(
+            client, train_dataset_path, val_dataset_path, args.gpus, args.hours,
+            train_dataset=train_dataset, val_dataset=val_dataset, train_model=train_model,
+        )
+    else:
+        run_object_detection(client, train_dataset_path, val_dataset_path, args.gpus, args.hours)
+
+    print(args)
diff --git a/examples/scripts/run_pos_tagging.py b/examples/scripts/run_pos_tagging.py
index 7afb14cd..9dda7ba6 100644
--- a/examples/scripts/run_pos_tagging.py
+++ b/examples/scripts/run_pos_tagging.py
@@ -26,7 +26,7 @@
 from singa_auto.constants import BudgetOption, ModelDependency
 
 from examples.scripts.quickstart import get_predictor_host, \
-    wait_until_train_job_has_stopped, make_predictions, gen_id
+    wait_until_train_job_has_stopped, make_predictions_json, gen_id
 
 from examples.datasets.corpus.load_sample_ptb import load_sample_ptb
 
@@ -102,7 +102,7 @@ def run_pos_tagging(client, train_dataset_path, val_dataset_path, gpus, hours):
                    '1,214', 'cars', 'in', 'the', 'U.S.'
                ]]
     print(queries)
-    predictions = make_predictions(client, predictor_host, queries)
+    predictions = make_predictions_json(client, predictor_host, queries)
     print('Predictions are:')
     print(predictions)
 
diff --git a/examples/scripts/run_speech_recognition.py b/examples/scripts/run_speech_recognition.py
index e7f8c7a1..8efadd43 100644
--- a/examples/scripts/run_speech_recognition.py
+++ b/examples/scripts/run_speech_recognition.py
@@ -17,9 +17,12 @@
 # under the License.
 #
 
+import os
+import sys
+sys.path.append(os.getcwd())
+
 from pprint import pprint
 import argparse
-import os
 import base64
 
 from singa_auto.client import Client
@@ -27,7 +30,7 @@
 from singa_auto.constants import BudgetOption, ModelDependency
 
 from examples.scripts.quickstart import get_predictor_host, \
-    wait_until_train_job_has_stopped, make_predictions, gen_id
+    wait_until_train_job_has_stopped, make_predictions_json, gen_id
 
 from examples.datasets.audio_files.load_librispeech import load_librispeech
 
diff --git a/examples/scripts/run_tabular_regression.py b/examples/scripts/run_tabular_regression.py
index 36e5df66..42d9f9b4 100644
--- a/examples/scripts/run_tabular_regression.py
+++ b/examples/scripts/run_tabular_regression.py
@@ -26,7 +26,7 @@
 from singa_auto.constants import BudgetOption, ModelDependency
 
 from examples.scripts.quickstart import get_predictor_host, \
-    wait_until_train_job_has_stopped, make_predictions, gen_id
+    wait_until_train_job_has_stopped, make_predictions_json, gen_id
 
 from examples.datasets.tabular.csv_file import load
 
@@ -99,7 +99,7 @@ def run_tabular_regression(client,
     if queries is not None:
         print('Making predictions for queries:')
         print(queries)
-        predictions = make_predictions(client, predictor_host, queries)
+        predictions = make_predictions_json(client, predictor_host, queries)
         print('Predictions are:')
         print(predictions)
 
diff --git a/scripts/.base_env.sh b/scripts/.base_env.sh
index bf3b0ef0..3ee6b3eb 100644
--- a/scripts/.base_env.sh
+++ b/scripts/.base_env.sh
@@ -77,6 +77,7 @@ export KIBANA_EXT_PORT=31009
 export DOCKER_WORKDIR_PATH=/root
 export DB_DIR_ROOT=db
 export DB_DIR_PATH=db/data
+export DB_PATH_ON_MASTER=/data0/singa_auto_data
 export DATA_DIR_PATH=data # Shares a data folder with containers, relative to workdir
 export LOGS_DIR_PATH=logs # Shares a folder with containers that stores components' logs, relative to workdir
 export PARAMS_DIR_PATH=params # Shares a folder with containers that stores model parameters, relative to workdir
@@ -97,14 +98,17 @@ export ES_DOCKER_WORKDIR_PATH=/usr/share/elasticsearch
 
 
 # Docker images for SINGA-Auto's custom components
-export SINGA_AUTO_IMAGE_ADMIN=singaauto/singa_auto_admin
-export SINGA_AUTO_IMAGE_WEB_ADMIN=singaauto/singa_auto_admin_web
-export SINGA_AUTO_IMAGE_WORKER=singaauto/singa_auto_worker
-export SINGA_AUTO_IMAGE_PREDICTOR=singaauto/singa_auto_predictor
-export SINGA_AUTO_IMAGE_LOGSTASH=singaauto/singa_auto_logstash
-export SINGA_AUTO_IMAGE_ES=singaauto/singa_auto_es
-
-export SINGA_AUTO_IMAGE_TEST=singaauto/singa_auto_test
+export SINGA_AUTO_IMAGE_ADMIN=singa_auto/singa_auto_admin
+export SINGA_AUTO_IMAGE_WEB_ADMIN=singa_auto/singa_auto_admin_web
+export SINGA_AUTO_IMAGE_WORKER=singa_auto/singa_auto_worker
+export SINGA_AUTO_IMAGE_WORKER_CU90=singa_auto/singa_auto_worker_cu90
+export SINGA_AUTO_IMAGE_WORKER_CU100=singa_auto/singa_auto_worker_cu100
+export SINGA_AUTO_IMAGE_WORKER_CU101=singa_auto/singa_auto_worker_cu101
+export SINGA_AUTO_IMAGE_WORKER_CU110=singa_auto/singa_auto_worker_cu110
+export SINGA_AUTO_IMAGE_PREDICTOR=singa_auto/singa_auto_predictor
+export SINGA_AUTO_IMAGE_LOGSTASH=singa_auto/singa_auto_logstash
+export SINGA_AUTO_IMAGE_ES=singa_auto/singa_auto_es
+export SINGA_AUTO_IMAGE_TEST=singa_auto/singa_auto_test
 
 # Docker images for dependent services
 export IMAGE_POSTGRES=postgres:10.5-alpine
diff --git a/scripts/base_build_image.sh b/scripts/base_build_image.sh
index eca15499..d667d649 100644
--- a/scripts/base_build_image.sh
+++ b/scripts/base_build_image.sh
@@ -35,10 +35,32 @@ title "Building SINGA-Auto Admin's image..."
 docker build -t $SINGA_AUTO_IMAGE_ADMIN:$SINGA_AUTO_VERSION -f ./dockerfiles/admin.Dockerfile \
     --build-arg DOCKER_WORKDIR_PATH=$DOCKER_WORKDIR_PATH \
     --build-arg CONDA_ENVIORNMENT=$CONDA_ENVIORNMENT $PWD || exit 1
+
 title "Building SINGA-Auto Worker's image..."
 docker build -t $SINGA_AUTO_IMAGE_WORKER:$SINGA_AUTO_VERSION -f ./dockerfiles/worker.Dockerfile \
     --build-arg DOCKER_WORKDIR_PATH=$DOCKER_WORKDIR_PATH \
     --build-arg CONDA_ENVIORNMENT=$CONDA_ENVIORNMENT $PWD || exit 1
+
+title "Building SINGA-Auto Worker's image with cu90..."
+docker build -t $SINGA_AUTO_IMAGE_WORKER_CU90:$SINGA_AUTO_VERSION -f ./dockerfiles/worker_cu90.Dockerfile \
+    --build-arg DOCKER_WORKDIR_PATH=$DOCKER_WORKDIR_PATH \
+    --build-arg CONDA_ENVIORNMENT=$CONDA_ENVIORNMENT $PWD || exit 1
+
+title "Building SINGA-Auto Worker's image with cu100..."
+docker build -t $SINGA_AUTO_IMAGE_WORKER_CU100:$SINGA_AUTO_VERSION -f ./dockerfiles/worker_cu100.Dockerfile \
+    --build-arg DOCKER_WORKDIR_PATH=$DOCKER_WORKDIR_PATH \
+    --build-arg CONDA_ENVIORNMENT=$CONDA_ENVIORNMENT $PWD || exit 1
+
+title "Building SINGA-Auto Worker's image with cu101..."
+docker build -t $SINGA_AUTO_IMAGE_WORKER_CU101:$SINGA_AUTO_VERSION -f ./dockerfiles/worker_cu101.Dockerfile \
+    --build-arg DOCKER_WORKDIR_PATH=$DOCKER_WORKDIR_PATH \
+    --build-arg CONDA_ENVIORNMENT=$CONDA_ENVIORNMENT $PWD || exit 1
+
+title "Building SINGA-Auto Worker's image with cu110..."
+docker build -t $SINGA_AUTO_IMAGE_WORKER_CU110:$SINGA_AUTO_VERSION -f ./dockerfiles/worker_cu110.Dockerfile \
+    --build-arg DOCKER_WORKDIR_PATH=$DOCKER_WORKDIR_PATH \
+     --build-arg CONDA_ENVIORNMENT=$CONDA_ENVIORNMENT $PWD || exit 1
+
 title "Building SINGA-Auto Predictor's image..."
 docker build -t $SINGA_AUTO_IMAGE_PREDICTOR:$SINGA_AUTO_VERSION -f ./dockerfiles/predictor.Dockerfile \
     --build-arg DOCKER_WORKDIR_PATH=$DOCKER_WORKDIR_PATH \
diff --git a/scripts/docker_swarm/test/start_monitor.sh b/scripts/docker_swarm/test/start_monitor.sh
index 2c5cef01..5949f307 100644
--- a/scripts/docker_swarm/test/start_monitor.sh
+++ b/scripts/docker_swarm/test/start_monitor.sh
@@ -37,7 +37,7 @@ title "Starting SINGA-Auto's Monitor..."
      -e KAFKA_HOST=$KAFKA_HOST \
      -p $LOGSTASH_PORT:$LOGSTASH_PORT  \
      -v $HOST_WORKDIR_PATH/$LOGS_DIR_PATH:$LOGSTASH_DOCKER_WORKDIR_PATH/$LOGS_DIR_PATH \
-     -v $HOST_WORKDIR_PATH/scripts/config/logstash.conf:$LOGSTASH_DOCKER_WORKDIR_PATH/logstash.conf  \
+     -v $HOST_WORKDIR_PATH/log_minitor/config/logstash.conf:$LOGSTASH_DOCKER_WORKDIR_PATH/logstash.conf  \
      -d $SINGA_AUTO_IMAGE_LOGSTASH:$SINGA_AUTO_VERSION \
      &> LOGSTADH_LOG_FILE_PATH) &
 
diff --git a/scripts/kubernetes/.env.sh b/scripts/kubernetes/.env.sh
index dd7d71ea..72ea1026 100644
--- a/scripts/kubernetes/.env.sh
+++ b/scripts/kubernetes/.env.sh
@@ -49,11 +49,11 @@ export CONTAINER_MODE=K8S
 # Cluster Mode for SINGA-auto
 export CLUSTER_MODE=SINGLE # CLUSTER or SINGLE
 
+source $HOST_WORKDIR_PATH/scripts/.base_env.sh $IP_ADRESS $SINGA_AUTO_VERSION || exit 1
+
 if [ "$CLUSTER_MODE" = "CLUSTER" ]; then
     export POSTGRES_HOST=stolon-proxy-service
     export NFS_HOST_IP=$IP_ADRESS      # NFS Host IP - if used nfs as pv for database storage
     export RUN_DIR_PATH=run            # Shares a folder with containers that stores components' running info, relative to workdir
 fi
 
-source $HOST_WORKDIR_PATH/scripts/.base_env.sh $IP_ADRESS $SINGA_AUTO_VERSION || exit 1
-
diff --git a/scripts/kubernetes/create_config.py b/scripts/kubernetes/create_config.py
index 26ddba39..bbd38f5f 100644
--- a/scripts/kubernetes/create_config.py
+++ b/scripts/kubernetes/create_config.py
@@ -101,6 +101,7 @@
     SINGA_AUTO_IMAGE_SPARKAPP = sys.argv[68]
     SPAEK_DOCKER_JARS_PATH = sys.argv[69]
     ES_DOCKER_WORKDIR_PATH = sys.argv[70]
+    DB_PATH_ON_MASTER = sys.argv[71]
 
     #zk service
     content = {}
@@ -373,6 +374,8 @@
     env.append({'name': 'CONTAINER_MODE', 'value': CONTAINER_MODE})
     env.append({'name': 'INGRESS_NAME', 'value': INGRESS_NAME})
     env.append({'name': 'INGRESS_EXT_PORT', 'value': INGRESS_EXT_PORT})
+    env.append({'name': 'KUBERNETES_ADVERTISE_ADDR', 'value': KUBERNETES_ADVERTISE_ADDR})
+    env.append({'name': 'DB_PATH_ON_MASTER', 'value': DB_PATH_ON_MASTER})
     container.setdefault('env', env)
     with open('{}/scripts/kubernetes/start_admin_deployment.json'.format(PYTHONPATH), 'w') as f:
         f.write(json.dumps(content, indent=4))
@@ -474,7 +477,7 @@
                           {'name': 'log-path', 'mountPath': '{}/{}'.format(LOGSTASH_DOCKER_WORKDIR_PATH, LOGS_DIR_PATH)}, \
                           {'name': 'docker-path', 'mountPath': '/var/run/docker.sock'}])
     template['spec']['volumes'] = [
-        {'name': 'conf-path', 'hostPath': {'path': '{}/scripts/config/logstash.conf'.format(HOST_WORKDIR_PATH)}}, \
+        {'name': 'conf-path', 'hostPath': {'path': '{}/log_minitor/config/logstash.conf'.format(HOST_WORKDIR_PATH)}}, \
         {'name': 'log-path', 'hostPath': {'path': '{}/{}'.format(HOST_WORKDIR_PATH, LOGS_DIR_PATH)}}, \
         {'name': 'docker-path', 'hostPath': {'path': '/var/run/docker.sock'}}]
 
@@ -569,7 +572,7 @@
                          [{'name': 'conf-path', 'mountPath': '{}/config/elasticsearch.yml'.format(LOGSTASH_DOCKER_WORKDIR_PATH)},\
                           {'name': 'docker-path', 'mountPath': '/var/run/docker.sock'}])
     template['spec']['volumes'] = [
-        {'name': 'conf-path', 'hostPath': {'path': '{}/scripts/config/elasticsearch.yml'.format(HOST_WORKDIR_PATH)}}, \
+        {'name': 'conf-path', 'hostPath': {'path': '{}/log_minitor/config/elasticsearch.yml'.format(HOST_WORKDIR_PATH)}}, \
         {'name': 'docker-path', 'hostPath': {'path': '/var/run/docker.sock'}}]
 
     with open('{}/scripts/kubernetes/start_es_deployment.json'.format(PYTHONPATH), 'w') as f:
diff --git a/scripts/kubernetes/create_nfs_pv.sh b/scripts/kubernetes/create_nfs_pv.sh
index 5f31ddce..834495ef 100755
--- a/scripts/kubernetes/create_nfs_pv.sh
+++ b/scripts/kubernetes/create_nfs_pv.sh
@@ -6,7 +6,7 @@ if [ $# -lt 8 ]; then
 fi
 
 TMP_NFS_PV_YAML=$HOST_WORKDIR_PATH/scripts/kubernetes/yaml/tmp-nfs-pv.yaml
-cp $HOST_WORKDIR_PATH/scripts/kubernetes//yaml/nfs-pv.yaml.template $TMP_NFS_PV_YAML
+cp $HOST_WORKDIR_PATH/scripts/kubernetes/yaml/nfs-pv.yaml.template $TMP_NFS_PV_YAML
 sed -ri "s/PV_NAME/$1/g" $TMP_NFS_PV_YAML
 sed -ri "s/PV_IP/$2/g" $TMP_NFS_PV_YAML
 sed -ri "s#PV_PATH#$3/#" $TMP_NFS_PV_YAML
diff --git a/scripts/kubernetes/generate_config.sh b/scripts/kubernetes/generate_config.sh
index d2669573..e61f2ed8 100644
--- a/scripts/kubernetes/generate_config.sh
+++ b/scripts/kubernetes/generate_config.sh
@@ -95,4 +95,5 @@ $SINGA_AUTO_IMAGE_ES \
 $KIBANA_EXT_PORT \
 $SINGA_AUTO_IMAGE_SPARKAPP \
 $SPAEK_DOCKER_JARS_PATH \
-$ES_DOCKER_WORKDIR_PATH
+$ES_DOCKER_WORKDIR_PATH \
+$DB_PATH_ON_MASTER
\ No newline at end of file
diff --git a/scripts/kubernetes/start_stolon.sh b/scripts/kubernetes/start_stolon.sh
index 9b32bf8e..d8364c57 100644
--- a/scripts/kubernetes/start_stolon.sh
+++ b/scripts/kubernetes/start_stolon.sh
@@ -31,8 +31,18 @@ fi
 echo "Create PV..."
 # With stolon, we use some default parameters to make nfs as pv, if your have another choice or want to change the default parameters,
 # your should modify this script
-bash $HOST_WORKDIR_PATH/scripts/kubernetes/create_nfs_pv.sh database-pv-0 $NFS_HOST_IP /home/singa_auto/database/db0 100Gi ReadWriteOnce Retain pv database-pv-0
-bash $HOST_WORKDIR_PATH/scripts/kubernetes/create_nfs_pv.sh database-pv-1 $NFS_HOST_IP /home/singa_auto/database/db1 100Gi ReadWriteOnce Retain pv database-pv-1
+
+if [ ! -d $DB_PATH_ON_MASTER ]; then
+    echo "create database folder"
+    mkdir -p $DB_PATH_ON_MASTER/database/db0
+    mkdir -p $DB_PATH_ON_MASTER/database/db1
+    mkdir -p $DB_PATH_ON_MASTER/$DATA_DIR_PATH
+    mkdir -p $DB_PATH_ON_MASTER/$LOGS_DIR_PATH
+    mkdir -p $DB_PATH_ON_MASTER/$PARAMS_DIR_PATH
+fi
+
+bash $HOST_WORKDIR_PATH/scripts/kubernetes/create_nfs_pv.sh database-pv-0 $NFS_HOST_IP $DB_PATH_ON_MASTER/database/db0 100Gi ReadWriteOnce Retain pv database-pv-0
+bash $HOST_WORKDIR_PATH/scripts/kubernetes/create_nfs_pv.sh database-pv-1 $NFS_HOST_IP $DB_PATH_ON_MASTER/database/db1 100Gi ReadWriteOnce Retain pv database-pv-1
 echo "Create PVC..."
 # PVC Name is Fixed
 bash $HOST_WORKDIR_PATH/scripts/kubernetes/create_nfs_pvc.sh database-stolon-keeper-0 100Gi ReadWriteOnce pv database-pv-0
diff --git a/scripts/kubernetes/stolon/generate_stolon_yaml.sh b/scripts/kubernetes/stolon/generate_stolon_yaml.sh
index f98f6c76..f557cfc1 100644
--- a/scripts/kubernetes/stolon/generate_stolon_yaml.sh
+++ b/scripts/kubernetes/stolon/generate_stolon_yaml.sh
@@ -1,22 +1,22 @@
-STOLON_PATH=$HOST_WORKDIR_PATH/scripts/kubernetes/stolon
-cp -f $STOLON_PATH/secret.yaml.template $STOLON_PATH/secret.yaml
-sed -ri "s/STOLON_PASSWD/$POSTGRES_STOLON_PASSWD/g" $STOLON_PATH/secret.yaml
-
-# replace config for stolon keeper
-cp -f $STOLON_PATH/stolon-keeper.yaml.template         $STOLON_PATH/stolon-keeper.yaml
-sed -ri "s#SINGA_AUTO_IMAGE_STOLON#$SINGA_AUTO_IMAGE_STOLON#"  $STOLON_PATH/stolon-keeper.yaml
-sed -ri "s/POSTGRES_PORT/$POSTGRES_PORT/g"             $STOLON_PATH/stolon-keeper.yaml
-
-# replace config for stolon proxy
-cp -f $STOLON_PATH/stolon-proxy.yaml.template          $STOLON_PATH/stolon-proxy.yaml
-sed -ri "s#SINGA_AUTO_IMAGE_STOLON#$SINGA_AUTO_IMAGE_STOLON#"  $STOLON_PATH/stolon-proxy.yaml
-sed -ri "s/POSTGRES_PORT/$POSTGRES_PORT/g"             $STOLON_PATH/stolon-proxy.yaml
-
-# replace config for stolon sentinel
-cp -f $STOLON_PATH/stolon-sentinel.yaml.template       $STOLON_PATH/stolon-sentinel.yaml
-sed -ri "s#RAFIKI_IMAGE_STOLON#$RAFIKI_IMAGE_STOLON#"  $STOLON_PATH/stolon-sentinel.yaml
-
-# replace config for stolon proxy service
-cp -f $STOLON_PATH/stolon-proxy-service.yaml.template  $STOLON_PATH/stolon-proxy-service.yaml
-sed -ri "s/POSTGRES_EXT_PORT/$POSTGRES_EXT_PORT/g"     $STOLON_PATH/stolon-proxy-service.yaml
-sed -ri "s/POSTGRES_PORT/$POSTGRES_PORT/g"             $STOLON_PATH/stolon-proxy-service.yaml
+STOLON_PATH=scripts/kubernetes/stolon
+cp -f $STOLON_PATH/secret.yaml.template $STOLON_PATH/secret.yaml
+sed -ri "s/STOLON_PASSWD/$POSTGRES_STOLON_PASSWD/g" $STOLON_PATH/secret.yaml
+
+# replace config for stolon keeper
+cp -f $STOLON_PATH/stolon-keeper.yaml.template         $STOLON_PATH/stolon-keeper.yaml
+sed -ri "s#SINGA_AUTO_IMAGE_STOLON#$SINGA_AUTO_IMAGE_STOLON#"  $STOLON_PATH/stolon-keeper.yaml
+sed -ri "s/POSTGRES_PORT/$POSTGRES_PORT/g"             $STOLON_PATH/stolon-keeper.yaml
+
+# replace config for stolon proxy
+cp -f $STOLON_PATH/stolon-proxy.yaml.template          $STOLON_PATH/stolon-proxy.yaml
+sed -ri "s#SINGA_AUTO_IMAGE_STOLON#$SINGA_AUTO_IMAGE_STOLON#"  $STOLON_PATH/stolon-proxy.yaml
+sed -ri "s/POSTGRES_PORT/$POSTGRES_PORT/g"             $STOLON_PATH/stolon-proxy.yaml
+
+# replace config for stolon sentinel
+cp -f $STOLON_PATH/stolon-sentinel.yaml.template       $STOLON_PATH/stolon-sentinel.yaml
+sed -ri "s#SINGA_AUTO_IMAGE_STOLON#$SINGA_AUTO_IMAGE_STOLON#"  $STOLON_PATH/stolon-sentinel.yaml
+
+# replace config for stolon proxy service
+cp -f $STOLON_PATH/stolon-proxy-service.yaml.template  $STOLON_PATH/stolon-proxy-service.yaml
+sed -ri "s/POSTGRES_EXT_PORT/$POSTGRES_EXT_PORT/g"     $STOLON_PATH/stolon-proxy-service.yaml
+sed -ri "s/POSTGRES_PORT/$POSTGRES_PORT/g"             $STOLON_PATH/stolon-proxy-service.yaml
diff --git a/scripts/kubernetes/stolon/stolon-keeper.yaml.template b/scripts/kubernetes/stolon/stolon-keeper.yaml.template
index 004f4c29..ddac6f7d 100644
--- a/scripts/kubernetes/stolon/stolon-keeper.yaml.template
+++ b/scripts/kubernetes/stolon/stolon-keeper.yaml.template
@@ -21,7 +21,7 @@ spec:
       terminationGracePeriodSeconds: 10
       containers:
       - name: stolon-keeper
-        image: RAFIKI_IMAGE_STOLON
+        image: SINGA_AUTO_IMAGE_STOLON
         command:
           - "/bin/bash"
           - "-ec"
diff --git a/scripts/kubernetes/stolon/stolon-sentinel.yaml.template b/scripts/kubernetes/stolon/stolon-sentinel.yaml.template
index cd3e9ff1..fdaace30 100644
--- a/scripts/kubernetes/stolon/stolon-sentinel.yaml.template
+++ b/scripts/kubernetes/stolon/stolon-sentinel.yaml.template
@@ -15,7 +15,7 @@ spec:
     spec:
       containers:
       - name: stolon-sentinel
-        image: RAFIKI_IMAGE_STOLON
+        image: SINGA_AUTO_IMAGE_STOLON
         command:
           - "/bin/bash"
           - "-ec"
diff --git a/scripts/kubernetes/stop.sh b/scripts/kubernetes/stop.sh
index 2ffe8d72..6270f58b 100644
--- a/scripts/kubernetes/stop.sh
+++ b/scripts/kubernetes/stop.sh
@@ -83,7 +83,7 @@ then
 
 else
 
-#      kubectl delete -f $HOST_WORKDIR_PATH/scripts/kubernetes/nvidia-device-plugin.yml
+#      kubectl delete -f $HOST_WORKDIR_PATH/scripts/kubernetes/yaml/nvidia-device-plugin.yml
 
       title "Stopping any existing jobs..."
       python $HOST_WORKDIR_PATH/scripts/stop_all_jobs.py
diff --git a/scripts/start_worker.py b/scripts/start_worker.py
index a1f51493..fec1930e 100644
--- a/scripts/start_worker.py
+++ b/scripts/start_worker.py
@@ -1,71 +1,70 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-import os
-
-
-# Run install command
-install_command = os.environ.get('WORKER_INSTALL_COMMAND', '')
-
-for i in range(10):
-    exit_code = os.system(install_command)
-    if exit_code != 0:
-        print('Install command gave non-zero exit code: "{}"'.format(install_command))
-        import time
-        time.sleep(3)
-    else:
-        break
-else:
-    raise Exception(
-        'Install command gave non-zero exit code: "{}"'.format(install_command))
-
-worker = None
-
-from singa_auto.constants import ServiceType
-from singa_auto.utils.service import run_worker
-from singa_auto.meta_store import MetaStore
-
-
-def start_worker(service_id, service_type, container_id):
-    global worker
-
-    if service_type == ServiceType.TRAIN:
-        from singa_auto.worker.train import TrainWorker
-        worker = TrainWorker(service_id, container_id)
-        worker.start()
-    elif service_type == ServiceType.INFERENCE:
-        from singa_auto.worker.inference import InferenceWorker
-        worker = InferenceWorker(service_id, container_id)
-        worker.start()
-    elif service_type == ServiceType.ADVISOR:
-        from singa_auto.worker.advisor import AdvisorWorker
-        worker = AdvisorWorker(service_id)
-        worker.start()
-    else:
-        raise Exception('Invalid service type: {}'.format(service_type))
-
-
-def stop_worker():
-    global worker
-    if worker is not None:
-        worker.stop()
-
-
-meta_store = MetaStore()
-run_worker(meta_store, start_worker, stop_worker)
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import os
+
+from singa_auto.constants import ServiceType
+from singa_auto.utils.service import run_worker
+from singa_auto.meta_store import MetaStore
+
+# Run install command
+install_command = os.environ.get('WORKER_INSTALL_COMMAND', '')
+
+for i in range(10):
+    exit_code = os.system(install_command)
+    if exit_code != 0:
+        print('Install command gave non-zero exit code: "{}"'.format(install_command))
+        import time
+        time.sleep(3)
+    else:
+        break
+else:
+    raise Exception(
+        'Install command gave non-zero exit code: "{}"'.format(install_command))
+
+worker = None
+
+
+def start_worker(service_id, service_type, container_id):
+    global worker
+
+    if service_type == ServiceType.TRAIN:
+        from singa_auto.worker.train import TrainWorker
+        worker = TrainWorker(service_id, container_id)
+        worker.start()
+    elif service_type == ServiceType.INFERENCE:
+        from singa_auto.worker.inference import InferenceWorker
+        worker = InferenceWorker(service_id, container_id)
+        worker.start()
+    elif service_type == ServiceType.ADVISOR:
+        from singa_auto.worker.advisor import AdvisorWorker
+        worker = AdvisorWorker(service_id)
+        worker.start()
+    else:
+        raise Exception('Invalid service type: {}'.format(service_type))
+
+
+def stop_worker():
+    global worker
+    if worker is not None:
+        worker.stop()
+
+
+meta_store = MetaStore()
+run_worker(meta_store, start_worker, stop_worker)
diff --git a/singa_auto/admin/requirements.txt b/singa_auto/admin/requirements.txt
index 81a833c7..2e868415 100644
--- a/singa_auto/admin/requirements.txt
+++ b/singa_auto/admin/requirements.txt
@@ -1,4 +1,4 @@
 bcrypt>=3.1.4
 Flask==1.0.2
 Flask-Cors==3.0.6
-kubernetes==10.0.0
+kubernetes==10.0.1
diff --git a/singa_auto/admin/services_manager.py b/singa_auto/admin/services_manager.py
index 5f32530c..8b07637c 100644
--- a/singa_auto/admin/services_manager.py
+++ b/singa_auto/admin/services_manager.py
@@ -16,6 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 #
+
 import json
 import os
 import logging
@@ -45,9 +46,10 @@ class ServiceDeploymentError(Exception):
     'SUPERADMIN_PASSWORD',
     'REDIS_HOST', 'REDIS_PORT', 'REDIS_PASSWORD',
     'ADMIN_HOST', 'ADMIN_PORT',
-    'DATA_DIR_PATH', 'LOGS_DIR_PATH', 'PARAMS_DIR_PATH',
+    'DATA_DIR_PATH', 'LOGS_DIR_PATH', 'PARAMS_DIR_PATH', 'DB_PATH_ON_MASTER',
     'KAFKA_HOST', 'KAFKA_PORT',
-
+    'CONTAINER_MODE', 'KUBERNETES_ADVERTISE_ADDR',
+    'KUBERNETES_INNER_NETWORK_RANGE'
 ]
 
 DEFAULT_TRAIN_GPU_COUNT = 0
@@ -84,6 +86,7 @@ def __init__(self,
         self._data_dir_path = os.environ['DATA_DIR_PATH']
         self._logs_dir_path = os.environ['LOGS_DIR_PATH']
         self._params_dir_path = os.environ['PARAMS_DIR_PATH']
+        self._params_root_path = os.environ['DB_PATH_ON_MASTER']
         self._host_workdir_path = os.environ['HOST_WORKDIR_PATH']
         self._docker_workdir_path = os.environ['DOCKER_WORKDIR_PATH']
         self._predictor_image = f"{os.environ['SINGA_AUTO_IMAGE_PREDICTOR']}:{version}"
@@ -204,13 +207,17 @@ def create_train_services(self, train_job_id):
                 # Create advisor
                 self._create_advisor(sub_train_job)
 
-                # 1 GPU per worker
-                for _ in range(gpus):
-                    self._create_train_job_worker(sub_train_job, dist_workers=dist_workers)
+                # # 1 GPU per worker
+                # for gpu_idx in range(gpus):
+                #     self._create_train_job_worker(sub_train_job, dist_workers=dist_workers)
+                #     logger.info("gpu idx {} created".format(gpu_idx))
 
-                # CPU workers
-                for _ in range(cpus):
-                    self._create_train_job_worker(sub_train_job, dist_workers=dist_workers, gpus=0)
+                # # CPU workers
+                # for cpu_idx in range(cpus):
+                #     self._create_train_job_worker(sub_train_job, dist_workers=dist_workers, gpus=0)
+                #     logger.info("cpu idx {} created".format(cpu_idx))
+                
+                self._create_train_job_worker(sub_train_job, dist_workers=dist_workers, gpus=gpus)
 
             return train_job
 
@@ -380,7 +387,7 @@ def _create_inference_job_worker(self,
         }
 
         service = self._create_service(service_type=service_type,
-                                       docker_image=model.docker_image,
+                                       docker_image=self._get_docker_image_by_dependency(model.docker_image, gpus, model.dependencies),
                                        environment_vars=environment_vars,
                                        gpus=gpus)
 
@@ -411,6 +418,33 @@ def _create_predictor(self, inference_job, inferenceAppName: str):
 
         return service
 
+    def _get_docker_image_by_dependency(self, docker_image_name, gpus, dependencies):
+        logger.info("docker_image_name is {} with type {}".format(docker_image_name, type(docker_image_name)))
+        if gpus == 0:
+            selected_image_name = docker_image_name
+        elif "torch" in dependencies:
+            torch_dependency = '.'.join(dependencies["torch"].split('.')[:2])
+            if torch_dependency in ["0.4", "1.0", "1.1"]:
+                selected_image_name = docker_image_name.replace("singa_auto_worker", "singa_auto_worker_cu90")
+            elif torch_dependency in ["1.2", "1.3", "1.4"]:
+                selected_image_name = docker_image_name.replace("singa_auto_worker", "singa_auto_worker_cu100")
+            elif torch_dependency in ["1.5", "1.6", "1.7", "1.8"]:
+                selected_image_name = docker_image_name.replace("singa_auto_worker", "singa_auto_worker_cu101")
+        elif "tensorflow" in dependencies:
+            tf_dependency = '.'.join(dependencies["tensorflow"].split('.')[:2])
+            if tf_dependency in ["1.5", "1.6", "1.7", "1.8", "1.9", "1.10", "1.11", "1.12"]:
+                selected_image_name = docker_image_name.replace("singa_auto_worker", "singa_auto_worker_cu90")
+            elif tf_dependency in ["1.13", "1.14", "1.15", "2.0"]:
+                selected_image_name = docker_image_name.replace("singa_auto_worker", "singa_auto_worker_cu100")
+            elif tf_dependency in ["2.1", "2.2", "2.3"]:
+                selected_image_name = docker_image_name.replace("singa_auto_worker", "singa_auto_worker_cu101")
+            elif tf_dependency in ["2.4"]:
+                selected_image_name = docker_image_name.replace("singa_auto_worker", "singa_auto_worker_cu110")
+        else:
+            selected_image_name = docker_image_name
+        
+        return selected_image_name
+
     def _create_train_job_worker(self, sub_train_job, dist_workers=0, gpus=1):
         model = self._meta_store.get_model(sub_train_job.model_id)
         service_type = ServiceType.TRAIN
@@ -421,7 +455,7 @@ def _create_train_job_worker(self, sub_train_job, dist_workers=0, gpus=1):
         }
 
         service = self._create_service(service_type=service_type,
-                                       docker_image=model.docker_image,
+                                       docker_image=self._get_docker_image_by_dependency(model.docker_image, gpus, model.dependencies),
                                        environment_vars=environment_vars,
                                        gpus=gpus,
                                        dist_workers=dist_workers)
@@ -507,20 +541,22 @@ def _create_service(self,
         }
 
         if self._app_mode == 'DEV':
-            # Mount whole root directory
-            mounts = {self._host_workdir_path: self._docker_workdir_path}
-        else:
-            # Mount only data, logs and params folders to containers' work directories
             mounts = {
                 os.path.join(self._host_workdir_path, self._data_dir_path):
-                    os.path.join(self._docker_workdir_path,
-                                 self._data_dir_path),
+                    os.path.join(self._docker_workdir_path, self._data_dir_path),
                 os.path.join(self._host_workdir_path, self._logs_dir_path):
-                    os.path.join(self._docker_workdir_path,
-                                 self._logs_dir_path),
+                    os.path.join(self._docker_workdir_path, self._logs_dir_path),
                 os.path.join(self._host_workdir_path, self._params_dir_path):
-                    os.path.join(self._docker_workdir_path,
-                                 self._params_dir_path)
+                    os.path.join(self._docker_workdir_path, self._params_dir_path)
+            }
+        else:
+            mounts = {
+                os.path.join(self._params_root_path, self._data_dir_path):
+                    os.path.join(self._docker_workdir_path, self._data_dir_path),
+                os.path.join(self._params_root_path, self._logs_dir_path):
+                    os.path.join(self._docker_workdir_path, self._logs_dir_path),
+                os.path.join(self._params_root_path, self._params_dir_path):
+                    os.path.join(self._docker_workdir_path, self._params_dir_path)
             }
 
         # Expose container port if it exists
@@ -541,8 +577,32 @@ def _create_service(self,
             container_service_name = '{}-{}-{}'.format(
                 service_app_name, service_type.lower(), service.id.split('-')[0])
 
+            if service_type in ["TRAIN", "INFERENCE"]:
+                gpu_allocated = dict()
+                gpu_in_use_by_train = self._meta_store.get_services(service_type = "TRAIN")
+                for service_info in gpu_in_use_by_train:
+                    if service_info.status in ["RUNNING", "DEPLOYING"] and "default" != service_info.container_service_info["node_id"]:
+                        gpu_node_name = service_info.container_service_info["node_id"]
+                        if gpu_node_name not in gpu_allocated:
+                            gpu_allocated[gpu_node_name] = []
+                        gpu_list = (service_info.container_service_info["gpu_list"] if "gpu_list" in service_info.container_service_info else "").split(',')
+                        for gpu_idx in gpu_list:
+                            gpu_allocated[gpu_node_name].append(gpu_idx.strip())
+
+                gpu_in_use_by_inference = self._meta_store.get_services(service_type = "INFERENCE")
+                for service_info in gpu_in_use_by_inference:
+                    if service_info.status in ["RUNNING", "DEPLOYING"] and "default" != service_info.container_service_info["node_id"]:
+                        gpu_node_name = service_info.container_service_info["node_id"]
+                        if gpu_node_name not in gpu_allocated:
+                            gpu_allocated[gpu_node_name] = []
+                        gpu_list = (service_info.container_service_info["gpu_list"] if "gpu_list" in service_info.container_service_info else "").split(',')
+                        gpu_allocated[gpu_node_name].append(gpu_list)
+            else:
+                gpu_allocated=None
+
             container_service = self._container_manager.create_service(
                 service_name=container_service_name,
+                service_type=service_type,
                 docker_image=docker_image,
                 replicas=replicas,
                 args=args,
@@ -550,7 +610,8 @@ def _create_service(self,
                 mounts=mounts,
                 publish_port=publish_port,
                 gpus=gpus,
-                dist_workers=dist_workers)
+                dist_workers=dist_workers,
+                gpu_allocated=gpu_allocated)
 
             self._meta_store.mark_service_as_deploying(
                 service,
@@ -573,8 +634,7 @@ def _create_service(self,
                    service_port=int(self._predictor_port))
 
                 self._container_manager.update_ingress(ingress_name=_ingress_name,
-                                                       ingress_body=json.loads(ingress_info.ingress_body)
-                                                       )
+                                                       ingress_body=json.loads(ingress_info.ingress_body))
 
                 self._meta_store.commit()
 
diff --git a/singa_auto/advisor/requirements.txt b/singa_auto/advisor/requirements.txt
index bdfb7dfa..88c51e2c 100644
--- a/singa_auto/advisor/requirements.txt
+++ b/singa_auto/advisor/requirements.txt
@@ -1,3 +1,2 @@
 scikit-learn==0.22
 scikit-optimize==0.5.2
-tensorflow==1.15.5
diff --git a/singa_auto/client/client.py b/singa_auto/client/client.py
index c5adf77d..b46d6970 100644
--- a/singa_auto/client/client.py
+++ b/singa_auto/client/client.py
@@ -1,862 +1,862 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-import requests
-import json
-import pickle
-import os
-from functools import wraps
-from typing import Type, Dict, List, Any
-
-from singa_auto.constants import ModelAccessRight, ModelDependencies, Budget, BudgetOption, \
-                            InferenceBudget, InferenceBudgetOption, UserType, ModelType
-from singa_auto.model import Params, BaseModel
-from singa_auto.error_code import generate_error
-
-
-class SingaAutoConnectionError(ConnectionError):
-    pass
-
-
-DOCS_URL = 'https://nginyc.github.io/rafiki/docs/latest/docs/src/python/rafiki.client.Client.html'
-
-def rafiki_response_handler(resp):
-    # if isinstance(resp, dict):
-    #     if resp.get('success', 0) == 0:
-    #         return resp['data']
-    #     else:
-    #         raise generate_error(resp.get('error_code', 500))
-    # else:
-    #     return resp
-        return resp
-
-# Returns a decorator that warns user about the method being deprecated
-def _deprecated(msg=None):
-
-    def deco(func):
-        nonlocal msg
-        msg = msg or f'`{func.__name__}` has been deprecated.'
-
-        @wraps(func)
-        def deprecated_func(*args, **kwargs):
-            _warn(f'{msg}\n' \
-                f'Refer to the updated documentation at {DOCS_URL}')
-            return func(*args, **kwargs)
-
-        return deprecated_func
-
-    return deco
-
-
-class Client:
-    '''
-    Initializes the Client to connect to a running
-    SINGA-Auto Admin instance that the Client connects to.
-
-    :param admin_host: Host of SINGA-Auto Admin
-    :param admin_port: Port of SINGA-Auto Admin
-    '''
-
-    def __init__(self,
-                 admin_host: str = os.environ.get('SINGA_AUTO_ADDR',
-                                                  'localhost'),
-                 admin_port: int = os.environ.get('ADMIN_EXT_PORT', 3000)):
-        self._admin_host = admin_host
-        self._admin_port = admin_port
-        self._token = None
-        self._user = None
-
-    def login(self, email: str, password: str) -> Dict[str, Any]:
-        '''
-        Creates a login session as a SINGA-Auto user. You will have to be logged in to perform any actions.
-
-        App developers can create, list and stop train and inference jobs, as well as list models.
-        Model developers can create and list models.
-
-        The login session (the session token) expires in 1 hour.
-
-        :param email: User's email
-        :param password: User's password
-
-        :returns: Logged-in user as dictionary
-        '''
-        data = self._post('/tokens',
-                          json={
-                              'email': email,
-                              'password': password
-                          })
-        self._token = data['token']
-
-        # Save user's data
-        self._user = {'id': data['user_id'], 'user_type': data['user_type']}
-
-        return self._user
-
-    def get_current_user(self) -> Dict[str, Any]:
-        '''
-        Gets currently logged in user's data.
-
-        :returns: Current user as dictionary, or ``None`` if client is not logged in
-        '''
-        return self._user
-
-    def logout(self):
-        '''
-        Clears the current login session.
-        '''
-        self._token = None
-        self._user = None
-
-    ####################################
-    # User
-    ####################################
-
-    def create_user(self, email: str, password: str,
-                    user_type: UserType) -> Dict[str, Any]:
-        '''
-        Creates a SINGA-Auto user.
-
-        Only admins can create users (except for admins).
-        Only superadmins can create admins.
-
-        :param email: The new user's email
-        :param password: The new user's password
-        :param user_type: The new user's type
-
-        :returns: Created user as dictionary
-        '''
-        data = self._post('/users',
-                          json={
-                              'email': email,
-                              'password': password,
-                              'user_type': user_type
-                          })
-        return data
-
-    @_deprecated('`create_users` has been removed')
-    def create_users(self, *args, **kwargs):
-        pass
-
-    def get_users(self) -> List[Dict[str, Any]]:
-        '''
-        Lists all SINGA-Auto users.
-
-        Only admins can list all users.
-
-        :returns: List of users as list of dictionaries
-        '''
-        data = self._get('/users')
-        return data
-
-    def ban_user(self, email: str) -> Dict[str, Any]:
-        '''
-        Bans a SINGA-Auto user, disallowing logins.
-
-        This action is irrevisible.
-        Only admins can ban users (except for admins).
-        Only superadmins can ban admins.
-
-        :param email: The user's email
-
-        :returns: Banned user as dictionary
-        '''
-        data = self._delete('/users', json={'email': email})
-        return data
-
-    ####################################
-    # Datasets
-    ####################################
-
-    def create_dataset(self,
-                       name: str,
-                       task: str,
-                       dataset_path: str = None,
-                       dataset_url: str = None) -> Dict[str, Any]:
-        '''
-        Creates a dataset on SINGA-Auto, either by uploading the dataset file from your filesystem or specifying a URL where the dataset file can be downloaded.
-        The dataset should be in a format specified by the task
-        Either `dataset_url` or `dataset_path` should be specified.
-
-        Only admins, model developers and app developers can manage their own datasets.
-
-        :param name: Name for the dataset, does not need to be unique
-        :param task: Task associated to the dataset
-        :param dataset_path: Path to the dataset file to upload from the local filesystem
-        :param dataset_url: Publicly accessible URL where the dataset file can be downloaded
-        :returns: Created dataset as dictionary
-        '''
-
-        dataset = dict()
-
-        form_data = {'name': name, 'task': task, 'dataset_url': dataset_url}
-
-        if dataset_path is not None:
-            dataset = {
-                'dataset': ('dataset', open(dataset_path,
-                                            'rb'), 'application/zip')
-            }
-        else:
-            print(
-                'Waiting for server finish downloading the dataset from URL...')
-
-        data = self._post_stream(path='/datasets',
-                                 files=dataset,
-                                 form_data=form_data)
-
-        return data
-
-    def get_datasets(self, task: str = None) -> List[Dict[str, Any]]:
-        '''
-        Lists all datasets owned by the current user, optionally filtering by task.
-
-        :param task: Task name
-        :returns: List of datasets as list of dictionaries
-        '''
-        data = self._get('/datasets', params={'task': task})
-        return data
-
-    ####################################
-    # Models
-    ####################################
-
-    def create_model(self,
-                     name: str,
-                     task: str,
-                     model_file_path: str,
-                     model_class: str,
-                     model_preload_file_path: str = None,
-                     dependencies: ModelDependencies = None,
-                     access_right: ModelAccessRight = ModelAccessRight.PRIVATE,
-                     docker_image: str = None,
-                     model_type: str = ModelType.PYTHON_FILE,
-                     model_file_name: str = None,
-                     model_description: str = None) -> Dict[str, Any]:
-        '''
-        Creates a model on SINGA-Auto.
-
-        Only admins & model developers can manage models.
-
-        :param name: Name of the model, which must be unique across all models added by the current user
-        :param task: Task associated with the model, where the model must adhere to the specification of the task
-        :param model_file_path: Path to a single Python file that contains the definition for the model class
-        :param model_class: The name of the model class inside the Python file. This class should implement :class:`singa_auto.model.BaseModel`
-        :param dependencies: List of Python dependencies & their versions
-        :param access_right: Model access right
-        :param model_preload_file_path: pretrained mdoel file
-        :param docker_image: A custom Docker image that extends ``singa_auto/singa_auto_worker``, publicly available on Docker Hub.
-        :returns: Created model as dictionary
-
-        Refer to :ref:`model-development` for more details on how to write & test models for SINGA-Auto.
-
-        ``model_file_path`` should point to a *single* file that contains all necessary Python code for the model's implementation.
-        If the Python file imports any external Python modules, you should list it in ``dependencies`` or create a custom
-        ``docker_image``.
-
-        If a model's ``access_right`` is set to ``PUBLIC``, this model will be publicly available to all other users on SINGA-Auto for training
-        and inference. By default, a model's access is ``PRIVATE``.
-
-        ``dependencies`` should be a dictionary of ``{ <dependency_name>: <dependency_version> }``, where
-        ``<dependency_name>`` corresponds to the name of the Python Package Index (PyPI) package (e.g. ``tensorflow``)
-        and ``<dependency_version>`` corresponds to the version of the PyPI package (e.g. ``1.12.0``).
-        Refer to :ref:`configuring-model-environment` to understand more about this option.
-        '''
-
-        model_files = {
-            'model_file_bytes': (model_file_path, open(model_file_path, 'rb'),
-                                 'application/octet-stream')
-        }
-        pretrained_files = {}
-
-        if model_preload_file_path is not None:
-            pretrained_files = {'checkpoint_id':  (
-                                                    model_preload_file_path,
-                                                    open(model_preload_file_path, 'rb'),
-                                                    'application/octet-stream')}
-
-        files = {**model_files, **pretrained_files}
-
-        form_data = {
-            'name': name,
-            'task': task,
-            'dependencies': json.dumps(dependencies),
-            'docker_image': docker_image,
-            'model_class': model_class,
-            'access_right': access_right,
-            'model_type': model_type,
-            'model_file_name': model_file_name,
-            'model_description': model_description
-        }
-
-        data = self._post_stream(path='/models',
-                                 files=files,
-                                 form_data=form_data)
-
-        return data
-
-    def get_model(self, model_id: str) -> Dict[str, Any]:
-        '''
-        Retrieves details of a single model.
-
-        Model developers can only view their own models.
-
-        :param model_id: ID of model
-        :returns: Model as dictionary
-        '''
-        _note('`get_model` now requires `model_id` instead of `name`')
-
-        data = self._get('/models/{}'.format(model_id))
-        return data
-
-    def download_model_file(self, model_id: str,
-                            out_model_file_path: str) -> Dict[str, any]:
-        '''
-        Downloads the Python model class file for the SINGA-Auto model.
-
-        Model developers can only download their own models.
-
-        :param model_id: ID of model
-        :param out_model_file_path: Absolute/relative path to save model class file to
-        :returns: Model as dictionary
-        '''
-        _note('`download_model_file` now requires `model_id` instead of `name`')
-
-        model_file_bytes = self._get('/models/{}/model_file'.format(model_id))
-
-        with open(out_model_file_path, 'wb') as f:
-            f.write(model_file_bytes)
-
-        data = self.get_model(model_id)
-        dependencies = data.get('dependencies')
-        model_class = data.get('model_class')
-
-        print('Model file downloaded to "{}"!'.format(
-            os.path.join(os.getcwd(), out_model_file_path)))
-
-        if dependencies:
-            print(
-                'You\'ll need to install the following model dependencies locally: {}'
-                .format(dependencies))
-
-        print('From the file, import the model class `{}`.'.format(model_class))
-
-        return data
-
-    @_deprecated(
-        '`get_models` & `get_models_of_task` have been combined into `get_available_models`'
-    )
-    def get_models(self, *args, **kwargs):
-        pass
-
-    @_deprecated(
-        '`get_models` & `get_models_of_task` have been combined into `get_available_models`'
-    )
-    def get_models_of_task(self, *args, **kwargs):
-        pass
-
-    def get_available_models(self, task: str = None) -> List[Dict[str, Any]]:
-        '''
-        Lists all SINGA-Auto models available to the current user, optionally filtering by task.
-
-        :param task: Task name
-        :returns: Available models as list of dictionaries
-        '''
-        data = self._get('/models/available', params={'task': task})
-        return data
-
-    def delete_model(self, model_id: str) -> Dict[str, Any]:
-        '''
-        Deletes a single model. Models that have been used in train jobs cannot be deleted.
-
-        Model developers can only delete their own models.
-
-        :param str model_id: ID of model
-        :returns: Deleted model as dictionary
-        '''
-        data = self._delete('/models/{}'.format(model_id))
-        return data
-
-    ####################################
-    # Train Jobs
-    ####################################
-
-    def create_train_job(self,
-                         app: str,
-                         task: str,
-                         train_dataset_id: str,
-                         val_dataset_id: str,
-                         budget: Budget,
-                         annotation_dataset_id: str = None,
-                         models: List[str] = None,
-                         train_args: Dict[str, any] = None) -> Dict[str, Any]:
-        '''
-        Creates and starts a train job on SINGA-Auto.
-
-        A train job is uniquely identified by user, its associated app, and the app version (returned in output).
-
-        Only admins, model developers & app developers can manage train jobs. Model developers & app developers can only manage their own train jobs.
-
-        :param app: Name of the app associated with the train job
-        :param task: Task associated with the train job,
-            the train job will train models associated with the task
-        :param train_dataset_id: ID of the train dataset, previously created on SINGA-Auto
-        :param val_dataset_id: ID of the validation dataset, previously created on SINGA-Auto
-        :param budget: Budget for train job
-        :param models: List of IDs of model to use for train job. Defaults to all available models
-        :param train_args: Additional arguments to pass to models during training, if any.
-            Refer to the task's specification for appropriate arguments
-        :returns: Created train job as dictionary
-
-        If ``models`` is unspecified, all models accessible to the user for the specified task will be used.
-
-        ``budget`` should be a dictionary of ``{ <budget_type>: <budget_amount> }``, where
-        ``<budget_type>`` is one of :class:`singa_auto.constants.BudgetOption` and
-        ``<budget_amount>`` specifies the amount for the associated budget option.
-
-        The following describes the budget options available:
-
-        =====================       =====================
-        **Budget Option**             **Description**
-        ---------------------       ---------------------
-        ``TIME_HOURS``              Max no. of hours to train (soft target). Defaults to 0.1.
-        ``GPU_COUNT``               No. of GPUs to allocate for training, across all models. Defaults to 0.
-        ``MODEL_TRIAL_COUNT``       Max no. of trials to conduct for each model (soft target). -1 for unlimited. Defaults to -1.
-        =====================       =====================
-        '''
-        _note(
-            '`create_train_job` now requires `models` as a list of model IDs instead of a list of model names'
-        )
-
-        if 'ENABLE_GPU' in budget:
-            _warn('The `ENABLE_GPU` option has been changed to `GPU_COUNT`')
-
-         # Have defaults for budget
-        budget = {
-            BudgetOption.TIME_HOURS: 0.1,
-            BudgetOption.GPU_COUNT: 0,
-            **budget
-        }
-
-        postJSON = {
-            'app': app,
-            'task': task,
-            'train_dataset_id': train_dataset_id,
-            'val_dataset_id': val_dataset_id,
-            'budget': budget,
-        }
-        if train_args:
-            postJSON['train_args'] = train_args
-        if models:
-            postJSON['model_ids'] = models
-        if annotation_dataset_id:
-            postJSON['annotation_dataset_id'] = annotation_dataset_id
-
-        print("postJSON: ", postJSON)
-        # print will show up in docker exec terminal
-
-        data = self._post('/train_jobs', json=postJSON)
-        return data
-
-    def get_train_jobs_by_user(self, user_id: str) -> List[Dict[str, Any]]:
-        '''
-        Lists all of user's train jobs on SINGA-Auto.
-
-        :param user_id: ID of the user
-        :returns: Train jobs as list of dictionaries
-        '''
-        data = self._get('/train_jobs', params={'user_id': user_id})
-        return data
-
-    def get_train_jobs_of_app(self, app: str) -> List[Dict[str, Any]]:
-        '''
-        Lists all of current user's train jobs associated to the app name on SINGA-Auto.
-
-        :param app: Name of the app
-        :returns: Train jobs as list of dictionaries
-        '''
-        data = self._get('/train_jobs/app', params={'app': app})
-        return data
-
-    def get_train_job(self, app: str, app_version: int = -1) -> Dict[str, Any]:
-        '''
-        Retrieves details of the current user's train job identified by an app and an app version,
-        including workers' details.
-
-        :param app: Name of the app
-        :param app_version: Version of the app (-1 for latest version)
-        :returns: Train job as dictionary
-        '''
-        data = self._get('/train_jobs/app/app_version', params={'app': app, 'app_version': app_version})
-        return data
-
-    def stop_train_job(self, app: str, app_version: int = -1) -> Dict[str, Any]:
-        '''
-        Prematurely stops the current user's train job identified by an app and an app version.
-        Otherwise, the train job should stop by itself when its budget is reached.
-
-        :param app: Name of the app
-        :param app_version: Version of the app (-1 for latest version)
-        :returns: Stopped train job as dictionary
-        '''
-        data = self._post('/train_jobs/app/app_version/stop', json={'app': app, 'app_version': app_version})
-        return data
-
-    ####################################
-    # Trials
-    ####################################
-
-    def get_trial(self, trial_id: str) -> Dict[str, Any]:
-        '''
-        Gets a specific trial.
-
-        :param trial_id: ID of trial
-        :returns: Trial as dictionary
-        '''
-        data = self._get('/trials/{}'.format(trial_id))
-        return data
-
-    def get_best_trials_of_train_job(
-            self,
-            app: str,
-            app_version: int = -1,
-            max_count: int = 2) -> List[Dict[str, Any]]:
-        '''
-        Lists the best scoring trials of the current user's train job identified by an app and an app version,
-        ordered by descending score.
-
-        :param app: Name of the app
-        :param app_version: Version of the app (-1 for latest version)
-        :param max_count: Maximum number of trials to return
-        :returns: Trials as list of dictionaries
-        '''
-        data = self._get('/train_jobs/{}/{}/trials'.format(app, app_version),
-                         params={
-                             'type': 'best',
-                             'max_count': max_count
-                         })
-        return data
-
-    def get_trials_of_train_job(self,
-                                app: str,
-                                app_version: int = -1) -> List[Dict[str, Any]]:
-        '''
-        Lists all trials of the current user's train job identified by an app and an app version,
-        ordered by when the trial started.
-
-        :param app: Name of the app
-        :param app_version: Version of the app (-1 for latest version)
-        :returns: Trials as list of dictionaries
-        '''
-        data = self._get('/train_jobs/app/app_version/trials', params={'app': app, 'app_version': app_version})
-        return data
-
-    def get_trial_logs(self, trial_id: str) -> Dict[str, Any]:
-        '''
-        Gets the logs for a specific trial.
-
-        :param trial_id: ID of trial
-        :returns: Logs of trial as dictionary
-        '''
-        data = self._get('/trials/{}/logs'.format(trial_id))
-        return data
-
-    def get_trial_parameters(self, trial_id: str) -> Params:
-        '''
-        Gets parameters of the model associated with the trial. The trial's model parameters must have been saved.
-
-        :param trial_id: ID of trial
-        :returns: Parameters of the *trained* model associated with the trial
-        '''
-        data = self._get('/trials/{}/parameters'.format(trial_id))
-        parameters = pickle.loads(data)
-        return parameters
-
-    def load_trial_model(self, trial_id: str,
-                         ModelClass: Type[BaseModel]) -> BaseModel:
-        '''
-        Loads an instance of a trial's model with the trial's knobs & parameters.
-
-        Before this, you must have the trial's model class file already in your local filesystem,
-        the dependencies of the model must have been installed separately, and the model class must have been
-        imported and passed into this method.
-
-        Wraps :meth:`get_trial_parameters` and :meth:`get_trial`.
-
-        :param trial_id: ID of trial
-        :param ModelClass: model class that conincides with the trial's model class
-        :returns: A *trained* model instance of ``ModelClass``, loaded with the trial's knobs and parameters
-        '''
-        data = self.get_trial(trial_id)
-        assert 'proposal' in data
-        knobs = data['proposal']['knobs']
-        parameters = self.get_trial_parameters(trial_id)
-        model_inst = ModelClass(**knobs)
-        model_inst.load_parameters(parameters)
-        return model_inst
-
-    ####################################
-    # Inference Jobs
-    ####################################
-
-    def create_inference_job(self,
-                             app: str,
-                             app_version: int = -1,
-                             budget: InferenceBudget = None,
-                             description: str = None) -> Dict[str, Any]:
-        '''
-        Creates and starts a inference job on SINGA-Auto with the best-scoring trials of the associated train job.
-        The train job must have the status of ``STOPPED``.The inference job would be tagged with the train job's app and app version.
-        Throws an error if an inference job of the same train job is already running.
-
-        In this method's response, `predictor_host` is this inference job's predictor's host.
-
-        Only admins, model developers & app developers can manage inference jobs. Model developers & app developers can only manage their own inference jobs.
-
-        :param app: Name of the app identifying the train job to use
-        :param app_version: Version of the app identifying the train job to use
-        :param budget: Budget for inference job
-        :returns: Created inference job as dictionary
-
-        ``budget`` should be a dictionary of ``{ <budget_type>: <budget_amount> }``, where
-        ``<budget_type>`` is one of :class:`singa_auto.constants.InferenceBudgetOption` and
-        ``<budget_amount>`` specifies the amount for the associated budget option.
-
-        The following describes the budget options available:
-
-        =====================       =====================
-        **Budget Option**             **Description**
-        ---------------------       ---------------------
-        ``GPU_COUNT``               No. of GPUs to allocate for inference, across all trials. Defaults to 0.
-        =====================       =====================
-        '''
-
-        # Have defaults for budget
-        budget = {InferenceBudgetOption.GPU_COUNT: 0, **(budget or {})}
-
-        data = self._post('/inference_jobs',
-                          json={
-                              'app': app,
-                              'app_version': app_version,
-                              'budget': budget,
-                              'description': description
-                          })
-        return data
-
-    def create_inference_job_by_checkpoint(self,
-                                           model_name: str,
-                                           budget: InferenceBudget = None,
-                                           description: str = None) -> Dict[str, Any]:
-        '''
-        Creates and starts a inference job on SINGA-Auto with the best-scoring trials of the associated train job.
-        The train job must have the status of ``STOPPED``.The inference job would be tagged with the train job's app and app version.
-        Throws an error if an inference job of the same train job is already running.
-
-        In this method's response, `predictor_host` is this inference job's predictor's host.
-
-        Only admins, model developers & app developers can manage inference jobs. Model developers & app developers can only manage their own inference jobs.
-
-        :param app: Name of the app identifying the train job to use
-        :param app_version: Version of the app identifying the train job to use
-        :param budget: Budget for inference job
-        :returns: Created inference job as dictionary
-
-        ``budget`` should be a dictionary of ``{ <budget_type>: <budget_amount> }``, where
-        ``<budget_type>`` is one of :class:`singa_auto.constants.InferenceBudgetOption` and
-        ``<budget_amount>`` specifies the amount for the associated budget option.
-
-        The following describes the budget options available:
-
-        =====================       =====================
-        **Budget Option**             **Description**
-        ---------------------       ---------------------
-        ``GPU_COUNT``               No. of GPUs to allocate for inference, across all trials. Defaults to 0.
-        =====================       =====================
-        '''
-
-        # Have defaults for budget
-        budget = {
-            InferenceBudgetOption.GPU_COUNT: 0,
-            **(budget or {})
-        }
-
-        data = self._post('/inference_jobs/checkpoint', json={
-            'model_name': model_name,
-            'budget': budget,
-            'description': description
-        })
-        return data
-
-    def get_inference_jobs_by_user(self, user_id: str) -> List[Dict[str, Any]]:
-        '''
-        Lists all of user's inference jobs on SINGA-Auto.
-
-        :param user_id: ID of the user
-        :returns: Inference jobs as list of dictionaries
-        '''
-        data = self._get('/inference_jobs', params={'user_id': user_id})
-        return data
-
-    def get_inference_jobs_of_app(self, app: str) -> List[Dict[str, Any]]:
-        '''
-        Lists all inference jobs associated to an app on SINGA-Auto.
-
-        :param app: Name of the app
-        :returns: Inference jobs as list of dictionaries
-        '''
-        data = self._get('/inference_jobs/app', params={'app': app})
-        return data
-
-    def get_running_inference_job(self,
-                                  app: str,
-                                  app_version: int = -1) -> Dict[str, Any]:
-        '''
-        Retrieves details of the *running* inference job identified by an app and an app version,
-        including workers' details.
-
-        :param app: Name of the app
-        :param app_version: Version of the app (-1 for latest version)
-        :returns: Inference job as dictionary
-        '''
-        data = self._get('/inference_jobs/app/app_version', params={'app': app, 'app_version': app_version})
-        return data
-
-    def stop_inference_job(self,
-                           app: str,
-                           app_version: int = -1) -> Dict[str, Any]:
-        '''
-        Stops the inference job identified by an app and an app version.
-
-        :param app: Name of the app
-        :param app_version: Version of the app (-1 for latest version)
-        :returns: Stopped inference job as dictionary
-        '''
-        data = self._post('/inference_jobs/app/app_version/stop', json={'app': app, 'app_version': app_version})
-        return data
-
-    # TODO: Add predict method?
-
-    ####################################
-    # Administrative
-    ####################################
-
-    def stop_all_jobs(self):
-        '''
-        Stops all train and inference jobs on SINGA-Auto.
-
-        Only the superadmin can call this.
-        '''
-        data = self._post('/actions/stop_all_jobs')
-        return data
-
-    ####################################
-    # SINGA-Auto Internal
-    ####################################
-
-    def send_event(self, name, **params):
-        data = self._post('/event/{}'.format(name), json=params)
-        return data
-
-    ####################################
-    # Private
-    ####################################
-
-    def _get(self, path, params=None):
-        url = self._make_url(path)
-        headers = self._get_headers()
-        res = requests.get(url, headers=headers, params=params or {})
-        return self._parse_response(res)
-
-    def _post(self, path, params=None, files=None, form_data=None, json=None):
-        url = self._make_url(path)
-        headers = self._get_headers()
-        res = requests.post(url,
-                            headers=headers,
-                            params=params or {},
-                            data=form_data,
-                            json=json,
-                            files=files or {})
-        return self._parse_response(res)
-
-    def _post_stream(self, path, files=None, form_data=None):
-        from requests_toolbelt import MultipartEncoder, MultipartEncoderMonitor
-
-        def my_callback(monitor):
-            progress = (monitor.bytes_read / monitor.len) * 100
-            print("\r uploading...：%d%%(%d/%d)" %
-                  (progress, monitor.bytes_read, monitor.len),
-                  end=" ")
-
-        url = self._make_url(path)
-        headers = self._get_headers()
-        m = MultipartEncoderMonitor(
-            MultipartEncoder(fields={
-                **files,
-                **form_data
-            }), my_callback)
-        res = requests.post(url,
-                            data=m,
-                            headers={
-                                **{
-                                    'Content-Type': m.content_type
-                                },
-                                **headers
-                            })
-        return self._parse_response(res)
-
-    def _delete(self, path, params=None, files=None, form_data=None, json=None):
-        url = self._make_url(path)
-        headers = self._get_headers()
-        res = requests.delete(url,
-                              headers=headers,
-                              params=params or {},
-                              data=form_data or {},
-                              json=json,
-                              files=files)
-        return self._parse_response(res)
-
-    def _make_url(self, path):
-        url = 'http://{}:{}{}'.format(self._admin_host, self._admin_port, path)
-        return url
-
-    def _parse_response(self, res):
-        if res.status_code != 200 and res.status_code != 400:
-            raise SingaAutoConnectionError(res.text)
-
-        content_type = res.headers.get('content-type')
-        if content_type == 'application/json':
-            res = rafiki_response_handler(res.json())
-            return res
-        elif content_type == 'application/octet-stream':
-            return res.content
-        else:
-            raise SingaAutoConnectionError(
-                'Invalid response content type: {}'.format(content_type))
-
-    def _get_headers(self):
-        if self._token is not None:
-            return {'Authorization': 'Bearer ' + self._token}
-        else:
-            return {}
-
-
-def _warn(msg):
-    print(f'\033[93mWARNING: {msg}\033[0m')
-
-
-def _note(msg):
-    print(f'\033[94m{msg}\033[0m')
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import requests
+import json
+import pickle
+import os
+from functools import wraps
+from typing import Type, Dict, List, Any
+
+from singa_auto.constants import ModelAccessRight, ModelDependencies, Budget, BudgetOption, \
+                            InferenceBudget, InferenceBudgetOption, UserType, ModelType
+from singa_auto.model import Params, BaseModel
+from singa_auto.error_code import generate_error
+
+
+class SingaAutoConnectionError(ConnectionError):
+    pass
+
+
+DOCS_URL = 'https://nginyc.github.io/rafiki/docs/latest/docs/src/python/rafiki.client.Client.html'
+
+def singa_auto_response_handler(resp):
+    # if isinstance(resp, dict):
+    #     if resp.get('success', 0) == 0:
+    #         return resp['data']
+    #     else:
+    #         raise generate_error(resp.get('error_code', 500))
+    # else:
+    #     return resp
+        return resp
+
+# Returns a decorator that warns user about the method being deprecated
+def _deprecated(msg=None):
+
+    def deco(func):
+        nonlocal msg
+        msg = msg or f'`{func.__name__}` has been deprecated.'
+
+        @wraps(func)
+        def deprecated_func(*args, **kwargs):
+            _warn(f'{msg}\n' \
+                f'Refer to the updated documentation at {DOCS_URL}')
+            return func(*args, **kwargs)
+
+        return deprecated_func
+
+    return deco
+
+
+class Client:
+    '''
+    Initializes the Client to connect to a running
+    SINGA-Auto Admin instance that the Client connects to.
+
+    :param admin_host: Host of SINGA-Auto Admin
+    :param admin_port: Port of SINGA-Auto Admin
+    '''
+
+    def __init__(self,
+                 admin_host: str = os.environ.get('SINGA_AUTO_ADDR',
+                                                  'localhost'),
+                 admin_port: int = os.environ.get('ADMIN_EXT_PORT', 3000)):
+        self._admin_host = admin_host
+        self._admin_port = admin_port
+        self._token = None
+        self._user = None
+
+    def login(self, email: str, password: str) -> Dict[str, Any]:
+        '''
+        Creates a login session as a SINGA-Auto user. You will have to be logged in to perform any actions.
+
+        App developers can create, list and stop train and inference jobs, as well as list models.
+        Model developers can create and list models.
+
+        The login session (the session token) expires in 1 hour.
+
+        :param email: User's email
+        :param password: User's password
+
+        :returns: Logged-in user as dictionary
+        '''
+        data = self._post('/tokens',
+                          json={
+                              'email': email,
+                              'password': password
+                          })
+        self._token = data['token']
+
+        # Save user's data
+        self._user = {'id': data['user_id'], 'user_type': data['user_type']}
+
+        return self._user
+
+    def get_current_user(self) -> Dict[str, Any]:
+        '''
+        Gets currently logged in user's data.
+
+        :returns: Current user as dictionary, or ``None`` if client is not logged in
+        '''
+        return self._user
+
+    def logout(self):
+        '''
+        Clears the current login session.
+        '''
+        self._token = None
+        self._user = None
+
+    ####################################
+    # User
+    ####################################
+
+    def create_user(self, email: str, password: str,
+                    user_type: UserType) -> Dict[str, Any]:
+        '''
+        Creates a SINGA-Auto user.
+
+        Only admins can create users (except for admins).
+        Only superadmins can create admins.
+
+        :param email: The new user's email
+        :param password: The new user's password
+        :param user_type: The new user's type
+
+        :returns: Created user as dictionary
+        '''
+        data = self._post('/users',
+                          json={
+                              'email': email,
+                              'password': password,
+                              'user_type': user_type
+                          })
+        return data
+
+    @_deprecated('`create_users` has been removed')
+    def create_users(self, *args, **kwargs):
+        pass
+
+    def get_users(self) -> List[Dict[str, Any]]:
+        '''
+        Lists all SINGA-Auto users.
+
+        Only admins can list all users.
+
+        :returns: List of users as list of dictionaries
+        '''
+        data = self._get('/users')
+        return data
+
+    def ban_user(self, email: str) -> Dict[str, Any]:
+        '''
+        Bans a SINGA-Auto user, disallowing logins.
+
+        This action is irrevisible.
+        Only admins can ban users (except for admins).
+        Only superadmins can ban admins.
+
+        :param email: The user's email
+
+        :returns: Banned user as dictionary
+        '''
+        data = self._delete('/users', json={'email': email})
+        return data
+
+    ####################################
+    # Datasets
+    ####################################
+
+    def create_dataset(self,
+                       name: str,
+                       task: str,
+                       dataset_path: str = None,
+                       dataset_url: str = None) -> Dict[str, Any]:
+        '''
+        Creates a dataset on SINGA-Auto, either by uploading the dataset file from your filesystem or specifying a URL where the dataset file can be downloaded.
+        The dataset should be in a format specified by the task
+        Either `dataset_url` or `dataset_path` should be specified.
+
+        Only admins, model developers and app developers can manage their own datasets.
+
+        :param name: Name for the dataset, does not need to be unique
+        :param task: Task associated to the dataset
+        :param dataset_path: Path to the dataset file to upload from the local filesystem
+        :param dataset_url: Publicly accessible URL where the dataset file can be downloaded
+        :returns: Created dataset as dictionary
+        '''
+
+        dataset = dict()
+
+        form_data = {'name': name, 'task': task, 'dataset_url': dataset_url}
+
+        if dataset_path is not None:
+            dataset = {
+                'dataset': ('dataset', open(dataset_path,
+                                            'rb'), 'application/zip')
+            }
+        else:
+            print(
+                'Waiting for server finish downloading the dataset from URL...')
+
+        data = self._post_stream(path='/datasets',
+                                 files=dataset,
+                                 form_data=form_data)
+
+        return data
+
+    def get_datasets(self, task: str = None) -> List[Dict[str, Any]]:
+        '''
+        Lists all datasets owned by the current user, optionally filtering by task.
+
+        :param task: Task name
+        :returns: List of datasets as list of dictionaries
+        '''
+        data = self._get('/datasets', params={'task': task})
+        return data
+
+    ####################################
+    # Models
+    ####################################
+
+    def create_model(self,
+                     name: str,
+                     task: str,
+                     model_file_path: str,
+                     model_class: str,
+                     model_preload_file_path: str = None,
+                     dependencies: ModelDependencies = None,
+                     access_right: ModelAccessRight = ModelAccessRight.PRIVATE,
+                     docker_image: str = None,
+                     model_type: str = ModelType.PYTHON_FILE,
+                     model_file_name: str = None,
+                     model_description: str = None) -> Dict[str, Any]:
+        '''
+        Creates a model on SINGA-Auto.
+
+        Only admins & model developers can manage models.
+
+        :param name: Name of the model, which must be unique across all models added by the current user
+        :param task: Task associated with the model, where the model must adhere to the specification of the task
+        :param model_file_path: Path to a single Python file that contains the definition for the model class
+        :param model_class: The name of the model class inside the Python file. This class should implement :class:`singa_auto.model.BaseModel`
+        :param dependencies: List of Python dependencies & their versions
+        :param access_right: Model access right
+        :param model_preload_file_path: pretrained mdoel file
+        :param docker_image: A custom Docker image that extends ``singa_auto/singa_auto_worker``, publicly available on Docker Hub.
+        :returns: Created model as dictionary
+
+        Refer to :ref:`model-development` for more details on how to write & test models for SINGA-Auto.
+
+        ``model_file_path`` should point to a *single* file that contains all necessary Python code for the model's implementation.
+        If the Python file imports any external Python modules, you should list it in ``dependencies`` or create a custom
+        ``docker_image``.
+
+        If a model's ``access_right`` is set to ``PUBLIC``, this model will be publicly available to all other users on SINGA-Auto for training
+        and inference. By default, a model's access is ``PRIVATE``.
+
+        ``dependencies`` should be a dictionary of ``{ <dependency_name>: <dependency_version> }``, where
+        ``<dependency_name>`` corresponds to the name of the Python Package Index (PyPI) package (e.g. ``tensorflow``)
+        and ``<dependency_version>`` corresponds to the version of the PyPI package (e.g. ``1.12.0``).
+        Refer to :ref:`configuring-model-environment` to understand more about this option.
+        '''
+
+        model_files = {
+            'model_file_bytes': (model_file_path, open(model_file_path, 'rb'),
+                                 'application/octet-stream')
+        }
+        pretrained_files = {}
+
+        if model_preload_file_path is not None:
+            pretrained_files = {'checkpoint_id':  (
+                                                    model_preload_file_path,
+                                                    open(model_preload_file_path, 'rb'),
+                                                    'application/octet-stream')}
+
+        files = {**model_files, **pretrained_files}
+
+        form_data = {
+            'name': name,
+            'task': task,
+            'dependencies': json.dumps(dependencies),
+            'docker_image': docker_image,
+            'model_class': model_class,
+            'access_right': access_right,
+            'model_type': model_type,
+            'model_file_name': model_file_name,
+            'model_description': model_description
+        }
+
+        data = self._post_stream(path='/models',
+                                 files=files,
+                                 form_data=form_data)
+
+        return data
+
+    def get_model(self, model_id: str) -> Dict[str, Any]:
+        '''
+        Retrieves details of a single model.
+
+        Model developers can only view their own models.
+
+        :param model_id: ID of model
+        :returns: Model as dictionary
+        '''
+        _note('`get_model` now requires `model_id` instead of `name`')
+
+        data = self._get('/models/{}'.format(model_id))
+        return data
+
+    def download_model_file(self, model_id: str,
+                            out_model_file_path: str) -> Dict[str, any]:
+        '''
+        Downloads the Python model class file for the SINGA-Auto model.
+
+        Model developers can only download their own models.
+
+        :param model_id: ID of model
+        :param out_model_file_path: Absolute/relative path to save model class file to
+        :returns: Model as dictionary
+        '''
+        _note('`download_model_file` now requires `model_id` instead of `name`')
+
+        model_file_bytes = self._get('/models/{}/model_file'.format(model_id))
+
+        with open(out_model_file_path, 'wb') as f:
+            f.write(model_file_bytes)
+
+        data = self.get_model(model_id)
+        dependencies = data.get('dependencies')
+        model_class = data.get('model_class')
+
+        print('Model file downloaded to "{}"!'.format(
+            os.path.join(os.getcwd(), out_model_file_path)))
+
+        if dependencies:
+            print(
+                'You\'ll need to install the following model dependencies locally: {}'
+                .format(dependencies))
+
+        print('From the file, import the model class `{}`.'.format(model_class))
+
+        return data
+
+    @_deprecated(
+        '`get_models` & `get_models_of_task` have been combined into `get_available_models`'
+    )
+    def get_models(self, *args, **kwargs):
+        pass
+
+    @_deprecated(
+        '`get_models` & `get_models_of_task` have been combined into `get_available_models`'
+    )
+    def get_models_of_task(self, *args, **kwargs):
+        pass
+
+    def get_available_models(self, task: str = None) -> List[Dict[str, Any]]:
+        '''
+        Lists all SINGA-Auto models available to the current user, optionally filtering by task.
+
+        :param task: Task name
+        :returns: Available models as list of dictionaries
+        '''
+        data = self._get('/models/available', params={'task': task})
+        return data
+
+    def delete_model(self, model_id: str) -> Dict[str, Any]:
+        '''
+        Deletes a single model. Models that have been used in train jobs cannot be deleted.
+
+        Model developers can only delete their own models.
+
+        :param str model_id: ID of model
+        :returns: Deleted model as dictionary
+        '''
+        data = self._delete('/models/{}'.format(model_id))
+        return data
+
+    ####################################
+    # Train Jobs
+    ####################################
+
+    def create_train_job(self,
+                         app: str,
+                         task: str,
+                         train_dataset_id: str,
+                         val_dataset_id: str,
+                         budget: Budget,
+                         annotation_dataset_id: str = None,
+                         models: List[str] = None,
+                         train_args: Dict[str, any] = None) -> Dict[str, Any]:
+        '''
+        Creates and starts a train job on SINGA-Auto.
+
+        A train job is uniquely identified by user, its associated app, and the app version (returned in output).
+
+        Only admins, model developers & app developers can manage train jobs. Model developers & app developers can only manage their own train jobs.
+
+        :param app: Name of the app associated with the train job
+        :param task: Task associated with the train job,
+            the train job will train models associated with the task
+        :param train_dataset_id: ID of the train dataset, previously created on SINGA-Auto
+        :param val_dataset_id: ID of the validation dataset, previously created on SINGA-Auto
+        :param budget: Budget for train job
+        :param models: List of IDs of model to use for train job. Defaults to all available models
+        :param train_args: Additional arguments to pass to models during training, if any.
+            Refer to the task's specification for appropriate arguments
+        :returns: Created train job as dictionary
+
+        If ``models`` is unspecified, all models accessible to the user for the specified task will be used.
+
+        ``budget`` should be a dictionary of ``{ <budget_type>: <budget_amount> }``, where
+        ``<budget_type>`` is one of :class:`singa_auto.constants.BudgetOption` and
+        ``<budget_amount>`` specifies the amount for the associated budget option.
+
+        The following describes the budget options available:
+
+        =====================       =====================
+        **Budget Option**             **Description**
+        ---------------------       ---------------------
+        ``TIME_HOURS``              Max no. of hours to train (soft target). Defaults to 0.1.
+        ``GPU_COUNT``               No. of GPUs to allocate for training, across all models. Defaults to 0.
+        ``MODEL_TRIAL_COUNT``       Max no. of trials to conduct for each model (soft target). -1 for unlimited. Defaults to -1.
+        =====================       =====================
+        '''
+        _note(
+            '`create_train_job` now requires `models` as a list of model IDs instead of a list of model names'
+        )
+
+        if 'ENABLE_GPU' in budget:
+            _warn('The `ENABLE_GPU` option has been changed to `GPU_COUNT`')
+
+         # Have defaults for budget
+        budget = {
+            BudgetOption.TIME_HOURS: 0.1,
+            BudgetOption.GPU_COUNT: 0,
+            **budget
+        }
+
+        postJSON = {
+            'app': app,
+            'task': task,
+            'train_dataset_id': train_dataset_id,
+            'val_dataset_id': val_dataset_id,
+            'budget': budget,
+        }
+        if train_args:
+            postJSON['train_args'] = train_args
+        if models:
+            postJSON['model_ids'] = models
+        if annotation_dataset_id:
+            postJSON['annotation_dataset_id'] = annotation_dataset_id
+
+        print("postJSON: ", postJSON)
+        # print will show up in docker exec terminal
+
+        data = self._post('/train_jobs', json=postJSON)
+        return data
+
+    def get_train_jobs_by_user(self, user_id: str) -> List[Dict[str, Any]]:
+        '''
+        Lists all of user's train jobs on SINGA-Auto.
+
+        :param user_id: ID of the user
+        :returns: Train jobs as list of dictionaries
+        '''
+        data = self._get('/train_jobs', params={'user_id': user_id})
+        return data
+
+    def get_train_jobs_of_app(self, app: str) -> List[Dict[str, Any]]:
+        '''
+        Lists all of current user's train jobs associated to the app name on SINGA-Auto.
+
+        :param app: Name of the app
+        :returns: Train jobs as list of dictionaries
+        '''
+        data = self._get('/train_jobs/app', params={'app': app})
+        return data
+
+    def get_train_job(self, app: str, app_version: int = -1) -> Dict[str, Any]:
+        '''
+        Retrieves details of the current user's train job identified by an app and an app version,
+        including workers' details.
+
+        :param app: Name of the app
+        :param app_version: Version of the app (-1 for latest version)
+        :returns: Train job as dictionary
+        '''
+        data = self._get('/train_jobs/app/app_version', params={'app': app, 'app_version': app_version})
+        return data
+
+    def stop_train_job(self, app: str, app_version: int = -1) -> Dict[str, Any]:
+        '''
+        Prematurely stops the current user's train job identified by an app and an app version.
+        Otherwise, the train job should stop by itself when its budget is reached.
+
+        :param app: Name of the app
+        :param app_version: Version of the app (-1 for latest version)
+        :returns: Stopped train job as dictionary
+        '''
+        data = self._post('/train_jobs/app/app_version/stop', json={'app': app, 'app_version': app_version})
+        return data
+
+    ####################################
+    # Trials
+    ####################################
+
+    def get_trial(self, trial_id: str) -> Dict[str, Any]:
+        '''
+        Gets a specific trial.
+
+        :param trial_id: ID of trial
+        :returns: Trial as dictionary
+        '''
+        data = self._get('/trials/{}'.format(trial_id))
+        return data
+
+    def get_best_trials_of_train_job(
+            self,
+            app: str,
+            app_version: int = -1,
+            max_count: int = 2) -> List[Dict[str, Any]]:
+        '''
+        Lists the best scoring trials of the current user's train job identified by an app and an app version,
+        ordered by descending score.
+
+        :param app: Name of the app
+        :param app_version: Version of the app (-1 for latest version)
+        :param max_count: Maximum number of trials to return
+        :returns: Trials as list of dictionaries
+        '''
+        data = self._get('/train_jobs/{}/{}/trials'.format(app, app_version),
+                         params={
+                             'type': 'best',
+                             'max_count': max_count
+                         })
+        return data
+
+    def get_trials_of_train_job(self,
+                                app: str,
+                                app_version: int = -1) -> List[Dict[str, Any]]:
+        '''
+        Lists all trials of the current user's train job identified by an app and an app version,
+        ordered by when the trial started.
+
+        :param app: Name of the app
+        :param app_version: Version of the app (-1 for latest version)
+        :returns: Trials as list of dictionaries
+        '''
+        data = self._get('/train_jobs/app/app_version/trials', params={'app': app, 'app_version': app_version})
+        return data
+
+    def get_trial_logs(self, trial_id: str) -> Dict[str, Any]:
+        '''
+        Gets the logs for a specific trial.
+
+        :param trial_id: ID of trial
+        :returns: Logs of trial as dictionary
+        '''
+        data = self._get('/trials/{}/logs'.format(trial_id))
+        return data
+
+    def get_trial_parameters(self, trial_id: str) -> Params:
+        '''
+        Gets parameters of the model associated with the trial. The trial's model parameters must have been saved.
+
+        :param trial_id: ID of trial
+        :returns: Parameters of the *trained* model associated with the trial
+        '''
+        data = self._get('/trials/{}/parameters'.format(trial_id))
+        parameters = pickle.loads(data)
+        return parameters
+
+    def load_trial_model(self, trial_id: str,
+                         ModelClass: Type[BaseModel]) -> BaseModel:
+        '''
+        Loads an instance of a trial's model with the trial's knobs & parameters.
+
+        Before this, you must have the trial's model class file already in your local filesystem,
+        the dependencies of the model must have been installed separately, and the model class must have been
+        imported and passed into this method.
+
+        Wraps :meth:`get_trial_parameters` and :meth:`get_trial`.
+
+        :param trial_id: ID of trial
+        :param ModelClass: model class that conincides with the trial's model class
+        :returns: A *trained* model instance of ``ModelClass``, loaded with the trial's knobs and parameters
+        '''
+        data = self.get_trial(trial_id)
+        assert 'proposal' in data
+        knobs = data['proposal']['knobs']
+        parameters = self.get_trial_parameters(trial_id)
+        model_inst = ModelClass(**knobs)
+        model_inst.load_parameters(parameters)
+        return model_inst
+
+    ####################################
+    # Inference Jobs
+    ####################################
+
+    def create_inference_job(self,
+                             app: str,
+                             app_version: int = -1,
+                             budget: InferenceBudget = None,
+                             description: str = None) -> Dict[str, Any]:
+        '''
+        Creates and starts a inference job on SINGA-Auto with the best-scoring trials of the associated train job.
+        The train job must have the status of ``STOPPED``.The inference job would be tagged with the train job's app and app version.
+        Throws an error if an inference job of the same train job is already running.
+
+        In this method's response, `predictor_host` is this inference job's predictor's host.
+
+        Only admins, model developers & app developers can manage inference jobs. Model developers & app developers can only manage their own inference jobs.
+
+        :param app: Name of the app identifying the train job to use
+        :param app_version: Version of the app identifying the train job to use
+        :param budget: Budget for inference job
+        :returns: Created inference job as dictionary
+
+        ``budget`` should be a dictionary of ``{ <budget_type>: <budget_amount> }``, where
+        ``<budget_type>`` is one of :class:`singa_auto.constants.InferenceBudgetOption` and
+        ``<budget_amount>`` specifies the amount for the associated budget option.
+
+        The following describes the budget options available:
+
+        =====================       =====================
+        **Budget Option**             **Description**
+        ---------------------       ---------------------
+        ``GPU_COUNT``               No. of GPUs to allocate for inference, across all trials. Defaults to 0.
+        =====================       =====================
+        '''
+
+        # Have defaults for budget
+        budget = {InferenceBudgetOption.GPU_COUNT: 0, **(budget or {})}
+
+        data = self._post('/inference_jobs',
+                          json={
+                              'app': app,
+                              'app_version': app_version,
+                              'budget': budget,
+                              'description': description
+                          })
+        return data
+
+    def create_inference_job_by_checkpoint(self,
+                                           model_name: str,
+                                           budget: InferenceBudget = None,
+                                           description: str = None) -> Dict[str, Any]:
+        '''
+        Creates and starts a inference job on SINGA-Auto with the best-scoring trials of the associated train job.
+        The train job must have the status of ``STOPPED``.The inference job would be tagged with the train job's app and app version.
+        Throws an error if an inference job of the same train job is already running.
+
+        In this method's response, `predictor_host` is this inference job's predictor's host.
+
+        Only admins, model developers & app developers can manage inference jobs. Model developers & app developers can only manage their own inference jobs.
+
+        :param app: Name of the app identifying the train job to use
+        :param app_version: Version of the app identifying the train job to use
+        :param budget: Budget for inference job
+        :returns: Created inference job as dictionary
+
+        ``budget`` should be a dictionary of ``{ <budget_type>: <budget_amount> }``, where
+        ``<budget_type>`` is one of :class:`singa_auto.constants.InferenceBudgetOption` and
+        ``<budget_amount>`` specifies the amount for the associated budget option.
+
+        The following describes the budget options available:
+
+        =====================       =====================
+        **Budget Option**             **Description**
+        ---------------------       ---------------------
+        ``GPU_COUNT``               No. of GPUs to allocate for inference, across all trials. Defaults to 0.
+        =====================       =====================
+        '''
+
+        # Have defaults for budget
+        budget = {
+            InferenceBudgetOption.GPU_COUNT: 0,
+            **(budget or {})
+        }
+
+        data = self._post('/inference_jobs/checkpoint', json={
+            'model_name': model_name,
+            'budget': budget,
+            'description': description
+        })
+        return data
+
+    def get_inference_jobs_by_user(self, user_id: str) -> List[Dict[str, Any]]:
+        '''
+        Lists all of user's inference jobs on SINGA-Auto.
+
+        :param user_id: ID of the user
+        :returns: Inference jobs as list of dictionaries
+        '''
+        data = self._get('/inference_jobs', params={'user_id': user_id})
+        return data
+
+    def get_inference_jobs_of_app(self, app: str) -> List[Dict[str, Any]]:
+        '''
+        Lists all inference jobs associated to an app on SINGA-Auto.
+
+        :param app: Name of the app
+        :returns: Inference jobs as list of dictionaries
+        '''
+        data = self._get('/inference_jobs/app', params={'app': app})
+        return data
+
+    def get_running_inference_job(self,
+                                  app: str,
+                                  app_version: int = -1) -> Dict[str, Any]:
+        '''
+        Retrieves details of the *running* inference job identified by an app and an app version,
+        including workers' details.
+
+        :param app: Name of the app
+        :param app_version: Version of the app (-1 for latest version)
+        :returns: Inference job as dictionary
+        '''
+        data = self._get('/inference_jobs/app/app_version', params={'app': app, 'app_version': app_version})
+        return data
+
+    def stop_inference_job(self,
+                           app: str,
+                           app_version: int = -1) -> Dict[str, Any]:
+        '''
+        Stops the inference job identified by an app and an app version.
+
+        :param app: Name of the app
+        :param app_version: Version of the app (-1 for latest version)
+        :returns: Stopped inference job as dictionary
+        '''
+        data = self._post('/inference_jobs/app/app_version/stop', json={'app': app, 'app_version': app_version})
+        return data
+
+    # TODO: Add predict method?
+
+    ####################################
+    # Administrative
+    ####################################
+
+    def stop_all_jobs(self):
+        '''
+        Stops all train and inference jobs on SINGA-Auto.
+
+        Only the superadmin can call this.
+        '''
+        data = self._post('/actions/stop_all_jobs')
+        return data
+
+    ####################################
+    # SINGA-Auto Internal
+    ####################################
+
+    def send_event(self, name, **params):
+        data = self._post('/event/{}'.format(name), json=params)
+        return data
+
+    ####################################
+    # Private
+    ####################################
+
+    def _get(self, path, params=None):
+        url = self._make_url(path)
+        headers = self._get_headers()
+        res = requests.get(url, headers=headers, params=params or {})
+        return self._parse_response(res)
+
+    def _post(self, path, params=None, files=None, form_data=None, json=None):
+        url = self._make_url(path)
+        headers = self._get_headers()
+        res = requests.post(url,
+                            headers=headers,
+                            params=params or {},
+                            data=form_data,
+                            json=json,
+                            files=files or {})
+        return self._parse_response(res)
+
+    def _post_stream(self, path, files=None, form_data=None):
+        from requests_toolbelt import MultipartEncoder, MultipartEncoderMonitor
+
+        def my_callback(monitor):
+            progress = (monitor.bytes_read / monitor.len) * 100
+            print("\r uploading...：%d%%(%d/%d)" %
+                  (progress, monitor.bytes_read, monitor.len),
+                  end=" ")
+
+        url = self._make_url(path)
+        headers = self._get_headers()
+        m = MultipartEncoderMonitor(
+            MultipartEncoder(fields={
+                **files,
+                **form_data
+            }), my_callback)
+        res = requests.post(url,
+                            data=m,
+                            headers={
+                                **{
+                                    'Content-Type': m.content_type
+                                },
+                                **headers
+                            })
+        return self._parse_response(res)
+
+    def _delete(self, path, params=None, files=None, form_data=None, json=None):
+        url = self._make_url(path)
+        headers = self._get_headers()
+        res = requests.delete(url,
+                              headers=headers,
+                              params=params or {},
+                              data=form_data or {},
+                              json=json,
+                              files=files)
+        return self._parse_response(res)
+
+    def _make_url(self, path):
+        url = 'http://{}:{}{}'.format(self._admin_host, self._admin_port, path)
+        return url
+
+    def _parse_response(self, res):
+        if res.status_code != 200 and res.status_code != 400:
+            raise SingaAutoConnectionError(res.text)
+
+        content_type = res.headers.get('content-type')
+        if content_type == 'application/json':
+            res = singa_auto_response_handler(res.json())
+            return res
+        elif content_type == 'application/octet-stream':
+            return res.content
+        else:
+            raise SingaAutoConnectionError(
+                'Invalid response content type: {}'.format(content_type))
+
+    def _get_headers(self):
+        if self._token is not None:
+            return {'Authorization': 'Bearer ' + self._token}
+        else:
+            return {}
+
+
+def _warn(msg):
+    print(f'\033[93mWARNING: {msg}\033[0m')
+
+
+def _note(msg):
+    print(f'\033[94m{msg}\033[0m')
diff --git a/singa_auto/container/docker_swarm.py b/singa_auto/container/docker_swarm.py
index 582b94a6..8bcd63be 100644
--- a/singa_auto/container/docker_swarm.py
+++ b/singa_auto/container/docker_swarm.py
@@ -56,6 +56,7 @@ def __init__(
 
     def create_service(self,
                        service_name,
+                       service_type,
                        docker_image,
                        replicas,
                        args,
@@ -63,7 +64,8 @@ def create_service(self,
                        mounts=None,
                        publish_port=None,
                        gpus=0,
-                       dist_workers=0) -> ContainerService:
+                       dist_workers=0,
+                       gpu_allocated=None) -> ContainerService:
 
         if mounts is None:
             mounts = {}
diff --git a/singa_auto/container/kubernetes_operation.py b/singa_auto/container/kubernetes_operation.py
index 1c9bbcce..59949da6 100644
--- a/singa_auto/container/kubernetes_operation.py
+++ b/singa_auto/container/kubernetes_operation.py
@@ -34,6 +34,10 @@
 
 logger = logging.getLogger(__name__)
 
+ENVIRONMENT_VARIABLES_AUTOFORWARD = [
+    'KUBERNETES_ADVERTISE_ADDR',# 'DB_PATH_ON_MASTER',
+]
+
 
 class KubernetesContainerManager(ContainerManager):
 
@@ -50,6 +54,9 @@ def __init__(self, **kwargs):
             os.getenv('KUBERNETES_SERVICE_HOST'),
             os.getenv('KUBERNETES_SERVICE_PORT'))
 
+        # self._params_root_path = os.environ['DB_PATH_ON_MASTER']
+        self._kubernetes_advertise_addr = os.environ['KUBERNETES_ADVERTISE_ADDR']
+
         # Security part.
         # In this simple example we are not going to verify the SSL certificate of
         # the remote cluster (for simplicity reason)
@@ -67,6 +74,7 @@ def __init__(self, **kwargs):
         self._client_deployment = client.AppsV1Api(aApiClient)
         self._client_service = client.CoreV1Api(aApiClient)
         self.api_instance = client.NetworkingV1beta1Api(aApiClient)
+        self._client_networkpolicy = client.NetworkingV1Api(aApiClient)
 
     def update_ingress(self, ingress_name: str, ingress_body: dict):
         paths = self._update_ingress_paths(ingress_body)
@@ -125,11 +133,24 @@ def _update_ingress_paths(self, ingress_body: dict) -> list:
         return paths
 
     def destroy_service(self, service: ContainerService):
-        self._client_deployment.delete_namespaced_deployment(service.id, namespace='default')
-        self._client_service.delete_namespaced_service(service.id, namespace='default')
+        try:
+            self._client_deployment.delete_namespaced_deployment(service.id, namespace='default')
+        except(Exception):
+            logger.error('Error while stopping kubernetes deployment {}.'.format(service.id))
+
+        try:
+            self._client_networkpolicy.delete_namespaced_network_policy(name=service.id, namespace='default')
+        except(Exception):
+            logger.error('Error while stopping kubernetes network policy {}.'.format(service.id))
+
+        try:
+            self._client_service.delete_namespaced_service(service.id, namespace='default')
+        except(Exception):
+            logger.error('Error while stopping kubernetes service {}.'.format(service.id))
 
     def create_service(self,
                        service_name,
+                       service_type,
                        docker_image,
                        replicas,
                        args,
@@ -137,10 +158,14 @@ def create_service(self,
                        mounts=None,
                        publish_port=None,
                        gpus=0,
-                       dist_workers=0) -> ContainerService:
+                       dist_workers=0,
+                       gpu_allocated=None) -> ContainerService:
         if mounts is None:
             mounts = {}
         hostname = service_name
+        node_name = 'default'
+        gpu_list = ""
+
         if publish_port is not None:
             service_config = self._create_service_config(service_name, docker_image, replicas,
                             args, environment_vars, mounts, publish_port,
@@ -167,7 +192,7 @@ def create_service(self,
 
             if gpus > 0:
                 # run the scheduler algorithm, choose the gpu and node for few pods.
-                node_gpuid = self._get_top_gpus(dist_workers)
+                node_gpuid = self._get_dist_top_gpus(dist_workers)
 
             for index in range(dist_workers):
                 environment_vars["RANK"] = str(index)
@@ -209,19 +234,24 @@ def create_service(self,
                                                          environment_vars, mounts, select_gpu, select_node_name)
                     print("pod_config", pod_config)
                     _retry(self._client_service.create_namespaced_pod)(namespace='default', body=pod_config)
-
         else:
-            deployment_config = self._create_deployment_config(service_name, docker_image, replicas,
-                                                               environment_vars, mounts, gpus
-                                                               )
+            list_hostname = []
+            list_gpu_selected = []
+            deployment_config = self._create_deployment_config(hostname, service_name, service_type, docker_image, replicas,
+                                                               environment_vars, mounts, gpus, gpu_allocated, list_gpu_selected, list_hostname)
+            if len(list_hostname) > 0:
+                node_name = list_hostname[0] 
+            if len(list_gpu_selected) > 0:
+                gpu_list = list_gpu_selected[0]
 
             _retry(self._client_deployment.create_namespaced_deployment)(namespace='default', body=deployment_config)
 
         info = {
-            'node_id': 'default',
+            'node_id': node_name,
             'gpu_nos': gpus,
             'service_name': service_name,
-            'replicas': replicas
+            'replicas': replicas,
+            'gpu_list': gpu_list,
         }
 
         service = ContainerService(
@@ -247,7 +277,8 @@ def _create_pod_config(self,
             })
             volumes.append({
                 'name': 'v' + str(mounts_count),
-                'hostPath': {
+                'nfs':{
+                    'server': self._kubernetes_advertise_addr,
                     'path': k
                 }
             })
@@ -297,12 +328,17 @@ def _create_pod_config(self,
         return content
 
     def _create_deployment_config(self,
+                                  hostname,
                                   service_name,
+                                  service_type,
                                   docker_image,
                                   replicas,
                                   environment_vars,
                                   mounts,
-                                  gpus=0
+                                  gpus=0,
+                                  gpu_allocated=None,
+                                  list_gpu_selected=[],
+                                  list_hostname=[]
                                   ):
         content = {}
         content.setdefault('apiVersion', 'apps/v1')
@@ -330,38 +366,50 @@ def _create_deployment_config(self,
             })
             volumes.append({
                 'name': 'v' + str(mounts_count),
-                'hostPath': {
+                'nfs':{
+                    'server': self._kubernetes_advertise_addr,
                     'path': k
                 }
             })
             mounts_count += 1
-        template.setdefault('spec', {
-            'containers': [container],
-            'volumes': volumes
-        })
         env = [{'name': k, 'value': v} for (k, v) in environment_vars.items()]
 
+        select_node_name = hostname
+        
         if gpus > 0:
-            node_gpuid = self._get_top_gpus(1)
-            if node_gpuid and node_gpuid[0]:
-                select_node_name, select_gpu = node_gpuid[0]["nodeName"], node_gpuid[0]["GPUID"]
-                # nodeSelector can be used to bind a pod to a node
-                nodeSelector = {NodeLabes.NodeName: select_node_name}
-                template["spec"]["nodeSelector"] = nodeSelector
-
-                # NVIDIA_VISIBLE_DEVICES is used to expose a specific gpu to this pod
-                env.append({"name": "NVIDIA_VISIBLE_DEVICES", "value": select_gpu})
-
-            container.setdefault('resources',
-                                 {'limits': {
-                                     'nvidia.com/gpu': gpus
-                                 }})
+            node_gpuid = self._get_gpus_on_node(gpus, gpu_allocated)
+            
+            if node_gpuid and "max_min_free_node" in node_gpuid and "max_gpu_free_ratio" in node_gpuid:
+                select_node_name = node_gpuid["max_gpu_free_ratio"]
+            
+                list_hostname.append(select_node_name)
+            
+                env.append({"name": "NVIDIA_VISIBLE_DEVICES", "value": ', '.join(node_gpuid[select_node_name]["gpu_id"])})
+                list_gpu_selected.append(', '.join(node_gpuid[select_node_name]["gpu_id"]))
+
         container.setdefault('env', env)
-        return content
 
-    def _get_top_gpus(self, n) -> List[dict]:
+        if gpus > 0:
+            template.setdefault('spec', {
+                'nodeName': select_node_name,
+                'containers': [container],
+                'volumes': volumes
+            })
+            container.setdefault('resources', {
+                'limits': {
+                    'nvidia.com/gpu': gpus
+                },
+            })
+        else:
+            template.setdefault('spec', {
+                # 'nodeName': select_node_name,
+                'containers': [container],
+                'volumes': volumes
+            })
 
+        return content
 
+    def _get_dist_top_gpus(self, n) -> List[dict]:
         """
         This method is used to find the top n gpus, the one with most free memory
         nodeInfo is format of following:
@@ -476,6 +524,59 @@ def _get_top_gpus(self, n) -> List[dict]:
         print("node_gpuid:  ", node_gpuid)
         return node_gpuid
 
+    def _get_gpus_on_node(self, n, gpu_allocated=None) -> List[dict]:
+        node_infos = self._client_service.list_node()
+
+        node_gpuid = dict()
+        
+        max_min_free_memory = 0
+        max_gpu_used_ratio = 0
+
+        
+        for node_info in node_infos.items:
+            # if the node doesnt have gpu label or gpu is false, skip this node
+            if NodeLabes.Gpu not in node_info.metadata.labels or not node_info.metadata.labels[NodeLabes.Gpu]:
+                continue
+
+            gpu_summary = node_info.metadata.labels[NodeLabes.GpuSummary]
+
+            node_name = node_info.metadata.labels[NodeLabes.NodeName]
+
+            num_gpu = int(node_info.status.allocatable["nvidia.com/gpu"])
+            if num_gpu < 1:
+                continue
+
+            gpu_used_on_node = []
+            if node_name in gpu_allocated:
+                gpu_used_on_node = gpu_allocated[node_name]
+
+            node_gpus = dict()
+            for gpu_info in gpu_summary.split("."):
+                gpu_device_id = gpu_info.split("_")[0]
+                if gpu_device_id not in gpu_used_on_node:
+                    free_memory = gpu_info.split("_")[1]
+                    node_gpus[gpu_device_id] = free_memory
+
+            if len(node_gpus) < n:
+                continue
+            
+            top_n = sorted(node_gpus.items(), key=lambda d: d[1], reverse=False)[:n]
+            logger.info("top_n: {}".format(top_n))
+            node_gpuid[node_name] = {
+                "gpu_id": [ele[0] for ele in top_n],
+                "min_free_memory": top_n[n-1][1],
+                "gpu_free_ratio": len(node_gpus) / num_gpu,
+            }
+            node_min_free_memory = int(node_gpuid[node_name]["min_free_memory"])
+            if max_min_free_memory < node_min_free_memory:
+                max_min_free_memory = node_min_free_memory
+                node_gpuid["max_min_free_node"] = node_name
+            node_gpu_used_ratio = len(node_gpus) / num_gpu
+            if max_gpu_used_ratio < node_gpu_used_ratio:
+                max_gpu_used_ratio = node_gpu_used_ratio
+                node_gpuid["max_gpu_free_ratio"] = node_name
+        return node_gpuid
+
     def _create_clusterip_service_config(self, service_name, publish_port):
         content = \
             {'apiVersion': 'v1',
diff --git a/singa_auto/container/requirements.txt b/singa_auto/container/requirements.txt
index 8f21ead2..10095770 100644
--- a/singa_auto/container/requirements.txt
+++ b/singa_auto/container/requirements.txt
@@ -1 +1,2 @@
-docker==3.5.0
+docker==4.4.0
+kubernetes==10.0.1
\ No newline at end of file
diff --git a/singa_auto/darknet/model.py b/singa_auto/darknet/model.py
new file mode 100644
index 00000000..22ac19d3
--- /dev/null
+++ b/singa_auto/darknet/model.py
@@ -0,0 +1,375 @@
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from singa_auto.darknet.utils import build_targets, to_cpu
+
+
+class Upsample(nn.Module):
+    """ nn.Upsample is deprecated """
+
+    def __init__(self, scale_factor, mode="nearest"):
+        super(Upsample, self).__init__()
+        self.scale_factor = scale_factor
+        self.mode = mode
+
+    def forward(self, x):
+        x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
+        return x
+
+
+class EmptyLayer(nn.Module):
+    """Placeholder for 'route' and 'shortcut' layers"""
+
+    def __init__(self):
+        super(EmptyLayer, self).__init__()
+
+
+class YOLOLayer(nn.Module):
+    """Detection layer"""
+
+    def __init__(self, anchors, num_classes, img_dim=416):
+        super(YOLOLayer, self).__init__()
+        self.anchors = anchors
+        self.num_anchors = len(anchors)
+        self.num_classes = num_classes
+        self.ignore_thresh = 0.5
+        self.mse_loss = nn.MSELoss()
+        self.bce_loss = nn.BCELoss()
+        self.obj_scale = 1
+        self.noobj_scale = 100
+        self.metrics = {}
+        self.img_dim = img_dim
+        self.grid_size = 0  # grid size
+
+    def compute_grid_offsets(self, grid_size, cuda=True):
+        self.grid_size = grid_size
+        g = self.grid_size
+        FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
+        self.stride = self.img_dim / self.grid_size
+        # Calculate offsets for each grid
+        self.grid_x = torch.arange(g).repeat(g, 1).view([1, 1, g, g]).type(FloatTensor)
+        self.grid_y = torch.arange(g).repeat(g, 1).t().view([1, 1, g, g]).type(FloatTensor)
+        self.scaled_anchors = FloatTensor([(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors])
+        self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
+        self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))
+
+    def forward(self, x, targets=None, img_dim=None):
+
+        # Tensors for cuda support
+        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
+        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
+        ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor
+
+        self.img_dim = img_dim
+        num_samples = x.size(0)
+        grid_size = x.size(2)
+
+        prediction = (
+            x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size)
+            .permute(0, 1, 3, 4, 2)
+            .contiguous()
+        )
+
+        # Get outputs
+        x = torch.sigmoid(prediction[..., 0])  # Center x
+        y = torch.sigmoid(prediction[..., 1])  # Center y
+        w = prediction[..., 2]  # Width
+        h = prediction[..., 3]  # Height
+        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
+        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.
+
+        # If grid size does not match current we compute new offsets
+        if grid_size != self.grid_size:
+            self.compute_grid_offsets(grid_size, cuda=x.is_cuda)
+
+        # Add offset and scale with anchors
+        pred_boxes = FloatTensor(prediction[..., :4].shape)
+        pred_boxes[..., 0] = x.data + self.grid_x
+        pred_boxes[..., 1] = y.data + self.grid_y
+        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
+        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h
+
+        output = torch.cat(
+            (
+                pred_boxes.view(num_samples, -1, 4) * self.stride,
+                pred_conf.view(num_samples, -1, 1),
+                pred_cls.view(num_samples, -1, self.num_classes),
+            ),
+            -1,
+        )
+
+        if targets is None:
+            return output, 0
+        else:
+            iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets(
+                pred_boxes=pred_boxes,
+                pred_cls=pred_cls,
+                target=targets,
+                anchors=self.scaled_anchors,
+                ignore_thresh=self.ignore_thresh,
+            )
+
+            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
+            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
+            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
+            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
+            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
+            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
+            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask])
+            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
+            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
+            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
+
+            # Metrics
+            cls_acc = 100 * class_mask[obj_mask].mean()
+            conf_obj = pred_conf[obj_mask].mean()
+            conf_noobj = pred_conf[noobj_mask].mean()
+            conf50 = (pred_conf > 0.5).float()
+            iou50 = (iou_scores > 0.5).float()
+            iou75 = (iou_scores > 0.75).float()
+            detected_mask = conf50 * class_mask * tconf
+            precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
+            recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
+            recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)
+
+            self.metrics = {
+                "loss": to_cpu(total_loss).item(),
+                "x": to_cpu(loss_x).item(),
+                "y": to_cpu(loss_y).item(),
+                "w": to_cpu(loss_w).item(),
+                "h": to_cpu(loss_h).item(),
+                "conf": to_cpu(loss_conf).item(),
+                "cls": to_cpu(loss_cls).item(),
+                "cls_acc": to_cpu(cls_acc).item(),
+                "recall50": to_cpu(recall50).item(),
+                "recall75": to_cpu(recall75).item(),
+                "precision": to_cpu(precision).item(),
+                "conf_obj": to_cpu(conf_obj).item(),
+                "conf_noobj": to_cpu(conf_noobj).item(),
+                "grid_size": grid_size,
+            }
+
+            return output, total_loss
+
+class DarkNet(nn.Module):
+    """YOLOv3 object detection model"""
+
+    def __init__(self, config_path=None, model_cfg=None, img_size=416):
+        """
+        one of config_path and model_cfg is valid, not both
+        """
+        super(DarkNet, self).__init__()
+        self._model_cfg = None
+
+        self.module_defs = self.parse_model_config(path=config_path, model_cfg=model_cfg)
+        self.hyperparams, self.module_list = self.create_modules(self.module_defs)
+        self.yolo_layers = [layer[0] for layer in self.module_list if hasattr(layer[0], "metrics")]
+        self.img_size = img_size
+        self.seen = 0
+        self.header_info = np.array([0, 0, 0, self.seen, 0], dtype=np.int32)
+    
+    @property
+    def model_cfg(self):
+        return self._model_cfg
+
+    def create_modules(self, module_defs):
+        """
+        Constructs module list of layer blocks from module configuration in module_defs
+        """
+
+        hyperparams = module_defs.pop(0)
+        output_filters = [int(hyperparams["channels"])]
+        module_list = nn.ModuleList()
+
+        for module_i, module_def in enumerate(module_defs):
+            modules = nn.Sequential()
+
+            if module_def["type"] == "convolutional":
+                bn = int(module_def["batch_normalize"])
+                filters = int(module_def["filters"])
+                kernel_size = int(module_def["size"])
+                pad = (kernel_size - 1) // 2
+                modules.add_module(
+                    f"conv_{module_i}",
+                    nn.Conv2d(
+                        in_channels=output_filters[-1],
+                        out_channels=filters,
+                        kernel_size=kernel_size,
+                        stride=int(module_def["stride"]),
+                        padding=pad,
+                        bias=not bn,
+                    ),
+                )
+                if bn:
+                    modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5))
+                if module_def["activation"] == "leaky":
+                    modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1))
+
+            elif module_def["type"] == "maxpool":
+                kernel_size = int(module_def["size"])
+                stride = int(module_def["stride"])
+                if kernel_size == 2 and stride == 1:
+                    modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1)))
+                maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
+                modules.add_module(f"maxpool_{module_i}", maxpool)
+
+            elif module_def["type"] == "upsample":
+                upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
+                modules.add_module(f"upsample_{module_i}", upsample)
+
+            elif module_def["type"] == "route":
+                layers = [int(x) for x in module_def["layers"].split(",")]
+                filters = sum([output_filters[1:][i] for i in layers])
+                modules.add_module(f"route_{module_i}", EmptyLayer())
+
+            elif module_def["type"] == "shortcut":
+                filters = output_filters[1:][int(module_def["from"])]
+                modules.add_module(f"shortcut_{module_i}", EmptyLayer())
+
+            elif module_def["type"] == "yolo":
+                anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
+                # Extract anchors
+                anchors = [int(x) for x in module_def["anchors"].split(",")]
+                anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
+                anchors = [anchors[i] for i in anchor_idxs]
+                num_classes = int(module_def["classes"])
+                img_size = int(hyperparams["height"])
+                # Define detection layer
+                yolo_layer = YOLOLayer(anchors, num_classes, img_size)
+                modules.add_module(f"yolo_{module_i}", yolo_layer)
+            # Register module list and number of output filters
+            module_list.append(modules)
+            output_filters.append(filters)
+
+        return hyperparams, module_list
+
+    def forward(self, x, targets=None):
+        img_dim = x.shape[2]
+        loss = 0
+        layer_outputs, yolo_outputs = [], []
+        for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
+            if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
+                x = module(x)
+            elif module_def["type"] == "route":
+                x = torch.cat([layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",")], 1)
+            elif module_def["type"] == "shortcut":
+                layer_i = int(module_def["from"])
+                x = layer_outputs[-1] + layer_outputs[layer_i]
+            elif module_def["type"] == "yolo":
+                x, layer_loss = module[0](x, targets, img_dim)
+                loss += layer_loss
+                yolo_outputs.append(x)
+            layer_outputs.append(x)
+        yolo_outputs = to_cpu(torch.cat(yolo_outputs, 1))
+        return yolo_outputs if targets is None else (loss, yolo_outputs)
+
+    def load_darknet_weights(self, weights_path):
+        """Parses and loads the weights stored in 'weights_path'"""
+
+        # Open the weights file
+        with open(weights_path, "rb") as f:
+            header = np.fromfile(f, dtype=np.int32, count=5)  # First five are header values
+            self.header_info = header  # Needed to write header when saving weights
+            self.seen = header[3]  # number of images seen during training
+            weights = np.fromfile(f, dtype=np.float32)  # The rest are weights
+
+        # Establish cutoff for loading backbone weights
+        cutoff = None
+        if "darknet53.conv.74" in weights_path:
+            cutoff = 75
+
+        ptr = 0
+        for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
+            if i == cutoff:
+                break
+            if module_def["type"] == "convolutional":
+                conv_layer = module[0]
+                if module_def["batch_normalize"]:
+                    # Load BN bias, weights, running mean and running variance
+                    bn_layer = module[1]
+                    num_b = bn_layer.bias.numel()  # Number of biases
+                    # Bias
+                    bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)
+                    bn_layer.bias.data.copy_(bn_b)
+                    ptr += num_b
+                    # Weight
+                    bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)
+                    bn_layer.weight.data.copy_(bn_w)
+                    ptr += num_b
+                    # Running Mean
+                    bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)
+                    bn_layer.running_mean.data.copy_(bn_rm)
+                    ptr += num_b
+                    # Running Var
+                    bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)
+                    bn_layer.running_var.data.copy_(bn_rv)
+                    ptr += num_b
+                else:
+                    # Load conv. bias
+                    num_b = conv_layer.bias.numel()
+                    conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)
+                    conv_layer.bias.data.copy_(conv_b)
+                    ptr += num_b
+                # Load conv. weights
+                num_w = conv_layer.weight.numel()
+                conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)
+                conv_layer.weight.data.copy_(conv_w)
+                ptr += num_w
+
+    def parse_model_config(self, path=None, model_cfg=None):
+        """Parses the yolo-v3 layer configuration file and returns module definitions"""
+        if model_cfg is None:
+            if path is None:
+                raise ValueError("path and model_cfg should not both be None")
+
+            file = open(path, 'r')
+            lines = file.read().split('\n')
+            lines = [x for x in lines if x and not x.startswith('#')]
+            self._model_cfg = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
+        else:
+            self._model_cfg = model_cfg
+
+        module_defs = []
+        for line in self._model_cfg:
+            if line.startswith('['): # This marks the start of a new block
+                module_defs.append({})
+                module_defs[-1]['type'] = line[1:-1].rstrip()
+                if module_defs[-1]['type'] == 'convolutional':
+                    module_defs[-1]['batch_normalize'] = 0
+            else:
+                key, value = line.split("=")
+                value = value.strip()
+                module_defs[-1][key.rstrip()] = value.strip()
+
+        return module_defs
+
+    def save_darknet_weights(self, path, cutoff=-1):
+        """
+            @:param path    - path of the new weights file
+            @:param cutoff  - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
+        """
+        fp = open(path, "wb")
+        self.header_info[3] = self.seen
+        self.header_info.tofile(fp)
+
+        # Iterate through layers
+        for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
+            if module_def["type"] == "convolutional":
+                conv_layer = module[0]
+                # If batch norm, load bn first
+                if module_def["batch_normalize"]:
+                    bn_layer = module[1]
+                    bn_layer.bias.data.cpu().numpy().tofile(fp)
+                    bn_layer.weight.data.cpu().numpy().tofile(fp)
+                    bn_layer.running_mean.data.cpu().numpy().tofile(fp)
+                    bn_layer.running_var.data.cpu().numpy().tofile(fp)
+                # Load conv bias
+                else:
+                    conv_layer.bias.data.cpu().numpy().tofile(fp)
+                # Load conv weights
+                conv_layer.weight.data.cpu().numpy().tofile(fp)
+
+        fp.close()
\ No newline at end of file
diff --git a/singa_auto/darknet/utils.py b/singa_auto/darknet/utils.py
new file mode 100644
index 00000000..9a2ef11c
--- /dev/null
+++ b/singa_auto/darknet/utils.py
@@ -0,0 +1,352 @@
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+
+def ap_per_class(tp, conf, pred_cls, target_cls):
+    """ Compute the average precision, given the recall and precision curves.
+    Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
+    # Arguments
+        tp:    True positives (list).
+        conf:  Objectness value from 0-1 (list).
+        pred_cls: Predicted object classes (list).
+        target_cls: True object classes (list).
+    # Returns
+        The average precision as computed in py-faster-rcnn.
+    """
+
+    # Sort by objectness
+    i = np.argsort(-conf)
+    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
+
+    # Find unique classes
+    unique_classes = np.unique(target_cls)
+
+    # Create Precision-Recall curve and compute AP for each class
+    ap, p, r = [], [], []
+    for c in unique_classes:
+        i = pred_cls == c
+        n_gt = (target_cls == c).sum()  # Number of ground truth objects
+        n_p = i.sum()  # Number of predicted objects
+
+        if n_p == 0 and n_gt == 0:
+            continue
+        elif n_p == 0 or n_gt == 0:
+            ap.append(0)
+            r.append(0)
+            p.append(0)
+        else:
+            # Accumulate FPs and TPs
+            fpc = (1 - tp[i]).cumsum()
+            tpc = (tp[i]).cumsum()
+
+            # Recall
+            recall_curve = tpc / (n_gt + 1e-16)
+            r.append(recall_curve[-1])
+
+            # Precision
+            precision_curve = tpc / (tpc + fpc)
+            p.append(precision_curve[-1])
+
+            # AP from recall-precision curve
+            ap.append(compute_ap(recall_curve, precision_curve))
+
+    # Compute F1 score (harmonic mean of precision and recall)
+    p, r, ap = np.array(p), np.array(r), np.array(ap)
+    f1 = 2 * p * r / (p + r + 1e-16)
+
+    return p, r, ap, f1, unique_classes.astype("int32")
+
+
+def bbox_iou(box1, box2, x1y1x2y2=True):
+    """
+    Returns the IoU of two bounding boxes
+    """
+    if not x1y1x2y2:
+        # Transform from center and width to exact coordinates
+        b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
+        b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
+        b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
+        b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
+    else:
+        # Get the coordinates of bounding boxes
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
+
+    # get the corrdinates of the intersection rectangle
+    inter_rect_x1 = torch.max(b1_x1, b2_x1)
+    inter_rect_y1 = torch.max(b1_y1, b2_y1)
+    inter_rect_x2 = torch.min(b1_x2, b2_x2)
+    inter_rect_y2 = torch.min(b1_y2, b2_y2)
+
+    # Intersection area
+    inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
+        inter_rect_y2 - inter_rect_y1 + 1, min=0
+    )
+
+    # Union Area
+    b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
+    b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
+
+    iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
+
+    return iou
+
+
+def bbox_wh_iou(wh1, wh2):
+    wh2 = wh2.t()
+    w1, h1 = wh1[0], wh1[1]
+    w2, h2 = wh2[0], wh2[1]
+    inter_area = torch.min(w1, w2) * torch.min(h1, h2)
+    union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
+    return inter_area / union_area
+
+
+def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thresh):
+
+    # ByteTensor = torch.cuda.ByteTensor if pred_boxes.is_cuda else torch.ByteTensor
+    ByteTensor = torch.cuda.BoolTensor if pred_boxes.is_cuda else torch.BoolTensor
+    FloatTensor = torch.cuda.FloatTensor if pred_boxes.is_cuda else torch.FloatTensor
+
+    nB = pred_boxes.size(0)
+    nA = pred_boxes.size(1)
+    nC = pred_cls.size(-1)
+    nG = pred_boxes.size(2)
+
+    # Output tensors
+    obj_mask = ByteTensor(nB, nA, nG, nG).fill_(0)
+    noobj_mask = ByteTensor(nB, nA, nG, nG).fill_(1)
+    class_mask = FloatTensor(nB, nA, nG, nG).fill_(0)
+    iou_scores = FloatTensor(nB, nA, nG, nG).fill_(0)
+    tx = FloatTensor(nB, nA, nG, nG).fill_(0)
+    ty = FloatTensor(nB, nA, nG, nG).fill_(0)
+    tw = FloatTensor(nB, nA, nG, nG).fill_(0)
+    th = FloatTensor(nB, nA, nG, nG).fill_(0)
+    tcls = FloatTensor(nB, nA, nG, nG, nC).fill_(0)
+
+
+    # # note: solver multi gpu problem
+    # target  = target[target.sum(dim=1) != 0]
+
+    # Convert to position relative to box
+    target_boxes = target[:, 2:6] * nG
+    gxy = target_boxes[:, :2]
+    gwh = target_boxes[:, 2:]
+    # Get anchors with best iou
+    ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors])
+    best_ious, best_n = ious.max(0)
+    # Separate target values
+    b, target_labels = target[:, :2].long().t()
+    gx, gy = gxy.t()
+    gw, gh = gwh.t()
+    gi, gj = gxy.long().t()
+
+    # prevent index out of boundary
+    gi = gi.clamp(0, nG - 1)
+    gj = gj.clamp(0, nG - 1)
+
+    # Set masks
+    obj_mask[b, best_n, gj, gi] = 1
+    noobj_mask[b, best_n, gj, gi] = 0
+
+    # Set noobj mask to zero where iou exceeds ignore threshold
+    for i, anchor_ious in enumerate(ious.t()):
+        noobj_mask[b[i], anchor_ious > ignore_thresh, gj[i], gi[i]] = 0
+
+    # Coordinates
+    tx[b, best_n, gj, gi] = gx - gx.floor()
+    ty[b, best_n, gj, gi] = gy - gy.floor()
+    # Width and height
+    tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16)
+    th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16)
+    # One-hot encoding of label
+    tcls[b, best_n, gj, gi, target_labels] = 1
+    # Compute label correctness and iou at best anchor
+    class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float()
+    iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi], target_boxes, x1y1x2y2=False)
+
+    tconf = obj_mask.float()
+    return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf
+
+
+def compute_ap(recall, precision):
+    """
+    Compute the average precision, given the recall and precision curves.
+    Code originally from https://github.com/rbgirshick/py-faster-rcnn.
+
+    # Arguments
+        recall:    The recall curve (list).
+        precision: The precision curve (list).
+    # Returns
+        The average precision as computed in py-faster-rcnn.
+    """
+    # correct AP calculation
+    # first append sentinel values at the end
+    mrec = np.concatenate(([0.0], recall, [1.0]))
+    mpre = np.concatenate(([0.0], precision, [0.0]))
+
+    # compute the precision envelope
+    for i in range(mpre.size - 1, 0, -1):
+        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+
+    # to calculate area under PR curve, look for points
+    # where X axis (recall) changes value
+    i = np.where(mrec[1:] != mrec[:-1])[0]
+
+    # and sum (\Delta recall) * prec
+    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap
+
+
+def get_batch_statistics(outputs, targets, iou_thresh):
+    """ Compute true positives, predicted scores and predicted labels per sample """
+    batch_metrics = []
+    for sample_i in range(len(outputs)):
+
+        if outputs[sample_i] is None:
+            continue
+
+        output = outputs[sample_i]
+        pred_boxes = output[:, :4]
+        pred_scores = output[:, 4]
+        pred_labels = output[:, -1]
+
+        true_positives = np.zeros(pred_boxes.shape[0])
+
+        annotations = targets[targets[:, 0] == sample_i][:, 1:]
+        target_labels = annotations[:, 0] if len(annotations) else []
+        if len(annotations):
+            detected_boxes = []
+            target_boxes = annotations[:, 1:]
+
+            for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):
+
+                # If targets are found break
+                if len(detected_boxes) == len(annotations):
+                    break
+
+                # Ignore if label is not one of the target labels
+                if pred_label not in target_labels:
+                    continue
+
+                iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0)
+                if iou >= iou_thresh and box_index not in detected_boxes:
+                    true_positives[pred_i] = 1
+                    detected_boxes += [box_index]
+        batch_metrics.append([true_positives, pred_scores, pred_labels])
+    return batch_metrics
+
+
+def non_max_suppression(prediction, conf_thresh=0.5, nms_thresh=0.4):
+    """
+    Removes detections with lower object confidence score than 'conf_thresh' and performs
+    Non-Maximum Suppression to further filter detections.
+    Returns detections with shape:
+        (x1, y1, x2, y2, object_conf, class_score, class_pred)
+    """
+
+    # From (center x, center y, width, height) to (x1, y1, x2, y2)
+    prediction[..., :4] = xywh2xyxy(prediction[..., :4])
+    output = [None for _ in range(len(prediction))]
+    for image_i, image_pred in enumerate(prediction):
+        # Filter out confidence scores below threshold
+        image_pred = image_pred[image_pred[:, 4] >= conf_thresh]
+
+        # If none are remaining => process next image
+        if not image_pred.size(0):
+            continue
+        
+        # Object confidence times class confidence
+        score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]
+
+        # Sort by it
+        image_pred = image_pred[(-score).argsort()]
+        class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True)
+        detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1)
+        
+        # Perform non-maximum suppression
+        keep_boxes = []
+        try_round = 1000  # avoiding infinite loop
+        while detections.size(0) and try_round >= 0:
+            try_round -= 1
+            
+            large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thresh
+            label_match = detections[0, -1] == detections[:, -1]
+
+            # Indices of boxes with lower confidence scores, large IOUs and matching labels
+            invalid = large_overlap & label_match
+            weights = detections[invalid, 4:5]
+
+            # Merge overlapping bboxes by order of confidence
+            detections[0, :4] = (weights * detections[invalid, :4]).sum(0) / weights.sum()
+            keep_boxes += [detections[0]]
+            detections = detections[~invalid]
+        
+        if keep_boxes:
+            output[image_i] = torch.stack(keep_boxes)
+
+    return output
+
+
+def pad_to_square(img, pad_value):
+    c, h, w = img.shape
+    dim_diff = np.abs(h - w)
+
+    # (upper / left) padding and (lower / right) padding
+    pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
+
+    # Determine padding
+    pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0)
+
+    # Add padding
+    img = F.pad(img, pad, "constant", value=pad_value)
+
+    return img, pad
+
+
+def rescale_boxes(boxes, current_dim, original_shape):
+    """ Rescales bounding boxes to the original shape """
+    orig_h, orig_w = original_shape
+
+    # The amount of padding that was added
+    pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape))
+    pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape))
+
+    # Image height and width after padding is removed
+    unpad_h = current_dim - pad_y
+    unpad_w = current_dim - pad_x
+
+    # Rescale bounding boxes to dimension of original image
+    boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w
+    boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h
+    boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w
+    boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h
+    
+    return boxes
+
+
+def resize(image, size):
+    image = F.interpolate(image.unsqueeze(0), size=size, mode="nearest").squeeze(0)
+    return image
+
+
+def to_cpu(tensor):
+    return tensor.detach().cpu()
+
+
+def weights_init_normal(m):
+    classname = m.__class__.__name__
+    if classname.find("Conv") != -1:
+        torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
+    elif classname.find("BatchNorm2d") != -1:
+        torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
+        torch.nn.init.constant_(m.bias.data, 0.0)
+
+
+def xywh2xyxy(x):
+    y = x.new(x.shape)
+    y[..., 0] = x[..., 0] - x[..., 2] / 2
+    y[..., 1] = x[..., 1] - x[..., 3] / 2
+    y[..., 2] = x[..., 0] + x[..., 2] / 2
+    y[..., 3] = x[..., 1] + x[..., 3] / 2
+    return y
\ No newline at end of file
diff --git a/singa_auto/darknet/yolov3.cfg b/singa_auto/darknet/yolov3.cfg
new file mode 100644
index 00000000..946e0154
--- /dev/null
+++ b/singa_auto/darknet/yolov3.cfg
@@ -0,0 +1,788 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=16
+subdivisions=1
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+######################
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 6,7,8
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 61
+
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 3,4,5
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 36
+
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+[yolo]
+mask = 0,1,2
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
diff --git a/singa_auto/datasets/dataset.py b/singa_auto/datasets/dataset.py
index 5155a42c..38669207 100644
--- a/singa_auto/datasets/dataset.py
+++ b/singa_auto/datasets/dataset.py
@@ -202,6 +202,7 @@ def _(image_bytes):
         pil_images = []
         for image_path in image_paths:
             pil_images.append(load(image_path))
+        images = np.array([np.asarray(x) for x in pil_images])
 
         return pil_images
 
@@ -220,6 +221,7 @@ class CorpusDataset(ModelDataset):
     '''
 
     def __init__(self, dataset_path, tags, split_by):
+        super().__init__(dataset_path)
         self.tags = tags
         (self.size, self.tag_num_classes, self.max_token_len, self.max_sent_len, self._sents) = \
             self._load(dataset_path, self.tags, split_by)
@@ -289,10 +291,13 @@ class AudioFilesDataset(ModelDataset):
     '''
 
     def __init__(self, dataset_path, dataset_dir):
+        super().__init__(dataset_path)
         self._dataset_dir = dataset_dir
         self.df = self._load(dataset_path)
+
     def __getitem__(self, idx):
         return self.df.iloc[idx]
+
     def _load(self, dataset_path):
         '''
             Loading the dataset into a pandas dataframe. Called in the class __init__ method.
diff --git a/singa_auto/datasets/dataset_base.py b/singa_auto/datasets/dataset_base.py
index 4df12f41..ec1b7f63 100644
--- a/singa_auto/datasets/dataset_base.py
+++ b/singa_auto/datasets/dataset_base.py
@@ -12,8 +12,8 @@ def _load_pil_image(image_path, mode='RGB'):
 
     try:
         with open(image_path, 'rb') as f:
-            encoded = io.BytesIO(f.read())
-            pil_image = Image.open(encoded).convert(mode)
+            #encoded = io.BytesIO(f.read())
+            pil_image = Image.open(image_path).convert(mode)
     except:
         print('error accurs when handling : ', image_path)
         raise
diff --git a/singa_auto/datasets/image_classification_dataset.py b/singa_auto/datasets/image_classification_dataset.py
index cf72e779..95a3d03f 100644
--- a/singa_auto/datasets/image_classification_dataset.py
+++ b/singa_auto/datasets/image_classification_dataset.py
@@ -9,6 +9,9 @@
 import numpy as np
 from singa_auto.datasets.dataset_base import _load_pil_image, ClfModelDataset
 import pandas as pd
+import logging 
+logger = logging.getLogger(__name__)
+
 
 
 class ImageDataset4Clf(ClfModelDataset):
@@ -29,10 +32,12 @@ def __init__(self,
         self.mode = mode
         self.path = dataset_path
         self.dataset_zipfile = None
+        self.label_mapper = dict()
         (self._image_names, self._image_classes, self.size, self.classes) = self._extract_zip(self.path)
+      
         self.min_image_size = min_image_size
         self.max_image_size = max_image_size
-        self.label_mapper = dict()
+        
         self.image_size = None
         if if_shuffle:
             (self._image_names,
@@ -44,6 +49,7 @@ def __getitem__(self, index):
             raise StopIteration
         try:
             pil_image = self._extract_item(item_path=self._image_names[index])
+           
             (image, image_size) = self._preprocess(pil_image,
                                                    self.min_image_size,
                                                    self.max_image_size)
@@ -53,6 +59,8 @@ def __getitem__(self, index):
             return (image, image_class)
 
         except:
+            logging.error('getitem')
+            logging.error(self._image_names[index])
             raise
 
     def _preprocess(self, pil_image, min_image_size, max_image_size):
@@ -78,46 +86,69 @@ def _extract_item(self, item_path):
         with tempfile.TemporaryDirectory() as d:
             extracted_item_path = self.dataset_zipfile.extract(item_path,
                                                                path=d)
+         
+
             pil_image = _load_pil_image(extracted_item_path, mode=self.mode)
 
         return pil_image
 
     def _extract_zip(self, dataset_path):
+        
+        flag=0
         self.dataset_zipfile = zipfile.ZipFile(dataset_path, 'r')
-        if 'images.csv' in self.dataset_zipfile.namelist():
+        print(self.dataset_zipfile.namelist())
+        with tempfile.TemporaryDirectory() as d:
+            for fileName in self.dataset_zipfile.namelist():
+                if fileName.endswith('class_name.csv'):
+                    class_csv_path = self.dataset_zipfile.extract(fileName,
+                                                                            path=d)
+            
+                    csv = pd.read_csv(class_csv_path)
+                    name = csv[csv.columns[1]]
+                    label = csv[csv.columns[0]]
+                    for single_name,single_label in zip(name,label):
+                        self.label_mapper[str(single_label)]=single_name
+        print('label_mapper')
+        print(self.label_mapper)
+                  
+        for fileName in self.dataset_zipfile.namelist():
+            if fileName.endswith('images.csv'):
+                flag=1
             # Create temp directory to unzip to extract paths/classes/numbers only,
             # no actual images would be extracted
-            with tempfile.TemporaryDirectory() as d:
-                # obtain csv file
-                for fileName in self.dataset_zipfile.namelist():
-                    if fileName.endswith('.csv'):
-                        # Extract a single csv file from zip
-                        images_csv_path = self.dataset_zipfile.extract(fileName,
-                                                                       path=d)
-                        break
-                try:
-                    csv = pd.read_csv(images_csv_path)
-                    image_classes = csv[csv.columns[1:]]
-                    image_paths = csv[csv.columns[0]]
-                except:
-                    traceback.print_stack()
-                    raise
-            num_classes = len(csv[csv.columns[1]].unique())
-            num_labeled_samples = len(csv[csv.columns[0]].unique())
-            image_classes = tuple(np.array(image_classes).squeeze().tolist())
-            image_paths = tuple(image_paths)
-
-        else:
-            # make image name list and remove dir from list
-            image_paths = [
-                x for x in self.dataset_zipfile.namelist()
-                if x.endswith('/') == False
-            ]
-            num_labeled_samples = len(image_paths)
-            str_labels = [os.path.dirname(x) for x in image_paths]
-            self.str_labels_set = list(set(str_labels))
-            num_classes = len(self.str_labels_set)
-            image_classes = [self.str_labels_set.index(x) for x in str_labels]
+            if flag==1:
+                with tempfile.TemporaryDirectory() as d:
+                    # obtain csv file
+                    for fileName in self.dataset_zipfile.namelist():
+                        if fileName.endswith('images.csv'):
+                            # Extract a single csv file from zip
+                            images_csv_path = self.dataset_zipfile.extract(fileName,
+                                                                        path=d)
+                            break
+                    try:
+                        csv = pd.read_csv(images_csv_path)
+                        image_classes = csv[csv.columns[1]]
+                        image_paths = csv[csv.columns[0]]
+                        print(image_classes)
+                    except:
+                        traceback.print_stack()
+                        raise
+                num_classes = len(csv[csv.columns[1]].unique())
+                num_labeled_samples = len(csv[csv.columns[0]].unique())
+                image_classes = tuple(np.array(image_classes).squeeze().tolist())
+                image_paths = tuple(image_paths)
+
+            else:
+                # make image name list and remove dir from list
+                image_paths = [
+                    x for x in self.dataset_zipfile.namelist()
+                    if x.endswith('/') == False
+                ]
+                num_labeled_samples = len(image_paths)
+                str_labels = [os.path.dirname(x) for x in image_paths]
+                self.str_labels_set = list(set(str_labels))
+                num_classes = len(self.str_labels_set)
+                image_classes = [self.str_labels_set.index(x) for x in str_labels]
         return (image_paths, image_classes, num_labeled_samples, num_classes)
 
     def _shuffle(self, images, classes):
diff --git a/singa_auto/datasets/image_detection_dataset.py b/singa_auto/datasets/image_detection_dataset.py
index 5a700f24..9b20c67b 100644
--- a/singa_auto/datasets/image_detection_dataset.py
+++ b/singa_auto/datasets/image_detection_dataset.py
@@ -1,15 +1,29 @@
 import copy
+import cv2
+import itertools
+import json
 import numpy as np
 import os
-import zipfile
+import random
 import tempfile
-from singa_auto.datasets.dataset_base import DetectionModelDataset
+import time
 import torch
 import torch.utils.data
+import zipfile
+
 from PIL import Image
+from PIL import ImageFile
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+
+from collections import defaultdict
 from pycocotools.coco import COCO
+from torchvision.transforms import transforms
+
+from singa_auto.darknet.utils import pad_to_square, resize
+from singa_auto.datasets.dataset_base import DetectionModelDataset
 from singa_auto.datasets.torch_utils import get_transform
 
+
 COCO_INSTANCE_CATEGORY_NAMES = [
     '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
     'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
@@ -26,6 +40,487 @@
 ]
 
 
+def fetch_from_train_set(root_path, split_ratio=0.8):
+    image_train_folder = os.path.join(root_path, "train", "image")
+    image_val_folder = os.path.join(root_path, "val", "image")
+    annotation_train_folder = os.path.join(root_path, "train", "annotation")
+    annotation_val_folder = os.path.join(root_path, "val", "annotation")
+
+    os.makedirs(image_val_folder, exist_ok=True)
+    os.makedirs(annotation_val_folder, exist_ok=True)
+
+    list_image = list(sorted(os.listdir(image_train_folder)))
+    list_annotation = list(sorted(os.listdir(annotation_train_folder)))
+
+    union_list = []
+    for image_name in list_image:
+        base_name, _ = os.path.splitext(image_name)
+
+        if base_name + ".json" in list_annotation:
+            union_list.append(image_name)
+
+    disordered_index = np.random.permutation(range(len(union_list)))
+    val_list = disordered_index[np.int(len(union_list) * split_ratio):]
+    import shutil
+
+    for image_idx in val_list:
+        image_name = union_list[image_idx]
+        annotation_name = os.path.splitext(image_name)[0] + ".json"
+
+        shutil.move(os.path.join(image_train_folder, image_name), os.path.join(image_val_folder, image_name))
+        shutil.move(os.path.join(annotation_train_folder, annotation_name), os.path.join(annotation_val_folder, annotation_name))
+
+
+def split_dataset(root_path, split_ratio=0.8):
+    image_path = os.path.join(root_path, "image")
+    annotation_path = os.path.join(root_path, "annotation")
+
+    image_train_folder = os.path.join(root_path, "train", "image")
+    image_val_folder = os.path.join(root_path, "val", "image")
+    annotation_train_folder = os.path.join(root_path, "train", "annotation")
+    annotation_val_folder = os.path.join(root_path, "val", "annotation")
+
+    os.makedirs(image_train_folder, exist_ok=True)
+    os.makedirs(image_val_folder, exist_ok=True)
+    os.makedirs(annotation_train_folder, exist_ok=True)
+    os.makedirs(annotation_val_folder, exist_ok=True)
+
+    list_image = list(sorted(os.listdir(image_path)))
+    list_annotation = list(sorted(os.listdir(annotation_path)))
+
+    union_list = []
+    for image_name in list_image:
+        base_name, _ = os.path.splitext(image_name)
+
+        if base_name + ".json" in list_annotation:
+            union_list.append(image_name)
+    
+    disordered_index = np.random.permutation(range(len(union_list)))
+    train_list = disordered_index[:np.int(len(union_list) * split_ratio)]
+    val_list = disordered_index[np.int(len(union_list) * split_ratio):]
+
+    import shutil
+    for image_idx, image_name in enumerate(union_list):
+        annotation_name = os.path.splitext(image_name)[0] + ".json"
+
+        if image_idx in train_list:
+            shutil.copy(os.path.join(image_path, image_name), os.path.join(image_train_folder, image_name))
+            shutil.copy(os.path.join(annotation_path, annotation_name), os.path.join(annotation_train_folder, annotation_name))
+        else:
+            shutil.copy(os.path.join(image_path, image_name), os.path.join(image_val_folder, image_name))
+            shutil.copy(os.path.join(annotation_path, annotation_name), os.path.join(annotation_val_folder, annotation_name))
+
+
+class YoloCoco(object):
+    def __init__(self, annotation_path=None, is_single_json_file=False):
+        """
+        dataset for YOLO, according with coco
+        @ annotation_path: annotation path, filename if a single json, folder path is multiple jsons
+        """
+        self.dataset, self.anns, self.cats, self.imgs = dict(), dict(), dict(), dict()
+        self.img_to_ann, self.cat_to_img = defaultdict(list), defaultdict(list)
+
+        if annotation_path is not None:
+            print("loading annotations into memory")
+            tic = time.time()
+
+            if is_single_json_file:
+                # load annotations from single json
+                with open(annotation_path, 'r') as f:
+                    dataset = json.load(f)
+            else:
+                # load annotations from json files
+                dataset = self.load_scattered_json(annotation_path)
+
+            assert type(dataset)==dict, "annotation file format {} not supported".format(type(dataset))
+            print("Done (t={:0.2f}s)".format(time.time()- tic))
+            self.dataset = dataset
+        else:
+            raise ValueError("annotation_path should not be None")
+
+        self.create_index()
+
+    def _is_array_like(self, obj):
+        return hasattr(obj, '__iter__') and hasattr(obj, '__len__')
+
+    def load_scattered_json(self, annotation_path):
+        """
+        merge annotation into a dataset, in accordancy with pycocotool
+        """
+        list_annotation = list(sorted(os.listdir(annotation_path)))
+
+        dataset = {
+            "images": list(),
+            "annotations": list(),
+            "categories": list(),
+        }
+
+        dict_category = dict()
+        dict_image = dict()
+        last_category_id = 0
+        last_annotation_id = 0
+        last_image_id = 0
+
+        # for all json files
+        for annotation_idx, annotation_filename in enumerate(list_annotation):
+            with open(os.path.join(annotation_path, annotation_filename), 'r') as f:
+                json_info = json.load(f)
+            
+            # process image info
+            image_id = int(json_info["imagePath"][15:-4])
+            if image_id not in dict_image:
+                dict_image[image_id] = last_image_id
+                last_image_id += 1
+
+                image_info = {
+                    "file_name": json_info["imagePath"],
+                    "height": json_info["imageHeight"],
+                    "width": json_info["imageWidth"],
+                    "id": image_id,
+                }
+
+                dataset["images"].append(image_info)
+
+            # process bounding box information
+            for bounding_box_info in json_info["shapes"]:
+                if bounding_box_info["label"] not in dict_category:
+                    dict_category[bounding_box_info["label"]] = last_category_id
+
+                    category_info = {
+                        "id": last_category_id,
+                        "name":bounding_box_info["label"],
+                    }
+
+                    dataset["categories"].append(category_info)
+                    last_category_id += 1
+
+                annotation_info = {
+                    "image_id": image_id,
+                    "bbox": list(np.array(np.concatenate((bounding_box_info["points"][0], bounding_box_info["points"][1]), axis=0), dtype=np.int)),
+                    "category_id": dict_category[bounding_box_info["label"]],
+                    "id": last_annotation_id,
+                }
+                last_annotation_id += 1
+
+                dataset["annotations"].append(annotation_info)
+        return dataset
+
+    def create_index(self):
+        print("creating index")
+        anns, cats, imgs = dict(), dict(), dict()
+        img_to_ann, cat_to_img = defaultdict(list), defaultdict(list)
+
+        if "annotations" in self.dataset:
+            for ann in self.dataset["annotations"]:
+                img_to_ann[ann["image_id"]].append(ann)
+                anns[ann["id"]] = ann
+
+        if "images" in self.dataset:
+            for img in self.dataset["images"]:
+                imgs[img["id"]] = img
+
+        if "categories" in self.dataset:
+            for cat in self.dataset["categories"]:
+                cats[cat["id"]] = cat
+
+        if "annotations" in self.dataset and "categories" in self.dataset:
+            for ann in self.dataset["annotations"]:
+                cat_to_img[ann["category_id"]].append(ann["image_id"])
+
+        print("index created")
+
+        # create class member
+        self.anns = anns
+        self.cats = cats
+        self.imgs = imgs
+        self.cat_to_img = cat_to_img
+        self.img_to_ann = img_to_ann
+
+    def info(self):
+        """
+        Print information about the annotation file.
+        :return:
+        """
+        for key, value in self.dataset['info'].items():
+            print('{}: {}'.format(key, value))
+
+    def get_ann_id(self, img_id=[], cat_id=[], area_rng=[], is_crowd=None):
+        """
+        Get ann ids that satisfy given filter conditions. default skips that filter
+        :param: img_id  (int array)    get anns for given imgs
+        :param: cat_id  (int array)    get anns for given cats
+        :param: area_rng (float array) get anns for given area range (e.g. [0 inf])
+        :param: is_crowd (boolean)     get anns for given crowd label (False or True)
+        :return: ids (int array)       integer array of ann ids
+        """
+        img_id = img_id if self._is_array_like(img_id) else [img_id]
+        cat_id = cat_id if self._is_array_like(cat_id) else [cat_id]
+
+        if len(img_id) == len(cat_id) == len(area_rng) == 0:
+            anns = self.dataset['annotations']
+        else:
+            if not len(img_id) == 0:
+                lists = [self.img_to_ann[imgId] for imgId in img_id if imgId in self.img_to_ann]
+                anns = list(itertools.chain.from_iterable(lists))
+            else:
+                anns = self.dataset['annotations']
+            anns = anns if len(cat_id)  == 0 else [ann for ann in anns if ann['category_id'] in cat_id]
+            anns = anns if len(area_rng) == 0 else [ann for ann in anns if ann['area'] > area_rng[0] and ann['area'] < area_rng[1]]
+        if not is_crowd is None:
+            ids = [ann['id'] for ann in anns if ann['is_crowd'] == is_crowd]
+        else:
+            ids = [ann['id'] for ann in anns]
+        return ids
+
+    def get_cat_id(self, cat_nms=[], sup_nms=[], cat_id=[]):
+        """
+        filtering parameters. default skips that filter.
+        :param: cat_nms (str array)  : get cats for given cat names
+        :param: sup_nms (str array)  : get cats for given supercategory names
+        :param: cat_id (int array)  : get cats for given cat ids
+        :return: ids (int array)   : integer array of cat ids
+        """
+        cat_nms = cat_nms if self._is_array_like(cat_nms) else [cat_nms]
+        sup_nms = sup_nms if self._is_array_like(sup_nms) else [sup_nms]
+        cat_id = cat_id if self._is_array_like(cat_id) else [cat_id]
+
+        if len(cat_nms) == len(sup_nms) == len(cat_id) == 0:
+            cats = self.dataset['categories']
+        else:
+            cats = self.dataset['categories']
+            cats = cats if len(cat_nms) == 0 else [cat for cat in cats if cat['name']          in cat_nms]
+            cats = cats if len(sup_nms) == 0 else [cat for cat in cats if cat['supercategory'] in sup_nms]
+            cats = cats if len(cat_id) == 0 else [cat for cat in cats if cat['id']            in cat_id]
+        ids = [cat['id'] for cat in cats]
+        return ids
+
+    def get_img_id(self, img_id=[], cat_id=[]):
+        """
+        Get img ids that satisfy given filter conditions.
+        :param: img_id (int array)  get imgs for given ids
+        :param: cat_id (int array) : get imgs with all given cats
+        :return: ids (int array)  : integer array of img ids
+        """
+        img_id = img_id if self._is_array_like(img_id) else [img_id]
+        cat_id = cat_id if self._is_array_like(cat_id) else [cat_id]
+
+        if len(img_id) == len(cat_id) == 0:
+            ids = self.imgs.keys()
+        else:
+            ids = set(img_id)
+            for i, cat_id in enumerate(cat_id):
+                if i == 0 and len(ids) == 0:
+                    ids = set(self.cat_to_img[cat_id])
+                else:
+                    # original &=, but should be |=
+                    ids |= set(self.cat_to_img[cat_id])
+        return list(ids)
+
+    def load_ann(self, ids=[]):
+        """
+        Load anns with the specified ids.
+        :param: ids (int array)       : integer ids specifying anns
+        :return: anns (object array) : loaded ann objects
+        """
+        if self._is_array_like(ids):
+            return [self.anns[id] for id in ids]
+        elif type(ids) == int:
+            return [self.anns[ids]]
+
+    def load_cat(self, ids=[]):
+        """
+        Load cats with the specified ids.
+        :param: ids (int array)       : integer ids specifying cats
+        :return: cats (object array) : loaded cat objects
+        """
+        if self._is_array_like(ids):
+            return [self.cats[id] for id in ids]
+        elif type(ids) == int:
+            return [self.cats[ids]]
+
+    def load_imgs(self, ids=[]):
+        """
+        Load anns with the specified ids.
+        :param: ids (int array)       : integer ids specifying img
+        :return: imgs (object array) : loaded img objects
+        """
+        if self._is_array_like(ids):
+            return [self.imgs[id] for id in ids]
+        elif type(ids) == int:
+            return [self.imgs[ids]]
+
+    def load_numpy_annotation(self, data):
+        """
+        Convert result data from a numpy array [Nx7] where each row contains {imageID,x1,y1,w,h,score,class}
+        :param:  data (numpy.ndarray)
+        :return: annotations (python nested list)
+        """
+        print('Converting ndarray to lists...')
+        assert(type(data) == np.ndarray)
+        print(data.shape)
+        assert(data.shape[1] == 7)
+        N = data.shape[0]
+        ann = []
+        for i in range(N):
+            if i % 1000000 == 0:
+                print('{}/{}'.format(i,N))
+            ann += [{
+                'image_id'  : int(data[i, 0]),
+                'bbox'  : [ data[i, 1], data[i, 2], data[i, 3], data[i, 4] ],
+                'score' : data[i, 5],
+                'category_id': int(data[i, 6]),
+                }]
+        return ann
+
+
+
+class YoloDataset(DetectionModelDataset, torch.utils.data.Dataset):
+    """
+    dataset of yolo
+    """
+    def __init__(self, image_path, annotation_path, is_single_json_file, filter_classes, is_train, img_size=416, augment=True, multiscale=True, normalized_labels=True):
+        self.root_path = image_path
+        self.imgs = list(sorted(os.listdir(image_path)))
+        self.annotation_path = annotation_path
+        self.coco = YoloCoco(self.annotation_path, is_single_json_file=is_single_json_file)
+        # eg: filter_classes: ['person', 'dog']
+        self.cat_ids = self.coco.get_cat_id(cat_nms=filter_classes)
+        self.ids = self.coco.get_img_id(cat_id=self.cat_ids)
+        
+        self.cat_to_label = {v: key+1 for key, v in enumerate(self.cat_ids)}
+        self.label_to_cat = {key+1: v for key, v in enumerate(self.cat_ids)}
+
+        self.img_size = img_size
+        self.max_objects = 100
+        self.augment = augment if is_train else False
+        self.multiscale = multiscale if is_train else False
+        self.normalized_labels = normalized_labels
+        self.min_size = self.img_size - 3 * 32
+        self.max_size = self.img_size + 3 * 32
+        self.batch_count = 0
+
+        # if os.path.exists(r"./rectangle_images/"):
+        #     import shutil
+        #     shutil.rmtree(r"./rectangle_images/")
+        # os.makedirs(r"./rectangle_images/", exist_ok=True)
+
+    def __getitem__(self, index):
+        img_id = self.ids[index % len(self.ids)]
+        ann_id = self.coco.get_ann_id(img_id=img_id)
+
+        img_path = os.path.join(self.root_path, self.coco.load_imgs(img_id)[0]["file_name"])
+
+        # Extract image as PyTorch tensor
+        img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))
+
+        # Handle images with less than three channels
+        if len(img.shape) != 3:
+            img = img.unsqueeze(0)
+            img = img.expand((3, img.shape[1:]))
+
+        _, h, w = img.shape
+        h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
+        # Pad to square resolution
+        img, pad = pad_to_square(img, 0)
+        _, padded_h, padded_w = img.shape
+
+        # ---------
+        #  Label
+        # ---------
+        coco_annotation = self.coco.load_ann(ann_id)
+
+        tmp_label = []
+        box_info = []
+        for ann in coco_annotation:
+            if ann["category_id"] not in self.cat_ids:
+                continue
+            boxes = torch.zeros((1, 6), dtype=torch.float32)
+            x1 = round(max(ann['bbox'][0], 0))
+            y1 = round(max(ann['bbox'][1], 0))
+            x2 = round(min(x1 + ann['bbox'][2], w - 1))
+            y2 = round(min(y1 + ann['bbox'][3], h - 1))
+
+            # Adjust for added padding
+            x1 += pad[0]
+            y1 += pad[2]
+            x2 += pad[1]
+            y2 += pad[3]
+
+            box_info.append(((x1, y1), (x2, y2)))
+
+            # print(x1, x2, y1, y2, padded_h, padded_w)
+            # Returns (x, y, w, h)
+            boxes[0, 2] = (x2 + x1) / 2 / padded_w
+            boxes[0, 3] = (y2 + y1) / 2 / padded_h
+            boxes[0, 4] = (x2 - x1) / padded_w
+            boxes[0, 5] = (y2 - y1) / padded_h
+            boxes[0, 1] = self.cat_to_label[ann["category_id"]]
+            tmp_label.append(boxes)
+        
+        # self.get_bounding_box(img, os.path.basename(img_path), box_info)
+
+        # targets from list to tensor
+        targets = torch.cat(tmp_label, dim=0)
+
+        # Apply augmentations
+        if self.augment:
+            if np.random.random() < 0.5:
+                img, targets = self.horisontal_flip(img, targets)
+
+        return img_path, img, targets
+
+    def __len__(self):
+        return len(self.ids)
+
+    def _extract_zip(self, dataset_path, annotation_path):
+        dataset_zipfile = zipfile.ZipFile(dataset_path, 'r')
+        annotation_zipfile = zipfile.ZipFile(annotation_path, 'r')
+
+        # create temp dir
+        self.root_path = tempfile.TemporaryDirectory()
+
+        # extract images and annotations
+        dataset_zipfile.extractall(path=self.root_path.name)
+        annotation_zipfile.extractall(path=self.root_path.name)
+        imgs = list(sorted(os.listdir(os.path.join(self.root_path.name, self.img_folder_name))))
+        annotation_file = os.path.join(self.root_path.name, "annotations", self.annotation_file_name)
+
+        return imgs, annotation_file
+
+    def collate_fn(self, batch):
+        paths, imgs, targets = list(zip(*batch))
+        # Remove empty placeholder targets
+        targets = [boxes for boxes in targets if boxes is not None]
+        # Add sample index to targets
+        for i, boxes in enumerate(targets):
+            boxes[:, 0] = i
+        targets = torch.cat(targets, 0)
+        # Selects new image size every tenth batch
+        if self.multiscale and self.batch_count % 10 == 0:
+            self.img_size = random.choice(range(self.min_size, self.max_size + 1, 32))
+        # Resize images to input shape
+        imgs = torch.stack([resize(img, self.img_size) for img in imgs])
+        self.batch_count += 1
+        return paths, imgs, targets
+
+    # def get_bounding_box(self, img, basename, boxes, rect_th=3):
+    #     """
+    #     draw the bounding box on img
+    #     """
+    #     tmp = img.squeeze().detach().permute((1, 2, 0)).mul(255).clamp(0, 255).numpy()
+    #     tmp = cv2.cvtColor(tmp, cv2.COLOR_RGB2BGR)
+    # 
+    #     for rect_info in boxes:
+    #         cv2.rectangle(tmp, rect_info[0], rect_info[1], (0, 255, 0), rect_th)
+    # 
+    #     cv2.imwrite('./rectangle_images/{}'.format(basename), tmp)
+
+    def horisontal_flip(self, images, targets):
+        images = torch.flip(images, [-1])
+        targets[:, 2] = 1 - targets[:, 2]
+        return images, targets
+
+
 class PennFudanDataset(DetectionModelDataset, torch.utils.data.Dataset):
     def __init__(self, dataset_path, is_train):
         self.root_path = None
diff --git a/singa_auto/datasets/image_segmentation_dataset.py b/singa_auto/datasets/image_segmentation_dataset.py
new file mode 100644
index 00000000..45065657
--- /dev/null
+++ b/singa_auto/datasets/image_segmentation_dataset.py
@@ -0,0 +1,140 @@
+from torch.utils.data import Dataset
+import numpy as np
+from tqdm import tqdm
+import os
+from copy import deepcopy
+from PIL import Image
+import torch
+from glob import glob
+
+
+def ImageFetch(train_data_path, split_rate=0.9):
+    '''
+    load image as PIL.Image into a list for dataloader, split train/val subsets automatically 
+    train_data_path: already unzipped dataset folder path
+    split_rate: ratio of train/val data
+    '''
+    folder_name = train_data_path
+
+    image_train = []
+    mask_train = []
+    image_val = []
+    mask_val = []
+
+    # split train and val subsets
+    images_folder = os.path.join(folder_name, "image")
+    masks_folder = os.path.join(folder_name, "mask")
+
+    if not os.path.isdir(images_folder) or not os.path.isdir(masks_folder):
+        print("imges folder or mask folder does not exist, please check the upload file")
+
+    image_list = sorted(glob(os.path.join(images_folder, '*'))) # use sorted list to control train/val split
+    num_img = len(image_list)
+
+    train_num = int(num_img * split_rate)
+    train_list = image_list[0:train_num]
+    val_list = image_list[train_num:]
+
+    # load images and masks from their folders
+    for idx, image_name in tqdm(enumerate(train_list), total=len(train_list), desc="load train images......"):
+        image_name = image_name.split('/')[-1]
+
+        image_path = os.path.join(images_folder, image_name)
+        mask_path = os.path.join(masks_folder, os.path.splitext(image_name)[0] + ".png") # use image name to find the corresponding mask
+
+        image = Image.open(image_path)
+        image_train.append(image)
+
+        mask = Image.open(mask_path)
+        mask_train.append(mask)
+
+    for idx, image_name in tqdm(enumerate(val_list), total=len(val_list), desc="load validation images......"):
+        image_name = image_name.split('/')[-1]
+
+        image_path = os.path.join(images_folder, image_name)
+        mask_path = os.path.join(masks_folder, os.path.splitext(image_name)[0] + ".png")
+
+        image = Image.open(image_path)
+        image_val.append(image)
+
+        mask = Image.open(mask_path)
+        mask_val.append(mask)
+
+    return image_train, mask_train, image_val, mask_val
+
+
+def trainImageFetch(folder_name):
+    '''
+    load train image as PIL.Image into a list for dataloader, need train/val subsets split before execution
+    folder_name: already unzipped train dataset folder path
+    '''
+    image_train = []
+    mask_train = []
+
+    # load images and masks from their folders
+    images_folder = os.path.join(folder_name, "train", "image")
+    masks_folder = os.path.join(folder_name, "train", "mask")
+    image_list = os.listdir(images_folder)
+    for idx, image_name in tqdm(enumerate(image_list), total=len(image_list), desc="load train images......"):
+        image_path = os.path.join(images_folder, image_name)
+        mask_path = os.path.join(masks_folder, os.path.splitext(image_name)[0] + ".png")
+
+        image = Image.open(image_path)
+        image_train.append(image)
+
+        mask = Image.open(mask_path)
+        mask_train.append(mask)
+
+    return image_train, mask_train
+
+
+def valImageFetch(folder_name):
+    '''
+    load validation image as PIL.Image into a list for dataloader, need train/val subsets split before execution
+    folder_name: already unzipped validation dataset folder path
+    '''
+    image_val = []
+    mask_val = []
+
+    images_folder = os.path.join(folder_name, "val", "image")
+    masks_folder = os.path.join(folder_name, "val", "mask")
+
+    image_list = os.listdir(images_folder)
+    for idx, image_name in tqdm(enumerate(image_list), total=len(image_list), desc="load validation images......"):
+        image_path = os.path.join(images_folder, image_name)
+        mask_path = os.path.join(masks_folder, os.path.splitext(image_name)[0] + ".png")
+
+        image = Image.open(image_path)
+        image_val.append(image)
+
+        mask = Image.open(mask_path)
+        mask_val.append(mask)
+
+    return image_val, mask_val
+
+
+class SegDataset(Dataset):
+    '''
+    prepare image dataset with certain transforms
+    '''
+    def __init__(self, image_list, mask_list, transform_img, transform_mask):
+        self.transform_img = transform_img
+        self.transform_mask = transform_mask
+        self.imagelist = image_list
+        self.masklist = mask_list
+
+
+    def __len__(self):
+        return len(self.imagelist)
+
+
+    def __getitem__(self, idx):
+        image = deepcopy(self.imagelist[idx])
+        mask = deepcopy(self.masklist[idx])
+
+        image = self.transform_img(image) # apply transform
+
+        mask = self.transform_mask(mask)
+        mask = torch.as_tensor(np.array(mask), dtype=torch.int64) # mask transform does not contain to_tensor function
+
+        return image, mask
\ No newline at end of file
diff --git a/singa_auto/meta_store/meta_store.py b/singa_auto/meta_store/meta_store.py
index 1f84729a..91be3c78 100644
--- a/singa_auto/meta_store/meta_store.py
+++ b/singa_auto/meta_store/meta_store.py
@@ -422,13 +422,16 @@ def get_service(self, service_id):
         service = self._session.query(Service).get(service_id)
         return service
 
-    def get_services(self, status=None):
+    def get_services(self, status=None, service_type=None):
         query = self._session.query(Service)
 
         if status is not None:
             # pylint: disable=E1111
             query = query.filter(Service.status == status)
-
+        
+        if service_type is not None:
+            query = query.filter(Service.service_type == service_type)
+            
         services = query.all()
 
         return services
diff --git a/singa_auto/meta_store/requirements.txt b/singa_auto/meta_store/requirements.txt
index 0bb6f98e..760235e8 100644
--- a/singa_auto/meta_store/requirements.txt
+++ b/singa_auto/meta_store/requirements.txt
@@ -1,2 +1,2 @@
 SQLAlchemy==1.3.0
-psycopg2==2.7.5
\ No newline at end of file
+psycopg2-binary==2.8.6
\ No newline at end of file
diff --git a/singa_auto/model/__init__.py b/singa_auto/model/__init__.py
index 172baed3..0d644c3d 100644
--- a/singa_auto/model/__init__.py
+++ b/singa_auto/model/__init__.py
@@ -20,6 +20,7 @@
 from .image_classification import ImageClfBase
 from .model import BaseModel, Params, KnobConfig, Knobs
 from .object_detection import ObjtDetModel
+from .image_segmentation import SegmentationModel
 from .post_tagging import PosTagModel
 from .tabular_classification import TabularClfModel
 from .log import LoggerUtils
diff --git a/singa_auto/model/dev.py b/singa_auto/model/dev.py
index ec16955c..bb483e85 100644
--- a/singa_auto/model/dev.py
+++ b/singa_auto/model/dev.py
@@ -209,8 +209,9 @@ def tune_model(
     return (best_proposal, best_model_test_score, best_params)
 
 
-def make_predictions(queries: List[Any], task: str,
-                     py_model_class: Type[BaseModel], proposal: Proposal, fine_tune_dataset_path,
+def make_predictions_json(queries: List[Any], task: str,
+                     py_model_class: Type[BaseModel], proposal: Proposal,
+                     fine_tune_dataset_path,
                      params: Params) -> List[Any]:
     inference_cache: InferenceCache = InferenceCache()
     worker_id = 'local'
@@ -249,8 +250,8 @@ def make_predictions(queries: List[Any], task: str,
     _print_header('Making predictions with trained model...')
     predictions = model_inst.predict([x.query for x in queries_at_worker])
 
-
-    predictions = [Prediction(x, query.id, worker_id)
+    predictions = [
+        Prediction(x, query.id, worker_id)
         for (x, query) in zip(predictions, queries_at_worker)
     ]
 
@@ -265,6 +266,7 @@ def make_predictions(queries: List[Any], task: str,
         assert prediction is not None
         predictions_at_predictor.append(prediction)
 
+    # Predictor ensembles predictions
     ensemble_method = get_ensemble_method(task)
     print(f'Ensemble method: {ensemble_method}')
     out_predictions = []
@@ -341,7 +343,7 @@ def test_model_class(model_file_path: str,
     model_inst = None
     predictions = None
     if best_proposal is not None and best_params is not None and queries is not None:
-        (predictions, model_inst) = make_predictions(queries, task,
+        (predictions, model_inst) = make_predictions_json(queries, task,
                                                      py_model_class,
                                                      best_proposal, fine_tune_dataset_path, best_params)
 
@@ -546,4 +548,4 @@ class DeprecatedModelUtils():
 def _print_header(msg):
     print('-' * (len(msg) + 4))
     print('| {} |'.format(msg))
-    print('-' * (len(msg) + 4))
\ No newline at end of file
+    print('-' * (len(msg) + 4))
diff --git a/singa_auto/model/image_segmentation.py b/singa_auto/model/image_segmentation.py
new file mode 100644
index 00000000..c235f8b9
--- /dev/null
+++ b/singa_auto/model/image_segmentation.py
@@ -0,0 +1,26 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from .model import BaseModel
+
+
+class SegmentationModel(BaseModel):
+    # TODO Find some thing in common for this task, and  and abstract them to here
+    pass
+
diff --git a/singa_auto/model/model.py b/singa_auto/model/model.py
index 47f62187..cb157fb3 100644
--- a/singa_auto/model/model.py
+++ b/singa_auto/model/model.py
@@ -1,158 +1,157 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-import abc
-import numpy as np
-from typing import Union, Dict, Optional, Any, List
-
-from .knob import BaseKnob
-
-KnobConfig = Dict[str, BaseKnob]
-Knobs = Dict[str, Any]
-Params = Dict[str, Union[str, int, float, np.ndarray]]
-
-
-class BaseModel(abc.ABC):
-    '''
-    SINGA-Auto's base model class that SINGA-Auto models must extend.
-
-    SINGA-Auto models must implement all abstract methods below, according to the specification of its associated task (see :ref:`tasks`).
-    They configure how this model template will be trained, evaluated, tuned, serialized and served on SINGA-Auto.
-
-    In the model's ``__init__`` method, call ``super().__init__(**knobs)`` as the first line,
-    followed by the model's initialization logic. The model should be initialize itself with ``knobs``,
-    a set of generated knob values for the created model instance.
-
-    These knob values are chosen by SINGA-Auto based on the model's knob configuration (defined in :meth:`singa_auto.model.BaseModel.get_knob_config`).
-
-    For example:
-
-    ::
-
-        def __init__(self, **knobs):
-            self.__dict__.update(knobs)
-            ...
-            self._build_model(self.knob1, self.knob2)
-
-    :param knobs: Dictionary mapping knob names to knob values
-    :type knobs: :obj:`singa_auto.model.Knobs`
-    '''
-
-    def __init__(self, **knobs: Knobs):
-        pass
-
-
-    @staticmethod
-    @abc.abstractmethod
-    def get_knob_config() -> KnobConfig:
-        '''
-        Return a dictionary that defines the search space for this model template's knobs
-        (i.e. knobs' names, their types & their ranges).
-
-        Over the course of training, your model will be initialized with different values of knobs within this search space
-        to maximize this model’s performance.
-
-        Refer to :ref:`model-tuning` to understand more about how this works.
-
-        :returns: Dictionary mapping knob names to knob specifications
-        '''
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def train(self,
-              dataset_path: str,
-              shared_params: Optional[Params] = None,
-              **train_args):
-        '''
-        Train this model instance with the given traing dataset and initialized knob values.
-        Additional keyword arguments could be passed depending on the task's specification.
-
-        Additionally, trained parameters shared from previous trials could be passed,
-        as part of the ``SHARE_PARAMS`` policy (see :ref:`model-policies`).
-
-        Subsequently, the model is considered *trained*.
-
-        :param dataset_path: File path of the train dataset file in the *local filesystem*, in a format specified by the task
-        :param shared_params: Dictionary mapping parameter names to values, as produced by your model's :meth:`singa_auto.model.BaseModel.dump_parameters`.
-        '''
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def evaluate(self, dataset_path: str, **kargs) -> float:
-        '''
-        Evaluate this model instance with the given validation dataset after training.
-
-        This will be called only when model is *trained*.
-
-        :param dataset_path: File path of the validation dataset file in the *local filesystem*, in a format specified by the task
-        :returns: A score associated with the validation performance for the trained model instance, the higher the better e.g. classification accuracy.
-        '''
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def predict(self, queries: List[Any]) -> List[Any]:
-        '''
-        Make predictions on a batch of queries after training.
-
-        This will be called only when model is *trained*.
-
-        :param queries: List of queries, where a query is in the format specified by the task
-        :returns: List of predictions, in an order corresponding to the queries, where a prediction is in the format specified by the task
-        '''
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def dump_parameters(self) -> Params:
-        '''
-        Returns a dictionary of model parameters that *fully define the trained state of the model*.
-        This dictionary must conform to the format :obj:`singa_auto.model.Params`.
-        This will be used to save the trained model in SINGA-Auto.
-
-        Additionally, trained parameters produced by this method could be shared with future trials, as
-        part of the ``SHARE_PARAMS`` policy (see :ref:`model-policies`).
-
-        This will be called only when model is *trained*.
-
-        :returns: Dictionary mapping parameter names to values
-        '''
-        raise NotImplementedError()
-
-    @abc.abstractmethod
-    def load_parameters(self, params: Params):
-        '''
-        Loads this model instance with previously trained model parameters produced by your model's :meth:`singa_auto.model.BaseModel.dump_parameters`.
-        *This model instance's initialized knob values will match those during training*.
-
-        Subsequently, the model is considered *trained*.
-        '''
-        raise NotImplementedError()
-
-    def destroy(self):
-        '''
-        Destroy this model instance, freeing any resources held by this model instance.
-        No other instance methods will be called subsequently.
-        '''
-        pass
-
-    @staticmethod
-    def teardown():
-        '''
-        Runs class-wide teardown logic (e.g. close a training session shared across trials).
-        '''
-        pass
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import abc
+import numpy as np
+from typing import Union, Dict, Optional, Any, List
+
+from .knob import BaseKnob
+
+KnobConfig = Dict[str, BaseKnob]
+Knobs = Dict[str, Any]
+Params = Dict[str, Union[str, int, float, np.ndarray]]
+
+
+class BaseModel(abc.ABC):
+    '''
+    SINGA-Auto's base model class that SINGA-Auto models must extend.
+
+    SINGA-Auto models must implement all abstract methods below, according to the specification of its associated task (see :ref:`tasks`).
+    They configure how this model template will be trained, evaluated, tuned, serialized and served on SINGA-Auto.
+
+    In the model's ``__init__`` method, call ``super().__init__(**knobs)`` as the first line,
+    followed by the model's initialization logic. The model should be initialize itself with ``knobs``,
+    a set of generated knob values for the created model instance.
+
+    These knob values are chosen by SINGA-Auto based on the model's knob configuration (defined in :meth:`singa_auto.model.BaseModel.get_knob_config`).
+
+    For example:
+
+    ::
+
+        def __init__(self, **knobs):
+            self.__dict__.update(knobs)
+            ...
+            self._build_model(self.knob1, self.knob2)
+
+    :param knobs: Dictionary mapping knob names to knob values
+    :type knobs: :obj:`singa_auto.model.Knobs`
+    '''
+
+    def __init__(self, **knobs: Knobs):
+        pass
+
+    @staticmethod
+    @abc.abstractmethod
+    def get_knob_config() -> KnobConfig:
+        '''
+        Return a dictionary that defines the search space for this model template's knobs
+        (i.e. knobs' names, their types & their ranges).
+
+        Over the course of training, your model will be initialized with different values of knobs within this search space
+        to maximize this model’s performance.
+
+        Refer to :ref:`model-tuning` to understand more about how this works.
+
+        :returns: Dictionary mapping knob names to knob specifications
+        '''
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def train(self,
+              dataset_path: str,
+              shared_params: Optional[Params] = None,
+              **train_args):
+        '''
+        Train this model instance with the given traing dataset and initialized knob values.
+        Additional keyword arguments could be passed depending on the task's specification.
+
+        Additionally, trained parameters shared from previous trials could be passed,
+        as part of the ``SHARE_PARAMS`` policy (see :ref:`model-policies`).
+
+        Subsequently, the model is considered *trained*.
+
+        :param dataset_path: File path of the train dataset file in the *local filesystem*, in a format specified by the task
+        :param shared_params: Dictionary mapping parameter names to values, as produced by your model's :meth:`singa_auto.model.BaseModel.dump_parameters`.
+        '''
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def evaluate(self, dataset_path: str, **kargs) -> float:
+        '''
+        Evaluate this model instance with the given validation dataset after training.
+
+        This will be called only when model is *trained*.
+
+        :param dataset_path: File path of the validation dataset file in the *local filesystem*, in a format specified by the task
+        :returns: A score associated with the validation performance for the trained model instance, the higher the better e.g. classification accuracy.
+        '''
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def predict(self, queries: List[Any]) -> List[Any]:
+        '''
+        Make predictions on a batch of queries after training.
+
+        This will be called only when model is *trained*.
+
+        :param queries: List of queries, where a query is in the format specified by the task
+        :returns: List of predictions, in an order corresponding to the queries, where a prediction is in the format specified by the task
+        '''
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def dump_parameters(self) -> Params:
+        '''
+        Returns a dictionary of model parameters that *fully define the trained state of the model*.
+        This dictionary must conform to the format :obj:`singa_auto.model.Params`.
+        This will be used to save the trained model in SINGA-Auto.
+
+        Additionally, trained parameters produced by this method could be shared with future trials, as
+        part of the ``SHARE_PARAMS`` policy (see :ref:`model-policies`).
+
+        This will be called only when model is *trained*.
+
+        :returns: Dictionary mapping parameter names to values
+        '''
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def load_parameters(self, params: Params):
+        '''
+        Loads this model instance with previously trained model parameters produced by your model's :meth:`singa_auto.model.BaseModel.dump_parameters`.
+        *This model instance's initialized knob values will match those during training*.
+
+        Subsequently, the model is considered *trained*.
+        '''
+        raise NotImplementedError()
+
+    def destroy(self):
+        '''
+        Destroy this model instance, freeing any resources held by this model instance.
+        No other instance methods will be called subsequently.
+        '''
+        pass
+
+    @staticmethod
+    def teardown():
+        '''
+        Runs class-wide teardown logic (e.g. close a training session shared across trials).
+        '''
+        pass
diff --git a/singa_auto/model/utils.py b/singa_auto/model/utils.py
index 914e249b..5e6014d5 100644
--- a/singa_auto/model/utils.py
+++ b/singa_auto/model/utils.py
@@ -98,7 +98,7 @@ def parse_model_install_command(dependencies, enable_gpu=False):
             commands.append(
                 'pip --no-cache-dir install scikit-learn=={}'.format(ver))
         elif dep == ModelDependency.TENSORFLOW:
-            if enable_gpu:
+            if enable_gpu and dep.split('.')[0]=='1':
                 commands.append(
                     'pip --no-cache-dir install tensorflow-gpu=={}'.format(ver))
             else:
diff --git a/singa_auto/predictor/app.py b/singa_auto/predictor/app.py
index 896bdc15..515a54d8 100644
--- a/singa_auto/predictor/app.py
+++ b/singa_auto/predictor/app.py
@@ -1,74 +1,74 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-import os
-import logging
-from typing import Any, List
-from flask import Flask, jsonify, g, request
-from flask_cors import CORS
-from .predictor import Predictor
-from singa_auto.model import utils
-import traceback
-
-service_id = os.environ['SINGA_AUTO_SERVICE_ID']
-
-logger = logging.getLogger(__name__)
-app = Flask(__name__)
-CORS(app)
-
-
-def get_predictor() -> Predictor:
-    # Allow multiple threads to each have their own instance of predictor
-    if not hasattr(g, 'predictor'):
-        g.predictor = Predictor(service_id)
-
-    return g.predictor
-
-
-@app.route('/')
-def index():
-    return 'Predictor is up.'
-
-
-@app.route('/', methods=['POST'])
-def predict():
-    try:
-        if request.files.getlist('img'):
-            img_stores = request.files.getlist('img')
-            img_bytes = [
-                img for img in [img_store.read() for img_store in img_stores] if img
-            ]
-            if not img_bytes:
-                return jsonify({'ErrorMsg': 'No image provided'}), 400
-            queries = utils.dataset.load_images(img_bytes)
-            print("img_bytes_first 10 bytes", img_bytes[0][:10])
-            print("queries_sizes", len(queries))
-        elif request.get_json():
-            data = request.get_json()
-            queries = [data]
-        else:
-            return jsonify({'ErrorMsg': 'data should be either at files (set "img" as key) or json payload'}), 400
-        predictor = get_predictor()
-        predictions: List[Any] = predictor.predict(queries)
-        return jsonify(predictions), 200
-    except:
-        # for debug,print the error
-        traceback.print_exc()
-        logging.error(traceback.format_exc())
-        return jsonify({'ErrorMsg': 'Server Error:{}'.format(traceback.format_exc())}
-                       ), 500
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import os
+import logging
+from typing import Any, List
+from flask import Flask, jsonify, g, request
+from flask_cors import CORS
+from .predictor import Predictor
+from singa_auto.model import utils
+import traceback
+
+service_id = os.environ['SINGA_AUTO_SERVICE_ID']
+
+logger = logging.getLogger(__name__)
+app = Flask(__name__)
+CORS(app)
+
+
+def get_predictor() -> Predictor:
+    # Allow multiple threads to each have their own instance of predictor
+    if not hasattr(g, 'predictor'):
+        g.predictor = Predictor(service_id)
+
+    return g.predictor
+
+
+@app.route('/')
+def index():
+    return 'Predictor is up.'
+
+
+@app.route('/', methods=['POST'])
+def predict():
+    try:
+        if request.files.getlist('img'):
+            img_stores = request.files.getlist('img')
+            img_bytes = [
+                img for img in [img_store.read() for img_store in img_stores] if img
+            ]
+            if not img_bytes:
+                return jsonify({'ErrorMsg': 'No image provided'}), 400
+            queries = utils.dataset.load_images(img_bytes)
+            print("img_bytes_first 10 bytes", img_bytes[0][:10])
+            print("queries_sizes", len(queries))
+        elif request.get_json():
+            data = request.get_json()
+            queries = [data]
+        else:
+            return jsonify({'ErrorMsg': 'data should be either at files (set "img" as key) or json payload'}), 400
+            
+        predictor = get_predictor()
+        predictions: List[Any] = predictor.predict(queries)
+        return jsonify(predictions), 200
+    except:
+        # for debug,print the error
+        traceback.print_exc()
+        logging.error(traceback.format_exc())
+        return jsonify({'ErrorMsg': 'Server Error:{}'.format(traceback.format_exc())}), 500
diff --git a/singa_auto/worker/inference.py b/singa_auto/worker/inference.py
index 271a3127..f0430fd1 100644
--- a/singa_auto/worker/inference.py
+++ b/singa_auto/worker/inference.py
@@ -188,6 +188,14 @@ def _predict(self, queries: List[Query]) -> List[Prediction]:
         try:
             predictions = self._model_inst.predict([x.query for x in queries])
         except:
+            logger.error('queries')
+            logger.error(queries)
+            logger.error('x.query')
+            query_list=[x.query for x in queries]
+            logger.error(query_list)
+            logger.error(len(queries))
+            logger.error(type(query_list[0]))
+            logger.error(len(query_list[0]))
             logger.error('Error while making predictions:')
             logger.error(traceback.format_exc())
             predictions = [None for x in range(len(queries))]
diff --git a/singa_auto_scheduler/deploy/singa-auto-monitor.yaml b/singa_auto_scheduler/deploy/singa-auto-monitor.yaml
index 28559fc6..6d01d388 100644
--- a/singa_auto_scheduler/deploy/singa-auto-monitor.yaml
+++ b/singa_auto_scheduler/deploy/singa-auto-monitor.yaml
@@ -58,7 +58,7 @@ spec:
       serviceAccountName: sasche-sa
       containers:
         - name: sasche
-          image: singaauto/singa_auto_nodegpumonitor:dev
+          image: singa_auto/singa_auto_nodegpumonitor:dev
           imagePullPolicy: Always
           env:
             - name: NODE_NAME
diff --git a/singa_auto_scheduler/deploy/singa-auto-scheduler.yaml b/singa_auto_scheduler/deploy/singa-auto-scheduler.yaml
index e7b56731..049c9f9e 100644
--- a/singa_auto_scheduler/deploy/singa-auto-scheduler.yaml
+++ b/singa_auto_scheduler/deploy/singa-auto-scheduler.yaml
@@ -54,7 +54,7 @@ spec:
             name: scheduler-config
       containers:
         - name: sascheduler
-          image: singaauto/singa_auto_scheduler:dev
+          image: singa_auto/singa_auto_scheduler:dev
           imagePullPolicy: Always
           args:
             - sascheduler