From 3ed2732740a2657fe4771e8818ad8c9ff30554c3 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Wed, 19 Oct 2022 10:39:41 -0700 Subject: [PATCH] [SPARK-40833][K8S] Cleanup apt lists cache ### What changes were proposed in this pull request? Remove unused apt lists cache ### Why are the changes needed? Clean cache to reduce docker image size. This is also [recommanded](https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#run) by docker community: ``` $ docker run --user 0:0 -ti apache/spark bash root5d1ca347279e:/opt/spark/work-dir# ls /var/lib/apt/lists/ auxfiles lock deb.debian.org_debian_dists_bullseye-updates_InRelease partial deb.debian.org_debian_dists_bullseye-updates_main_binary-arm64_Packages.lz4 security.debian.org_debian-security_dists_bullseye-security_InRelease deb.debian.org_debian_dists_bullseye_InRelease security.debian.org_debian-security_dists_bullseye-security_main_binary-arm64_Packages.lz4 deb.debian.org_debian_dists_bullseye_main_binary-arm64_Packages.lz4 root5d1ca347279e:/opt/spark/work-dir# du --max-depth=1 -h /var/lib/apt/lists/ 4.0K /var/lib/apt/lists/partial 4.0K /var/lib/apt/lists/auxfiles 17M /var/lib/apt/lists/ ``` ### Does this PR introduce _any_ user-facing change? Yes in some level, image size is reduced. ### How was this patch tested? K8s CI passed Closes #38298 from Yikun/SPARK-40513. Authored-by: Yikun Jiang Signed-off-by: Dongjoon Hyun --- .../kubernetes/docker/src/main/dockerfiles/spark/Dockerfile | 2 +- .../docker/src/main/dockerfiles/spark/Dockerfile.java17 | 2 +- .../docker/src/main/dockerfiles/spark/bindings/R/Dockerfile | 2 +- .../src/main/dockerfiles/spark/bindings/python/Dockerfile | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile index fc529afb1be82..30338b6f91c73 100644 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile @@ -39,7 +39,7 @@ RUN set -ex && \ ln -sv /bin/bash /bin/sh && \ echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \ chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \ - rm -rf /var/cache/apt/* + rm -rf /var/cache/apt/* && rm -rf /var/lib/apt/lists/* COPY jars /opt/spark/jars COPY bin /opt/spark/bin diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 index 5a360682071b9..194242996ca7f 100644 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 @@ -39,7 +39,7 @@ RUN set -ex && \ ln -sv /bin/bash /bin/sh && \ echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \ chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \ - rm -rf /var/cache/apt/* + rm -rf /var/cache/apt/* && rm -rf /var/lib/apt/lists/* COPY jars /opt/spark/jars COPY bin /opt/spark/bin diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile index 03e4210623080..e4d62cf45f5d7 100644 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile @@ -29,7 +29,7 @@ RUN mkdir ${SPARK_HOME}/R RUN \ apt-get update && \ apt install -y r-base r-base-dev && \ - rm -rf /var/cache/apt/* + rm -rf /var/cache/apt/* && rm -rf /var/lib/apt/lists/* COPY R ${SPARK_HOME}/R ENV R_HOME /usr/lib/R diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile index 85d92e39ed278..740aa7f0c43dc 100644 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile @@ -28,7 +28,7 @@ RUN apt-get update && \ apt install -y python3 python3-pip && \ pip3 install --upgrade pip setuptools && \ # Removed the .cache to save space - rm -rf /root/.cache && rm -rf /var/cache/apt/* + rm -rf /root/.cache && rm -rf /var/cache/apt/* && rm -rf /var/lib/apt/lists/* COPY python/pyspark ${SPARK_HOME}/python/pyspark COPY python/lib ${SPARK_HOME}/python/lib