Skip to content

Commit

Permalink
Sync repo with internal repo (#15)
Browse files Browse the repository at this point in the history
Co-authored-by: Tal Cohen <[email protected]>
  • Loading branch information
snirkop89 and Tal Cohen authored Mar 10, 2024
1 parent 2f461fb commit 76a7ccc
Show file tree
Hide file tree
Showing 12 changed files with 338 additions and 387 deletions.
61 changes: 39 additions & 22 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019, HabanaLabs Ltd. All rights reserved.
# Copyright (c) 2022, HabanaLabs Ltd. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -12,52 +12,69 @@
# See the License for the specific language governing permissions and
# limitations under the License.

ARG BASE_IMAGE
FROM ${BASE_IMAGE} as builder

ENV GOLANG_VERSION 1.20
RUN wget -nv -O - https://dl.google.com/go/go${GOLANG_VERSION}.linux-amd64.tar.gz \
ARG VERSION=1.14.0
ARG MINOR_VERSION=493
ARG DIST=ubuntu22.04
ARG REGISTRY=vault.habana.ai

FROM ${REGISTRY}/gaudi-docker/${VERSION}/${DIST}/habanalabs/pytorch-installer-2.1.1:${VERSION}-${MINOR_VERSION} as builder

RUN apt-get update && \
apt-get install -y wget make git gcc \
&& \
rm -rf /var/lib/apt/lists/*

ARG GOLANG_VERSION=1.21.5
RUN set -eux; \
\
arch="$(uname -m)"; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
| tar -C /usr/local -xz


ENV GOPATH /opt/habanalabs/go
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH

# go-hlml must be download before building the image, since it is hosted in gerrit,
# and gerrit doesn't support go modules in our version. Then it is copied as
# a sibling to the device plugin folder
WORKDIR /opt/habanalabs/go/src/go-hlml
COPY go-hlml/ .

WORKDIR /opt/habanalabs/go/src/habanalabs-device-plugin
COPY go.mod go.sum ./
RUN go mod download

COPY . .
RUN go mod tidy

RUN go build -buildvcs=false -o bin/habanalabs-device-plugin .


FROM artifactory-kfs.habana-labs.com/docker-developers/base/ubuntu:focal
ARG BUILD_DATE
ARG BUILD_REF

FROM ${REGISTRY}/gaudi-docker/${VERSION}/${DIST}/habanalabs/pytorch-installer-2.1.1:${VERSION}-${MINOR_VERSION}

# Remove Habana libs(compat etc) in favor of libs installed by the NVIDIA driver
RUN apt-get --purge -y autoremove habana*

RUN apt update && apt install -y --no-install-recommends \
pciutils && \
rm -rf /var/lib/apt/lists/*

COPY --from=builder /usr/lib/habanalabs /usr/lib/habanalabs
COPY --from=builder /usr/include/habanalabs /usr/include/habanalabs
COPY --from=builder /opt/habanalabs/go/src/habanalabs-device-plugin/bin/habanalabs-device-plugin /usr/bin/habanalabs-device-plugin

RUN echo "/usr/lib/habanalabs/" >> /etc/ld.so.conf.d/habanalabs.conf
RUN ldconfig

COPY --from=builder /opt/habanalabs/go/src/habanalabs-device-plugin/bin/habanalabs-device-plugin /usr/bin/habanalabs-device-plugin
CMD ["habanalabs-device-plugin"]


LABEL image.created="${BUILD_DATE}" \
LABEL io.k8s.display-name="HABANA Device Plugin" \
vendor="HABANA" \
version=${VERSION} \
image.git-commit="${GIT_COMMIT}" \
image.created="${BUILD_DATE}" \
image.revision="${BUILD_REF}" \
image.title="habana-device-plugin" \
image.author="Habana Labs Ltd"
summary="HABANA device plugin for Kubernetes" \
description="See summary"

CMD ["habanalabs-device-plugin"]
59 changes: 43 additions & 16 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,19 +1,46 @@
image ?= "artifactory-kfs.habana-labs.com/k8s-docker-dev/device_plugin/habana-device-plugin"
version ?= "test"
base_image ?= "artifactory-kfs.habana-labs.com/docker-local/1.13.0/ubuntu20.04/habanalabs/base-installer:1.13.0-10"


## build: build docker image in ci-cd process
.PHONY: build
build:
docker build \
-t $(image):$(version) \
--build-arg BASE_IMAGE=$(base_image) \
--build-arg BUILD_REF=$(version) \
# Copyright (c) 2020-2022, HabanaLabs Ltd. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

DOCKER ?= docker

include $(CURDIR)/versions.mk


ifeq ($(IMAGE_NAME),)
IMAGE_NAME := $(REGISTRY)/$(APP_NAME)
endif

IMAGE_TAG ?= $(VERSION)-$(MINOR_VERSION)
IMAGE = $(IMAGE_NAME):$(IMAGE_TAG)


.PHONY: build push

## build: build docker image
build:
$(DOCKER) build \
-t $(IMAGE) \
--build-arg BUILD_REF=$(IMAGE_TAG) \
--build-arg REGISTRY=$(REGISTRY) \
--build-arg VERSION="$(VERSION)" \
--build-arg MINOR_VERSION="$(MINOR_VERSION)" \
--build-arg DIST="$(DIST)" \
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
--build-arg BUILD_DATE=`date -u +"%Y-%m-%dT%H:%M:%SZ"` \
.

## push-image: push the image to the registry
.PHONY: push-image
push-image:
docker image push $(image):$(version)
## push: push the image to the registry
push:
$(DOCKER) image push $(IMAGE)
144 changes: 29 additions & 115 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,141 +1,55 @@
# HABANA device plugin for Kubernetes
# Habana Device Plugin for Kubernetes

## Table of Contents
The Habana device plugin for Kubernetes, operating as a DaemonSet, enables the automatic registration of
Habana devices within your Kubernetes cluster, while also monitoring the health status of these devices.
This integration ensures seamless management and monitoring of Habana devices within the Kubernetes ecosystem,
enhancing operational efficiency and reliability.

This repository contains Habana official implementation of the [Kubernetes device plugin](https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/device-plugins/).

- [HABANA device plugin for Kubernetes](#habana-device-plugin-for-kubernetes)
## Table of Contents
- [Habana Device Plugin for Kubernetes](#habana-device-plugin-for-kubernetes)
- [Table of Contents](#table-of-contents)
- [Introduction](#introduction)
- [Prerequisites](#prerequisites)
- [The below sections detail existing plugins](#the-below-sections-detail-existing-plugins)
- [Goya device plugin](#goya-device-plugin)
- [Running Jobs](#running-jobs)
- [Gaudi device plugin](#gaudi-device-plugin)
- [With Docker](#with-docker)
- [Build](#build)
- [Build in CD](#build-in-cd)
- [Deploy as Daemon Set:](#deploy-as-daemon-set)
- [Changelog](#changelog)
- [Version 0.9.1](#version-091)
- [Version 0.8.1-beta1](#version-081-beta1)
- [Issues](#issues)


## Introduction

The HABANA device plugin for Kubernetes is a Daemonset that allows you to automatically:
- Enables the registration of HABANA devices in your Kubernetes cluster.
- Keep track of the health of your Device

## Prerequisites
The list of prerequisites for running the HABANA device plugin is described below:
- HABANA drivers
- Kubernetes version >= 1.10

### The below sections detail existing plugins
- [Gaudi Device Registration](#gaudi-device-registration)
- [Building and Running Locally Using Docker](#building-and-running-locally-using-docker)

#### Goya device plugin

Once you have enabled this option on *all* the nodes you wish to use,
you can then enable support in your cluster by deploying the following Daemonset:
## Prerequisites

```shell
$ kubectl create -f habanalabs-device-plugin.yaml
```
The below lists the prerequisites needed for running Habana device plugin:
- Habana Drivers
- Kubernetes version >= 1.19
- [Habana-container-runtime](https://github.com/HabanaAI/habana-container-runtime)

##### Running Jobs

Can now be consumed via container level resource requirements using the resource name habana.com/goya:
```yaml
apiVersion: v1
kind: Pod
metadata:
name: habanalabs-goya-demo0
spec:
nodeSelector:
accelerator: habanalabs
containers:
- name: habana-ai-base-container
image: habanai/goya-demo:0.9.1-43-debian9.8
workingDir: /home/user1
securityContext:
capabilities:
add: ["SYS_RAWIO"]
command: ["sleep"]
args: ["10000"]
resources:
limits:
habana.ai/goya: 1
imagePullSecrets:
- name: regcred
```

#### Gaudi device plugin
## Gaudi Device Registration

Once you have enabled this option on *all* the nodes you wish to use,
you can then enable support in your cluster by deploying the following Daemonset:
Once the prerequisites mentioned earlier have been established in the nodes,
you can then activate support in your cluster by deploying the Daemonset:

```shell
$ kubectl create -f habanalabs-device-plugin-gaudi.yaml
```

### With Docker

#### Build
Option 1, pull the prebuilt image from [Docker Hub](https://hub.docker.com/r/habanai/k8s-device-plugin):
```shell
$ docker pull habanai/k8s-device-plugin:0.9.1
```
## Building and Running Locally Using Docker

Option 2, build without cloning the repository:
```shell
$ docker build --network=host --no-cache -t habanai/k8s-device-plugin:0.9.1 habanalabs-k8s-device-plugin
```
To build and run using a docker, employ the following options according to your specific scenario:

Option 3, if you want to modify the code:
- To pull the prebuilt image, run:
```shell
https://github.com/HabDevops/habanalabs-k8s-device-plugin
$ git clone https://github.com/HabDevops/habanalabs-k8s-device-plugin.git && cd habanalabs-k8s-device-plugin
$ git checkout v0.9.1
$ docker build -t habanai/k8s-device-plugin:0.9.1 .
$ docker pull vault.habana.ai/docker-k8s-device-plugin/docker-k8s-device-plugin:1.14.0
```

### Build in CD
Requirements:
- go-hlml repo must be first downloaded from gerrit into the habanalabs-device-plugin repo.
It is copied by the Dockerfile into the image during the build process.
_(this is due a lack of go modules support in Gerrit v2)_


To build the image in the CD process use the `make build` build.
It accepts the following parameters:
- `base_image` -Image to use as the builder for the application
- `image` - Final full image name to deploy
- `version` - Image's tag

Full example showing usage of current jenkins variables(or parameters):
```
make build base_image=$baseDockerImage image=$pluginDockerImage version=$"{release_version}-${release_build_id}"
- To build without cloning the repository, run:
```shell
$ docker build --network=host --no-cache -t habanai/k8s-device-plugin:1.14.0 habanalabs-k8s-device-plugin
```

#### Deploy as Daemon Set:
- To modify the code, run:
```shell
$ kubectl create -f habanalabs-device-plugin.yaml
$ git clone https://github.com/HabanaAI/habanalabs-k8s-device-plugin.git && cd habanalabs-k8s-device-plugin
$ docker build -t habanai/k8s-device-plugin:devel .
```

## Changelog

### Version 0.9.1
- New HLML SW 0.9.1-43 debian9.8

### Version 0.8.1-beta1
- Support k8s plugin for Gaudi
- New HLML SW 0.8.1-55 debian9.8
- Add new resource namespace e.g: goya/gaudi
- Refactor device plugin to eventually handle multiple resource types
- Move plugin error retry to event loop so we can exit with a signal

# Issues
* You can report a bug by [filing a new issue](https://github.com/HabDevops/habanalabs-k8s-device-plugin/issues/new)
* You can contribute by opening a [pull request](https://help.github.com/articles/using-pull-requests/)


10 changes: 1 addition & 9 deletions examples/hl-smi-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,11 @@ kind: Pod
metadata:
name: hl-smi
spec:
hostNetwork: true
containers:
- name: app
image: artifactory.habana-labs.com/docker-developers/base/u18/u18-base:bionic-20190307
image: vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:1.14.0-493
command: ["/bin/bash"]
args: ["-c", "hl-smi"]
resources:
limits:
habana.ai/gaudi: 1
volumeMounts:
- name: usr
mountPath: /usr/bin
volumes:
- name: usr
hostPath:
path: /usr/bin
9 changes: 4 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
module github.com/HabanaAI/habanalabs-k8s-device-plugin

go 1.20
go 1.21

toolchain go1.21.5

require (
github.com/HabanaAI/gohlml v1.3.0
github.com/HabanaAI/gohlml v1.14.0
github.com/fsnotify/fsnotify v1.4.9
google.golang.org/grpc v1.35.0
k8s.io/kubelet v0.19.7
Expand All @@ -18,6 +20,3 @@ require (
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 // indirect
google.golang.org/protobuf v1.25.0 // indirect
)

// uncomment below if developing with a local copy of gohlml
replace github.com/HabanaAI/gohlml v1.3.0 => ../go-hlml
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
Expand Down Expand Up @@ -350,6 +351,7 @@ gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw=
gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
Expand Down
Loading

0 comments on commit 76a7ccc

Please sign in to comment.