It works by default in CPU, but a DEVICE
env var can be passed in for CUDA.
Build:
docker build . --tag nb-gpt-j
Run:
docker run --rm -it -p 8080:8080 -e MODEL_NAME=NbAiLab/nb-gpt-j-6b -e HF_AUTH_TOKEN=<api_token> -v $(pwd)/streamlitcache:/home/streamlitapp/.cache/huggingface nb-gpt-j
Register:
gcloud auth configure-docker
docker tag nb-gpt-j gcr.io/<project>/nb-gpt-j
docker push gcr.io/<project>/nb-gpt-j