Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support for round robin allocation of cuda cards to workers #36

Merged
merged 1 commit into from
Oct 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os
def post_fork(server, worker):
server.log.info("Worker spawned (pid: %s)", worker.pid)
cuda_device_count = int(os.getenv("APP_CUDA_DEVICE_COUNT", -1))

if cuda_device_count > 0:
# set variables for cuda resource allocation
# Needs to be done before loading models
# The number of devices to use should be set via
# APP_CUDA_DEVICE_COUNT in env_app and the docker compose
# file should allocate cards to the container
cudaid = worker.age % cuda_device_count
worker.log.info("Setting cuda device " + str(cudaid))
os.environ["CUDA_VISIBLE_DEVICES"] = str(cudaid)
else:
worker.log.info("APP_CUDA_DEVICE_COUNT device variables not set")


3 changes: 3 additions & 0 deletions medcat_service/nlp_processor/medcat_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def __init__(self):
self.app_model = os.getenv("APP_MODEL_NAME", "unknown")
self.entity_output_mode = os.getenv("ANNOTATIONS_ENTITY_OUTPUT_MODE", "dict").lower()


self.cat = self._create_cat()
self.cat.train = os.getenv("APP_TRAINING_MODE", False)

Expand All @@ -70,11 +71,13 @@ def __init__(self):
# this is available to constrain torch threads when there
# isn't a GPU
# You probably want to set to 1
# Not sure what happens if torch is using a cuda device
if self.torch_threads > 0:
import torch
torch.set_num_threads(self.torch_threads)
self.log.info("Torch threads set to " + str(self.torch_threads))


self.log.info("MedCAT processor is ready")

def get_app_info(self):
Expand Down
3 changes: 2 additions & 1 deletion start-service-prod.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,6 @@ SERVER_ACCESS_LOG_FORMAT="%(t)s [ACCESSS] %(h)s \"%(r)s\" %(s)s \"%(f)s\" \"%(a)
#
echo "Starting up Flask app using gunicorn server ..."
gunicorn --bind $SERVER_HOST:$SERVER_PORT --workers=$SERVER_WORKERS --threads=$SERVER_THREADS --timeout=$SERVER_WORKER_TIMEOUT \
--access-logformat="$SERVER_ACCESS_LOG_FORMAT" --access-logfile=- --log-file=- --log-level info \
--access-logformat="$SERVER_ACCESS_LOG_FORMAT" --access-logfile=- --log-file=- --log-level info \
--config /cat/config.py \
wsgi
Loading