diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 22053c95..fb2582be 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -18,11 +18,12 @@ services: - api-media:/home/api/media - api-static:/home/api/static - api-db:/home/api/db + - api-db-backup:/home/api/db-backup env_file: - ./envs/env environment: - MCT_VERSION=latest - command: /home/run.sh + command: /home/scripts/run.sh nginx: build: @@ -53,4 +54,5 @@ volumes: api-media: api-static: api-db: + api-db-backup: solr-data: diff --git a/docker-compose-prod.yml b/docker-compose-prod.yml index b82672f1..afcecd51 100644 --- a/docker-compose-prod.yml +++ b/docker-compose-prod.yml @@ -13,13 +13,29 @@ services: - api-media:/home/api/media - api-static:/home/api/static - api-db:/home/api/db + - api-db-backup:/home/api/db-backup expose: - "8000" env_file: - ./envs/env-prod environment: - MCT_VERSION=v2.11.2 - command: /home/run.sh + command: /home/scripts/run.sh + + # crontab - for db backup + medcattrainer-db-backup: + image: cogstacksystems/medcat-trainer:v2.11.2 + restart: always + volumes: + - ./configs:/home/configs + - api-media:/home/api/media + - api-static:/home/api/static + - api-db:/home/api/db + - api-db-backup:/home/api/db-backup + env_file: + - ./envs/env + entrypoint: /home/scripts/entry.sh + command: cron -f -l 2 nginx: container_name: medcattrainer_nginx @@ -48,4 +64,5 @@ volumes: api-media: api-static: api-db: + api-db-backup: solr-data: diff --git a/docker-compose.yml b/docker-compose.yml index afb07f8d..a289e734 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,11 +12,27 @@ services: - api-media:/home/api/media - api-static:/home/api/static - api-db:/home/api/db + - api-db-backup:/home/api/db-backup env_file: - ./envs/env environment: - MCT_VERSION=v2.11.2 - command: /home/run.sh + command: /home/scripts/run.sh + + # crontab - for db backup + medcattrainer-db-backup: + image: cogstacksystems/medcat-trainer:v2.11.2 + restart: always + volumes: + - ./configs:/home/configs + - api-media:/home/api/media + - api-static:/home/api/static + - api-db:/home/api/db + - api-db-backup:/home/api/db-backup + env_file: + - ./envs/env + entrypoint: /home/scripts/entry.sh + command: cron -f -l 2 nginx: image: cogstacksystems/medcat-trainer-nginx:v2.11.2 @@ -45,4 +61,6 @@ volumes: api-media: api-static: api-db: + api-db-backup: solr-data: + diff --git a/docs/index.rst b/docs/index.rst index 41c6c38d..dd941e75 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -18,6 +18,7 @@ Welcome to MedCATtrainer's documentation! annotator_guide.md meta_annotations.md advanced_usage.md + maintanence.md Indices and tables diff --git a/docs/installation.md b/docs/installation.md index 2f96366d..b27f4e2d 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -4,9 +4,9 @@ MedCATtrainer is a docker-compose packaged Django application. ## Download from Dockerhub Clone the repo, run the default docker-compose file and default env var: ```shell -git clone https://github.com/CogStack/MedCATtrainer -cd MedCATtrainer -docker-compose up +$ git clone https://github.com/CogStack/MedCATtrainer +$ cd MedCATtrainer +$ docker-compose up ``` This will use the pre-built docker images available on DockerHub. If your internal firewall does on permit access to DockerHub, you can build @@ -16,7 +16,7 @@ directly from source. If you have MedCAT v0.x models, and want to use the trainer please use the following docker-compose file: This refences the latest built image for the trainer that is still compatible with [MedCAT v0.x.](https://pypi.org/project/medcat/0.4.0.6/) and under. ```shell -docker-compose -f docker-compose-mc0x.yml up +$ docker-compose -f docker-compose-mc0x.yml up ``` ## Build images from source @@ -27,7 +27,7 @@ $ docker-compose -f docker-compose-dev.yml up To change environment variables, such as the exposed host ports and language of spaCy model, use: ```shell -cp .env-example .env +$ cp .env-example .env # Set local configuration in .env ``` diff --git a/docs/maintanence.md b/docs/maintanence.md new file mode 100644 index 00000000..f3349ef0 --- /dev/null +++ b/docs/maintanence.md @@ -0,0 +1,61 @@ +# Maintanence + +MedCATtrainer is actively maintained. To ensure you receive the latest +security patches of the software and its dependencies you should regularly +be upgrading to the latest release. + +The latest stable releases update the `docker-compose.yml` and `docker-compose-prod.yml` files. + +To update these docker compose files, either copy them directly from the [repo](https://github.com/CogStack/MedCATtrainer) +or update the cloned files via: + +```shell +$ cd MedCATtrainer +$ git pull +$ docker-compose up +# alternatively for prod releases use: +$ docker-compose -f docker-compose-prod.yml up +``` + +MedCATtrainer follows [Semver](https://semver.org/), so patch and minor release should always be backwards compatible, +whereas major releases, e.g. v1.x vs 2.x versions signify breaking changes. + +Neccessary Django DB migrations will automatically applied between releases, which should largely be invisible to an end admin +or annotation user. Nevertheless, migrating ORM / DB models, then rolling back a release can cause issues if values are defaulted +or removed from a later version. + +## Backup and Restore + +### Backup +Before updating to a new release, a backup will be created in the `DB_BACKUP_DIR`, as configured in `envs/env`. +A further crontab runs the same backup script at 10pm every night. This does not cause any downtime and will look like +this in the logs: +```shell +medcattrainer-medcattrainer-db-backup-1 | Found backup dir location: /home/api/db-backup and DB_PATH: /home/api/db/db.sqlite3 +medcattrainer-medcattrainer-db-backup-1 | Backed up existing DB to /home/api/db-backup/db-backup-2023-09-26__23-26-01.sqlite3 +medcattrainer-medcattrainer-db-backup-1 | To restore this backup use $ ./restore.sh /home/api/db-backup/db-backup-2023-09-26__23-26-01.sqlite3 +``` + +A backup is also automatically performed each time the service starts, and any migrations are performed, in the events of a new release +introducing a breaking change and corrupting a DB. + +### Restore +If a DB is corrupted or needs to be restored to an existing backed up db use the following commands, whilst the service is running: + +```shell +$ docker ps +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +a2489b0c681b cogstacksystems/medcat-trainer-nginx:v2.11.2 "/docker-entrypoint.…" 4 days ago Up 4 days 80/tcp, 0.0.0.0:8001->8000/tcp, :::8001->8000/tcp medcattrainer-nginx-1 +20fed153d798 solr:8 "docker-entrypoint.s…" 4 days ago Up 4 days 0.0.0.0:8983->8983/tcp, :::8983->8983/tcp mct_solr +2b250a0975fe cogstacksystems/medcat-trainer:v2.11.2 "/home/run.sh" 4 days ago Up 4 days medcattrainer-medcattrainer-1 +$ docker exec -it 2b250a0975fe bash +root@2b250a0975fe:/home/api# cd .. +$ restore_db.sh db-backup-2023-09-25__23-21-39.sqlite3 # run the restore.sh script +Found backup dir location: /home/api/db-backup, found db path: home/api/db/db.sqlite3 +DB file to restore: db-backup-2023-09-25__23-21-39.sqlite3 +Found db-backup-2023-09-25__23-21-39.sqlite3 - y to confirm backup: y # you'll need tp confirm this is the correct file to restore. +Restored db-backup-2023-09-25__23-21-39.sqlite3 to /home/db/db.sqlite3 +``` + +The `restore_db.sh` script will automatically restore the latest db file, if no file is specified. + diff --git a/envs/env b/envs/env index c828aef6..11c0d850 100644 --- a/envs/env +++ b/envs/env @@ -20,3 +20,11 @@ MAX_DATASET_SIZE=10000 ### Solr Concept Search Conf ### CONCEPT_SEARCH_SERVICE_HOST=solr CONCEPT_SEARCH_SERVICE_PORT=8983 + +### DB backup dir ### +# volume mount location, default docker host system volume location, this might be different in /etc/docker/daemon.json +DB_DIR=/home/api/db +# currently only supports sqlite3 dbs +DB_PATH=${DB_DIR}/db.sqlite3 +DB_BACKUP_DIR=/home/api/db-backup + diff --git a/envs/env-prod b/envs/env-prod index d9aeee10..dcfa81dc 100644 --- a/envs/env-prod +++ b/envs/env-prod @@ -21,3 +21,11 @@ MAX_DATASET_SIZE=10000 ### Solr Concept Search Conf ### CONCEPT_SEARCH_SERVICE_HOST=solr CONCEPT_SEARCH_SERVICE_PORT=8983 + +### DB backup dir - should be set ideally to a mounted / backed up drive### +# volume mount location, default docker host system volume location, this might be different in /etc/docker/daemon.json +DB_DIR=/home/api/db +# currently only supports sqlite3 dbs +DB_PATH=${DB_DIR}/db.sqlite3 +DB_BACKUP_DIR=/home/api/db-backup + diff --git a/webapp/Dockerfile b/webapp/Dockerfile index 28058915..323494ad 100644 --- a/webapp/Dockerfile +++ b/webapp/Dockerfile @@ -7,6 +7,9 @@ RUN apt-get update -y && \ # install vim as its annoying not to have an editor RUN apt-get install -y vim +# install cron - and remove any default tabs +RUN apt-get install -y cron && which cron && rm -rf /etc/cron.*/* + # Get node and npm RUN apt install -y nodejs && apt install -y npm @@ -18,6 +21,9 @@ ENV PATH="/root/.cargo/bin:${PATH}" WORKDIR /home COPY ./ . +# copy backup crontab and chmod scripts +RUN chmod u+x /home/scripts/entry.sh && chmod u+x /home/scripts/crontab && cp /home/scripts/crontab /etc/crontab + # Build frontend WORKDIR /home/frontend RUN npm install && npm run build @@ -30,4 +36,4 @@ ARG SPACY_MODELS="en_core_web_md" RUN for SPACY_MODEL in ${SPACY_MODELS}; do python -m spacy download ${SPACY_MODEL}; done WORKDIR /home/api/ -RUN chmod a+x /home/run.sh +RUN chmod a+x /home/scripts/run.sh diff --git a/webapp/api/db-backup/.keep b/webapp/api/db-backup/.keep new file mode 100644 index 00000000..e69de29b diff --git a/webapp/scripts/backup_db.sh b/webapp/scripts/backup_db.sh new file mode 100755 index 00000000..685452c5 --- /dev/null +++ b/webapp/scripts/backup_db.sh @@ -0,0 +1,20 @@ +#!/bin/sh + +if [ -n "${DB_BACKUP_DIR}" ] && [ -f "${DB_PATH}" ]; then + echo "Found backup dir location: ${DB_BACKUP_DIR} and DB_PATH: ${DB_PATH}" + if [ ! -d "${DB_BACKUP_DIR}" ]; then + mkdir DB_BACKUP_DIR + else + # remove backups older than 90 days. + echo "Checking age of current backups - removing any older than 90 days.." + find ${DB_BACKUP_DIR} -mtime +90 -type f -delete + fi + BACKUP_NAME=db-backup-$(date +"%Y-%m-%d__%H-%M-%S").sqlite3 + cp $DB_PATH ${DB_BACKUP_DIR}/${BACKUP_NAME} + echo "Backed up existing DB to ${DB_BACKUP_DIR}/${BACKUP_NAME}" + echo "To restore this backup use $ /home/scripts/restore.sh ${DB_BACKUP_DIR}/${BACKUP_NAME}" +else + echo "No DB_BACKUP_DIR env var found or DB_PATH . This should be set in env vars. No backups will be created" + return 0 +fi + diff --git a/webapp/scripts/crontab b/webapp/scripts/crontab new file mode 100644 index 00000000..a0083c59 --- /dev/null +++ b/webapp/scripts/crontab @@ -0,0 +1,3 @@ +SHELL=/bin/bash +BASH_ENV=/etc/envrionment +0 22 * * * root /home/scripts/backup_db.sh > /proc/1/fd/1 2>/proc/1/fd/2 \ No newline at end of file diff --git a/webapp/scripts/entry.sh b/webapp/scripts/entry.sh new file mode 100644 index 00000000..60097910 --- /dev/null +++ b/webapp/scripts/entry.sh @@ -0,0 +1,6 @@ +#!/bin/bash +env >> /etc/environment + +# execute CMD +echo "$@" +exec "$@" diff --git a/webapp/load_examples.py b/webapp/scripts/load_examples.py similarity index 100% rename from webapp/load_examples.py rename to webapp/scripts/load_examples.py diff --git a/webapp/scripts/restore_db.sh b/webapp/scripts/restore_db.sh new file mode 100755 index 00000000..300cca91 --- /dev/null +++ b/webapp/scripts/restore_db.sh @@ -0,0 +1,26 @@ +#!/bin/sh + +DB_RESTORE_FILE=$1 +BACKUP_DIR=${DB_BACKUP_DIR} + +if [ -n "$BACKUP_DIR" ] && [ -n "$DB_PATH" ]; then + echo "Found backup dir location: ${BACKUP_DIR}, found db path: ${DB_PATH}" + if [ -z "$DB_RESTORE_FILE" ]; then + echo "No specific backup specified. Restoring latest backup in $BACKUP_DIR" + DB_RESTORE_FILE=$(ls -Art ${BACKUP_DIR}/ | tail -n 1) + fi + echo "DB file to restore: ${DB_RESTORE_FILE}" + read -p "Found $DB_RESTORE_FILE - y to confirm backup: " choice + case "$choice" in + y|Y ) + cp $BACKUP_DIR/$DB_RESTORE_FILE $DB_PATH + echo "Restored $DB_RESTORE_FILE to $DB_PATH" + ;; + n|N ) + echo " - exiting";; + * ) + echo "Invalid choice - exiting";; + esac + else + echo "No BACKUP_DIR and DB_PATH found. Set to location of the backups and the location of DB sqlite3 file to be restored." +fi diff --git a/webapp/run.sh b/webapp/scripts/run.sh similarity index 86% rename from webapp/run.sh rename to webapp/scripts/run.sh index 6fb91d7c..a682c41e 100755 --- a/webapp/run.sh +++ b/webapp/scripts/run.sh @@ -1,5 +1,8 @@ #!/bin/sh +# run db backup script before doing anything +/home/scripts/backup_db.sh + # Collect static files and migrate if needed python /home/api/manage.py collectstatic --noinput python /home/api/manage.py makemigrations --noinput @@ -17,7 +20,7 @@ if User.objects.count() == 0: " | python manage.py shell if [ $LOAD_EXAMPLES ]; then - python /home/load_examples.py & + python /home/scripts/load_examples.py & fi uwsgi --http-timeout 360s --http :8000 --master --chdir /home/api/ --module core.wsgi