diff --git a/.cicd/agent-python.yaml b/.cicd/agent-python.yaml index a4b77ea5c..c0abd5a32 100644 --- a/.cicd/agent-python.yaml +++ b/.cicd/agent-python.yaml @@ -9,8 +9,14 @@ spec: volumeMounts: - { name: tmp, mountPath: /tmp } - { name: docker, mountPath: /var/run/docker.sock } + - { name: kaniko-secret, mountPath: /secret } + env: + - { name: GOOGLE_APPLICATION_CREDENTIALS, value: /secret/kaniko-secret.json } volumes: - name: tmp hostPath: { path: /tmp, type: Directory } - name: docker hostPath: { path: /var/run/docker.sock, type: File } + - name: kaniko-secret + secret: + secretName: kaniko-secret diff --git a/Jenkinsfile b/Jenkinsfile index 0653b390b..b177c9826 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -28,6 +28,8 @@ pipeline { steps { sh "apt update" + sh "apt install curl && mkdir -p /tmp/download && curl -L https://download.docker.com/linux/static/stable/x86_64/docker-18.06.3-ce.tgz | tar -xz -C /tmp/download && mv /tmp/download/docker/docker /usr/local/bin/" + sh "docker login -u _json_key --password-stdin https://eu.gcr.io/substra-208412/ < /secret/kaniko-secret.json" sh "apt install -y python3-pip python3-dev build-essential gfortran musl-dev postgresql-contrib git curl netcat" dir("substrabac") { diff --git a/README.md b/README.md index 3da97e0d8..4be9757c2 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ It will drop the databases if they are already created, then create them and gra - With django migrations ```shell SUBSTRABAC_ORG=owkin SUBSTRABAC_DEFAULT_PORT=8000 python substrabac/manage.py migrate --settings=substrabac.settings.dev -SUBSTRABAC_ORG=chu-nantes SUBSTRABAC_DEFAULT_PORT=8001 python substrabac/manage.py migrate --settings=substrabac.settings.dev``` +SUBSTRABAC_ORG=chu-nantes SUBSTRABAC_DEFAULT_PORT=8001 python substrabac/manage.py migrate --settings=substrabac.settings.dev ``` ###### With fixtures (fixtures container has been run from substra-network, old behavior for testing) @@ -143,7 +143,7 @@ Execute this command in the `substrabac/substrabac` folder. Note the use of the development settings. ```shell -DJANGO_SETTINGS_MODULE=substrabac.settings.dev SUBSTRABAC_ORG=owkin SUBSTRABAC_DEFAULT_PORT=8000 celery -E -A substrabac worker -l info -B -n owkin -Q owkin,scheduler, celery --hostname owkin.scheduler +DJANGO_SETTINGS_MODULE=substrabac.settings.dev SUBSTRABAC_ORG=owkin SUBSTRABAC_DEFAULT_PORT=8000 celery -E -A substrabac worker -l info -B -n owkin -Q owkin,scheduler,celery --hostname owkin.scheduler DJANGO_SETTINGS_MODULE=substrabac.settings.dev SUBSTRABAC_ORG=owkin SUBSTRABAC_DEFAULT_PORT=8000 celery -E -A substrabac worker -l info -B -n owkin -Q owkin,owkin.worker,celery --hostname owkin.worker DJANGO_SETTINGS_MODULE=substrabac.settings.dev SUBSTRABAC_ORG=owkin SUBSTRABAC_DEFAULT_PORT=8000 celery -E -A substrabac worker -l info -B -n owkin -Q owkin,owkin.dryrunner,celery --hostname owkin.dryrunner DJANGO_SETTINGS_MODULE=substrabac.settings.dev SUBSTRABAC_ORG=chu-nantes SUBSTRABAC_DEFAULT_PORT=8001 celery -E -A substrabac worker -l info -B -n chunantes -Q chu-nantes,scheduler,celery --hostname chu-nantes.scheduler diff --git a/bootstrap.sh b/bootstrap.sh index 293812acd..c622836bc 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -3,11 +3,11 @@ BASEDIR=$(dirname "$0") # if version not passed in, default to latest released version -export VERSION=1.3.0 +export VERSION=1.4.1 # if ca version not passed in, default to latest released version export CA_VERSION=$VERSION # current version of thirdparty images (couchdb, kafka and zookeeper) released -export THIRDPARTY_IMAGE_VERSION=0.4.10 +export THIRDPARTY_IMAGE_VERSION=0.4.15 export ARCH=$(echo "$(uname -s|tr '[:upper:]' '[:lower:]'|sed 's/mingw64_nt.*/windows/')-$(uname -m | sed 's/x86_64/amd64/g')") export MARCH=$(uname -m) diff --git a/docker/celerybeat/Dockerfile b/docker/celerybeat/Dockerfile index 023732885..144cdad4f 100644 --- a/docker/celerybeat/Dockerfile +++ b/docker/celerybeat/Dockerfile @@ -14,6 +14,3 @@ RUN pip3 install -r requirements.txt COPY ./substrabac/libs /usr/src/app/libs COPY ./substrabac/substrapp /usr/src/app/substrapp COPY ./substrabac/substrabac /usr/src/app/substrabac - -RUN sed -i 's/localhost/rabbit/g' /usr/src/app/substrabac/celery.py - diff --git a/docker/celeryworker/Dockerfile b/docker/celeryworker/Dockerfile index 0322d7966..6bf39448d 100644 --- a/docker/celeryworker/Dockerfile +++ b/docker/celeryworker/Dockerfile @@ -20,5 +20,3 @@ COPY ./substrabac/fake_metrics /usr/src/app/fake_metrics COPY ./substrabac/fake_data_sample /usr/src/app/fake_data_sample COPY ./substrabac/substrapp /usr/src/app/substrapp COPY ./substrabac/substrabac /usr/src/app/substrabac - -RUN sed -i 's/localhost/rabbit/g' /usr/src/app/substrabac/celery.py diff --git a/docker/postgresql/init.sh b/docker/postgresql/init.sh index 9032cd387..92590b661 100644 --- a/docker/postgresql/init.sh +++ b/docker/postgresql/init.sh @@ -5,3 +5,7 @@ psql -U ${USER} -d substrabac_owkin -c "GRANT ALL PRIVILEGES ON DATABASE substra createdb -U ${USER} -E UTF8 substrabac_chunantes psql -U ${USER} -d substrabac_chunantes -c "GRANT ALL PRIVILEGES ON DATABASE substrabac_chunantes to substrabac;ALTER ROLE substrabac WITH SUPERUSER CREATEROLE CREATEDB;" + + +createdb -U ${USER} -E UTF8 substrabac_clb +psql -U ${USER} -d substrabac_clb -c "GRANT ALL PRIVILEGES ON DATABASE substrabac_clb to substrabac;ALTER ROLE substrabac WITH SUPERUSER CREATEROLE CREATEDB;" diff --git a/docker/start.py b/docker/start.py index 95d1e2f9a..998381faa 100644 --- a/docker/start.py +++ b/docker/start.py @@ -10,8 +10,35 @@ raven_worker_url = "https://76abd6b5d11e48ea8a118831c86fc615@sentry.io/1402762" raven_scheduler_url = raven_worker_url +FABRIC_LOGGING_SPEC = "debug" + + +BACKEND_PORT = { + 'owkin': 8000, + 'chunantes': 8001, + 'clb': 8002 +} + def generate_docker_compose_file(conf, launch_settings): + + # POSTGRES + POSTGRES_USER = 'substrabac' + USER = 'substrabac' + POSTGRES_PASSWORD = 'substrabac' + POSTGRES_DB = 'substrabac' + + # RABBITMQ + RABBITMQ_DEFAULT_USER = 'guest' + RABBITMQ_DEFAULT_PASS = 'guest' + RABBITMQ_HOSTNAME = 'rabbitmq' + RABBITMQ_NODENAME = 'rabbitmq' + RABBITMQ_DOMAIN = 'rabbit' + RABBITMQ_PORT = '5672' + + # CELERY + CELERY_BROKER_URL = f'amqp://{RABBITMQ_DEFAULT_USER}:{RABBITMQ_DEFAULT_PASS}@{RABBITMQ_DOMAIN}:{RABBITMQ_PORT}//' + try: from ruamel import yaml except ImportError: @@ -22,10 +49,11 @@ def generate_docker_compose_file(conf, launch_settings): 'substrabac_tools': {'postgresql': {'container_name': 'postgresql', 'image': 'library/postgres:10.5', 'restart': 'unless-stopped', - 'environment': ['POSTGRES_USER=substrabac', - 'USER=substrabac', - 'POSTGRES_PASSWORD=substrabac', - 'POSTGRES_DB=substrabac'], + 'logging': {'driver': 'json-file', 'options': {'max-size': '20m', 'max-file': '5'}}, + 'environment': [f'POSTGRES_USER={POSTGRES_USER}', + f'USER={USER}', + f'POSTGRES_PASSWORD={POSTGRES_PASSWORD}', + f'POSTGRES_DB={POSTGRES_DB}'], 'volumes': [ '/substra/backup/postgres-data:/var/lib/postgresql/data', f'{dir_path}/postgresql/init.sh:/docker-entrypoint-initdb.d/init.sh'], @@ -34,9 +62,10 @@ def generate_docker_compose_file(conf, launch_settings): 'hostname': 'celerybeat', 'image': 'substra/celerybeat', 'restart': 'unless-stopped', - 'command': '/bin/bash -c "while ! { nc -z rabbit 5672 2>&1; }; do sleep 1; done; while ! { nc -z postgresql 5432 2>&1; }; do sleep 1; done; celery -A substrabac beat -l info -b rabbit"', + 'command': '/bin/bash -c "while ! { nc -z rabbit 5672 2>&1; }; do sleep 1; done; while ! { nc -z postgresql 5432 2>&1; }; do sleep 1; done; celery -A substrabac beat -l info"', 'logging': {'driver': 'json-file', 'options': {'max-size': '20m', 'max-file': '5'}}, 'environment': ['PYTHONUNBUFFERED=1', + f'CELERY_BROKER_URL={CELERY_BROKER_URL}', f'DJANGO_SETTINGS_MODULE=substrabac.settings.common'], 'depends_on': ['postgresql', 'rabbit'] }, @@ -44,10 +73,11 @@ def generate_docker_compose_file(conf, launch_settings): 'hostname': 'rabbitmq', # Must be set to be able to recover from volume 'restart': 'unless-stopped', 'image': 'rabbitmq:3', - 'environment': ['RABBITMQ_DEFAULT_USER=guest', - 'RABBITMQ_DEFAULT_PASS=guest', - 'HOSTNAME=rabbitmq', - 'RABBITMQ_NODENAME=rabbitmq'], + 'logging': {'driver': 'json-file', 'options': {'max-size': '20m', 'max-file': '5'}}, + 'environment': [f'RABBITMQ_DEFAULT_USER={RABBITMQ_DEFAULT_USER}', + f'RABBITMQ_DEFAULT_PASS={RABBITMQ_DEFAULT_PASS}', + f'HOSTNAME={RABBITMQ_HOSTNAME}', + f'RABBITMQ_NODENAME={RABBITMQ_NODENAME}'], 'volumes': ['/substra/backup/rabbit-data:/var/lib/rabbitmq'] }, }, @@ -55,124 +85,110 @@ def generate_docker_compose_file(conf, launch_settings): for org in conf: org_name = org['name'] - orderer = org['orderer']['name'] + orderer_ca = org['orderer']['ca'] peer = org['peer']['name'] + tls_peer_dir = f'/substra/data/orgs/{org_name}/tls/{peer}' + org_name_stripped = org_name.replace('-', '') - # Dirty port assign - port = 8000 - if org_name_stripped == 'chunantes': - port = 8001 + port = BACKEND_PORT[org_name_stripped] + + cpu_count = os.cpu_count() + processes = 2 * int(cpu_count) + 1 + + if launch_settings == 'prod': + django_server = f'python3 manage.py collectstatic --noinput; uwsgi --http :{port} --module substrabac.wsgi --static-map /static=/usr/src/app/substrabac/statics --master --processes {processes} --threads 2' + else: + + django_server = f'python3 manage.py runserver 0.0.0.0:{port}' + + backend_global_env = [ + f'ORG={org_name_stripped}', + f'SUBSTRABAC_ORG={org_name}', + f'SUBSTRABAC_DEFAULT_PORT={port}', + 'SUBSTRABAC_PEER_PORT=internal', + + 'PYTHONUNBUFFERED=1', + 'DATABASE_HOST=postgresql', + + f'CELERY_BROKER_URL={CELERY_BROKER_URL}', + f'DJANGO_SETTINGS_MODULE=substrabac.settings.{launch_settings}', + + # Basic auth + f"BACK_AUTH_USER={os.environ.get('BACK_AUTH_USER', '')}", + f"BACK_AUTH_PASSWORD={os.environ.get('BACK_AUTH_PASSWORD', '')}", + f"SITE_HOST={os.environ.get('SITE_HOST', 'localhost')}", + f"SITE_PORT={os.environ.get('BACK_PORT', 9000)}", + + # HLF overwrite config from core.yaml + f"FABRIC_CFG_PATH_ENV={org['peer']['docker_core_dir']}", + f"FABRIC_LOGGING_SPEC={FABRIC_LOGGING_SPEC}", + f"CORE_PEER_ADDRESS_ENV={org['peer']['host']}:{org['peer']['port']['internal']}", + f"CORE_PEER_MSPCONFIGPATH={org['core_peer_mspconfigpath']}", + f"CORE_PEER_TLS_CERT_FILE={tls_peer_dir}/server/server.crt", + f"CORE_PEER_TLS_KEY_FILE={tls_peer_dir}/server/server.key", + f"CORE_PEER_TLS_ROOTCERT_FILE={tls_peer_dir}/server/server.pem", + f"CORE_PEER_TLS_CLIENTCERT_FILE={tls_peer_dir}/client/client.crt", + f"CORE_PEER_TLS_CLIENTKEY_FILE={tls_peer_dir}/client/client.key", + f"CORE_PEER_TLS_CLIENTROOTCAS_FILES={tls_peer_dir}/client/client.pem", + ] + + hlf_volumes = [ + # config (core.yaml + substrabac/conf.json) + f'/substra/conf/{org_name}:/substra/conf/{org_name}:ro', + + # HLF files + f'{orderer_ca}:{orderer_ca}:ro', + f'{tls_peer_dir}:{tls_peer_dir}:ro', + f'{org["core_peer_mspconfigpath"]}:{org["core_peer_mspconfigpath"]}:ro', + ] backend = {'container_name': f'{org_name_stripped}.substrabac', 'image': 'substra/substrabac', 'restart': 'unless-stopped', 'ports': [f'{port}:{port}'], - 'command': f'/bin/bash -c "while ! {{ nc -z postgresql 5432 2>&1; }}; do sleep 1; done; yes | python manage.py migrate --settings=substrabac.settings.{launch_settings}; python3 manage.py collectstatic --noinput; python3 manage.py runserver 0.0.0.0:{port}"', + 'command': f'/bin/bash -c "while ! {{ nc -z postgresql 5432 2>&1; }}; do sleep 1; done; yes | python manage.py migrate; {django_server}"', 'logging': {'driver': 'json-file', 'options': {'max-size': '20m', 'max-file': '5'}}, - 'environment': ['DATABASE_HOST=postgresql', - f'SUBSTRABAC_ORG={org_name}', - f'SUBSTRABAC_DEFAULT_PORT={port}', - f'DJANGO_SETTINGS_MODULE=substrabac.settings.{launch_settings}', - 'PYTHONUNBUFFERED=1', - f"BACK_AUTH_USER={os.environ.get('BACK_AUTH_USER', '')}", - f"BACK_AUTH_PASSWORD={os.environ.get('BACK_AUTH_PASSWORD', '')}", - f"SITE_HOST={os.environ.get('SITE_HOST', 'localhost')}", - f"SITE_PORT={os.environ.get('BACK_PORT', 9000)}", - f"FABRIC_CFG_PATH_ENV={org['peer']['docker_core_dir']}", - f"CORE_PEER_ADDRESS_ENV={org['peer']['host']}:{org['peer']['docker_port']}"], + 'environment': backend_global_env.copy(), 'volumes': ['/substra/medias:/substra/medias', '/substra/dryrun:/substra/dryrun', - '/substra/static:/usr/src/app/substrabac/statics', - f'/substra/conf/{org_name}:/substra/conf/{org_name}', - f'/substra/data/orgs/{orderer}/ca-cert.pem:/substra/data/orgs/{orderer}/ca-cert.pem', - f'/substra/data/orgs/{org_name}/ca-cert.pem:/substra/data/orgs/{org_name}/ca-cert.pem', - f'/substra/data/orgs/{org_name}/user/msp:/substra/data/orgs/{org_name}/user/msp', - f'/substra/data/orgs/{org_name}/tls/{peer}:/substra/data/orgs/{org_name}/tls/{peer}', - f'/substra/data/orgs/{org_name}/user/msp:/opt/gopath/src/github.com/hyperledger/fabric/peer/msp'], + '/substra/servermedias:/substra/servermedias', + '/substra/static:/usr/src/app/substrabac/statics'] + hlf_volumes, 'depends_on': ['postgresql', 'rabbit']} scheduler = {'container_name': f'{org_name_stripped}.scheduler', 'hostname': f'{org_name}.scheduler', 'image': 'substra/celeryworker', 'restart': 'unless-stopped', - 'command': f'/bin/bash -c "while ! {{ nc -z rabbit 5672 2>&1; }}; do sleep 1; done; while ! {{ nc -z postgresql 5432 2>&1; }}; do sleep 1; done; celery -A substrabac worker -l info -n {org_name_stripped} -Q {org_name},scheduler,celery -b rabbit --hostname {org_name}.scheduler"', + 'command': f'/bin/bash -c "while ! {{ nc -z rabbit 5672 2>&1; }}; do sleep 1; done; while ! {{ nc -z postgresql 5432 2>&1; }}; do sleep 1; done; celery -A substrabac worker -l info -n {org_name_stripped} -Q {org_name},scheduler,celery --hostname {org_name}.scheduler"', 'logging': {'driver': 'json-file', 'options': {'max-size': '20m', 'max-file': '5'}}, - 'environment': [f'ORG={org_name_stripped}', - f'SUBSTRABAC_ORG={org_name}', - f'SUBSTRABAC_DEFAULT_PORT={port}', - f'DJANGO_SETTINGS_MODULE=substrabac.settings.{launch_settings}', - 'PYTHONUNBUFFERED=1', - f"BACK_AUTH_USER={os.environ.get('BACK_AUTH_USER', '')}", - f"BACK_AUTH_PASSWORD={os.environ.get('BACK_AUTH_PASSWORD', '')}", - f"SITE_HOST={os.environ.get('SITE_HOST', 'localhost')}", - f"SITE_PORT={os.environ.get('BACK_PORT', 9000)}", - 'DATABASE_HOST=postgresql', - f"FABRIC_CFG_PATH_ENV={org['peer']['docker_core_dir']}", - f"CORE_PEER_ADDRESS_ENV={org['peer']['host']}:{org['peer']['docker_port']}"], - 'volumes': [f'/substra/conf/{org_name}:/substra/conf/{org_name}', - f'/substra/data/orgs/{orderer}/ca-cert.pem:/substra/data/orgs/{orderer}/ca-cert.pem', - f'/substra/data/orgs/{org_name}/ca-cert.pem:/substra/data/orgs/{org_name}/ca-cert.pem', - f'/substra/data/orgs/{org_name}/user/msp:/substra/data/orgs/{org_name}/user/msp', - f'/substra/data/orgs/{org_name}/tls/{peer}:/substra/data/orgs/{org_name}/tls/{peer}', - f'/substra/data/orgs/{org_name}/user/msp:/opt/gopath/src/github.com/hyperledger/fabric/peer/msp'], + 'environment': backend_global_env.copy(), + 'volumes': hlf_volumes, 'depends_on': [f'substrabac{org_name_stripped}', 'postgresql', 'rabbit']} worker = {'container_name': f'{org_name_stripped}.worker', 'hostname': f'{org_name}.worker', 'image': 'substra/celeryworker', 'restart': 'unless-stopped', - 'command': f'/bin/bash -c "while ! {{ nc -z rabbit 5672 2>&1; }}; do sleep 1; done; while ! {{ nc -z postgresql 5432 2>&1; }}; do sleep 1; done; celery -A substrabac worker -l info -n {org_name_stripped} -Q {org_name},{org_name}.worker,celery -b rabbit --hostname {org_name}.worker"', + 'command': f'/bin/bash -c "while ! {{ nc -z rabbit 5672 2>&1; }}; do sleep 1; done; while ! {{ nc -z postgresql 5432 2>&1; }}; do sleep 1; done; celery -A substrabac worker -l info -n {org_name_stripped} -Q {org_name},{org_name}.worker,celery --hostname {org_name}.worker"', 'logging': {'driver': 'json-file', 'options': {'max-size': '20m', 'max-file': '5'}}, - 'environment': [f'ORG={org_name_stripped}', - f'SUBSTRABAC_ORG={org_name}', - f'SUBSTRABAC_DEFAULT_PORT={port}', - f'DJANGO_SETTINGS_MODULE=substrabac.settings.{launch_settings}', - 'PYTHONUNBUFFERED=1', - f"BACK_AUTH_USER={os.environ.get('BACK_AUTH_USER', '')}", - f"BACK_AUTH_PASSWORD={os.environ.get('BACK_AUTH_PASSWORD', '')}", - f"SITE_HOST={os.environ.get('SITE_HOST', 'localhost')}", - f"SITE_PORT={os.environ.get('BACK_PORT', 9000)}", - 'DATABASE_HOST=postgresql', - f"FABRIC_CFG_PATH_ENV={org['peer']['docker_core_dir']}", - f"CORE_PEER_ADDRESS_ENV={org['peer']['host']}:{org['peer']['docker_port']}"], + 'environment': backend_global_env.copy(), 'volumes': ['/var/run/docker.sock:/var/run/docker.sock', '/substra/medias:/substra/medias', - f'/substra/conf/{org_name}:/substra/conf/{org_name}', - f'/substra/data/orgs/{orderer}/ca-cert.pem:/substra/data/orgs/{orderer}/ca-cert.pem', - f'/substra/data/orgs/{org_name}/ca-cert.pem:/substra/data/orgs/{org_name}/ca-cert.pem', - f'/substra/data/orgs/{org_name}/user/msp:/substra/data/orgs/{org_name}/user/msp', - f'/substra/data/orgs/{org_name}/tls/{peer}:/substra/data/orgs/{org_name}/tls/{peer}', - f'/substra/data/orgs/{org_name}/user/msp:/opt/gopath/src/github.com/hyperledger/fabric/peer/msp'], + '/substra/servermedias:/substra/servermedias'] + hlf_volumes, 'depends_on': [f'substrabac{org_name_stripped}', 'rabbit']} dryrunner = {'container_name': f'{org_name_stripped}.dryrunner', 'hostname': f'{org_name}.dryrunner', 'image': 'substra/celeryworker', 'restart': 'unless-stopped', - 'command': f'/bin/bash -c "while ! {{ nc -z rabbit 5672 2>&1; }}; do sleep 1; done; while ! {{ nc -z postgresql 5432 2>&1; }}; do sleep 1; done; celery -A substrabac worker -l info -n {org_name_stripped} -Q {org_name},{org_name}.dryrunner,celery -b rabbit --hostname {org_name}.dryrunner"', + 'command': f'/bin/bash -c "while ! {{ nc -z rabbit 5672 2>&1; }}; do sleep 1; done; while ! {{ nc -z postgresql 5432 2>&1; }}; do sleep 1; done; celery -A substrabac worker -l info -n {org_name_stripped} -Q {org_name},{org_name}.dryrunner,celery --hostname {org_name}.dryrunner"', 'logging': {'driver': 'json-file', 'options': {'max-size': '20m', 'max-file': '5'}}, - 'environment': [f'ORG={org_name_stripped}', - f'SUBSTRABAC_ORG={org_name}', - f'SUBSTRABAC_DEFAULT_PORT={port}', - f'DJANGO_SETTINGS_MODULE=substrabac.settings.{launch_settings}', - 'PYTHONUNBUFFERED=1', - f"BACK_AUTH_USER={os.environ.get('BACK_AUTH_USER', '')}", - f"BACK_AUTH_PASSWORD={os.environ.get('BACK_AUTH_PASSWORD', '')}", - f"SITE_HOST={os.environ.get('SITE_HOST', 'localhost')}", - f"SITE_PORT={os.environ.get('BACK_PORT', 9000)}", - 'DATABASE_HOST=postgresql', - f"FABRIC_CFG_PATH_ENV={org['peer']['docker_core_dir']}", - f"CORE_PEER_ADDRESS_ENV={org['peer']['host']}:{org['peer']['docker_port']}"], + 'environment': backend_global_env.copy(), 'volumes': ['/var/run/docker.sock:/var/run/docker.sock', - '/substra/medias:/substra/medias', '/substra/dryrun:/substra/dryrun', - f'/substra/conf/{org_name}:/substra/conf/{org_name}', - f'/substra/data/orgs/{orderer}/ca-cert.pem:/substra/data/orgs/{orderer}/ca-cert.pem', - f'/substra/data/orgs/{org_name}/ca-cert.pem:/substra/data/orgs/{org_name}/ca-cert.pem', - f'/substra/data/orgs/{org_name}/user/msp:/substra/data/orgs/{org_name}/user/msp', - f'/substra/data/orgs/{org_name}/tls/{peer}:/substra/data/orgs/{org_name}/tls/{peer}', - f'/substra/data/orgs/{org_name}/user/msp:/opt/gopath/src/github.com/hyperledger/fabric/peer/msp'], + '/substra/medias:/substra/medias', + '/substra/servermedias:/substra/servermedias'] + hlf_volumes, 'depends_on': [f'substrabac{org_name_stripped}', 'rabbit']} # Check if we have nvidia docker @@ -190,7 +206,13 @@ def generate_docker_compose_file(conf, launch_settings): dryrunner['environment'].append(dryrun_root) backend['environment'].append(dryrun_root) else: - scheduler['environment'].append(f"RAVEN_URL={raven_scheduler_url}",) + default_domain = os.environ.get('SUBSTRABAC_DEFAULT_DOMAIN', '') + if default_domain: + backend['environment'].append(f"DEFAULT_DOMAIN={default_domain}") + worker['environment'].append(f"DEFAULT_DOMAIN={default_domain}") + scheduler['environment'].append(f"DEFAULT_DOMAIN={default_domain}") + dryrunner['environment'].append(f"DEFAULT_DOMAIN={default_domain}") + scheduler['environment'].append(f"RAVEN_URL={raven_scheduler_url}") worker['environment'].append(f"RAVEN_URL={raven_worker_url}") dryrunner['environment'].append(f"RAVEN_URL={raven_dryrunner_url}") @@ -242,7 +264,7 @@ def start(conf, launch_settings, no_backup): print('start docker-compose', flush=True) call(['docker-compose', '-f', docker_compose['path'], '--project-directory', os.path.join(dir_path, '../'), 'up', '-d', '--remove-orphans', '--build']) - call(['docker', 'ps', '-a']) + call(['docker', 'ps', '-a', '--format', 'table {{.ID}}\t{{.Names}}\t{{.Status}}\t{{.Ports}}']) if __name__ == "__main__": diff --git a/docker/substrabac/Dockerfile b/docker/substrabac/Dockerfile index 4d0ae185c..053c45257 100644 --- a/docker/substrabac/Dockerfile +++ b/docker/substrabac/Dockerfile @@ -18,5 +18,3 @@ COPY ./substrabac/manage.py /usr/src/app/manage.py COPY ./substrabac/libs /usr/src/app/libs COPY ./substrabac/substrapp /usr/src/app/substrapp COPY ./substrabac/substrabac /usr/src/app/substrabac - -RUN sed -i 's/localhost/rabbit/g' /usr/src/app/substrabac/celery.py diff --git a/fabric-sdk-py_tests/fabric-sdk-py-discover.py b/fabric-sdk-py_tests/fabric-sdk-py-discover.py new file mode 100644 index 000000000..0bf3127d5 --- /dev/null +++ b/fabric-sdk-py_tests/fabric-sdk-py-discover.py @@ -0,0 +1,88 @@ +from hfc.fabric import Client +from hfc.fabric.channel.channel import Channel +from hfc.fabric.block_decoder import decode_fabric_MSP_config, decode_fabric_peers_info, decode_fabric_endpoints +from hfc.fabric.peer import create_peer +from hfc.fabric.user import create_user +from hfc.util.crypto.crypto import ecies +from hfc.util.keyvaluestore import FileKeyValueStore + +import pprint +import glob + +peer_config = {'clientKey': {'path': '/substra/data/orgs/owkin/tls/peer1/cli-client.key'}, + 'clientServer': {'path': '/substra/data/orgs/owkin/tls/peer1/cli-client.crt'}, + 'eventUrl': 'peer1-owkin:7053', + 'grpcOptions': {'grpc.http2.keepalive_time': 15, + 'grpc.ssl_target_name_override': 'peer1-owkin'}, + 'tlsCACerts': { + 'path': '/substra/data/orgs/owkin/ca-cert.pem'}, + 'url': 'peer1-owkin:7051'} + +peer1_owkin = create_peer(endpoint=peer_config['url'], + tls_cacerts=peer_config['tlsCACerts']['path'], + client_key=peer_config['clientKey']['path'], + client_cert=peer_config['clientServer']['path'], + opts=[(k, v) for k, v in peer_config['grpcOptions'].items()]) + +key_path = glob.glob('/substra/data/orgs/owkin/admin/msp/keystore/*')[0] +cert_path = '/substra/data/orgs/owkin/admin/msp/signcerts/cert.pem' + +admin_owkin = create_user(name='admin', + org='owkin', + state_store=FileKeyValueStore('/tmp/kvs/'), + msp_id='owkinMSP', + key_path=key_path, + cert_path=cert_path) + + +client = Client() + +print(client.query_peers(admin_owkin, peer1_owkin)) +print(client.query_peers(admin_owkin, peer1_owkin, channel='mychannel', local=False)) + +client.init_with_discovery(admin_owkin, peer1_owkin, + 'mychannel') + +response = Channel('', '')._discovery(admin_owkin, peer1_owkin, config=False, local=True) + +response = Channel('mychannel', '')._discovery(admin_owkin, peer1_owkin, config=True, local=False) + + +def process_config_result(config_result): + + results = {'msps': {}, + 'orderers': {}} + + for msp_name in config_result.msps: + results['msps'][msp_name] = decode_fabric_MSP_config(config_result.msps[msp_name].SerializeToString()) + + for orderer_msp in config_result.orderers: + results['orderers'][orderer_msp] = decode_fabric_endpoints(config_result.orderers[orderer_msp].endpoint) + + return results + + +def process_cc_query_res(cc_query_res): + pass + + +def process_members(members): + peers = [] + for msp_name in members.peers_by_org: + peers.append(decode_fabric_peers_info(members.peers_by_org[msp_name].peers)) + return peers + + +results = {} +for res in response.results: + # print(res) + print('-' * 100) + print('Error') + pprint.pprint(res.error) + print('-' * 50) + print('Config result') + pprint.pprint(process_config_result(res.config_result), indent=2) + # print(f'Chaincode Query result : {res.cc_query_res}') + print('Members') + pprint.pprint(process_members(res.members), indent=2) + print('#' * 100) diff --git a/fabric-sdk-py_tests/fabric-sdk-py-query-invoke.py b/fabric-sdk-py_tests/fabric-sdk-py-query-invoke.py index 20c4eb7a7..5bef66917 100644 --- a/fabric-sdk-py_tests/fabric-sdk-py-query-invoke.py +++ b/fabric-sdk-py_tests/fabric-sdk-py-query-invoke.py @@ -1,4 +1,5 @@ import os +import asyncio import subprocess from hfc.fabric import Client @@ -12,6 +13,8 @@ cli.new_channel('mychannel') +loop = asyncio.get_event_loop() + from hfc.fabric_ca.caservice import ca_service cacli = ca_service(target="https://rca-owkin:7054", @@ -56,42 +59,42 @@ finally: print(data) - response = cli.chaincode_query( + response = loop.run_until_complete(cli.chaincode_query( requestor=admin_owkin, channel_name='mychannel', - peer_names=['peer1-owkin'], + peers=['peer1-owkin'], args=[], cc_name='mycc', cc_version='1.0', fcn='queryDataManagers' - ) + )) print(response) - response = cli.query_installed_chaincodes( + response = loop.run_until_complete(cli.query_installed_chaincodes( requestor=admin_owkin, - peer_names=['peer1-owkin'] - ) + peers=['peer1-owkin'] + )) print(response) - response = cli.query_channels( + response = loop.run_until_complete(cli.query_channels( requestor=admin_owkin, - peer_names=['peer1-owkin'] - ) + peers=['peer1-owkin'] + )) print(response) - response = cli.query_info( + response = loop.run_until_complete(cli.query_info( requestor=admin_owkin, channel_name='mychannel', - peer_names=['peer1-owkin'] - ) + peers=['peer1-owkin'] + )) print(response) dir_path = os.path.dirname(os.path.realpath(__file__)) - response = cli.chaincode_invoke( + response = loop.run_until_complete(cli.chaincode_invoke( requestor=admin_owkin, channel_name='mychannel', - peer_names=['peer1-owkin'], + peers=['peer1-owkin'], args=['ISIC 2018', '59300f1fec4f5cdd3a236c7260ed72bdd24691efdec63b7910ea84136123cecd', 'http://chunantes.substrabac:8001/media/data_managers/59300f1fec4f5cdd3a236c7260ed72bdd24691efdec63b7910ea84136123cecd/opener.py', @@ -106,5 +109,5 @@ fcn='registerDataManager', wait_for_event=True, wait_for_event_timeout=5 - ) + )) print(response) diff --git a/substrabac/base_metrics/Dockerfile b/substrabac/base_metrics/Dockerfile index ce69293d4..679578a93 100644 --- a/substrabac/base_metrics/Dockerfile +++ b/substrabac/base_metrics/Dockerfile @@ -1,13 +1,9 @@ -FROM nvidia/cuda:9.0-base - -RUN apt-get update; apt-get install -y build-essential libssl-dev python3 python3-dev python3-pip -RUN pip3 install --upgrade pip -RUN pip3 install pillow numpy sklearn pandas +FROM eu.gcr.io/substra-208412/substratools +RUN mkdir -p /sandbox +RUN mkdir -p /sandbox/opener RUN mkdir -p /sandbox/metrics - WORKDIR /sandbox -ADD ./calc_metrics.py . - -ENTRYPOINT ["python3", "calc_metrics.py"] +ENTRYPOINT ["python3"] +CMD ["-c", "import substratools as tools; tools.metrics.execute()"] diff --git a/substrabac/base_metrics/calc_metrics.py b/substrabac/base_metrics/calc_metrics.py deleted file mode 100644 index 89533d9c8..000000000 --- a/substrabac/base_metrics/calc_metrics.py +++ /dev/null @@ -1,17 +0,0 @@ -import json -import metrics -import opener - - -def calc_perf(folder_true="./data", folder_pred="./pred"): - """compute performances using the imported metrics.score function""" - # get true and pred values - y_true = opener.get_y(folder_true) - y_pred = opener.get_pred(folder_pred) - return {'all': metrics.score(y_true, y_pred)} - - -if __name__ == "__main__": - perf = calc_perf() - with open('./pred/perf.json', 'w') as outfile: - json.dump(perf, outfile) diff --git a/substrabac/fake_data_sample/Dockerfile b/substrabac/fake_data_sample/Dockerfile index 4fe1639f2..a47637b76 100644 --- a/substrabac/fake_data_sample/Dockerfile +++ b/substrabac/fake_data_sample/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:9.0-base +FROM eu.gcr.io/substra-208412/substratools RUN apt-get update; apt-get install -y build-essential libssl-dev python3 python3-dev python3-pip RUN pip3 install --upgrade pip diff --git a/substrabac/fake_data_sample/open_data_sample.py b/substrabac/fake_data_sample/open_data_sample.py index 3fce5230d..3b15172b9 100644 --- a/substrabac/fake_data_sample/open_data_sample.py +++ b/substrabac/fake_data_sample/open_data_sample.py @@ -1,11 +1,7 @@ -import opener - - -def open_data_samples(data_folder='./data'): - """Open data sample with the opener""" - opener.get_X(data_folder) - opener.get_y(data_folder) +import substratools as tools if __name__ == "__main__": - open_data_samples() + opener = tools.opener.load_from_module() + opener.get_X() + opener.get_y() diff --git a/substrabac/fake_metrics/Dockerfile b/substrabac/fake_metrics/Dockerfile index 774ea7542..61a90bf85 100644 --- a/substrabac/fake_metrics/Dockerfile +++ b/substrabac/fake_metrics/Dockerfile @@ -1,13 +1,9 @@ -FROM nvidia/cuda:9.0-base - -RUN apt-get update; apt-get install -y build-essential libssl-dev python3 python3-dev python3-pip -RUN pip3 install --upgrade pip -RUN pip3 install pillow numpy sklearn pandas +FROM eu.gcr.io/substra-208412/substratools +RUN mkdir -p /sandbox +RUN mkdir -p /sandbox/opener RUN mkdir -p /sandbox/metrics - WORKDIR /sandbox -ADD ./calc_fake_metrics.py . - -ENTRYPOINT ["python3", "calc_fake_metrics.py"] +ENTRYPOINT ["python3"] +CMD ["-c", "import substratools as tools; tools.metrics.execute(dry_run=True)"] diff --git a/substrabac/fake_metrics/calc_fake_metrics.py b/substrabac/fake_metrics/calc_fake_metrics.py deleted file mode 100644 index ebba3361a..000000000 --- a/substrabac/fake_metrics/calc_fake_metrics.py +++ /dev/null @@ -1,17 +0,0 @@ -import json -import metrics -import opener - - -def calc_perf(folder_pred="./pred"): - """compute performances using the imported metrics.score function""" - # get true and pred values - y_true = opener.fake_y() - y_pred = opener.fake_y() - return {'all': metrics.score(y_true, y_pred)} - - -if __name__ == "__main__": - perf = calc_perf() - with open('./pred/perf.json', 'w') as outfile: - json.dump(perf, outfile) diff --git a/substrabac/fixtures/chunantes/algos/algo3/algo.tar.gz b/substrabac/fixtures/chunantes/algos/algo3/algo.tar.gz index 604091d64..8805ccf0b 100644 Binary files a/substrabac/fixtures/chunantes/algos/algo3/algo.tar.gz and b/substrabac/fixtures/chunantes/algos/algo3/algo.tar.gz differ diff --git a/substrabac/fixtures/chunantes/algos/algo4/algo.tar.gz b/substrabac/fixtures/chunantes/algos/algo4/algo.tar.gz index bf03eda74..ce1b4e652 100644 Binary files a/substrabac/fixtures/chunantes/algos/algo4/algo.tar.gz and b/substrabac/fixtures/chunantes/algos/algo4/algo.tar.gz differ diff --git a/substrabac/fixtures/chunantes/datamanagers/datamanager0/opener.py b/substrabac/fixtures/chunantes/datamanagers/datamanager0/opener.py index 4c48743c3..f7dbd14d2 100644 --- a/substrabac/fixtures/chunantes/datamanagers/datamanager0/opener.py +++ b/substrabac/fixtures/chunantes/datamanagers/datamanager0/opener.py @@ -1,9 +1,11 @@ -"""Opener of the ISIC 2018 dataset""" +"""Opener of the simplified ISIC 2018 dataset""" import os import csv import numpy as np from PIL import Image +import substratools as tools + PREFIX_X = "IMG_" SUFFIX_X = ".jpg" @@ -17,77 +19,87 @@ SIZE_Z = 3 CLASSES = 7 +n_sample = 10 + -def check_existing_files(folder, files): +def _check_existing_files(files): """check if files from a list of files are located in folder""" - for fname in files: - if not os.path.isfile(os.path.join(folder, fname)): - raise FileNotFoundError("non existing file %s in folder %s" % - (fname, folder)) + for f in files: + if not os.path.isfile(f): + raise FileNotFoundError("non existing file %s" % (f)) -def get_files(folder): +def _get_paths(folders): """return list of features and label files given a folder location (with the same order)""" # get list of features files and create associated list of label files - X_files = [os.path.join(subfolder, f) for subfolder in os.listdir(folder) - for f in os.listdir(os.path.join(folder, subfolder)) + X_files = [os.path.join(folder, f) for folder in folders + for f in os.listdir(os.path.join(folder)) if '.jpg' in f] y_files = [f.replace(PREFIX_X, PREFIX_Y).replace(SUFFIX_X, SUFFIX_Y) for f in X_files] + # check label files exist try: - check_existing_files(folder, y_files) + _check_existing_files(y_files) except FileNotFoundError as e: print(str(e)) y_files = None return X_files, y_files -def get_X(folder): - """Format and return the ISIC features data as np arrays.""" - print('Finding features files...') - X_files, _ = get_files(folder) - print('Loading features...') - X = [] - for f in X_files: - image = Image.open(os.path.join(folder, f)) - X.append(np.array(image)) - return np.array(X) - - -def fake_X(n_sample=10): - """Make and return the ISIC like features data as np arrays.""" - return np.random.randint(low=0, high=256, size=(n_sample, SIZE_X, SIZE_Y, SIZE_Z)).astype('uint8') - - -def get_y(folder): - """Format and return the ISIC labels as np arrays.""" - print('Finding label files...') - _, y_files = get_files(folder) - print('Loading labels...') - y = [] - for f in y_files: - with open(os.path.join(folder, f)) as open_f: - str_y = open_f.readline().split(',') - y.append([float(yy) for yy in str_y]) - return np.array(y, dtype=np.float) - - -def fake_y(n_sample=10): - """Make and return the ISIC like labels as np arrays.""" - return np.eye(CLASSES)[np.arange(n_sample) % CLASSES].astype('uint8') - - -def save_pred(y_pred, folder): - """Save prediction in PRED_FILE in folder""" - with open(os.path.join(folder, PRED_FILE), "w") as f: - writer = csv.writer(f) - writer.writerows(y_pred) - - -def get_pred(folder): - """Get predictions which were saved using the save_pred function""" - with open(os.path.join(folder, PRED_FILE), "r") as f: - pred_iter = csv.reader(f) - pred = [y for y in pred_iter] - return np.array(pred, copy=False, dtype=np.float) +class MyOpener(tools.Opener): + + def get_X(self, folders): + print('Finding features files...') + X_paths, _ = _get_paths(folders) + print('Loading features...') + X = [] + for path in X_paths: + image = Image.open(path) + X.append(np.array(image)) + return np.array(X) + + def get_y(self, folders): + print('Finding label files...') + _, y_paths = _get_paths(folders) + print('Loading labels...') + y = [] + for path in y_paths: + with open(path) as f: + str_y = f.readline().split(',') + y.append([float(yy) for yy in str_y]) + return np.array(y, dtype=np.float) + + def fake_X(self): + """Make and return the ISIC like features data as np arrays.""" + return np.random.randint(low=0, high=256, size=(n_sample, SIZE_X, SIZE_Y, SIZE_Z)).astype('uint8') + + def fake_y(self): + """Make and return the ISIC like labels as np arrays.""" + return np.eye(CLASSES)[np.arange(n_sample) % CLASSES].astype('uint8') + + def save_pred(self, y_pred, path): + """Save prediction in path + + :param y_pred: predicted target variable vector + :type y_pred: numpy array + :param folder: path to the folder in which to save the predicted target variable vector + :type folder: string + :return: None + """ + with open(path, "w") as f: + writer = csv.writer(f) + writer.writerows(y_pred) + + def get_pred(self, path): + """Get predictions which were saved using the save_pred function + + :param folder: path to the folder where the previously predicted target variable vector has been saved + :type folder: string + :return: predicted target variable vector + :rtype: numpy array + """ + with open(path, "r") as f: + pred_iter = csv.reader(f) + pred = [y for y in pred_iter] + return np.array(pred, copy=False, dtype=np.float) diff --git a/substrabac/fixtures/chunantes/datasamples/train/0024306.zip b/substrabac/fixtures/chunantes/datasamples/train/0024306/IMG_0024306.jpg old mode 100644 new mode 100755 similarity index 91% rename from substrabac/fixtures/chunantes/datasamples/train/0024306.zip rename to substrabac/fixtures/chunantes/datasamples/train/0024306/IMG_0024306.jpg index da6e93892..6ca016f47 Binary files a/substrabac/fixtures/chunantes/datasamples/train/0024306.zip and b/substrabac/fixtures/chunantes/datasamples/train/0024306/IMG_0024306.jpg differ diff --git a/substrabac/fixtures/chunantes/datasamples/train/0024306/LABEL_0024306.csv b/substrabac/fixtures/chunantes/datasamples/train/0024306/LABEL_0024306.csv new file mode 100755 index 000000000..d8044bb00 --- /dev/null +++ b/substrabac/fixtures/chunantes/datasamples/train/0024306/LABEL_0024306.csv @@ -0,0 +1 @@ +1.0,0.0,0.0,0.0,0.0,0.0,0.0 diff --git a/substrabac/fixtures/chunantes/datasamples/train/0024307.zip b/substrabac/fixtures/chunantes/datasamples/train/0024307/IMG_0024307.jpg old mode 100644 new mode 100755 similarity index 91% rename from substrabac/fixtures/chunantes/datasamples/train/0024307.zip rename to substrabac/fixtures/chunantes/datasamples/train/0024307/IMG_0024307.jpg index 5c25670b1..1bfd66449 Binary files a/substrabac/fixtures/chunantes/datasamples/train/0024307.zip and b/substrabac/fixtures/chunantes/datasamples/train/0024307/IMG_0024307.jpg differ diff --git a/substrabac/fixtures/chunantes/datasamples/train/0024307/LABEL_0024307.csv b/substrabac/fixtures/chunantes/datasamples/train/0024307/LABEL_0024307.csv new file mode 100755 index 000000000..ff746af51 --- /dev/null +++ b/substrabac/fixtures/chunantes/datasamples/train/0024307/LABEL_0024307.csv @@ -0,0 +1 @@ +0.0,1.0,0.0,0.0,0.0,0.0,0.0 diff --git a/substrabac/fixtures/chunantes/objectives/objective0/metrics.py b/substrabac/fixtures/chunantes/objectives/objective0/metrics.py index 9596f2659..95652de06 100644 --- a/substrabac/fixtures/chunantes/objectives/objective0/metrics.py +++ b/substrabac/fixtures/chunantes/objectives/objective0/metrics.py @@ -1,6 +1,8 @@ from sklearn.metrics import recall_score +from substratools import Metrics as MetricsABC -def score(y_true, y_pred): - """returns the macro-average recall""" - return recall_score(y_true.argmax(axis=1), y_pred.argmax(axis=1), average='macro') \ No newline at end of file + +class Metrics(MetricsABC): + def score(self, y_true, y_pred): + return recall_score(y_true.argmax(axis=1), y_pred.argmax(axis=1), average='macro') diff --git a/substrabac/fixtures/owkin/datamanagers/datamanager0/opener.py b/substrabac/fixtures/owkin/datamanagers/datamanager0/opener.py index 75c8578f8..8d54fcd65 100644 --- a/substrabac/fixtures/owkin/datamanagers/datamanager0/opener.py +++ b/substrabac/fixtures/owkin/datamanagers/datamanager0/opener.py @@ -1,9 +1,11 @@ -"""Opener of the simplified ISIC 2018 dataset""" +"""Opener of the simplified ISIC 2018 dataset (other opener)""" import os import csv import numpy as np from PIL import Image +import substratools as tools + PREFIX_X = "IMG_" SUFFIX_X = ".jpg" @@ -17,77 +19,87 @@ SIZE_Z = 3 CLASSES = 7 +n_sample = 10 + -def check_existing_files(folder, files): +def _check_existing_files(files): """check if files from a list of files are located in folder""" - for fname in files: - if not os.path.isfile(os.path.join(folder, fname)): - raise FileNotFoundError("non existing file %s in folder %s" % - (fname, folder)) + for f in files: + if not os.path.isfile(f): + raise FileNotFoundError("non existing file %s" % (f)) -def get_files(folder): +def _get_paths(folders): """return list of features and label files given a folder location (with the same order)""" # get list of features files and create associated list of label files - X_files = [os.path.join(subfolder, f) for subfolder in os.listdir(folder) - for f in os.listdir(os.path.join(folder, subfolder)) + X_files = [os.path.join(folder, f) for folder in folders + for f in os.listdir(os.path.join(folder)) if '.jpg' in f] y_files = [f.replace(PREFIX_X, PREFIX_Y).replace(SUFFIX_X, SUFFIX_Y) for f in X_files] + # check label files exist try: - check_existing_files(folder, y_files) + _check_existing_files(y_files) except FileNotFoundError as e: print(str(e)) y_files = None return X_files, y_files -def get_X(folder): - """Format and return the ISIC features data as np arrays.""" - print('Finding features files...') - X_files, _ = get_files(folder) - print('Loading features...') - X = [] - for f in X_files: - image = Image.open(os.path.join(folder, f)) - X.append(np.array(image)) - return np.array(X) - - -def fake_X(n_sample=10): - """Make and return the ISIC like features data as np arrays.""" - return np.random.randint(low=0, high=256, size=(n_sample, SIZE_X, SIZE_Y, SIZE_Z)).astype('uint8') - - -def get_y(folder): - """Format and return the ISIC labels as np arrays.""" - print('Finding label files...') - _, y_files = get_files(folder) - print('Loading labels...') - y = [] - for f in y_files: - with open(os.path.join(folder, f)) as open_f: - str_y = open_f.readline().split(',') - y.append([float(yy) for yy in str_y]) - return np.array(y, dtype=np.float) - - -def fake_y(n_sample=10): - """Make and return the ISIC like labels as np arrays.""" - return np.eye(CLASSES)[np.arange(n_sample) % CLASSES].astype('uint8') - - -def save_pred(y_pred, folder): - """Save prediction in PRED_FILE in folder""" - with open(os.path.join(folder, PRED_FILE), "w") as f: - writer = csv.writer(f) - writer.writerows(y_pred) - - -def get_pred(folder): - """Get predictions which were saved using the save_pred function""" - with open(os.path.join(folder, PRED_FILE), "r") as f: - pred_iter = csv.reader(f) - pred = [y for y in pred_iter] - return np.array(pred, copy=False, dtype=np.float) +class MyOpener(tools.Opener): + + def get_X(self, folders): + print('Finding features files...') + X_paths, _ = _get_paths(folders) + print('Loading features...') + X = [] + for path in X_paths: + image = Image.open(path) + X.append(np.array(image)) + return np.array(X) + + def get_y(self, folders): + print('Finding label files...') + _, y_paths = _get_paths(folders) + print('Loading labels...') + y = [] + for path in y_paths: + with open(path) as f: + str_y = f.readline().split(',') + y.append([float(yy) for yy in str_y]) + return np.array(y, dtype=np.float) + + def fake_X(self): + """Make and return the ISIC like features data as np arrays.""" + return np.random.randint(low=0, high=256, size=(n_sample, SIZE_X, SIZE_Y, SIZE_Z)).astype('uint8') + + def fake_y(self): + """Make and return the ISIC like labels as np arrays.""" + return np.eye(CLASSES)[np.arange(n_sample) % CLASSES].astype('uint8') + + def save_pred(self, y_pred, path): + """Save prediction in path + + :param y_pred: predicted target variable vector + :type y_pred: numpy array + :param folder: path to the folder in which to save the predicted target variable vector + :type folder: string + :return: None + """ + with open(path, "w") as f: + writer = csv.writer(f) + writer.writerows(y_pred) + + def get_pred(self, path): + """Get predictions which were saved using the save_pred function + + :param folder: path to the folder where the previously predicted target variable vector has been saved + :type folder: string + :return: predicted target variable vector + :rtype: numpy array + """ + with open(path, "r") as f: + pred_iter = csv.reader(f) + pred = [y for y in pred_iter] + return np.array(pred, copy=False, dtype=np.float) diff --git a/substrabac/fixtures/owkin/objectives/objective0/metrics.py b/substrabac/fixtures/owkin/objectives/objective0/metrics.py index 6bae6c665..95652de06 100644 --- a/substrabac/fixtures/owkin/objectives/objective0/metrics.py +++ b/substrabac/fixtures/owkin/objectives/objective0/metrics.py @@ -1,6 +1,8 @@ from sklearn.metrics import recall_score +from substratools import Metrics as MetricsABC -def score(y_true, y_pred): - """returns the macro-average recall""" - return recall_score(y_true.argmax(axis=1), y_pred.argmax(axis=1), average='macro') + +class Metrics(MetricsABC): + def score(self, y_true, y_pred): + return recall_score(y_true.argmax(axis=1), y_pred.argmax(axis=1), average='macro') diff --git a/substrabac/populate.py b/substrabac/populate.py index 155266cd7..20cb01706 100644 --- a/substrabac/populate.py +++ b/substrabac/populate.py @@ -1,15 +1,15 @@ import argparse -import functools import os import json +import shutil import time import substra_sdk_py as substra from termcolor import colored -from rest_framework import status dir_path = os.path.dirname(os.path.realpath(__file__)) +server_path = '/substra/servermedias' client = substra.Client() @@ -18,104 +18,86 @@ def setup_config(): print('Init config in /tmp/.substrabac for owkin and chunantes') client.create_config('owkin', 'http://owkin.substrabac:8000', '0.0') client.create_config('chunantes', 'http://chunantes.substrabac:8001', '0.0') + client.create_config('clb', 'http://clb.substrabac:8002', '0.0') -def retry_until_success(f): - """Retry request to substrabac in case of Timeout.""" - @functools.wraps(f) - def wrapper(*args, **kwargs): - delay = 1 - backoff = 2 +def get_or_create(data, profile, asset, dryrun=False, register=False): - while True: - try: - return f(*args, **kwargs) - except substra.exceptions.HTTPError as e: - print(colored(e, 'red')) - print(colored(e.response.content, 'red')) - print(f'Request error: retrying in {delay}s') - time.sleep(delay) - delay *= backoff - - return wrapper - - -def create_asset(data, profile, asset, dryrun=False): client.set_config(profile) + method = client.add if not register else client.register + if dryrun: print('dryrun') try: - r = client.add(asset, data, dryrun=True) - except substra.exceptions.HTTPError as e: - print(colored(e, 'red')) + r = method(asset, data, dryrun=True) + except substra.exceptions.AlreadyExists as e: + r = e.response.json() + print(colored(json.dumps(r, indent=2), 'cyan')) else: print(colored(json.dumps(r, indent=2), 'magenta')) print('real') try: - r = client.add(asset, data) - except substra.exceptions.HTTPError as e: - if e.response.status_code == status.HTTP_400_BAD_REQUEST: - if 'pkhash' in e.response.json(): - # FIXME server is not correctly responding for some conflict - # cases, overwrite the status code for these cases - print('Bad request should be a conflict') - e.response.status_code = status.HTTP_409_CONFLICT - - if e.response.status_code == status.HTTP_408_REQUEST_TIMEOUT: - # retry until success in case of timeout - print(colored('got a 408, will test to get if from ledger', 'grey')) - r = e.response.json() - print(colored(json.dumps(r, indent=2), 'blue')) - results = r['pkhash'] if 'pkhash' in r else r['message'].get('pkhash') - - keys_to_check = results if isinstance(results, list) else [results] - for k in keys_to_check: - retry_until_success(client.get)(asset, k) + r = method(asset, data) - return results - - elif e.response.status_code == status.HTTP_409_CONFLICT: - r = e.response.json() - print(colored(json.dumps(r, indent=2), 'cyan')) - return [x['pkhash'] for x in r] if isinstance(r, list) else r['pkhash'] + except substra.exceptions.AlreadyExists as e: + r = e.response.json() + print(colored(json.dumps(r, indent=2), 'cyan')) + key_or_keys = e.pkhash - else: - print(colored(e, 'red')) else: print(colored(json.dumps(r, indent=2), 'green')) - return [x['pkhash'] for x in r] if isinstance(r, list) else r['pkhash'] + key_or_keys = [x['pkhash'] for x in r] if isinstance(r, list) else r['pkhash'] + + return key_or_keys def update_datamanager(data_manager_key, data, profile): client.set_config(profile) - try: r = client.update('data_manager', data_manager_key, data) - except substra.exceptions.HTTPError as e: - if e.response.status_code != status.HTTP_408_REQUEST_TIMEOUT: - print(colored(e, 'red')) - return None - # retry until success in case of timeout - r = retry_until_success(client.get)('data_manager', data_manager_key) + except substra.exceptions.AlreadyExists as e: + r = e.response.json() print(colored(json.dumps(r, indent=2), 'cyan')) - print(colored(json.dumps(r, indent=2), 'green')) - return r['pkhash'] + except substra.exceptions.InvalidRequest as e: + # FIXME if the data manager is already associated with the objective + # backend answer with a 400 and a raw error coming from the + # ledger. + # this case will be handled soon, with the fabric SDK. + print(colored(str(e), 'red')) + else: + print(colored(json.dumps(r, indent=2), 'green')) -if __name__ == '__main__': + +def do_populate(): setup_config() parser = argparse.ArgumentParser() - parser.add_argument('-o', '--one-org', action='store_true', default=False, - help='Launch populate with one org only') + group = parser.add_mutually_exclusive_group() + group.add_argument('-o', '--one-org', action='store_const', dest='nb_org', const=1, + help='Launch populate with one org') + group.add_argument('-tw', '--two-orgs', action='store_const', dest='nb_org', const=2, + help='Launch populate with two orgs') + group.add_argument('-th', '--three-orgs', action='store_const', dest='nb_org', const=3, + help='Launch populate with three orgs') + parser.set_defaults(nb_org=2) args = vars(parser.parse_args()) - org_0 = 'owkin' - org_1 = org_0 if args['one_org'] else 'chunantes' + if args['nb_org'] == 1: + org_0 = org_1 = org_2 = 'owkin' + elif args['nb_org'] == 2: + org_0 = org_2 = 'owkin' + org_1 = 'chunantes' + elif args['nb_org'] == 3: + org_0 = 'owkin' + org_1 = 'chunantes' + org_2 = 'clb' + else: + raise Exception(f"Number of orgs {args['nb_org']} not in [1, 2, 3]") print(f'will create datamanager with {org_1}') # create datamanager with org1 @@ -126,22 +108,26 @@ def update_datamanager(data_manager_key, data, profile): 'description': os.path.join(dir_path, './fixtures/chunantes/datamanagers/datamanager0/description.md'), 'permissions': 'all', } - data_manager_org1_key = create_asset(data, org_1, 'data_manager', dryrun=True) + data_manager_org1_key = get_or_create(data, org_1, 'data_manager', dryrun=True) #################################################### train_data_sample_keys = [] - if data_manager_org1_key: - print(f'register train data on datamanager {org_1} (will take datamanager creator as worker)') - data = { - 'files': [ - os.path.join(dir_path, './fixtures/chunantes/datasamples/train/0024306.zip'), - os.path.join(dir_path, './fixtures/chunantes/datasamples/train/0024307.zip') - ], - 'data_manager_keys': [data_manager_org1_key], - 'test_only': False, - } - train_data_sample_keys = create_asset(data, org_1, 'data_sample', True) + print(f'register train data (from server) on datamanager {org_1} (will take datamanager creator as worker)') + data_samples_path = ['./fixtures/chunantes/datasamples/train/0024306', + './fixtures/chunantes/datasamples/train/0024307'] + for d in data_samples_path: + try: + shutil.copytree(os.path.join(dir_path, d), + os.path.join(server_path, d)) + except FileExistsError: + pass + data = { + 'paths': [os.path.join(server_path, d) for d in data_samples_path], + 'data_manager_keys': [data_manager_org1_key], + 'test_only': False, + } + train_data_sample_keys = get_or_create(data, org_1, 'data_sample', dryrun=True, register=True) #################################################### @@ -153,182 +139,199 @@ def update_datamanager(data_manager_key, data, profile): 'description': os.path.join(dir_path, './fixtures/owkin/datamanagers/datamanager0/description.md'), 'permissions': 'all' } - data_manager_org0_key = create_asset(data, org_0, 'data_manager') + data_manager_org0_key = get_or_create(data, org_0, 'data_manager') + + #################################################### + + print('register test data') + data = { + 'paths': [ + os.path.join(dir_path, './fixtures/owkin/datasamples/test/0024900.zip'), + os.path.join(dir_path, './fixtures/owkin/datasamples/test/0024901.zip') + ], + 'data_manager_keys': [data_manager_org0_key], + 'test_only': True, + } + test_data_sample_keys = get_or_create(data, org_0, 'data_sample') + + #################################################### + + print('register test data 2') + data = { + 'paths': [ + os.path.join(dir_path, './fixtures/owkin/datasamples/test/0024902.zip'), + os.path.join(dir_path, './fixtures/owkin/datasamples/test/0024903.zip') + ], + 'data_manager_keys': [data_manager_org0_key], + 'test_only': True, + } + get_or_create(data, org_0, 'data_sample') + + #################################################### + + print('register test data 3') + data = { + 'paths': [ + os.path.join(dir_path, './fixtures/owkin/datasamples/test/0024904.zip'), + os.path.join(dir_path, './fixtures/owkin/datasamples/test/0024905.zip') + ], + 'data_manager_keys': [data_manager_org0_key], + 'test_only': True, + } + get_or_create(data, org_0, 'data_sample') + + #################################################### + + print('register objective') + data = { + 'name': 'Skin Lesion Classification Objective', + 'description': os.path.join(dir_path, './fixtures/chunantes/objectives/objective0/description.md'), + 'metrics_name': 'macro-average recall', + 'metrics': os.path.join(dir_path, './fixtures/chunantes/objectives/objective0/metrics.py'), + 'permissions': 'all', + 'test_data_sample_keys': test_data_sample_keys, + 'test_data_manager_key': data_manager_org0_key + } + + objective_key = get_or_create(data, org_0, 'objective', dryrun=True) + + #################################################### + + print('register objective without data manager and data sample') + data = { + 'name': 'Skin Lesion Classification Objective', + 'description': os.path.join(dir_path, './fixtures/owkin/objectives/objective0/description.md'), + 'metrics_name': 'macro-average recall', + 'metrics': os.path.join(dir_path, './fixtures/owkin/objectives/objective0/metrics.py'), + 'permissions': 'all' + } + + get_or_create(data, org_0, 'objective', dryrun=True) #################################################### - if data_manager_org0_key and data_manager_org1_key: - print('register test data') - data = { - 'files': [ - os.path.join(dir_path, './fixtures/owkin/datasamples/test/0024900.zip'), - os.path.join(dir_path, './fixtures/owkin/datasamples/test/0024901.zip') - ], - 'data_manager_keys': [data_manager_org0_key], - 'test_only': True, - } - test_data_sample_keys = create_asset(data, org_0, 'data_sample', False) - - #################################################### - - print('register test data 2') - data = { - 'files': [ - os.path.join(dir_path, './fixtures/owkin/datasamples/test/0024902.zip'), - os.path.join(dir_path, './fixtures/owkin/datasamples/test/0024903.zip') - ], - 'data_manager_keys': [data_manager_org0_key], - 'test_only': True, - } - test_data_sample_keys_2 = create_asset(data, org_0, 'data_sample', False) - - #################################################### - - print('register test data 3') - data = { - 'files': [ - os.path.join(dir_path, './fixtures/owkin/datasamples/test/0024904.zip'), - os.path.join(dir_path, './fixtures/owkin/datasamples/test/0024905.zip') - ], - 'data_manager_keys': [data_manager_org0_key], - 'test_only': True, - } - test_data_sample_keys_3 = create_asset(data, org_0, 'data_sample', False) - - #################################################### - - print('register objective') - data = { - 'name': 'Skin Lesion Classification Objective', - 'description': os.path.join(dir_path, './fixtures/chunantes/objectives/objective0/description.md'), - 'metrics_name': 'macro-average recall', - 'metrics': os.path.join(dir_path, './fixtures/chunantes/objectives/objective0/metrics.py'), - 'permissions': 'all', - 'test_data_sample_keys': test_data_sample_keys, - 'test_data_manager_key': data_manager_org0_key - } - - objective_key = create_asset(data, org_0, 'objective', True) - - #################################################### - - # update datamanager - print('update datamanager') - data = { - 'objective_key': objective_key - } - update_datamanager(data_manager_org1_key, data, org_0) - - #################################################### - - if objective_key: - # register algo - print('register algo') - data = { - 'name': 'Logistic regression', - 'file': os.path.join(dir_path, './fixtures/chunantes/algos/algo3/algo.tar.gz'), - 'description': os.path.join(dir_path, './fixtures/chunantes/algos/algo3/description.md'), - 'objective_key': objective_key, - 'permissions': 'all', - } - algo_key = create_asset(data, org_1, 'algo', True) - - #################################################### - - print('register algo 2') - data = { - 'name': 'Neural Network', - 'file': os.path.join(dir_path, './fixtures/chunantes/algos/algo0/algo.tar.gz'), - 'description': os.path.join(dir_path, './fixtures/chunantes/algos/algo0/description.md'), - 'objective_key': objective_key, - 'permissions': 'all', - } - algo_key_2 = create_asset(data, org_1, 'algo', False) - - #################################################### - - data = { - 'name': 'Random Forest', - 'file': os.path.join(dir_path, './fixtures/chunantes/algos/algo4/algo.tar.gz'), - 'description': os.path.join(dir_path, './fixtures/chunantes/algos/algo4/description.md'), - 'objective_key': objective_key, - 'permissions': 'all', - } - algo_key_3 = create_asset(data, org_1, 'algo', False) - - #################################################### - - if algo_key and train_data_sample_keys: - # create traintuple - print('create traintuple') - data = { - 'algo_key': algo_key, - 'data_manager_key': data_manager_org1_key, - 'train_data_sample_keys': train_data_sample_keys, - } - traintuple_key = create_asset(data, org_1, 'traintuple') - - print('create second traintuple') - data = { - 'algo_key': algo_key_2, - 'data_manager_key': data_manager_org1_key, - 'train_data_sample_keys': train_data_sample_keys, - } - - traintuple_key_2 = create_asset(data, org_1, 'traintuple') - - print('create third traintuple') - data = { - 'algo_key': algo_key_3, - 'data_manager_key': data_manager_org1_key, - 'train_data_sample_keys': train_data_sample_keys, - } - - traintuple_key_3 = create_asset(data, org_1, 'traintuple') - - #################################################### - - if traintuple_key: - client.set_config(org_1) - res = client.get('traintuple', traintuple_key) - print(colored(json.dumps(res, indent=2), 'green')) - - # create testtuple - print('create testtuple') - data = { - 'traintuple_key': traintuple_key - } - - testtuple_key = create_asset(data, org_1, 'testtuple') - # testtuple_key = None - - if testtuple_key: - client.set_config(org_1) - res_t = client.get('testtuple', testtuple_key) - print(colored(json.dumps(res_t, indent=2), 'yellow')) - - while res['status'] not in ('done', 'failed') or res_t['status'] not in ('done', 'failed'): - print('-' * 100) - try: - client.set_config(org_1) - res = client.get('traintuple', traintuple_key) - print(colored(json.dumps(res, indent=2), 'green')) - - res_t = client.get('testtuple', testtuple_key) - print(colored(json.dumps(res_t, indent=2), 'yellow')) - except substra.exceptions.SDKException: - print(colored('Error when getting subtuples', 'red')) - time.sleep(3) - - else: - while res['status'] not in ('done', 'failed'): - print('-' * 100) - try: - client.set_config(org_1) - res = client.get('traintuple', traintuple_key) - print(colored(json.dumps(res, indent=2), 'green')) - except substra.exceptions.SDKException: - print(colored('Error when getting subtuple', 'red')) - time.sleep(3) - - print('Testtuple create failed') + # update datamanager + print('update datamanager') + data = { + 'objective_key': objective_key + } + update_datamanager(data_manager_org1_key, data, org_0) + + #################################################### + + # register algo + print('register algo') + data = { + 'name': 'Logistic regression', + 'file': os.path.join(dir_path, './fixtures/chunantes/algos/algo3/algo.tar.gz'), + 'description': os.path.join(dir_path, './fixtures/chunantes/algos/algo3/description.md'), + 'permissions': 'all', + } + algo_key = get_or_create(data, org_2, 'algo') + + #################################################### + + print('register algo 2') + data = { + 'name': 'Neural Network', + 'file': os.path.join(dir_path, './fixtures/chunantes/algos/algo0/algo.tar.gz'), + 'description': os.path.join(dir_path, './fixtures/chunantes/algos/algo0/description.md'), + 'permissions': 'all', + } + algo_key_2 = get_or_create(data, org_1, 'algo') + + #################################################### + + data = { + 'name': 'Random Forest', + 'file': os.path.join(dir_path, './fixtures/chunantes/algos/algo4/algo.tar.gz'), + 'description': os.path.join(dir_path, './fixtures/chunantes/algos/algo4/description.md'), + 'permissions': 'all', + } + algo_key_3 = get_or_create(data, org_1, 'algo') + + #################################################### + + # create traintuple + print('create traintuple') + data = { + 'algo_key': algo_key, + 'objective_key': objective_key, + 'data_manager_key': data_manager_org1_key, + 'train_data_sample_keys': train_data_sample_keys, + 'tag': 'substra' + } + traintuple_key = get_or_create(data, org_1, 'traintuple') + + print('create second traintuple') + data = { + 'algo_key': algo_key_2, + 'data_manager_key': data_manager_org1_key, + 'objective_key': objective_key, + 'train_data_sample_keys': train_data_sample_keys, + 'tag': 'My super tag' + } + + get_or_create(data, org_1, 'traintuple') + + print('create third traintuple') + data = { + 'algo_key': algo_key_3, + 'data_manager_key': data_manager_org1_key, + 'objective_key': objective_key, + 'train_data_sample_keys': train_data_sample_keys, + } + + get_or_create(data, org_1, 'traintuple') + + #################################################### + + client.set_config(org_1) + res = client.get('traintuple', traintuple_key) + print(colored(json.dumps(res, indent=2), 'green')) + + # create testtuple + print('create testtuple') + data = { + 'traintuple_key': traintuple_key + } + + testtuple_key = get_or_create(data, org_1, 'testtuple') + + client.set_config(org_1) + res_t = client.get('testtuple', testtuple_key) + print(colored(json.dumps(res_t, indent=2), 'yellow')) + + testtuple_status = None + traintuple_status = None + + client.set_config(org_1) + + while traintuple_status not in ('done', 'failed') or testtuple_status not in ('done', 'failed'): + res = client.get('traintuple', traintuple_key) + res_t = client.get('testtuple', testtuple_key) + if traintuple_status != res['status'] or testtuple_status != res_t['status']: + traintuple_status = res['status'] + testtuple_status = res_t['status'] + print('') + print('-' * 100) + print(colored(json.dumps(res, indent=2), 'green')) + print(colored(json.dumps(res_t, indent=2), 'yellow')) + else: + print('.', end='', flush=True) + + time.sleep(3) + + +if __name__ == '__main__': + try: + do_populate() + except substra.exceptions.HTTPError as e: + try: + error = e.response.json() + except Exception: + error_message = e.response.text + else: + error_message = json.dumps(error, indent=2) + print(colored(str(e), 'red')) + print(colored(error_message, 'red')) diff --git a/substrabac/requirements.txt b/substrabac/requirements.txt index 848a770e1..53fd65156 100644 --- a/substrabac/requirements.txt +++ b/substrabac/requirements.txt @@ -11,7 +11,7 @@ django-rest-swagger==2.1.2 djangorestframework==3.8.2 docker == 3.5.0 grpcio >= 1.0.1 -git+https://github.com/anderskm/gputil.git@7e32706a2674935ae45f2fcf389ae1ea463b0b6b +GPUtil == 1.4.0 hkdf >= 0.0.3 ipython==6.4.0 ipython-genutils==0.2.0 @@ -25,3 +25,4 @@ requests == 2.20.0 rx >= 1.5.3 sentry-sdk == 0.5.2 six >= 1.4.0 +uwsgi == 2.0.18 diff --git a/substrabac/scripts/generate_assets.py b/substrabac/scripts/generate_assets.py index 98b006654..ea0447a02 100644 --- a/substrabac/scripts/generate_assets.py +++ b/substrabac/scripts/generate_assets.py @@ -14,13 +14,13 @@ def main(): client.set_config('owkin') assets = {} - assets['objective'] = json.dumps(client.list('objective')['result'], indent=4) - assets['datamanager'] = json.dumps(client.list('data_manager')['result'], indent=4) - assets['algo'] = json.dumps(client.list('algo')['result'], indent=4) - assets['traintuple'] = json.dumps(client.list('traintuple')['result'], indent=4) - assets['testtuple'] = json.dumps(client.list('testtuple')['result'], indent=4) + assets['objective'] = json.dumps(client.list('objective'), indent=4) + assets['datamanager'] = json.dumps(client.list('data_manager'), indent=4) + assets['algo'] = json.dumps(client.list('algo'), indent=4) + assets['traintuple'] = json.dumps(client.list('traintuple'), indent=4) + assets['testtuple'] = json.dumps(client.list('testtuple'), indent=4) - assets['model'] = json.dumps([res for res in client.list('model')['result'] + assets['model'] = json.dumps([res for res in client.list('model') if ('traintuple' in res and 'testtuple' in res)], indent=4) with open(os.path.join(dir_path, '../substrapp/tests/assets.py'), 'w') as f: diff --git a/substrabac/substrabac/celery.py b/substrabac/substrabac/celery.py index dbd7e85b9..faf074883 100644 --- a/substrabac/substrabac/celery.py +++ b/substrabac/substrabac/celery.py @@ -6,7 +6,7 @@ # set the default Django settings module for the 'celery' program. os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'substrabac.settings.prod') -app = Celery('substrabac', broker='amqp://localhost:5672//') +app = Celery('substrabac') # Using a string here means the worker doesn't have to serialize # the configuration object to child processes. diff --git a/substrabac/substrabac/settings/common.py b/substrabac/substrabac/settings/common.py index 1f9123c2e..6598755b5 100644 --- a/substrabac/substrabac/settings/common.py +++ b/substrabac/substrabac/settings/common.py @@ -158,3 +158,5 @@ CELERY_TASK_SERIALIZER = 'json' CELERY_TASK_TRACK_STARTED = True # since 4.0 CELERY_WORKER_CONCURRENCY = 1 + +CELERY_BROKER_URL = os.environ.get('CELERY_BROKER_URL', 'amqp://localhost:5672//'), diff --git a/substrabac/substrabac/settings/dev.py b/substrabac/substrabac/settings/dev.py index 83f45e043..f9c580123 100644 --- a/substrabac/substrabac/settings/dev.py +++ b/substrabac/substrabac/settings/dev.py @@ -43,6 +43,8 @@ SITE_HOST = f'{ORG_NAME}.substrabac' SITE_PORT = DEFAULT_PORT +DEFAULT_DOMAIN = os.environ.get('DEFAULT_DOMAIN', f'http://{SITE_HOST}:{SITE_PORT}') + LOGGING = { 'version': 1, 'disable_existing_loggers': False, diff --git a/substrabac/substrabac/settings/prod.py b/substrabac/substrabac/settings/prod.py index ad6acb1c4..40cb1fee6 100644 --- a/substrabac/substrabac/settings/prod.py +++ b/substrabac/substrabac/settings/prod.py @@ -43,6 +43,8 @@ SITE_HOST = os.environ.get('SITE_HOST', f'{ORG_NAME}.substrabac') SITE_PORT = os.environ.get('SITE_PORT', DEFAULT_PORT) +DEFAULT_DOMAIN = os.environ.get('DEFAULT_DOMAIN', f'http://{SITE_HOST}:{SITE_PORT}') + STATIC_URL = '/static/' STATIC_ROOT = os.path.join(BASE_DIR, 'statics') diff --git a/substrabac/substrapp/fixtures/model.py b/substrabac/substrapp/fixtures/model.py index 1d895892a..293df3c97 100644 --- a/substrabac/substrapp/fixtures/model.py +++ b/substrabac/substrapp/fixtures/model.py @@ -34,7 +34,7 @@ 'trainDataSample': { 'keys': ['62fb3263208d62c7235a046ee1d80e25512fe782254b730a9e566276b8c0ef3a', '42303efa663015e729159833a12ffb510ff92a6e386b8152f90f6fb14ddc94c9'], - 'openerHash': '59300f1fec4f5cdd3a236c7260ed72bdd24691efdec63b7910ea84136123cecd', + 'openerHash': '615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7', 'perf': 0.50, 'worker': 'a3119c79a173581425cbe6e06c3034ec396ee805b60d9a34feaa3048beb0e4a9', }, @@ -74,7 +74,7 @@ 'trainDataSample': { 'keys': ['62fb3263208d62c7235a046ee1d80e25512fe782254b730a9e566276b8c0ef3a', '42303efa663015e729159833a12ffb510ff92a6e386b8152f90f6fb14ddc94c9'], - 'openerHash': '59300f1fec4f5cdd3a236c7260ed72bdd24691efdec63b7910ea84136123cecd', + 'openerHash': '615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7', 'perf': 0.70, 'worker': 'a3119c79a173581425cbe6e06c3034ec396ee805b60d9a34feaa3048beb0e4a9', }, @@ -114,7 +114,7 @@ 'trainDataSample': { 'keys': ['62fb3263208d62c7235a046ee1d80e25512fe782254b730a9e566276b8c0ef3a', '42303efa663015e729159833a12ffb510ff92a6e386b8152f90f6fb14ddc94c9'], - 'openerHash': '59300f1fec4f5cdd3a236c7260ed72bdd24691efdec63b7910ea84136123cecd', + 'openerHash': '615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7', 'perf': 0.79, 'worker': 'a3119c79a173581425cbe6e06c3034ec396ee805b60d9a34feaa3048beb0e4a9', }, @@ -154,7 +154,7 @@ 'trainDataSample': { 'keys': ['62fb3263208d62c7235a046ee1d80e25512fe782254b730a9e566276b8c0ef3a', '42303efa663015e729159833a12ffb510ff92a6e386b8152f90f6fb14ddc94c9'], - 'openerHash': '59300f1fec4f5cdd3a236c7260ed72bdd24691efdec63b7910ea84136123cecd', + 'openerHash': '615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7', 'perf': 0.79, 'worker': 'a3119c79a173581425cbe6e06c3034ec396ee805b60d9a34feaa3048beb0e4a9', }, @@ -194,7 +194,7 @@ 'trainDataSample': { 'keys': ['62fb3263208d62c7235a046ee1d80e25512fe782254b730a9e566276b8c0ef3a', '42303efa663015e729159833a12ffb510ff92a6e386b8152f90f6fb14ddc94c9'], - 'openerHash': '59300f1fec4f5cdd3a236c7260ed72bdd24691efdec63b7910ea84136123cecd', + 'openerHash': '615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7', 'perf': 0.79, 'worker': 'a3119c79a173581425cbe6e06c3034ec396ee805b60d9a34feaa3048beb0e4a9', }, diff --git a/substrabac/substrapp/management/commands/bulkcreatedatasample.py b/substrabac/substrapp/management/commands/bulkcreatedatasample.py index 4a38b1144..d1dd6d2fb 100644 --- a/substrabac/substrapp/management/commands/bulkcreatedatasample.py +++ b/substrabac/substrapp/management/commands/bulkcreatedatasample.py @@ -97,7 +97,7 @@ def bulk_create_data_sample(data): # create on ledger + db ledger_data = {'test_only': test_only, 'data_manager_keys': data_manager_keys} - return DataSampleViewSet.commit(serializer, ledger_data, True) + return DataSampleViewSet.commit(serializer, ledger_data) class Command(BaseCommand): diff --git a/substrabac/substrapp/management/commands/createobjective.py b/substrabac/substrapp/management/commands/createobjective.py index af240adcf..b50f62353 100644 --- a/substrabac/substrapp/management/commands/createobjective.py +++ b/substrabac/substrapp/management/commands/createobjective.py @@ -10,7 +10,6 @@ from substrapp.management.utils.localRequest import LocalRequest from substrapp.serializers import DataManagerSerializer, LedgerDataManagerSerializer, \ LedgerObjectiveSerializer, ObjectiveSerializer -from substrapp.serializers.ledger.datamanager.util import updateLedgerDataManager from substrapp.utils import get_hash from substrapp.views.datasample import LedgerException diff --git a/substrabac/substrapp/serializers/ledger/algo/serializer.py b/substrabac/substrapp/serializers/ledger/algo/serializer.py index d04cb9442..e6d53ed1c 100644 --- a/substrabac/substrapp/serializers/ledger/algo/serializer.py +++ b/substrabac/substrapp/serializers/ledger/algo/serializer.py @@ -10,14 +10,12 @@ class LedgerAlgoSerializer(serializers.Serializer): name = serializers.CharField(min_length=1, max_length=100) - objective_key = serializers.CharField(min_length=64, max_length=64) permissions = serializers.CharField(min_length=1, max_length=60) def create(self, validated_data): instance = self.initial_data.get('instance') name = validated_data.get('name') permissions = validated_data.get('permissions') - objective_key = validated_data.get('objective_key') # TODO, create a datamigration with new Site domain name when we will know the name of the final website # current_site = Site.objects.get_current() @@ -25,13 +23,12 @@ def create(self, validated_data): protocol = 'https://' if request.is_secure() else 'http://' host = '' if request is None else request.get_host() - args = '"%(name)s", "%(algoHash)s", "%(storageAddress)s", "%(descriptionHash)s", "%(descriptionStorageAddress)s", "%(associatedObjective)s", "%(permissions)s"' % { + args = '"%(name)s", "%(algoHash)s", "%(storageAddress)s", "%(descriptionHash)s", "%(descriptionStorageAddress)s", "%(permissions)s"' % { 'name': name, 'algoHash': get_hash(instance.file), 'storageAddress': protocol + host + reverse('substrapp:algo-file', args=[instance.pk]), 'descriptionHash': get_hash(instance.description), 'descriptionStorageAddress': protocol + host + reverse('substrapp:algo-description', args=[instance.pk]), - 'associatedObjective': objective_key, 'permissions': permissions } diff --git a/substrabac/substrapp/serializers/ledger/objective/serializer.py b/substrabac/substrapp/serializers/ledger/objective/serializer.py index 7376ef03d..372c336c1 100644 --- a/substrabac/substrapp/serializers/ledger/objective/serializer.py +++ b/substrabac/substrapp/serializers/ledger/objective/serializer.py @@ -10,9 +10,10 @@ class LedgerObjectiveSerializer(serializers.Serializer): test_data_sample_keys = serializers.ListField(child=serializers.CharField(min_length=64, max_length=64), - min_length=1) + min_length=0, + required=False) name = serializers.CharField(min_length=1, max_length=100) - test_data_manager_key = serializers.CharField(max_length=256) + test_data_manager_key = serializers.CharField(max_length=256, allow_blank=True, required=False) permissions = serializers.CharField(min_length=1, max_length=60) metrics_name = serializers.CharField(min_length=1, max_length=100) @@ -21,7 +22,7 @@ def create(self, validated_data): name = validated_data.get('name') metrics_name = validated_data.get('metrics_name') permissions = validated_data.get('permissions') - test_data_manager_key = validated_data.get('test_data_manager_key') + test_data_manager_key = validated_data.get('test_data_manager_key', '') test_data_sample_keys = validated_data.get('test_data_sample_keys', []) # TODO, create a datamigration with new Site domain name when we will know the name of the final website diff --git a/substrabac/substrapp/serializers/ledger/testtuple/serializer.py b/substrabac/substrapp/serializers/ledger/testtuple/serializer.py index bb37825c3..c9f5ccbad 100644 --- a/substrabac/substrapp/serializers/ledger/testtuple/serializer.py +++ b/substrabac/substrapp/serializers/ledger/testtuple/serializer.py @@ -12,16 +12,19 @@ class LedgerTestTupleSerializer(serializers.Serializer): test_data_sample_keys = serializers.ListField(child=serializers.CharField(min_length=64, max_length=64), min_length=0, required=False) + tag = serializers.CharField(min_length=0, max_length=64, allow_blank=True, required=False) def get_args(self, validated_data): traintuple_key = validated_data.get('traintuple_key') data_manager_key = validated_data.get('data_manager_key', '') test_data_sample_keys = validated_data.get('test_data_sample_keys', []) + tag = validated_data.get('tag', '') - args = '"%(traintupleKey)s", "%(dataManagerKey)s", "%(dataSampleKeys)s"' % { + args = '"%(traintupleKey)s", "%(dataManagerKey)s", "%(dataSampleKeys)s", "%(tag)s"' % { 'traintupleKey': traintuple_key, 'dataManagerKey': data_manager_key, 'dataSampleKeys': ','.join(test_data_sample_keys), + 'tag': tag } return args diff --git a/substrabac/substrapp/serializers/ledger/traintuple/serializer.py b/substrabac/substrapp/serializers/ledger/traintuple/serializer.py index 2ca52ff82..970a67ba8 100644 --- a/substrabac/substrapp/serializers/ledger/traintuple/serializer.py +++ b/substrabac/substrapp/serializers/ledger/traintuple/serializer.py @@ -9,6 +9,7 @@ class LedgerTrainTupleSerializer(serializers.Serializer): algo_key = serializers.CharField(min_length=64, max_length=64) data_manager_key = serializers.CharField(min_length=64, max_length=64) + objective_key = serializers.CharField(min_length=64, max_length=64) rank = serializers.IntegerField(allow_null=True, required=False) FLtask_key = serializers.CharField(min_length=64, max_length=64, allow_blank=True, required=False) in_models_keys = serializers.ListField(child=serializers.CharField(min_length=64, max_length=64), @@ -16,23 +17,28 @@ class LedgerTrainTupleSerializer(serializers.Serializer): required=False) train_data_sample_keys = serializers.ListField(child=serializers.CharField(min_length=64, max_length=64), min_length=1) + tag = serializers.CharField(min_length=0, max_length=64, allow_blank=True, required=False) def get_args(self, validated_data): algo_key = validated_data.get('algo_key') data_manager_key = validated_data.get('data_manager_key') + objective_key = validated_data.get('objective_key') rank = validated_data.get('rank', '') rank = '' if rank is None else rank # rank should be an integer or empty string, not None FLtask_key = validated_data.get('FLtask_key', '') train_data_sample_keys = validated_data.get('train_data_sample_keys', []) in_models_keys = validated_data.get('in_models_keys') + tag = validated_data.get('tag', '') - args = '"%(algoKey)s", "%(inModels)s", "%(dataManagerKey)s", "%(dataSampleKeys)s", "%(FLtask)s", "%(rank)s"' % { + args = '"%(algoKey)s", "%(associatedObjective)s", "%(inModels)s", "%(dataManagerKey)s", "%(dataSampleKeys)s", "%(FLtask)s", "%(rank)s", "%(tag)s"' % { 'algoKey': algo_key, - 'rank': rank, - 'FLtask': FLtask_key, + 'associatedObjective': objective_key, 'inModels': ','.join(in_models_keys), 'dataManagerKey': data_manager_key, 'dataSampleKeys': ','.join(train_data_sample_keys), + 'FLtask': FLtask_key, + 'rank': rank, + 'tag': tag } return args diff --git a/substrabac/substrapp/signals/algo/post_delete.py b/substrabac/substrapp/signals/algo/post_delete.py index d8518f8cb..274af3e72 100644 --- a/substrabac/substrapp/signals/algo/post_delete.py +++ b/substrabac/substrapp/signals/algo/post_delete.py @@ -1,4 +1,5 @@ -from os import path, rmdir +import shutil +from os import path from django.conf import settings @@ -7,4 +8,4 @@ def algo_post_delete(sender, instance, **kwargs): instance.description.delete(False) directory = path.join(getattr(settings, 'MEDIA_ROOT'), 'algos/{0}'.format(instance.pk)) - rmdir(directory) + shutil.rmtree(directory) diff --git a/substrabac/substrapp/signals/datamanager/post_delete.py b/substrabac/substrapp/signals/datamanager/post_delete.py index fcde18748..e6642f3f1 100644 --- a/substrabac/substrapp/signals/datamanager/post_delete.py +++ b/substrabac/substrapp/signals/datamanager/post_delete.py @@ -1,4 +1,5 @@ -from os import path, rmdir +import shutil +from os import path from django.conf import settings @@ -7,4 +8,4 @@ def datamanager_post_delete(sender, instance, **kwargs): instance.description.delete(False) directory = path.join(getattr(settings, 'MEDIA_ROOT'), 'datamanagers/{0}'.format(instance.pk)) - rmdir(directory) + shutil.rmtree(directory) diff --git a/substrabac/substrapp/signals/model/post_delete.py b/substrabac/substrapp/signals/model/post_delete.py index 2d2dcc6bf..d26eeb1c1 100644 --- a/substrabac/substrapp/signals/model/post_delete.py +++ b/substrabac/substrapp/signals/model/post_delete.py @@ -1,4 +1,5 @@ -from os import path, rmdir +import shutil +from os import path from django.conf import settings @@ -6,4 +7,4 @@ def model_post_delete(sender, instance, **kwargs): instance.file.delete(False) directory = path.join(getattr(settings, 'MEDIA_ROOT'), 'models/{0}'.format(instance.pk)) - rmdir(directory) + shutil.rmtree(directory) diff --git a/substrabac/substrapp/signals/objective/post_delete.py b/substrabac/substrapp/signals/objective/post_delete.py index e5f64e1f2..5b1f71ced 100644 --- a/substrabac/substrapp/signals/objective/post_delete.py +++ b/substrabac/substrapp/signals/objective/post_delete.py @@ -1,4 +1,5 @@ -from os import path, rmdir +import shutil +from os import path from django.conf import settings @@ -7,4 +8,4 @@ def objective_post_delete(sender, instance, **kwargs): instance.metrics.delete(False) directory = path.join(getattr(settings, 'MEDIA_ROOT'), 'objectives/{0}'.format(instance.pk)) - rmdir(directory) + shutil.rmtree(directory) diff --git a/substrabac/substrapp/tasks.py b/substrabac/substrapp/tasks.py index 9bc381b74..da89344b3 100644 --- a/substrabac/substrapp/tasks.py +++ b/substrabac/substrapp/tasks.py @@ -354,6 +354,7 @@ def prepareMaterials(subtuple, model_type): def doTask(subtuple, tuple_type): subtuple_directory = path.join(getattr(settings, 'MEDIA_ROOT'), 'subtuple', subtuple['key']) + org_name = getattr(settings, 'ORG_NAME') # Federated learning variables fltask = None @@ -409,26 +410,26 @@ def doTask(subtuple, tuple_type): # create the command option for algo if tuple_type == 'traintuple': - algo_command = '--train' # main command + algo_command = 'train' # main command # add list of inmodels if subtuple['inModels'] is not None: inmodels = [subtuple_model["traintupleKey"] for subtuple_model in subtuple['inModels']] - algo_command += f' --inmodels {" ".join(inmodels)}' + algo_command = f"{algo_command} {' '.join(inmodels)}" # add fltask rank for training if flrank is not None: - algo_command += f' --rank {flrank}' + algo_command = f"{algo_command} --rank {flrank}" elif tuple_type == 'testtuple': - algo_command = '--predict' # main command + algo_command = 'predict' # main command inmodels = subtuple['model']["traintupleKey"] - algo_command += f' --inmodels {inmodels}' + algo_command = f'{algo_command} {inmodels}' # local volume for fltask if fltask is not None and tuple_type == 'traintuple': - flvolume = f'local-{fltask}' + flvolume = f'local-{fltask}-{org_name}' if flrank == 0: client.volumes.create(name=flvolume) else: @@ -458,9 +459,8 @@ def doTask(subtuple, tuple_type): with open(end_model_path, 'rb') as f: instance.file.save('model', f) - url_http = 'http' if settings.DEBUG else 'https' - current_site = f'{getattr(settings, "SITE_HOST")}:{getattr(settings, "SITE_PORT")}' - end_model_file = f'{url_http}://{current_site}{reverse("substrapp:model-file", args=[end_model_file_hash])}' + current_site = getattr(settings, "DEFAULT_DOMAIN") + end_model_file = f'{current_site}{reverse("substrapp:model-file", args=[end_model_file_hash])}' # compute metric task metrics_path = path.join(getattr(settings, 'PROJECT_ROOT'), 'base_metrics') # base metrics comes with substrabac @@ -500,7 +500,7 @@ def doTask(subtuple, tuple_type): # Rank == -1 -> Last fl subtuple or fl throws an exception if flrank == -1: - flvolume = f'local-{fltask}' + flvolume = f'local-{fltask}-{org_name}' local_volume = client.volumes.get(volume_id=flvolume) try: local_volume.remove(force=True) diff --git a/substrabac/substrapp/tests/assets.py b/substrabac/substrapp/tests/assets.py index d9a14315d..24c6fd30e 100644 --- a/substrabac/substrapp/tests/assets.py +++ b/substrabac/substrapp/tests/assets.py @@ -1,4 +1,20 @@ objective = [ + { + "key": "1cdafbb018dd195690111d74916b76c96892d897ec3587c814f287946db446c3", + "name": "Skin Lesion Classification Objective", + "description": { + "hash": "1cdafbb018dd195690111d74916b76c96892d897ec3587c814f287946db446c3", + "storageAddress": "http://testserver/objective/1cdafbb018dd195690111d74916b76c96892d897ec3587c814f287946db446c3/description/" + }, + "metrics": { + "name": "macro-average recall", + "hash": "c42dca31fbc2ebb5705643e3bb6ee666bbfd956de13dd03727f825ad8445b4d7", + "storageAddress": "http://testserver/objective/1cdafbb018dd195690111d74916b76c96892d897ec3587c814f287946db446c3/metrics/" + }, + "owner": "fba9c2538319fe2b45ac7047e21b4bc7196537367814d5da7f0aae020d3be5f7", + "testDataset": None, + "permissions": "all" + }, { "key": "3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71", "name": "Skin Lesion Classification Objective", @@ -8,12 +24,12 @@ }, "metrics": { "name": "macro-average recall", - "hash": "750f622262854341bd44f55c1018949e9c119606ef5068bd7d137040a482a756", + "hash": "c42dca31fbc2ebb5705643e3bb6ee666bbfd956de13dd03727f825ad8445b4d7", "storageAddress": "http://testserver/objective/3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71/metrics/" }, - "owner": "703433008d3f62dab5ffaccb3c53d723660f5f6cdac3c5dfd26ac88312b5a94e", + "owner": "fba9c2538319fe2b45ac7047e21b4bc7196537367814d5da7f0aae020d3be5f7", "testDataset": { - "dataManagerKey": "9a832ed6cee6acf7e33c3acffbc89cebf10ef503b690711bdee048b873daf528", + "dataManagerKey": "615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7", "dataSampleKeys": [ "8bf3bf4f753a32f27d18c86405e7a406a83a55610d91abcca9acc525061b8ecf", "17d58b67ae2028018108c9bf555fa58b2ddcfe560e0117294196e79d26140b2a" @@ -28,15 +44,15 @@ "objectiveKey": "3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71", "description": { "hash": "15863c2af1fcfee9ca6f61f04be8a0eaaf6a45e4d50c421788d450d198e580f1", - "storageAddress": "http://testserver/data_manager/59300f1fec4f5cdd3a236c7260ed72bdd24691efdec63b7910ea84136123cecd/description/" + "storageAddress": "http://testserver/data_manager/615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7/description/" }, - "key": "59300f1fec4f5cdd3a236c7260ed72bdd24691efdec63b7910ea84136123cecd", + "key": "615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7", "name": "ISIC 2018", "opener": { - "hash": "59300f1fec4f5cdd3a236c7260ed72bdd24691efdec63b7910ea84136123cecd", - "storageAddress": "http://testserver/data_manager/59300f1fec4f5cdd3a236c7260ed72bdd24691efdec63b7910ea84136123cecd/opener/" + "hash": "615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7", + "storageAddress": "http://testserver/data_manager/615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7/opener/" }, - "owner": "703433008d3f62dab5ffaccb3c53d723660f5f6cdac3c5dfd26ac88312b5a94e", + "owner": "2cb13d299b337fae2969da1ff4ddd9a2f3004be52d64f671d13d9513f5a79426", "permissions": "all", "type": "Images" }, @@ -44,15 +60,15 @@ "objectiveKey": "3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71", "description": { "hash": "258bef187a166b3fef5cb86e68c8f7e154c283a148cd5bc344fec7e698821ad3", - "storageAddress": "http://testserver/data_manager/9a832ed6cee6acf7e33c3acffbc89cebf10ef503b690711bdee048b873daf528/description/" + "storageAddress": "http://testserver/data_manager/615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7/description/" }, - "key": "9a832ed6cee6acf7e33c3acffbc89cebf10ef503b690711bdee048b873daf528", + "key": "615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7", "name": "Simplified ISIC 2018", "opener": { - "hash": "9a832ed6cee6acf7e33c3acffbc89cebf10ef503b690711bdee048b873daf528", - "storageAddress": "http://testserver/data_manager/9a832ed6cee6acf7e33c3acffbc89cebf10ef503b690711bdee048b873daf528/opener/" + "hash": "615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7", + "storageAddress": "http://testserver/data_manager/615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7/opener/" }, - "owner": "703433008d3f62dab5ffaccb3c53d723660f5f6cdac3c5dfd26ac88312b5a94e", + "owner": "fba9c2538319fe2b45ac7047e21b4bc7196537367814d5da7f0aae020d3be5f7", "permissions": "all", "type": "Images" } @@ -70,38 +86,35 @@ "hash": "b9463411a01ea00869bdffce6e59a5c100a4e635c0a9386266cad3c77eb28e9e", "storageAddress": "http://testserver/algo/0acc5180e09b6a6ac250f4e3c172e2893f617aa1c22ef1f379019d20fe44142f/description/" }, - "owner": "703433008d3f62dab5ffaccb3c53d723660f5f6cdac3c5dfd26ac88312b5a94e", - "objectiveKey": "3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71", + "owner": "2cb13d299b337fae2969da1ff4ddd9a2f3004be52d64f671d13d9513f5a79426", "permissions": "all" }, { - "key": "da58a7a29b549f2fe5f009fb51cce6b28ca184ec641a0c1db075729bb266549b", + "key": "4cc53726e01f7e3864a6cf9da24d9cef04a7cbd7fd2892765ff76931dd4628e7", "name": "Logistic regression", "content": { - "hash": "da58a7a29b549f2fe5f009fb51cce6b28ca184ec641a0c1db075729bb266549b", - "storageAddress": "http://testserver/algo/da58a7a29b549f2fe5f009fb51cce6b28ca184ec641a0c1db075729bb266549b/file/" + "hash": "4cc53726e01f7e3864a6cf9da24d9cef04a7cbd7fd2892765ff76931dd4628e7", + "storageAddress": "http://testserver/algo/4cc53726e01f7e3864a6cf9da24d9cef04a7cbd7fd2892765ff76931dd4628e7/file/" }, "description": { "hash": "124a0425b746d7072282d167b53cb6aab3a31bf1946dae89135c15b0126ebec3", - "storageAddress": "http://testserver/algo/da58a7a29b549f2fe5f009fb51cce6b28ca184ec641a0c1db075729bb266549b/description/" + "storageAddress": "http://testserver/algo/4cc53726e01f7e3864a6cf9da24d9cef04a7cbd7fd2892765ff76931dd4628e7/description/" }, - "owner": "703433008d3f62dab5ffaccb3c53d723660f5f6cdac3c5dfd26ac88312b5a94e", - "objectiveKey": "3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71", + "owner": "2cb13d299b337fae2969da1ff4ddd9a2f3004be52d64f671d13d9513f5a79426", "permissions": "all" }, { - "key": "f2d9fd38e25cd975c49f3ce7e6739846585e89635a86689b5db42ab2c0c57284", + "key": "9c3d8777e11fd72cbc0fd672bec3a0848f8518b4d56706008cc05f8a1cee44f9", "name": "Random Forest", "content": { - "hash": "f2d9fd38e25cd975c49f3ce7e6739846585e89635a86689b5db42ab2c0c57284", - "storageAddress": "http://testserver/algo/f2d9fd38e25cd975c49f3ce7e6739846585e89635a86689b5db42ab2c0c57284/file/" + "hash": "9c3d8777e11fd72cbc0fd672bec3a0848f8518b4d56706008cc05f8a1cee44f9", + "storageAddress": "http://testserver/algo/9c3d8777e11fd72cbc0fd672bec3a0848f8518b4d56706008cc05f8a1cee44f9/file/" }, "description": { "hash": "4acea40c4b51996c88ef279c5c9aa41ab77b97d38c5ca167e978a98b2e402675", - "storageAddress": "http://testserver/algo/f2d9fd38e25cd975c49f3ce7e6739846585e89635a86689b5db42ab2c0c57284/description/" + "storageAddress": "http://testserver/algo/9c3d8777e11fd72cbc0fd672bec3a0848f8518b4d56706008cc05f8a1cee44f9/description/" }, - "owner": "703433008d3f62dab5ffaccb3c53d723660f5f6cdac3c5dfd26ac88312b5a94e", - "objectiveKey": "3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71", + "owner": "2cb13d299b337fae2969da1ff4ddd9a2f3004be52d64f671d13d9513f5a79426", "permissions": "all" } ] @@ -113,134 +126,139 @@ "name": "Neural Network", "storageAddress": "http://testserver/algo/0acc5180e09b6a6ac250f4e3c172e2893f617aa1c22ef1f379019d20fe44142f/file/" }, - "creator": "703433008d3f62dab5ffaccb3c53d723660f5f6cdac3c5dfd26ac88312b5a94e", + "creator": "2cb13d299b337fae2969da1ff4ddd9a2f3004be52d64f671d13d9513f5a79426", "dataset": { "keys": [ - "bcdda7da240f1de016e5c185d63027ff6536c233f7ed96d086766e99027d4e24", + "31510dc1d8be788f7c5d28d05714f7efb9edb667762966b9adc02eadeaacebe9", "03a1f878768ea8624942d46a3b438c37992e626c2cf655023bcc3bed69d485d1" ], - "openerHash": "59300f1fec4f5cdd3a236c7260ed72bdd24691efdec63b7910ea84136123cecd", + "openerHash": "615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7", "perf": 0, - "worker": "703433008d3f62dab5ffaccb3c53d723660f5f6cdac3c5dfd26ac88312b5a94e" + "worker": "2cb13d299b337fae2969da1ff4ddd9a2f3004be52d64f671d13d9513f5a79426" }, "fltask": "", "inModels": None, - "key": "dfa89a184b6ba5c50daa5a7176818fe1b1c5c3b781b30b99e4d79eef036006f2", - "log": "[00-01-0032-456da5d]", + "key": "c4e3116dd3f895986b77e4d445178330630bd3f52407f10462dd4778e40090e0", + "log": "[00-01-0032-7cc5b61]", "objective": { "hash": "3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71", "metrics": { - "hash": "750f622262854341bd44f55c1018949e9c119606ef5068bd7d137040a482a756", + "hash": "c42dca31fbc2ebb5705643e3bb6ee666bbfd956de13dd03727f825ad8445b4d7", "storageAddress": "http://testserver/objective/3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71/metrics/" } }, "outModel": None, "permissions": "all", "rank": 0, - "status": "failed" + "status": "failed", + "tag": "My super tag" }, { "algo": { - "hash": "da58a7a29b549f2fe5f009fb51cce6b28ca184ec641a0c1db075729bb266549b", + "hash": "4cc53726e01f7e3864a6cf9da24d9cef04a7cbd7fd2892765ff76931dd4628e7", "name": "Logistic regression", - "storageAddress": "http://testserver/algo/da58a7a29b549f2fe5f009fb51cce6b28ca184ec641a0c1db075729bb266549b/file/" + "storageAddress": "http://testserver/algo/4cc53726e01f7e3864a6cf9da24d9cef04a7cbd7fd2892765ff76931dd4628e7/file/" }, - "creator": "703433008d3f62dab5ffaccb3c53d723660f5f6cdac3c5dfd26ac88312b5a94e", + "creator": "2cb13d299b337fae2969da1ff4ddd9a2f3004be52d64f671d13d9513f5a79426", "dataset": { "keys": [ - "bcdda7da240f1de016e5c185d63027ff6536c233f7ed96d086766e99027d4e24", + "31510dc1d8be788f7c5d28d05714f7efb9edb667762966b9adc02eadeaacebe9", "03a1f878768ea8624942d46a3b438c37992e626c2cf655023bcc3bed69d485d1" ], - "openerHash": "59300f1fec4f5cdd3a236c7260ed72bdd24691efdec63b7910ea84136123cecd", + "openerHash": "615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7", "perf": 1, - "worker": "703433008d3f62dab5ffaccb3c53d723660f5f6cdac3c5dfd26ac88312b5a94e" + "worker": "2cb13d299b337fae2969da1ff4ddd9a2f3004be52d64f671d13d9513f5a79426" }, "fltask": "", "inModels": None, - "key": "66caabaf37455cc7af8e89cac37eb0ebfdf73ac7fe4765c644ea6340c2589c0a", - "log": "Train - CPU:78.04 % - Mem:0.11 GB - GPU:0.00 % - GPU Mem:0.00 GB; ", + "key": "3979576752e014adddadfc360d79c67cdccb0f4bae46936f35ce09c64e5832c8", + "log": "Train - CPU:173.81 % - Mem:0.11 GB - GPU:0.00 % - GPU Mem:0.00 GB; ", "objective": { "hash": "3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71", "metrics": { - "hash": "750f622262854341bd44f55c1018949e9c119606ef5068bd7d137040a482a756", + "hash": "c42dca31fbc2ebb5705643e3bb6ee666bbfd956de13dd03727f825ad8445b4d7", "storageAddress": "http://testserver/objective/3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71/metrics/" } }, "outModel": { - "hash": "2bd56e309a7e899027a1e8b3990fd7a69986291043079d836bc2f8bcdb9ec8de", - "storageAddress": "http://testserver/model/2bd56e309a7e899027a1e8b3990fd7a69986291043079d836bc2f8bcdb9ec8de/file/" + "hash": "592242f9b162178994897c5b8aa49450a17cc395bb9bc9864b830a6cdba6a075", + "storageAddress": "http://testserver/model/592242f9b162178994897c5b8aa49450a17cc395bb9bc9864b830a6cdba6a075/file/" }, "permissions": "all", "rank": 0, - "status": "done" + "status": "done", + "tag": "substra" }, { "algo": { - "hash": "f2d9fd38e25cd975c49f3ce7e6739846585e89635a86689b5db42ab2c0c57284", + "hash": "9c3d8777e11fd72cbc0fd672bec3a0848f8518b4d56706008cc05f8a1cee44f9", "name": "Random Forest", - "storageAddress": "http://testserver/algo/f2d9fd38e25cd975c49f3ce7e6739846585e89635a86689b5db42ab2c0c57284/file/" + "storageAddress": "http://testserver/algo/9c3d8777e11fd72cbc0fd672bec3a0848f8518b4d56706008cc05f8a1cee44f9/file/" }, - "creator": "703433008d3f62dab5ffaccb3c53d723660f5f6cdac3c5dfd26ac88312b5a94e", + "creator": "2cb13d299b337fae2969da1ff4ddd9a2f3004be52d64f671d13d9513f5a79426", "dataset": { "keys": [ - "bcdda7da240f1de016e5c185d63027ff6536c233f7ed96d086766e99027d4e24", + "31510dc1d8be788f7c5d28d05714f7efb9edb667762966b9adc02eadeaacebe9", "03a1f878768ea8624942d46a3b438c37992e626c2cf655023bcc3bed69d485d1" ], - "openerHash": "59300f1fec4f5cdd3a236c7260ed72bdd24691efdec63b7910ea84136123cecd", + "openerHash": "615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7", "perf": 0, - "worker": "703433008d3f62dab5ffaccb3c53d723660f5f6cdac3c5dfd26ac88312b5a94e" + "worker": "2cb13d299b337fae2969da1ff4ddd9a2f3004be52d64f671d13d9513f5a79426" }, "fltask": "", "inModels": None, - "key": "7f4bea1afafefda207daf7c24034aab4f1db0df0575ba6b303d3d7a6df1794e7", - "log": "[00-01-0032-ea27bd6]", + "key": "c6beed3a4ee5ead0c4246faac7931a944fc2286e193454bb1b851dee0c5a5f59", + "log": "[00-01-0032-139c39e]", "objective": { "hash": "3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71", "metrics": { - "hash": "750f622262854341bd44f55c1018949e9c119606ef5068bd7d137040a482a756", + "hash": "c42dca31fbc2ebb5705643e3bb6ee666bbfd956de13dd03727f825ad8445b4d7", "storageAddress": "http://testserver/objective/3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71/metrics/" } }, "outModel": None, "permissions": "all", "rank": 0, - "status": "failed" + "status": "failed", + "tag": "" } ] testtuple = [ { - "objective": { - "hash": "3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71", - "metrics": { - "hash": "750f622262854341bd44f55c1018949e9c119606ef5068bd7d137040a482a756", - "storageAddress": "http://testserver/objective/3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71/metrics/" - } - }, + "key": "b7b9291e5ff96ec7d16d38ab49915cbe15055347bb933a824887f2a76fb57c9a", "algo": { "name": "Logistic regression", - "hash": "da58a7a29b549f2fe5f009fb51cce6b28ca184ec641a0c1db075729bb266549b", - "storageAddress": "http://testserver/algo/da58a7a29b549f2fe5f009fb51cce6b28ca184ec641a0c1db075729bb266549b/file/" - }, - "model": { - "traintupleKey": "66caabaf37455cc7af8e89cac37eb0ebfdf73ac7fe4765c644ea6340c2589c0a", - "hash": "2bd56e309a7e899027a1e8b3990fd7a69986291043079d836bc2f8bcdb9ec8de", - "storageAddress": "http://testserver/model/2bd56e309a7e899027a1e8b3990fd7a69986291043079d836bc2f8bcdb9ec8de/file/" + "hash": "4cc53726e01f7e3864a6cf9da24d9cef04a7cbd7fd2892765ff76931dd4628e7", + "storageAddress": "http://testserver/algo/4cc53726e01f7e3864a6cf9da24d9cef04a7cbd7fd2892765ff76931dd4628e7/file/" }, + "certified": True, + "creator": "2cb13d299b337fae2969da1ff4ddd9a2f3004be52d64f671d13d9513f5a79426", "dataset": { - "worker": "703433008d3f62dab5ffaccb3c53d723660f5f6cdac3c5dfd26ac88312b5a94e", + "worker": "fba9c2538319fe2b45ac7047e21b4bc7196537367814d5da7f0aae020d3be5f7", "keys": [ "17d58b67ae2028018108c9bf555fa58b2ddcfe560e0117294196e79d26140b2a", "8bf3bf4f753a32f27d18c86405e7a406a83a55610d91abcca9acc525061b8ecf" ], - "openerHash": "9a832ed6cee6acf7e33c3acffbc89cebf10ef503b690711bdee048b873daf528", + "openerHash": "615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7", "perf": 0 }, - "certified": True, - "status": "done", - "log": "Test - CPU:0.00 % - Mem:0.00 GB - GPU:0.00 % - GPU Mem:0.00 GB; ", + "log": "Test - CPU:179.46 % - Mem:0.09 GB - GPU:0.00 % - GPU Mem:0.00 GB; ", + "model": { + "traintupleKey": "3979576752e014adddadfc360d79c67cdccb0f4bae46936f35ce09c64e5832c8", + "hash": "592242f9b162178994897c5b8aa49450a17cc395bb9bc9864b830a6cdba6a075", + "storageAddress": "http://testserver/model/592242f9b162178994897c5b8aa49450a17cc395bb9bc9864b830a6cdba6a075/file/" + }, + "objective": { + "hash": "3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71", + "metrics": { + "hash": "c42dca31fbc2ebb5705643e3bb6ee666bbfd956de13dd03727f825ad8445b4d7", + "storageAddress": "http://testserver/objective/3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71/metrics/" + } + }, "permissions": "all", - "creator": "703433008d3f62dab5ffaccb3c53d723660f5f6cdac3c5dfd26ac88312b5a94e" + "status": "done", + "tag": "" } ] @@ -248,72 +266,75 @@ { "testtuple": { "algo": { - "hash": "da58a7a29b549f2fe5f009fb51cce6b28ca184ec641a0c1db075729bb266549b", + "hash": "4cc53726e01f7e3864a6cf9da24d9cef04a7cbd7fd2892765ff76931dd4628e7", "name": "Logistic regression", - "storageAddress": "http://testserver/algo/da58a7a29b549f2fe5f009fb51cce6b28ca184ec641a0c1db075729bb266549b/file/" + "storageAddress": "http://testserver/algo/4cc53726e01f7e3864a6cf9da24d9cef04a7cbd7fd2892765ff76931dd4628e7/file/" }, "certified": True, - "creator": "703433008d3f62dab5ffaccb3c53d723660f5f6cdac3c5dfd26ac88312b5a94e", + "creator": "2cb13d299b337fae2969da1ff4ddd9a2f3004be52d64f671d13d9513f5a79426", "dataset": { "keys": [ "17d58b67ae2028018108c9bf555fa58b2ddcfe560e0117294196e79d26140b2a", "8bf3bf4f753a32f27d18c86405e7a406a83a55610d91abcca9acc525061b8ecf" ], - "openerHash": "9a832ed6cee6acf7e33c3acffbc89cebf10ef503b690711bdee048b873daf528", + "openerHash": "615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7", "perf": 0, - "worker": "703433008d3f62dab5ffaccb3c53d723660f5f6cdac3c5dfd26ac88312b5a94e" + "worker": "fba9c2538319fe2b45ac7047e21b4bc7196537367814d5da7f0aae020d3be5f7" }, - "key": "0cd626cf445b1e17f7fb854e696d87db65b460545aab1677920459ae8a774f4f", - "log": "Test - CPU:0.00 % - Mem:0.00 GB - GPU:0.00 % - GPU Mem:0.00 GB; ", + "key": "b7b9291e5ff96ec7d16d38ab49915cbe15055347bb933a824887f2a76fb57c9a", + "log": "Test - CPU:179.46 % - Mem:0.09 GB - GPU:0.00 % - GPU Mem:0.00 GB; ", "model": { - "hash": "2bd56e309a7e899027a1e8b3990fd7a69986291043079d836bc2f8bcdb9ec8de", - "storageAddress": "http://testserver/model/2bd56e309a7e899027a1e8b3990fd7a69986291043079d836bc2f8bcdb9ec8de/file/", - "traintupleKey": "66caabaf37455cc7af8e89cac37eb0ebfdf73ac7fe4765c644ea6340c2589c0a" + "hash": "592242f9b162178994897c5b8aa49450a17cc395bb9bc9864b830a6cdba6a075", + "storageAddress": "http://testserver/model/592242f9b162178994897c5b8aa49450a17cc395bb9bc9864b830a6cdba6a075/file/", + "traintupleKey": "3979576752e014adddadfc360d79c67cdccb0f4bae46936f35ce09c64e5832c8" }, "objective": { "hash": "3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71", "metrics": { - "hash": "750f622262854341bd44f55c1018949e9c119606ef5068bd7d137040a482a756", + "hash": "c42dca31fbc2ebb5705643e3bb6ee666bbfd956de13dd03727f825ad8445b4d7", "storageAddress": "http://testserver/objective/3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71/metrics/" } }, "permissions": "all", - "status": "done" + "status": "done", + "tag": "" }, "traintuple": { "algo": { - "hash": "da58a7a29b549f2fe5f009fb51cce6b28ca184ec641a0c1db075729bb266549b", + "hash": "4cc53726e01f7e3864a6cf9da24d9cef04a7cbd7fd2892765ff76931dd4628e7", "name": "Logistic regression", - "storageAddress": "http://testserver/algo/da58a7a29b549f2fe5f009fb51cce6b28ca184ec641a0c1db075729bb266549b/file/" + "storageAddress": "http://testserver/algo/4cc53726e01f7e3864a6cf9da24d9cef04a7cbd7fd2892765ff76931dd4628e7/file/" }, - "creator": "703433008d3f62dab5ffaccb3c53d723660f5f6cdac3c5dfd26ac88312b5a94e", + "creator": "2cb13d299b337fae2969da1ff4ddd9a2f3004be52d64f671d13d9513f5a79426", "dataset": { "keys": [ - "bcdda7da240f1de016e5c185d63027ff6536c233f7ed96d086766e99027d4e24", + "31510dc1d8be788f7c5d28d05714f7efb9edb667762966b9adc02eadeaacebe9", "03a1f878768ea8624942d46a3b438c37992e626c2cf655023bcc3bed69d485d1" ], - "openerHash": "59300f1fec4f5cdd3a236c7260ed72bdd24691efdec63b7910ea84136123cecd", + "openerHash": "615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7", "perf": 1, - "worker": "703433008d3f62dab5ffaccb3c53d723660f5f6cdac3c5dfd26ac88312b5a94e" + "worker": "2cb13d299b337fae2969da1ff4ddd9a2f3004be52d64f671d13d9513f5a79426" }, "fltask": "", "inModels": None, - "key": "66caabaf37455cc7af8e89cac37eb0ebfdf73ac7fe4765c644ea6340c2589c0a", - "log": "Train - CPU:78.04 % - Mem:0.11 GB - GPU:0.00 % - GPU Mem:0.00 GB; ", + "key": "3979576752e014adddadfc360d79c67cdccb0f4bae46936f35ce09c64e5832c8", + "log": "Train - CPU:173.81 % - Mem:0.11 GB - GPU:0.00 % - GPU Mem:0.00 GB; ", "objective": { "hash": "3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71", "metrics": { - "hash": "750f622262854341bd44f55c1018949e9c119606ef5068bd7d137040a482a756", + "hash": "c42dca31fbc2ebb5705643e3bb6ee666bbfd956de13dd03727f825ad8445b4d7", "storageAddress": "http://testserver/objective/3d70ab46d710dacb0f48cb42db4874fac14e048a0d415e266aad38c09591ee71/metrics/" } }, "outModel": { - "hash": "2bd56e309a7e899027a1e8b3990fd7a69986291043079d836bc2f8bcdb9ec8de", - "storageAddress": "http://testserver/model/2bd56e309a7e899027a1e8b3990fd7a69986291043079d836bc2f8bcdb9ec8de/file/" + "hash": "592242f9b162178994897c5b8aa49450a17cc395bb9bc9864b830a6cdba6a075", + "storageAddress": "http://testserver/model/592242f9b162178994897c5b8aa49450a17cc395bb9bc9864b830a6cdba6a075/file/" }, "permissions": "all", "rank": 0, - "status": "done" + "status": "done", + "tag": "substra" } } ] + diff --git a/substrabac/substrapp/tests/tests_createobjective.py b/substrabac/substrapp/tests/tests_createobjective.py index a114c4885..1005011f5 100644 --- a/substrabac/substrapp/tests/tests_createobjective.py +++ b/substrabac/substrapp/tests/tests_createobjective.py @@ -69,13 +69,12 @@ def test_createobjective(self): } objective_pk = 'd5002e1cd50bd5de5341df8a7b7d11b6437154b3b08f531c9b8f93889855c66f' - datamanager_pk = '59300f1fec4f5cdd3a236c7260ed72bdd24691efdec63b7910ea84136123cecd' + datamanager_pk = '615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7' pkhash1 = '24fb12ff87485f6b0bc5349e5bf7f36ccca4eb1353395417fdae7d8d787f178c' pkhash2 = '30f6c797e277451b0a08da7119ed86fb2986fa7fab2258bf3edbd9f1752ed553' with patch.object(LedgerObjectiveSerializer, 'create') as mobjectivecreate, \ patch.object(LedgerDataManagerSerializer, 'create') as mdatamanagercreate, \ - patch('substrapp.management.commands.createobjective.updateLedgerDataManager') as mdatamanagerupdate, \ patch.object(LedgerDataSampleSerializer, 'create') as mdatacreate, \ patch('substrapp.views.datasample.DataSampleViewSet.check_datamanagers') as mcheck_datamanagers: @@ -95,11 +94,6 @@ def test_createobjective(self): }, status.HTTP_201_CREATED) - mdatamanagerupdate.return_value = ({ - 'pkhash': datamanager_pk - }, - status.HTTP_201_CREATED) - mcheck_datamanagers.return_value = True saved_stdout = sys.stdout diff --git a/substrabac/substrapp/tests/tests_query.py b/substrabac/substrapp/tests/tests_query.py index 85e0638ae..0a604e695 100644 --- a/substrabac/substrapp/tests/tests_query.py +++ b/substrabac/substrapp/tests/tests_query.py @@ -39,10 +39,10 @@ def setUp(self): os.makedirs(MEDIA_ROOT) self.objective_description, self.objective_description_filename, \ - self.objective_metrics, self.objective_metrics_filename = get_sample_objective() + self.objective_metrics, self.objective_metrics_filename = get_sample_objective() self.data_description, self.data_description_filename, self.data_data_opener, \ - self.data_opener_filename = get_sample_datamanager() + self.data_opener_filename = get_sample_datamanager() def tearDown(self): try: @@ -50,47 +50,83 @@ def tearDown(self): except FileNotFoundError: pass - def test_add_objective_sync_ok(self): - # add associated data opener - datamanager_name = 'slide opener' - DataManager.objects.create(name=datamanager_name, + def add_default_data_manager(self): + DataManager.objects.create(name='slide opener', description=self.data_description, data_opener=self.data_data_opener) - url = reverse('substrapp:objective-list') + def get_default_objective_data(self): + # XXX reload fixtures as it is an opened buffer and a post will + # modify the objects + desc, _, metrics, _ = get_sample_objective() + expected_hash = get_hash(self.objective_description) data = { 'name': 'tough objective', 'test_data_manager_key': get_hash(self.data_data_opener), 'test_data_sample_keys': [ '5c1d9cd1c2c1082dde0921b56d11030c81f62fbb51932758b58ac2569dd0b379', '5c1d9cd1c2c1082dde0921b56d11030c81f62fbb51932758b58ac2569dd0b389'], - 'description': self.objective_description, - 'metrics': self.objective_metrics, + 'description': desc, + 'metrics': metrics, 'permissions': 'all', 'metrics_name': 'accuracy' } + return expected_hash, data + + def test_add_objective_sync_ok(self): + self.add_default_data_manager() + + pkhash, data = self.get_default_objective_data() + + url = reverse('substrapp:objective-list') extra = { 'HTTP_ACCEPT': 'application/json;version=0.0', } with mock.patch.object(LedgerObjectiveSerializer, 'create') as mcreate: - mcreate.return_value = { - 'pkhash': 'a554bb7adf2cad37ea8b140dc07359dd6e6cbffb067d568d3ba7b3a9de1ed2f3'}, status.HTTP_201_CREATED + mcreate.return_value = {'pkhash': pkhash}, status.HTTP_201_CREATED response = self.client.post(url, data, format='multipart', **extra) r = response.json() - self.assertEqual(r['pkhash'], get_hash(self.objective_description)) + self.assertEqual(r['pkhash'], pkhash) self.assertEqual(r['validated'], False) self.assertEqual(r['description'], f'http://testserver/media/objectives/{r["pkhash"]}/{self.objective_description_filename}') self.assertEqual(r['metrics'], f'http://testserver/media/objectives/{r["pkhash"]}/{self.objective_metrics_filename}') + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + + def test_add_objective_conflict(self): + self.add_default_data_manager() + pkhash, data = self.get_default_objective_data() + + url = reverse('substrapp:objective-list') + + extra = { + 'HTTP_ACCEPT': 'application/json;version=0.0', + } + + with mock.patch.object(LedgerObjectiveSerializer, 'create') as mcreate: + mcreate.return_value = {'pkhash': pkhash}, status.HTTP_201_CREATED + + response = self.client.post(url, data, format='multipart', **extra) + r = response.json() + + self.assertEqual(r['pkhash'], pkhash) self.assertEqual(response.status_code, status.HTTP_201_CREATED) + # XXX reload data as the previous call to post change it + _, data = self.get_default_objective_data() + response = self.client.post(url, data, format='multipart', **extra) + r = response.json() + + self.assertEqual(response.status_code, status.HTTP_409_CONFLICT) + self.assertEqual(r['pkhash'], pkhash) + def test_add_objective_no_sync_ok(self): # add associated data opener datamanager_name = 'slide opener' @@ -393,7 +429,7 @@ def test_add_data_sample_sync_ok(self): response = self.client.post(url, data, format='multipart', **extra) r = response.json() self.data_file.file.seek(0) - self.assertEqual(r['pkhash'], get_dir_hash(self.data_file.file)) + self.assertEqual(r[0]['pkhash'], get_dir_hash(self.data_file.file)) self.assertEqual(response.status_code, status.HTTP_201_CREATED) @@ -524,7 +560,7 @@ def test_add_data_sample_ko_already_exists(self): response = self.client.post(url, data, format='multipart', **extra) r = response.json() self.assertEqual(r['message'], - [{'pkhash': ['data sample with this pkhash already exists.']}]) + [[{'pkhash': ['data sample with this pkhash already exists.']}]]) self.assertEqual(response.status_code, status.HTTP_409_CONFLICT) def test_add_data_sample_ko_not_a_zip(self): @@ -581,7 +617,7 @@ def test_add_data_sample_ko_408(self): mis_zipfile.return_value = True response = self.client.post(url, data, format='multipart', **extra) r = response.json() - self.assertEqual(r['message'], {'pkhash': get_hash(file_mock), 'validated': False}) + self.assertEqual(r['message'], {'pkhash': [get_dir_hash(file_mock)], 'validated': False}) self.assertEqual(response.status_code, status.HTTP_408_REQUEST_TIMEOUT) def test_bulk_add_data_sample_ko_408(self): @@ -1068,6 +1104,7 @@ def test_add_traintuple_ok(self): '5c1d9cd1c2c1082dde0921b56d11030c81f62fbb51932758b58ac2569dd0b422'], 'algo_key': '5c1d9cd1c2c1082dde0921b56d11030c81f62fbb51932758b58ac2569dd0a088', 'data_manager_key': '5c1d9cd1c2c1082dde0921b56d11030c81f62fbb51932758b58ac2569dd0a088', + 'objective_key': '5c1d9cd1c2c1082dde0921b56d11030c81f62fbb51932758b58ac2569dd0a088', 'rank': -1, 'FLtask_key': '5c1d9cd1c2c1082dde0921b56d11030c81f62fbb51932758b58ac2569dd0a088', 'in_models_keys': [ @@ -1080,11 +1117,13 @@ def test_add_traintuple_ok(self): mock.patch('substrapp.views.traintuple.queryLedger') as mqueryLedger: raw_pkhash = 'traintuple_pkhash'.encode('utf-8').hex() - mqueryLedger.return_value = (raw_pkhash, status.HTTP_200_OK) + mqueryLedger.return_value = ({'key': raw_pkhash}, status.HTTP_200_OK) mcreate.return_value = {'message': 'Traintuple added in local db waiting for validation. \ The substra network has been notified for adding this Traintuple'}, status.HTTP_202_ACCEPTED response = self.client.post(url, data, format='multipart', **extra) + + print(response.json()) self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED) def test_add_traintuple_ko(self): diff --git a/substrabac/substrapp/tests/tests_tasks.py b/substrabac/substrapp/tests/tests_tasks.py index a2df3e6f3..c289be5ea 100644 --- a/substrabac/substrapp/tests/tests_tasks.py +++ b/substrabac/substrapp/tests/tests_tasks.py @@ -2,6 +2,7 @@ import shutil import mock import time +import uuid from django.test import override_settings from django.http import HttpResponse @@ -381,9 +382,10 @@ def test_compute_docker(self): with open(dockerfile_path, 'w') as f: f.write('FROM library/hello-world') + hash_docker = uuid.uuid4().hex result = compute_docker(client, self.ResourcesManager, - self.subtuple_path, 'test_compute_docker', - 'test_compute_docker_name', None, None) + self.subtuple_path, 'test_compute_docker_' + hash_docker, + 'test_compute_docker_name_' + hash_docker, None, None) self.assertIsNone(cpu_set) self.assertIsNone(gpu_set) diff --git a/substrabac/substrapp/tests/tests_views.py b/substrabac/substrapp/tests/tests_views.py index 289fc2db1..76d418adb 100644 --- a/substrabac/substrapp/tests/tests_views.py +++ b/substrabac/substrapp/tests/tests_views.py @@ -17,7 +17,6 @@ from substrapp.views.utils import JsonException, ComputeHashMixin, getObjectFromLedger from substrapp.views.datasample import path_leaf, compute_dryrun as data_sample_compute_dryrun from substrapp.views.objective import compute_dryrun as objective_compute_dryrun -from substrapp.views.algo import compute_dryrun as algo_compute_dryrun from substrapp.utils import compute_hash, get_hash from substrapp.models import DataManager @@ -73,6 +72,7 @@ def test_utils_getObjectFromLedger(self): @override_settings(DRYRUN_ROOT=MEDIA_ROOT) @override_settings(SITE_HOST='localhost') @override_settings(LEDGER={'name': 'test-org', 'peer': 'test-peer'}) +@override_settings(DEFAULT_DOMAIN='https://localhost') class ObjectiveViewTests(APITestCase): def setUp(self): @@ -132,7 +132,7 @@ def test_objective_list_filter_name(self): response = self.client.get(url + search_params, **self.extra) r = response.json() - self.assertEqual(len(r[0]), 1) + self.assertEqual(len(r[0]), 2) def test_objective_list_filter_metrics(self): url = reverse('substrapp:objective-list') @@ -157,19 +157,6 @@ def test_objective_list_filter_datamanager(self): self.assertEqual(len(r[0]), 1) - def test_objective_list_filter_algo(self): - url = reverse('substrapp:objective-list') - with mock.patch('substrapp.views.objective.queryLedger') as mqueryLedger: - mqueryLedger.side_effect = [(objective, status.HTTP_200_OK), - (algo, status.HTTP_200_OK)] - - url = reverse('substrapp:objective-list') - search_params = '?search=algo%253Aname%253ALogistic%2520regression' - response = self.client.get(url + search_params, **self.extra) - r = response.json() - - self.assertEqual(len(r[0]), 1) - def test_objective_list_filter_model(self): url = reverse('substrapp:objective-list') with mock.patch('substrapp.views.objective.queryLedger') as mqueryLedger: @@ -499,64 +486,6 @@ def test_algo_create(self): data['description'].close() data['file'].close() - def test_algo_create_dryrun(self): - - url = reverse('substrapp:algo-list') - - dir_path = os.path.dirname(os.path.realpath(__file__)) - - algo_path = os.path.join(dir_path, '../../fixtures/chunantes/algos/algo3/algo.tar.gz') - description_path = os.path.join(dir_path, '../../fixtures/chunantes/algos/algo3/description.md') - - data = {'name': 'Logistic regression', - 'file': open(algo_path, 'rb'), - 'description': open(description_path, 'rb'), - 'objective_key': get_hash(os.path.join(dir_path, '../../fixtures/chunantes/objectives/objective0/description.md')), - 'permissions': 'all', - 'dryrun': True} - - with mock.patch('substrapp.views.algo.compute_dryrun.apply_async') as mdryrun_task: - - mdryrun_task.return_value = FakeTask('42') - response = self.client.post(url, data=data, format='multipart', **self.extra) - - self.assertEqual(response.data['id'], '42') - self.assertEqual(response.data['message'], 'Your dry-run has been taken in account. You can follow the task execution on https://localhost/task/42/') - self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED) - - data['description'].close() - data['file'].close() - - def test_algo_compute_dryrun(self): - - dir_path = os.path.dirname(os.path.realpath(__file__)) - - algo_path = os.path.join(dir_path, '../../fixtures/chunantes/algos/algo3/algo.tar.gz') - shutil.copy(algo_path, os.path.join(MEDIA_ROOT, 'algo.tar.gz')) - - metrics_path = os.path.join(dir_path, '../../fixtures/chunantes/objectives/objective0/metrics.py') - with open(metrics_path, 'rb') as f: - metrics_content = f.read() - metrics_pkhash = compute_hash(metrics_content) - - opener_path = os.path.join(dir_path, '../../fixtures/owkin/datamanagers/datamanager0/opener.py') - with open(opener_path, 'rb') as f: - opener_content = f.read() - opener_pkhash = compute_hash(opener_content) - - with mock.patch('substrapp.views.algo.getObjectFromLedger') as mgetObjectFromLedger,\ - mock.patch('substrapp.views.algo.get_computed_hash') as mget_computed_hash: - mgetObjectFromLedger.side_effect = [{'metrics': {'storageAddress': 'test'}, - 'testDataset': {'dataManagerKey': 'test'}}, - {'opener': {'storageAddress': 'test'}}] - mget_computed_hash.side_effect = [(metrics_content, metrics_pkhash), (opener_content, opener_pkhash)] - - objective_key = get_hash(os.path.join(dir_path, '../../fixtures/chunantes/objectives/objective0/description.md')) - pkhash = get_hash(algo_path) - - # Slow operation, about 45 s, will fail if no internet connection - algo_compute_dryrun(os.path.join(MEDIA_ROOT, 'algo.tar.gz'), objective_key, pkhash) - # APITestCase @override_settings(MEDIA_ROOT=MEDIA_ROOT) @@ -663,7 +592,7 @@ def test_model_retrieve(self): with mock.patch('substrapp.views.model.getObjectFromLedger') as mgetObjectFromLedger, \ mock.patch('substrapp.views.model.requests.get') as mrequestsget, \ mock.patch('substrapp.views.model.ModelViewSet.compute_hash') as mcomputed_hash: - mgetObjectFromLedger.return_value = model[0]['traintuple'] + mgetObjectFromLedger.return_value = model[0] mrequestsget.return_value = FakeRequest(status=status.HTTP_200_OK, content=self.model.read().encode()) @@ -674,7 +603,7 @@ def test_model_retrieve(self): search_params = model[0]['traintuple']['outModel']['hash'] + '/' response = self.client.get(url + search_params, **self.extra) r = response.json() - self.assertEqual(r, model[0]['traintuple']) + self.assertEqual(r, model[0]) def test_model_retrieve_fail(self): @@ -765,18 +694,6 @@ def test_datamanager_list_filter_name(self): self.assertEqual(len(r[0]), 1) - def test_datamanager_list_filter_algo(self): - url = reverse('substrapp:data_manager-list') - with mock.patch('substrapp.views.datamanager.queryLedger') as mqueryLedger: - mqueryLedger.side_effect = [(datamanager, status.HTTP_200_OK), - (algo, status.HTTP_200_OK)] - - search_params = '?search=algo%253Aname%253ALogistic%2520regression' - response = self.client.get(url + search_params, **self.extra) - r = response.json() - - self.assertEqual(len(r[0]), 2) - def test_datamanager_list_filter_objective(self): url = reverse('substrapp:data_manager-list') with mock.patch('substrapp.views.datamanager.queryLedger') as mqueryLedger: @@ -803,7 +720,7 @@ def test_datamanager_list_filter_model(self): def test_datamanager_retrieve(self): url = reverse('substrapp:data_manager-list') - datamanager_response = [d for d in datamanager if d['key'] == '59300f1fec4f5cdd3a236c7260ed72bdd24691efdec63b7910ea84136123cecd'][0] + datamanager_response = [d for d in datamanager if d['key'] == '615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7'][0] with mock.patch.object(DataManagerViewSet, 'getObjectFromLedger') as mgetObjectFromLedger, \ mock.patch('substrapp.views.datamanager.requests.get') as mrequestsget: mgetObjectFromLedger.return_value = datamanager_response @@ -821,7 +738,7 @@ def test_datamanager_retrieve(self): FakeRequest(status=status.HTTP_200_OK, content=description_content)] - search_params = '59300f1fec4f5cdd3a236c7260ed72bdd24691efdec63b7910ea84136123cecd/' + search_params = '615ce631b93c185b492dfc97ed5dea27430d871fa4e50678bab3c79ce2ec6cb7/' response = self.client.get(url + search_params, **self.extra) r = response.json() @@ -871,18 +788,6 @@ def test_datamanager_create_dryrun(self): self.assertEqual(response.data, {'message': f'Your data opener is valid. You can remove the dryrun option.'}) self.assertEqual(response.status_code, status.HTTP_200_OK) - # Will fail because metrics.py instead of opener - files = {'data_opener': open(os.path.join(dir_path, - '../../fixtures/owkin/objectives/objective0/metrics.py'), - 'rb'), - 'description': open(os.path.join(dir_path, - '../../fixtures/chunantes/datamanagers/datamanager0/description.md'), - 'rb')} - - response = self.client.post(url, {**data, **files}, format='multipart', **self.extra) - self.assertIn('please review your opener and the documentation.', response.data['message']) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - for x in files: files[x].close() @@ -1087,6 +992,7 @@ def test_task_retrieve_pending(self): @override_settings(DRYRUN_ROOT=MEDIA_ROOT) @override_settings(SITE_HOST='localhost') @override_settings(LEDGER={'name': 'test-org', 'peer': 'test-peer'}) +@override_settings(DEFAULT_DOMAIN='https://localhost') class DataViewTests(APITestCase): def setUp(self): @@ -1204,7 +1110,7 @@ def test_data_create(self): status.HTTP_201_CREATED) response = self.client.post(url, data=data, format='multipart', **self.extra) - self.assertEqual(response.data['pkhash'], pkhash) + self.assertEqual(response.data[0]['pkhash'], pkhash) self.assertEqual(response.status_code, status.HTTP_201_CREATED) data['file'].close() diff --git a/substrabac/substrapp/utils.py b/substrabac/substrapp/utils.py index cc1300a9c..5f48345bd 100644 --- a/substrabac/substrapp/utils.py +++ b/substrabac/substrapp/utils.py @@ -40,15 +40,16 @@ def queryLedger(options): core_peer_mspconfigpath = LEDGER['core_peer_mspconfigpath'] peer = LEDGER['peer'] + peer_port = peer["port"][os.environ.get('SUBSTRABAC_PEER_PORT', 'external')] + # update config path for using right core.yaml and override msp config path os.environ['FABRIC_CFG_PATH'] = os.environ.get('FABRIC_CFG_PATH_ENV', peer['docker_core_dir']) os.environ['CORE_PEER_MSPCONFIGPATH'] = os.environ.get('CORE_PEER_MSPCONFIGPATH_ENV', core_peer_mspconfigpath) - os.environ['CORE_PEER_ADDRESS'] = os.environ.get('CORE_PEER_ADDRESS_ENV', f'{peer["host"]}:{peer["port"]}') + os.environ['CORE_PEER_ADDRESS'] = os.environ.get('CORE_PEER_ADDRESS_ENV', f'{peer["host"]}:{peer_port}') print(f'Querying chaincode in the channel \'{channel_name}\' on the peer \'{peer["host"]}\' ...', flush=True) output = subprocess.run([os.path.join(PROJECT_ROOT, '../bin/peer'), - '--logging-level', 'DEBUG', 'chaincode', 'query', '-x', '-C', channel_name, @@ -70,7 +71,7 @@ def queryLedger(options): print(msg, flush=True) else: try: - msg = output.stderr.decode('utf-8').split('Error')[2].split('\n')[0] + msg = output.stderr.decode('utf-8').split('Error')[-1].split('\n')[0] data = {'message': msg} except: msg = output.stderr.decode('utf-8') @@ -79,6 +80,8 @@ def queryLedger(options): st = status.HTTP_400_BAD_REQUEST if 'access denied' in msg: st = status.HTTP_403_FORBIDDEN + elif 'no element with key' in msg: + st = status.HTTP_404_NOT_FOUND clean_env_variables() @@ -92,6 +95,8 @@ def invokeLedger(options, sync=False): chaincode_name = LEDGER['chaincode_name'] core_peer_mspconfigpath = LEDGER['core_peer_mspconfigpath'] peer = LEDGER['peer'] + peer_port = peer["port"][os.environ.get('SUBSTRABAC_PEER_PORT', 'external')] + orderer = LEDGER['orderer'] orderer_ca_file = orderer['ca'] peer_key_file = peer['clientKey'] @@ -100,12 +105,11 @@ def invokeLedger(options, sync=False): # update config path for using right core.yaml and override msp config path os.environ['FABRIC_CFG_PATH'] = os.environ.get('FABRIC_CFG_PATH_ENV', peer['docker_core_dir']) os.environ['CORE_PEER_MSPCONFIGPATH'] = os.environ.get('CORE_PEER_MSPCONFIGPATH_ENV', core_peer_mspconfigpath) - os.environ['CORE_PEER_ADDRESS'] = os.environ.get('CORE_PEER_ADDRESS_ENV', f'{peer["host"]}:{peer["port"]}') + os.environ['CORE_PEER_ADDRESS'] = os.environ.get('CORE_PEER_ADDRESS_ENV', f'{peer["host"]}:{peer_port}') print(f'Sending invoke transaction to {peer["host"]} ...', flush=True) cmd = [os.path.join(PROJECT_ROOT, '../bin/peer'), - '--logging-level', 'DEBUG', 'chaincode', 'invoke', '-C', channel_name, '-n', chaincode_name, @@ -145,6 +149,9 @@ def invokeLedger(options, sync=False): msg = msg.split('result: status:')[1].split('\n')[0].split('payload:')[1].strip().strip('"') except: pass + else: + msg = json.loads(msg.encode('utf-8').decode('unicode_escape')) + msg = msg.get('key', msg.get('keys')) # get pkhash finally: data = {'pkhash': msg} diff --git a/substrabac/substrapp/views/algo.py b/substrabac/substrapp/views/algo.py index 1c233add5..bad64fffe 100644 --- a/substrabac/substrapp/views/algo.py +++ b/substrabac/substrapp/views/algo.py @@ -1,109 +1,18 @@ -import docker -import os import tempfile import requests -from django.conf import settings from django.http import Http404 -from docker.errors import ContainerError from rest_framework import status, mixins from rest_framework.decorators import action from rest_framework.exceptions import ValidationError from rest_framework.response import Response from rest_framework.viewsets import GenericViewSet -from rest_framework.reverse import reverse - - -from substrabac.celery import app from substrapp.models import Algo from substrapp.serializers import LedgerAlgoSerializer, AlgoSerializer -from substrapp.utils import queryLedger, get_hash, get_computed_hash, \ - uncompress_path -from substrapp.views.utils import get_filters, getObjectFromLedger, ComputeHashMixin, ManageFileMixin, JsonException -from substrapp.tasks import build_subtuple_folders, remove_subtuple_materials - - -@app.task(bind=True, ignore_result=False) -def compute_dryrun(self, algo_path, objective_key, pkhash): - - try: - subtuple_directory = build_subtuple_folders({'key': pkhash}) - - uncompress_path(algo_path, subtuple_directory) - os.remove(algo_path) - - try: - objective = getObjectFromLedger(objective_key, 'queryObjective') - except JsonException as e: - raise e - else: - metrics_content, metrics_computed_hash = get_computed_hash(objective['metrics']['storageAddress']) - with open(os.path.join(subtuple_directory, 'metrics/metrics.py'), 'wb') as metrics_file: - metrics_file.write(metrics_content) - datamanager_key = objective['testDataset']['dataManagerKey'] - - try: - datamanager = getObjectFromLedger(datamanager_key, 'queryDataManager') - except JsonException as e: - raise e - else: - opener_content, opener_computed_hash = get_computed_hash(datamanager['opener']['storageAddress']) - with open(os.path.join(subtuple_directory, 'opener/opener.py'), 'wb') as opener_file: - opener_file.write(opener_content) - - # Launch verification - client = docker.from_env() - opener_file = os.path.join(subtuple_directory, 'opener/opener.py') - metrics_file = os.path.join(subtuple_directory, 'metrics/metrics.py') - pred_path = os.path.join(subtuple_directory, 'pred') - model_path = os.path.join(subtuple_directory, 'model') - - algo_docker = 'algo_dry_run' # tag must be lowercase for docker - algo_docker_name = f'{algo_docker}_{pkhash}' - algo_path = subtuple_directory - volumes = {pred_path: {'bind': '/sandbox/pred', 'mode': 'rw'}, - metrics_file: {'bind': '/sandbox/metrics/__init__.py', 'mode': 'ro'}, - opener_file: {'bind': '/sandbox/opener/__init__.py', 'mode': 'ro'}, - model_path: {'bind': '/sandbox/model', 'mode': 'rw'}} - - dockerfile_path = os.path.join(algo_path, 'Dockerfile') - if not os.path.exists(dockerfile_path): - raise Exception('Missing dockerfile in the algo archive.') - - client.images.build(path=algo_path, - tag=algo_docker, - rm=True) - - job_args = {'image': algo_docker, - 'name': algo_docker_name, - 'cpuset_cpus': '0-1', - 'mem_limit': '1G', - 'command': '--dry-run', - 'volumes': volumes, - 'shm_size': '8G', - 'labels': ['dryrun'], - 'detach': False, - 'auto_remove': False, - 'remove': False} - - client.containers.run(**job_args) - - except ContainerError as e: - raise Exception(e.stderr) - except Exception as e: - raise str(e) - finally: - try: - container = client.containers.get(algo_docker_name) - container.remove() - client.images.remove(algo_docker, force=True) - except: - pass - remove_subtuple_materials(subtuple_directory) - if os.path.exists(algo_path): - os.remove(algo_path) +from substrapp.utils import queryLedger, get_hash +from substrapp.views.utils import get_filters, getObjectFromLedger, ComputeHashMixin, ManageFileMixin, JsonException, find_primary_key_error class AlgoViewSet(mixins.CreateModelMixin, @@ -122,10 +31,7 @@ def perform_create(self, serializer): def create(self, request, *args, **kwargs): data = request.data - dryrun = data.get('dryrun', False) - file = data.get('file') - objective_key = data.get('objective_key') pkhash = get_hash(file) serializer = self.get_serializer(data={ 'pkhash': pkhash, @@ -136,33 +42,12 @@ def create(self, request, *args, **kwargs): try: serializer.is_valid(raise_exception=True) except Exception as e: - return Response({ - 'message': e.args, - 'pkhash': pkhash - }, - status=status.HTTP_400_BAD_REQUEST) + st = status.HTTP_400_BAD_REQUEST + if find_primary_key_error(e): + st = status.HTTP_409_CONFLICT + return Response({'message': e.args, 'pkhash': pkhash}, status=st) else: - if dryrun: - try: - algo_path = os.path.join(getattr(settings, 'DRYRUN_ROOT'), f'algo_{pkhash}.tar.gz') - with open(algo_path, 'wb') as algo_file: - algo_file.write(file.open().read()) - - task = compute_dryrun.apply_async((algo_path, objective_key, pkhash), queue=f"{settings.LEDGER['name']}.dryrunner") - url_http = 'http' if settings.DEBUG else 'https' - site_port = getattr(settings, "SITE_PORT", None) - current_site = f'{getattr(settings, "SITE_HOST")}' - if site_port: - current_site = f'{current_site}:{site_port}' - task_route = f'{url_http}://{current_site}{reverse("substrapp:task-detail", args=[task.id])}' - msg = f'Your dry-run has been taken in account. You can follow the task execution on {task_route}' - except Exception as e: - return Response({'message': f'Could not launch algo creation with dry-run on this instance: {str(e)}'}, - status=status.HTTP_400_BAD_REQUEST) - else: - return Response({'id': task.id, 'message': msg}, status=status.HTTP_202_ACCEPTED) - # create on db try: instance = self.perform_create(serializer) @@ -173,7 +58,6 @@ def create(self, request, *args, **kwargs): # init ledger serializer ledger_serializer = LedgerAlgoSerializer(data={'name': data.get('name'), 'permissions': data.get('permissions', 'all'), - 'objective_key': objective_key, 'instance': instance}, context={'request': request}) if not ledger_serializer.is_valid(): @@ -194,7 +78,7 @@ def create(self, request, *args, **kwargs): def create_or_update_algo(self, algo, pk): try: - # get objective description from remote node + # get algo description from remote node url = algo['description']['storageAddress'] try: r = requests.get(url, headers={'Accept': 'application/json;version=0.0'}) # TODO pass cert @@ -243,6 +127,8 @@ def retrieve(self, request, *args, **kwargs): data = getObjectFromLedger(pk, self.ledger_query_call) except JsonException as e: return Response(e.msg, status=status.HTTP_400_BAD_REQUEST) + except Http404: + return Response(f'No element with key {pk}', status=status.HTTP_404_NOT_FOUND) else: try: # try to get it from local db to check if description exists @@ -278,8 +164,7 @@ def list(self, request, *args, **kwargs): data, st = queryLedger({ 'args': '{"Args":["queryAlgos"]}' }) - objectiveData = None - datamanagerData = None + modelData = None # init list to return @@ -309,41 +194,6 @@ def list(self, request, *args, **kwargs): if k == 'algo': # filter by own key for key, val in subfilters.items(): l[idx] = [x for x in l[idx] if x[key] in val] - elif k == 'objective': # select objective used by these datamanagers - st = None - if not objectiveData: - # TODO find a way to put this call in cache - objectiveData, st = queryLedger({ - 'args': '{"Args":["queryObjectives"]}' - }) - - if st != status.HTTP_200_OK: - return Response(objectiveData, status=st) - if objectiveData is None: - objectiveData = [] - - for key, val in subfilters.items(): - if key == 'metrics': # specific to nested metrics - filteredData = [x for x in objectiveData if x[key]['name'] in val] - else: - filteredData = [x for x in objectiveData if x[key] in val] - objectiveKeys = [x['key'] for x in filteredData] - l[idx] = [x for x in l[idx] if x['objectiveKey'] in objectiveKeys] - elif k == 'dataset': # select objective used by these algo - if not datamanagerData: - # TODO find a way to put this call in cache - datamanagerData, st = queryLedger({ - 'args': '{"Args":["queryDataManagers"]}' - }) - if st != status.HTTP_200_OK: - return Response(datamanagerData, status=st) - if datamanagerData is None: - datamanagerData = [] - - for key, val in subfilters.items(): - filteredData = [x for x in datamanagerData if x[key] in val] - objectiveKeys = [x['objectiveKey'] for x in filteredData] - l[idx] = [x for x in l[idx] if x['objectiveKey'] in objectiveKeys] elif k == 'model': # select objectives used by outModel hash if not modelData: # TODO find a way to put this call in cache diff --git a/substrabac/substrapp/views/datamanager.py b/substrabac/substrapp/views/datamanager.py index 17a360a79..783697216 100644 --- a/substrabac/substrapp/views/datamanager.py +++ b/substrabac/substrapp/views/datamanager.py @@ -17,7 +17,7 @@ from substrapp.serializers.ledger.datamanager.util import updateLedgerDataManager from substrapp.serializers.ledger.datamanager.tasks import updateLedgerDataManagerAsync from substrapp.utils import queryLedger, get_hash -from substrapp.views.utils import get_filters, ManageFileMixin, ComputeHashMixin, JsonException +from substrapp.views.utils import get_filters, ManageFileMixin, ComputeHashMixin, JsonException, find_primary_key_error class DataManagerViewSet(mixins.CreateModelMixin, @@ -35,14 +35,6 @@ def perform_create(self, serializer): def dryrun(self, data_opener): - mandatory_functions = {'get_X': {'folder'}, - 'get_y': {'folder'}, - 'save_pred': {'y_pred', 'folder'}, - 'get_pred': {'folder'}, - 'fake_X': {'n_sample'}, - 'fake_y': {'n_sample'} - } - file = data_opener.open().read() try: @@ -51,18 +43,10 @@ def dryrun(self, data_opener): return Response({'message': f'Opener must be a valid python file, please review your opener file and the documentation.'}, status=status.HTTP_400_BAD_REQUEST) - funcs_args = {n.name: {arg.arg for arg in n.args.args} for n in node.body if isinstance(n, ast.FunctionDef)} - - for mfunc, margs in mandatory_functions.items(): - try: - args = funcs_args[mfunc] - except: - return Response({'message': f'Opener must have a "{mfunc}" function, please review your opener and the documentation.'}, - status=status.HTTP_400_BAD_REQUEST) - else: - if not margs.issubset(args): - return Response({'message': f'Opener function "{mfunc}" must have at least {margs} arguments, please review your opener and the documentation.'}, - status=status.HTTP_400_BAD_REQUEST) + imported_module_names = [m.name for e in node.body if isinstance(e, ast.Import) for m in e.names] + if 'substratools' not in imported_module_names: + return Response({'message': 'Opener must import substratools, please review your opener and the documentation.'}, + status=status.HTTP_400_BAD_REQUEST) return Response({'message': f'Your data opener is valid. You can remove the dryrun option.'}, status=status.HTTP_200_OK) @@ -85,9 +69,10 @@ def create(self, request, *args, **kwargs): try: serializer.is_valid(raise_exception=True) except Exception as e: - return Response({'message': e.args, - 'pkhash': pkhash}, - status=status.HTTP_400_BAD_REQUEST) + st = status.HTTP_400_BAD_REQUEST + if find_primary_key_error(e): + st = status.HTTP_409_CONFLICT + return Response({'message': e.args, 'pkhash': pkhash}, status=st) else: if dryrun: return self.dryrun(data_opener) @@ -192,6 +177,9 @@ def getObjectFromLedger(self, pk): 'args': f'{{"Args":["queryDataset", "{pk}"]}}' }) + if st == status.HTTP_404_NOT_FOUND: + raise Http404('Not found') + if st != status.HTTP_200_OK: raise JsonException(data) @@ -217,6 +205,8 @@ def retrieve(self, request, *args, **kwargs): data = self.getObjectFromLedger(pk) # datamanager use particular query to ledger except JsonException as e: return Response(e.msg, status=status.HTTP_400_BAD_REQUEST) + except Http404: + return Response(f'No element with key {pk}', status=status.HTTP_404_NOT_FOUND) else: error = None instance = None @@ -303,21 +293,6 @@ def list(self, request, *args, **kwargs): filteredData = [x for x in objectiveData if x[key] in val] objectiveKeys = [x['key'] for x in filteredData] l[idx] = [x for x in l[idx] if x['objectiveKey'] in objectiveKeys] - elif k == 'algo': # select objective used by these algo - if not algoData: - # TODO find a way to put this call in cache - algoData, st = queryLedger({ - 'args': '{"Args":["queryAlgos"]}' - }) - if st != status.HTTP_200_OK: - return Response(algoData, status=st) - if algoData is None: - algoData = [] - - for key, val in subfilters.items(): - filteredData = [x for x in algoData if x[key] in val] - objectiveKeys = [x['objectiveKey'] for x in filteredData] - l[idx] = [x for x in l[idx] if x['objectiveKey'] in objectiveKeys] elif k == 'model': # select objectives used by outModel hash if not modelData: # TODO find a way to put this call in cache diff --git a/substrabac/substrapp/views/datasample.py b/substrabac/substrapp/views/datasample.py index 287585056..4b8cab446 100644 --- a/substrabac/substrapp/views/datasample.py +++ b/substrabac/substrapp/views/datasample.py @@ -1,7 +1,12 @@ +import logging +from os.path import normpath + import docker import os import ntpath +import uuid +from checksumdir import dirhash from django.conf import settings from docker.errors import ContainerError from rest_framework import status, mixins @@ -17,8 +22,11 @@ from substrapp.serializers import DataSampleSerializer, LedgerDataSampleSerializer from substrapp.serializers.ledger.datasample.util import updateLedgerDataSample from substrapp.serializers.ledger.datasample.tasks import updateLedgerDataSampleAsync -from substrapp.utils import get_hash, uncompress_path, get_dir_hash +from substrapp.utils import uncompress_path, get_dir_hash from substrapp.tasks import build_subtuple_folders, remove_subtuple_materials +from substrapp.views.utils import find_primary_key_error + +logger = logging.getLogger('django.request') def path_leaf(path): @@ -33,38 +41,56 @@ def __init__(self, data, st): super(LedgerException).__init__() +class ValidationException(Exception): + def __init__(self, data, pkhash, st): + self.data = data + self.pkhash = pkhash + self.st = st + super(ValidationException).__init__() + + @app.task(bind=True, ignore_result=False) -def compute_dryrun(self, data_sample_files, data_manager_keys): +def compute_dryrun(self, data, data_manager_keys): from shutil import copy from substrapp.models import DataManager - try: - # Name of the dry-run subtuple (not important) - pkhash = data_sample_files[0]['pkhash'] + client = docker.from_env() - subtuple_directory = build_subtuple_folders({'key': pkhash}) + # Name of the dry-run subtuple (not important) + pkhash = data[0]['pkhash'] + dryrun_uuid = f'{pkhash}_{uuid.uuid4().hex}' + subtuple_directory = build_subtuple_folders({'key': dryrun_uuid}) + data_path = os.path.join(subtuple_directory, 'data') + volumes = {} - for data_sample in data_sample_files: - try: - uncompress_path(data_sample['filepath'], - os.path.join(subtuple_directory, 'data', data_sample['pkhash'])) - except Exception as e: - raise e + try: + + for data_sample in data: + # uncompress only for file + if 'file' in data_sample: + try: + uncompress_path(data_sample['file'], os.path.join(data_path, data_sample['pkhash'])) + except Exception as e: + raise e + # for all data paths, we need to create symbolic links inside data_path + # and add real path to volume bind docker + elif 'path' in data_sample: + os.symlink(data_sample['path'], os.path.join(data_path, data_sample['pkhash'])) + volumes.update({data_sample['path']: {'bind': data_sample['path'], 'mode': 'ro'}}) for datamanager_key in data_manager_keys: datamanager = DataManager.objects.get(pk=datamanager_key) copy(datamanager.data_opener.path, os.path.join(subtuple_directory, 'opener/opener.py')) # Launch verification - client = docker.from_env() opener_file = os.path.join(subtuple_directory, 'opener/opener.py') data_sample_docker_path = os.path.join(getattr(settings, 'PROJECT_ROOT'), 'fake_data_sample') # fake_data comes with substrabac data_docker = 'data_dry_run' # tag must be lowercase for docker - data_docker_name = f'{data_docker}_{pkhash}' - data_path = os.path.join(subtuple_directory, 'data') - volumes = {data_path: {'bind': '/sandbox/data', 'mode': 'rw'}, - opener_file: {'bind': '/sandbox/opener/__init__.py', 'mode': 'ro'}} + data_docker_name = f'{data_docker}_{dryrun_uuid}' + + volumes.update({data_path: {'bind': '/sandbox/data', 'mode': 'rw'}, + opener_file: {'bind': '/sandbox/opener/__init__.py', 'mode': 'ro'}}) client.images.build(path=data_sample_docker_path, tag=data_docker, @@ -86,18 +112,16 @@ def compute_dryrun(self, data_sample_files, data_manager_keys): except ContainerError as e: raise Exception(e.stderr) - except Exception as e: - raise str(e) finally: try: container = client.containers.get(data_docker_name) container.remove() except: - pass + logger.error('Could not remove containers') remove_subtuple_materials(subtuple_directory) - for data_sample in data_sample_files: - if os.path.exists(data_sample['filepath']): - os.remove(data_sample['filepath']) + for data_sample in data: + if 'file' in data_sample and os.path.exists(data_sample['file']): + os.remove(data_sample['file']) class DataSampleViewSet(mixins.CreateModelMixin, @@ -109,15 +133,11 @@ class DataSampleViewSet(mixins.CreateModelMixin, queryset = DataSample.objects.all() serializer_class = DataSampleSerializer - def dryrun_task(self, data_sample_files, data_manager_keys): - task = compute_dryrun.apply_async((data_sample_files, data_manager_keys), + def dryrun_task(self, data, data_manager_keys): + task = compute_dryrun.apply_async((data, data_manager_keys), queue=f"{settings.LEDGER['name']}.dryrunner") - url_http = 'http' if settings.DEBUG else 'https' - site_port = getattr(settings, "SITE_PORT", None) - current_site = f'{getattr(settings, "SITE_HOST")}' - if site_port: - current_site = f'{current_site}:{site_port}' - task_route = f'{url_http}://{current_site}{reverse("substrapp:task-detail", args=[task.id])}' + current_site = getattr(settings, "DEFAULT_DOMAIN") + task_route = f'{current_site}{reverse("substrapp:task-detail", args=[task.id])}' return task, f'Your dry-run has been taken in account. You can follow the task execution on {task_route}' @staticmethod @@ -128,130 +148,158 @@ def check_datamanagers(data_manager_keys): raise Exception(f'One or more datamanager keys provided do not exist in local substrabac database. Please create them before. DataManager keys: {data_manager_keys}') @staticmethod - def commit(serializer, ledger_data, many): - try: - instances = serializer.save() - except Exception as exc: - raise exc - else: - # init ledger serializer - if not many: - instances = [instances] - ledger_data.update({'instances': instances}) - ledger_serializer = LedgerDataSampleSerializer(data=ledger_data) - - if not ledger_serializer.is_valid(): - # delete instance - for instance in instances: - instance.delete() - raise ValidationError(ledger_serializer.errors) - - # create on ledger - data, st = ledger_serializer.create(ledger_serializer.validated_data) - - if st == status.HTTP_408_REQUEST_TIMEOUT: - if many: - data.update({'pkhash': [x['pkhash'] for x in serializer.data]}) - raise LedgerException(data, st) - - if st not in (status.HTTP_201_CREATED, status.HTTP_202_ACCEPTED): - raise LedgerException(data, st) - - # update validated to True in response - if 'pkhash' in data and data['validated']: - if many: - for d in serializer.data: - if d['pkhash'] in data['pkhash']: - d.update({'validated': data['validated']}) - else: - d = dict(serializer.data) + def commit(serializer, ledger_data): + instances = serializer.save() # can raise + # init ledger serializer + ledger_data.update({'instances': instances}) + ledger_serializer = LedgerDataSampleSerializer(data=ledger_data) + + if not ledger_serializer.is_valid(): + # delete instance + for instance in instances: + instance.delete() + raise ValidationError(ledger_serializer.errors) + + # create on ledger + data, st = ledger_serializer.create(ledger_serializer.validated_data) + + if st == status.HTTP_408_REQUEST_TIMEOUT: + data.update({'pkhash': [x['pkhash'] for x in serializer.data]}) + raise LedgerException(data, st) + + if st not in (status.HTTP_201_CREATED, status.HTTP_202_ACCEPTED): + raise LedgerException(data, st) + + # update validated to True in response + if 'pkhash' in data and data['validated']: + for d in serializer.data: + if d['pkhash'] in data['pkhash']: d.update({'validated': data['validated']}) - return serializer.data, st + return serializer.data, st - def create(self, request, *args, **kwargs): - data = request.data + def compute_data(self, request): + data = {} + # files, should be archive + for k, file in request.FILES.items(): + pkhash = get_dir_hash(file) # can raise + # check pkhash does not belong to the list + try: + existing = data[pkhash] + except KeyError: + pass + else: + raise Exception(f'Your data sample archives contain same files leading to same pkhash, please review the content of your achives. Archives {file} and {existing["file"]} are the same') + data[pkhash] = { + 'pkhash': pkhash, + 'file': file + } - dryrun = data.get('dryrun', False) - test_only = data.get('test_only', False) + # path/paths case + path = request.POST.get('path', None) + paths = request.POST.getlist('paths', []) + + if path and paths: + raise Exception('Cannot use path and paths together.') + + if path is not None: + paths = [path] + + # paths, should be directories + for path in paths: + if not os.path.isdir(path): + raise Exception(f'One of your paths does not exist, is not a directory or is not an absolute path: {path}') + pkhash = dirhash(path, 'sha256') + try: + existing = data[pkhash] + except KeyError: + pass + else: + # existing can be a dict with a field path or file + raise Exception(f'Your data sample directory contain same files leading to same pkhash. Invalid path: {path}.') + + data[pkhash] = { + 'pkhash': pkhash, + 'path': normpath(path) + } + + if not data: # data empty + raise Exception(f'No data sample provided.') + + return list(data.values()) + + def handle_dryrun(self, data, data_manager_keys): + data_dry_run = [] + + # write uploaded file to disk + for d in data: + pkhash = d['pkhash'] + if 'file' in d: + file_path = os.path.join(getattr(settings, 'DRYRUN_ROOT'), + f'data_{pkhash}.zip') + with open(file_path, 'wb') as f: + f.write(d['file'].open().read()) + + data_dry_run.append({ + 'pkhash': pkhash, + 'file': file_path + }) + + if 'path' in d: + data_dry_run.append(d) + + try: + task, msg = self.dryrun_task(data_dry_run, data_manager_keys) + except Exception as e: + return Exception(f'Could not launch data creation with dry-run on this instance: {str(e)}') + else: + return {'id': task.id, 'message': msg}, status.HTTP_202_ACCEPTED + + def _create(self, request, data_manager_keys, test_only, dryrun): - # check if bulk create - data_manager_keys = data.getlist('data_manager_keys') if not data_manager_keys: - message = "missing or empty field 'data_manager_keys'" - return Response({'message': message}, - status=status.HTTP_400_BAD_REQUEST) + raise Exception("missing or empty field 'data_manager_keys'") + + self.check_datamanagers(data_manager_keys) # can raise + + computed_data = self.compute_data(request) + + serializer = self.get_serializer(data=computed_data, many=True) try: - self.check_datamanagers(data_manager_keys) + serializer.is_valid(raise_exception=True) + except Exception as e: + pkhashes = [x['pkhash'] for x in computed_data] + st = status.HTTP_400_BAD_REQUEST + if find_primary_key_error(e): + st = status.HTTP_409_CONFLICT + raise ValidationException(e.args, pkhashes, st) + else: + if dryrun: + return self.handle_dryrun(computed_data, data_manager_keys) + + # create on ledger + db + ledger_data = {'test_only': test_only, + 'data_manager_keys': data_manager_keys} + data, st = self.commit(serializer, ledger_data) + return data, st + + def create(self, request, *args, **kwargs): + dryrun = request.data.get('dryrun', False) + test_only = request.data.get('test_only', False) + data_manager_keys = request.data.getlist('data_manager_keys', []) + + try: + data, st = self._create(request, data_manager_keys, test_only, dryrun) + except ValidationException as e: + return Response({'message': e.data, 'pkhash': e.pkhash}, status=e.st) + except LedgerException as e: + return Response({'message': e.data}, status=e.st) except Exception as e: return Response({'message': str(e)}, status=status.HTTP_400_BAD_REQUEST) else: - l = [] - for k, file in request.FILES.items(): - try: - pkhash = get_dir_hash(file) - except Exception as e: - return Response({'message': str(e)}, - status=status.HTTP_400_BAD_REQUEST) - else: - # check pkhash does not belong to the list - for x in l: - if pkhash == x['pkhash']: - return Response({'message': f'Your data sample archives contain same files leading to same pkhash, please review the content of your achives. Archives {file} and {x["file"]} are the same'}, status=status.HTTP_400_BAD_REQUEST) - l.append({ - 'pkhash': pkhash, - 'file': file - }) - - many = len(request.FILES) > 1 - data = l - if not many: - data = data[0] - serializer = self.get_serializer(data=data, many=many) - try: - serializer.is_valid(raise_exception=True) - except Exception as e: - return Response({ - 'message': e.args, - 'pkhash': [x['pkhash'] for x in l]}, - status=status.HTTP_409_CONFLICT) - else: - if dryrun: - try: - data_sample_files = [] - for k, file in request.FILES.items(): - pkhash = get_hash(file) - - data_path = os.path.join(getattr(settings, 'DRYRUN_ROOT'), f'data_{pkhash}.zip') - with open(data_path, 'wb') as data_file: - data_file.write(file.open().read()) - - data_sample_files.append({ - 'pkhash': pkhash, - 'filepath': data_path, - }) - - task, msg = self.dryrun_task(data_sample_files, data_manager_keys) - except Exception as e: - return Response({'message': f'Could not launch data creation with dry-run on this instance: {str(e)}'}, - status=status.HTTP_400_BAD_REQUEST) - else: - return Response({'id': task.id, 'message': msg}, - status=status.HTTP_202_ACCEPTED) - - # create on ledger + db - ledger_data = {'test_only': test_only, - 'data_manager_keys': data_manager_keys} - try: - data, st = self.commit(serializer, ledger_data, many) - except LedgerException as e: - return Response({'message': e.data}, status=e.st) - except Exception as e: - return Response({'message': str(e)}, status=status.HTTP_400_BAD_REQUEST) - else: - headers = self.get_success_headers(data) - return Response(data, status=st, headers=headers) + headers = self.get_success_headers(data) + return Response(data, status=st, headers=headers) @action(methods=['post'], detail=False) def bulk_update(self, request): diff --git a/substrabac/substrapp/views/model.py b/substrabac/substrapp/views/model.py index fb367d2b0..34a541eb3 100644 --- a/substrabac/substrapp/views/model.py +++ b/substrabac/substrapp/views/model.py @@ -78,6 +78,8 @@ def retrieve(self, request, *args, **kwargs): data = getObjectFromLedger(pk, 'queryModelDetails') except JsonException as e: return Response(e.msg, status=status.HTTP_400_BAD_REQUEST) + except Http404: + return Response(f'No element with key {pk}', status=status.HTTP_404_NOT_FOUND) else: error = None instance = None @@ -86,14 +88,16 @@ def retrieve(self, request, *args, **kwargs): instance = self.get_object() except Http404: try: - instance = self.create_or_update_model(data, pk) + instance = self.create_or_update_model(data['traintuple'], + data['traintuple']['outModel']['hash']) except Exception as e: error = e else: # check if instance has file if not instance.file: try: - instance = self.create_or_update_model(data, pk) + instance = self.create_or_update_model(data['traintuple'], + data['traintuple']['outModel']['hash']) except Exception as e: error = e finally: diff --git a/substrabac/substrapp/views/objective.py b/substrabac/substrapp/views/objective.py index c849a7ba4..345267fc9 100644 --- a/substrabac/substrapp/views/objective.py +++ b/substrabac/substrapp/views/objective.py @@ -1,8 +1,12 @@ import docker +import logging import os import re import shutil import tempfile +import uuid + +from urllib.parse import unquote import requests from django.conf import settings @@ -24,72 +28,72 @@ from substrapp.utils import queryLedger, get_hash, get_computed_hash from substrapp.tasks import build_subtuple_folders, remove_subtuple_materials -from substrapp.views.utils import get_filters, getObjectFromLedger, ComputeHashMixin, ManageFileMixin, JsonException +from substrapp.views.utils import get_filters, getObjectFromLedger, ComputeHashMixin, ManageFileMixin, JsonException, find_primary_key_error @app.task(bind=True, ignore_result=False) def compute_dryrun(self, metrics_path, test_data_manager_key, pkhash): - try: - subtuple_directory = build_subtuple_folders({'key': pkhash}) - - metrics_path_dst = os.path.join(subtuple_directory, 'metrics/metrics.py') - if not os.path.exists(metrics_path_dst): - shutil.copy2(metrics_path, os.path.join(subtuple_directory, 'metrics/metrics.py')) - os.remove(metrics_path) - try: - datamanager = getObjectFromLedger(test_data_manager_key, 'queryDataManager') - except JsonException as e: - raise e - else: - opener_content, opener_computed_hash = get_computed_hash(datamanager['opener']['storageAddress']) - with open(os.path.join(subtuple_directory, 'opener/opener.py'), 'wb') as opener_file: - opener_file.write(opener_content) - - # Launch verification - client = docker.from_env() - pred_path = os.path.join(subtuple_directory, 'pred') - opener_file = os.path.join(subtuple_directory, 'opener/opener.py') - metrics_file = os.path.join(subtuple_directory, 'metrics/metrics.py') - metrics_path = os.path.join(getattr(settings, 'PROJECT_ROOT'), 'fake_metrics') # base metrics comes with substrabac - - metrics_docker = 'metrics_dry_run' # tag must be lowercase for docker - metrics_docker_name = f'{metrics_docker}_{pkhash}' - volumes = {pred_path: {'bind': '/sandbox/pred', 'mode': 'rw'}, - metrics_file: {'bind': '/sandbox/metrics/__init__.py', 'mode': 'ro'}, - opener_file: {'bind': '/sandbox/opener/__init__.py', 'mode': 'ro'}} - - client.images.build(path=metrics_path, - tag=metrics_docker, - rm=False) - - job_args = {'image': metrics_docker, - 'name': metrics_docker_name, - 'cpuset_cpus': '0-0', - 'mem_limit': '1G', - 'command': None, - 'volumes': volumes, - 'shm_size': '8G', - 'labels': ['dryrun'], - 'detach': False, - 'auto_remove': False, - 'remove': False} + dryrun_uuid = f'{pkhash}_{uuid.uuid4().hex}' + + subtuple_directory = build_subtuple_folders({'key': dryrun_uuid}) + + metrics_path_dst = os.path.join(subtuple_directory, 'metrics/metrics.py') + if not os.path.exists(metrics_path_dst): + shutil.copy2(metrics_path, os.path.join(subtuple_directory, 'metrics/metrics.py')) + os.remove(metrics_path) + + if not test_data_manager_key: + raise Exception('Cannot do a objective dryrun without a data manager key.') + + datamanager = getObjectFromLedger(test_data_manager_key, 'queryDataManager') + opener_content, opener_computed_hash = get_computed_hash(datamanager['opener']['storageAddress']) + with open(os.path.join(subtuple_directory, 'opener/opener.py'), 'wb') as opener_file: + opener_file.write(opener_content) + + # Launch verification + client = docker.from_env() + pred_path = os.path.join(subtuple_directory, 'pred') + opener_file = os.path.join(subtuple_directory, 'opener/opener.py') + metrics_file = os.path.join(subtuple_directory, 'metrics/metrics.py') + metrics_path = os.path.join(getattr(settings, 'PROJECT_ROOT'), 'fake_metrics') # base metrics comes with substrabac + + metrics_docker = 'metrics_dry_run' # tag must be lowercase for docker + metrics_docker_name = f'{metrics_docker}_{dryrun_uuid}' + volumes = {pred_path: {'bind': '/sandbox/pred', 'mode': 'rw'}, + metrics_file: {'bind': '/sandbox/metrics/__init__.py', 'mode': 'ro'}, + opener_file: {'bind': '/sandbox/opener/__init__.py', 'mode': 'ro'}} + + client.images.build(path=metrics_path, + tag=metrics_docker, + rm=False) + + job_args = {'image': metrics_docker, + 'name': metrics_docker_name, + 'cpuset_cpus': '0-0', + 'mem_limit': '1G', + 'command': None, + 'volumes': volumes, + 'shm_size': '8G', + 'labels': ['dryrun'], + 'detach': False, + 'auto_remove': False, + 'remove': False} + try: client.containers.run(**job_args) - - # Verify that the pred file exist - assert os.path.exists(os.path.join(pred_path, 'perf.json')) + if not os.path.exists(os.path.join(pred_path, 'perf.json')): + raise Exception('Perf file not found') except ContainerError as e: raise Exception(e.stderr) - except Exception as e: - raise str(e) + finally: try: container = client.containers.get(metrics_docker_name) - container.remove() - except: - pass + container.remove(force=True) + except BaseException as e: + logging.error(e, exc_info=True) remove_subtuple_materials(subtuple_directory) @@ -136,8 +140,13 @@ def create(self, request, *args, **kwargs): dryrun = data.get('dryrun', False) description = data.get('description') - test_data_manager_key = data.get('test_data_manager_key') - test_data_sample_keys = data.getlist('test_data_sample_keys') + test_data_manager_key = request.data.get('test_data_manager_key', request.POST.get('test_data_manager_key', '')) + + try: + test_data_sample_keys = request.data.getlist('test_data_sample_keys', []) + except: + test_data_sample_keys = request.data.get('test_data_sample_keys', request.POST.getlist('test_data_sample_keys', [])) + metrics = data.get('metrics') pkhash = get_hash(description) @@ -147,103 +156,96 @@ def create(self, request, *args, **kwargs): try: serializer.is_valid(raise_exception=True) - except Exception as e: - return Response({'message': e.args, - 'pkhash': pkhash}, - status=status.HTTP_400_BAD_REQUEST) - else: - - if dryrun: - try: - metrics_path = os.path.join(getattr(settings, 'DRYRUN_ROOT'), f'metrics_{pkhash}.py') - with open(metrics_path, 'wb') as metrics_file: - metrics_file.write(metrics.open().read()) - - task = compute_dryrun.apply_async((metrics_path, test_data_manager_key, pkhash), queue=f"{settings.LEDGER['name']}.dryrunner") - url_http = 'http' if settings.DEBUG else 'https' - site_port = getattr(settings, "SITE_PORT", None) - current_site = f'{getattr(settings, "SITE_HOST")}' - if site_port: - current_site = f'{current_site}:{site_port}' - task_route = f'{url_http}://{current_site}{reverse("substrapp:task-detail", args=[task.id])}' - msg = f'Your dry-run has been taken in account. You can follow the task execution on {task_route}' - except Exception as e: - return Response({'message': f'Could not launch objective creation with dry-run on this instance: {str(e)}'}, - status=status.HTTP_400_BAD_REQUEST) - else: - return Response({'id': task.id, 'message': msg}, status=status.HTTP_202_ACCEPTED) - - # create on db + except ValidationError as e: + st = status.HTTP_400_BAD_REQUEST + if find_primary_key_error(e): + st = status.HTTP_409_CONFLICT + return Response({'message': e.args, 'pkhash': pkhash}, status=st) + + if dryrun: + try: + metrics_path = os.path.join(getattr(settings, 'DRYRUN_ROOT'), f'metrics_{pkhash}.py') + with open(metrics_path, 'wb') as metrics_file: + metrics_file.write(metrics.open().read()) + + task = compute_dryrun.apply_async((metrics_path, test_data_manager_key, pkhash), queue=f"{settings.LEDGER['name']}.dryrunner") + except Exception as e: + return Response({'message': f'Could not launch objective creation with dry-run on this instance: {str(e)}'}, + status=status.HTTP_400_BAD_REQUEST) + + current_site = getattr(settings, "DEFAULT_DOMAIN") + task_route = f'{current_site}{reverse("substrapp:task-detail", args=[task.id])}' + msg = f'Your dry-run has been taken in account. You can follow the task execution on {task_route}' + + return Response({'id': task.id, 'message': msg}, status=status.HTTP_202_ACCEPTED) + + # create on db + try: + instance = self.perform_create(serializer) + except IntegrityError as exc: try: - instance = self.perform_create(serializer) - except IntegrityError as exc: - try: - pkhash = re.search('\(pkhash\)=\((\w+)\)', exc.args[0]).group(1) - except: - pkhash = '' + pkhash = re.search(r'\(pkhash\)=\((\w+)\)', exc.args[0]).group(1) + except BaseException: + pkhash = '' + finally: return Response({'message': 'A objective with this description file already exists.', 'pkhash': pkhash}, status=status.HTTP_409_CONFLICT) - except Exception as exc: - return Response({'message': exc.args}, - status=status.HTTP_400_BAD_REQUEST) - else: - # init ledger serializer - ledger_serializer = LedgerObjectiveSerializer(data={'test_data_sample_keys': test_data_sample_keys, - 'test_data_manager_key': test_data_manager_key, - 'name': data.get('name'), - 'permissions': data.get('permissions'), - 'metrics_name': data.get('metrics_name'), - 'instance': instance}, - context={'request': request}) - - if not ledger_serializer.is_valid(): - # delete instance - instance.delete() - raise ValidationError(ledger_serializer.errors) - - # create on ledger - data, st = ledger_serializer.create(ledger_serializer.validated_data) - - if st not in (status.HTTP_201_CREATED, status.HTTP_202_ACCEPTED, status.HTTP_408_REQUEST_TIMEOUT): - return Response(data, status=st) - - headers = self.get_success_headers(serializer.data) - d = dict(serializer.data) - d.update(data) - return Response(d, status=st, headers=headers) + except Exception as exc: + return Response({'message': exc.args}, + status=status.HTTP_400_BAD_REQUEST) + + # init ledger serializer + ledger_serializer = LedgerObjectiveSerializer(data={'test_data_sample_keys': test_data_sample_keys, + 'test_data_manager_key': test_data_manager_key, + 'name': data.get('name'), + 'permissions': data.get('permissions'), + 'metrics_name': data.get('metrics_name'), + 'instance': instance}, + context={'request': request}) + + if not ledger_serializer.is_valid(): + # delete instance + instance.delete() + raise ValidationError(ledger_serializer.errors) + + # create on ledger + data, st = ledger_serializer.create(ledger_serializer.validated_data) + + if st not in (status.HTTP_201_CREATED, status.HTTP_202_ACCEPTED, status.HTTP_408_REQUEST_TIMEOUT): + return Response(data, status=st) + + headers = self.get_success_headers(serializer.data) + d = dict(serializer.data) + d.update(data) + return Response(d, status=st, headers=headers) def create_or_update_objective(self, objective, pk): + # get objective description from remote node + url = objective['description']['storageAddress'] try: - # get objective description from remote node - url = objective['description']['storageAddress'] - try: - r = requests.get(url, headers={'Accept': 'application/json;version=0.0'}) # TODO pass cert - except: - raise Exception(f'Failed to fetch {url}') - else: - if r.status_code != 200: - raise Exception(f'end to end node report {r.text}') - - try: - computed_hash = self.compute_hash(r.content) - except Exception: - raise Exception('Failed to fetch description file') - else: - if computed_hash != pk: - msg = 'computed hash is not the same as the hosted file. Please investigate for default of synchronization, corruption, or hacked' - raise Exception(msg) - - f = tempfile.TemporaryFile() - f.write(r.content) - - # save/update objective in local db for later use - instance, created = Objective.objects.update_or_create(pkhash=pk, validated=True) - instance.description.save('description.md', f) - - except Exception as e: - raise e - else: - return instance + r = requests.get(url, headers={'Accept': 'application/json;version=0.0'}) # TODO pass cert + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout): + raise Exception(f'Failed to fetch {url}') + if r.status_code != status.HTTP_200_OK: + raise Exception(f'end to end node report {r.text}') + + try: + computed_hash = self.compute_hash(r.content) + except Exception: + raise Exception('Failed to fetch description file') + + if computed_hash != pk: + msg = 'computed hash is not the same as the hosted file. Please investigate for default of synchronization, corruption, or hacked' + raise Exception(msg) + + f = tempfile.TemporaryFile() + f.write(r.content) + + # save/update objective in local db for later use + instance, created = Objective.objects.update_or_create(pkhash=pk, validated=True) + instance.description.save('description.md', f) + + return instance def retrieve(self, request, *args, **kwargs): lookup_url_kwarg = self.lookup_url_kwarg or self.lookup_field @@ -254,45 +256,33 @@ def retrieve(self, request, *args, **kwargs): try: int(pk, 16) # test if pk is correct (hexadecimal) - except: + except ValueError: return Response({'message': f'Wrong pk {pk}'}, status.HTTP_400_BAD_REQUEST) - else: - # get instance from remote node + + # get instance from remote node + try: + data = getObjectFromLedger(pk, self.ledger_query_call) + except JsonException as e: + return Response(e.msg, status=status.HTTP_400_BAD_REQUEST) + except Http404: + return Response(f'No element with key {pk}', status=status.HTTP_404_NOT_FOUND) + # try to get it from local db to check if description exists + try: + instance = self.get_object() + except Http404: + instance = None + + if not instance or not instance.description: try: - data = getObjectFromLedger(pk, self.ledger_query_call) - except JsonException as e: - return Response(e.msg, status=status.HTTP_400_BAD_REQUEST) - else: - error = None - instance = None - try: - # try to get it from local db to check if description exists - instance = self.get_object() - except Http404: - try: - instance = self.create_or_update_objective(data, pk) - except Exception as e: - error = e - else: - # check if instance has description - if not instance.description: - try: - instance = self.create_or_update_objective(data, pk) - except Exception as e: - error = e - finally: - if error is not None: - return Response({'message': str(error)}, status=status.HTTP_400_BAD_REQUEST) - - # do not give access to local files address - if instance is not None: - serializer = self.get_serializer(instance, - fields=('owner', 'pkhash', 'creation_date', 'last_modified')) - data.update(serializer.data) - else: - data = {'message': 'Fail to get instance'} - - return Response(data, status=status.HTTP_200_OK) + instance = self.create_or_update_objective(data, pk) + except Exception as e: + return Response({'message': str(e)}, status=status.HTTP_400_BAD_REQUEST) + + # do not give access to local files address + serializer = self.get_serializer( + instance, fields=('owner', 'pkhash', 'creation_date', 'last_modified')) + data.update(serializer.data) + return Response(data, status=status.HTTP_200_OK) def list(self, request, *args, **kwargs): # can modify result by interrogating `request.version` @@ -300,88 +290,78 @@ def list(self, request, *args, **kwargs): data, st = queryLedger({ 'args': '{"Args":["queryObjectives"]}' }) + + data = [] if data is None else data + objectives = [data] + + if st != status.HTTP_200_OK: + return Response(objectives, status=st) + dataManagerData = None algoData = None modelData = None - # init list to return - if data is None: - data = [] - l = [data] - - if st == 200: - - # parse filters - query_params = request.query_params.get('search', None) - - if query_params is not None: - try: - filters = get_filters(query_params) - except Exception as exc: - return Response( - {'message': f'Malformed search filters {query_params}'}, - status=status.HTTP_400_BAD_REQUEST) - else: - # filtering, reset l to an empty array - l = [] - for idx, filter in enumerate(filters): - # init each list iteration to data - l.append(data) - for k, subfilters in filter.items(): - if k == 'objective': # filter by own key - for key, val in subfilters.items(): - if key == 'metrics': # specific to nested metrics - l[idx] = [x for x in l[idx] if x[key]['name'] in val] - else: - l[idx] = [x for x in l[idx] if x[key] in val] - elif k == 'dataset': # select objective used by these datamanagers - if not dataManagerData: - # TODO find a way to put this call in cache - dataManagerData, st = queryLedger({ - 'args': '{"Args":["queryDataManagers"]}' - }) - if st != status.HTTP_200_OK: - return Response(dataManagerData, status=st) - if dataManagerData is None: - dataManagerData = [] - - for key, val in subfilters.items(): - filteredData = [x for x in dataManagerData if x[key] in val] - dataManagerKeys = [x['key'] for x in filteredData] - objectiveKeys = [x['objectiveKey'] for x in filteredData] - l[idx] = [x for x in l[idx] if x['key'] in objectiveKeys or x['testData']['dataManagerKey'] in dataManagerKeys] - elif k == 'algo': # select objective used by these algo - if not algoData: - # TODO find a way to put this call in cache - algoData, st = queryLedger({ - 'args': '{"Args":["queryAlgos"]}' - }) - if st != status.HTTP_200_OK: - return Response(algoData, status=st) - if algoData is None: - algoData = [] - - for key, val in subfilters.items(): - filteredData = [x for x in algoData if x[key] in val] - objectiveKeys = [x['objectiveKey'] for x in filteredData] - l[idx] = [x for x in l[idx] if x['key'] in objectiveKeys] - elif k == 'model': # select objectives used by outModel hash - if not modelData: - # TODO find a way to put this call in cache - modelData, st = queryLedger({ - 'args': '{"Args":["queryTraintuples"]}' - }) - if st != status.HTTP_200_OK: - return Response(modelData, status=st) - if modelData is None: - modelData = [] - - for key, val in subfilters.items(): - filteredData = [x for x in modelData if x['outModel'] is not None and x['outModel'][key] in val] - objectiveKeys = [x['objective']['hash'] for x in filteredData] - l[idx] = [x for x in l[idx] if x['key'] in objectiveKeys] - - return Response(l, status=st) + # parse filters + query_params = request.query_params.get('search', None) + if query_params is None: + return Response(objectives, status=st) + + try: + filters = get_filters(query_params) + except Exception: + return Response( + {'message': f'Malformed search filters {query_params}'}, + status=status.HTTP_400_BAD_REQUEST) + + # filtering + objectives = [] + for idx, filter in enumerate(filters): + # init each list iteration to data + objectives.append(data) + + for k, subfilters in filter.items(): + if k == 'objective': # filter by own key + for key, val in subfilters.items(): + if key == 'metrics': # specific to nested metrics + objectives[idx] = [x for x in objectives[idx] if x[key]['name'] in val] + else: + objectives[idx] = [x for x in objectives[idx] if x[key] in val] + + elif k == 'dataset': # select objective used by these datamanagers + if not dataManagerData: + # TODO find a way to put this call in cache + dataManagerData, st = queryLedger({ + 'args': '{"Args":["queryDataManagers"]}' + }) + if st != status.HTTP_200_OK: + return Response(dataManagerData, status=st) + if dataManagerData is None: + dataManagerData = [] + + for key, val in subfilters.items(): + filteredData = [x for x in dataManagerData if x[key] in val] + dataManagerKeys = [x['key'] for x in filteredData] + objectiveKeys = [x['objectiveKey'] for x in filteredData] + objectives[idx] = [x for x in objectives[idx] if x['key'] in objectiveKeys or + (x['testDataset'] and x['testDataset']['dataManagerKey'] in dataManagerKeys)] + + elif k == 'model': # select objectives used by outModel hash + if not modelData: + # TODO find a way to put this call in cache + modelData, st = queryLedger({ + 'args': '{"Args":["queryTraintuples"]}' + }) + if st != status.HTTP_200_OK: + return Response(modelData, status=st) + if modelData is None: + modelData = [] + + for key, val in subfilters.items(): + filteredData = [x for x in modelData if x['outModel'] is not None and x['outModel'][key] in val] + objectiveKeys = [x['objective']['hash'] for x in filteredData] + objectives[idx] = [x for x in objectives[idx] if x['key'] in objectiveKeys] + + return Response(objectives, status=st) @action(detail=True) def description(self, request, *args, **kwargs): diff --git a/substrabac/substrapp/views/testtuple.py b/substrabac/substrapp/views/testtuple.py index 565a0ec80..ae25f0f0f 100644 --- a/substrabac/substrapp/views/testtuple.py +++ b/substrabac/substrapp/views/testtuple.py @@ -1,5 +1,6 @@ -import hashlib +import json +from django.http import Http404 from rest_framework import mixins, status from rest_framework.response import Response from rest_framework.viewsets import GenericViewSet @@ -42,6 +43,7 @@ def create(self, request, *args, **kwargs): traintuple_key = request.data.get('traintuple_key', request.POST.get('traintuple_key', None)) data_manager_key = request.data.get('data_manager_key', request.POST.get('data_manager_key', '')) + tag = request.data.get('tag', request.POST.get('tag', '')) try: test_data_sample_keys = request.data.getlist('test_data_sample_keys', []) @@ -52,16 +54,34 @@ def create(self, request, *args, **kwargs): 'traintuple_key': traintuple_key, 'data_manager_key': data_manager_key, 'test_data_sample_keys': test_data_sample_keys, # list of test data keys + 'tag': tag } # init ledger serializer serializer = self.get_serializer(data=data) serializer.is_valid(raise_exception=True) - # Get testtuple pkhash of the proposal + # Get testtuple pkhash of the proposal with a queryLedger in case of 408 timeout args = serializer.get_args(serializer.validated_data) data, st = queryLedger({'args': '{"Args":["createTesttuple", ' + args + ']}'}) - pkhash = bytes.fromhex(data.rstrip()).decode('utf-8') # fail in queryLedger because it's a string hash and not a json + if st == status.HTTP_200_OK: + pkhash = data.get('key', data.get('keys')) + else: + # If queryLedger fails, invoke will fail too so we handle the issue right now + try: + data['message'] = data['message'].split('Error')[-1] + msg = json.loads(data['message'].split('payload:')[-1].strip().strip('"').encode('utf-8').decode('unicode_escape')) + pkhash = msg['error'].replace('(', '').replace(')', '').split('tkey: ')[-1].strip() + + if len(pkhash) != 64: + raise Exception('bad pkhash') + else: + st = status.HTTP_409_CONFLICT + + return Response({'message': data['message'].split('payload')[0], + 'pkhash': pkhash}, status=st) + except: + return Response(data, status=st) # create on ledger data, st = serializer.create(serializer.validated_data) @@ -72,13 +92,16 @@ def create(self, request, *args, **kwargs): if st not in (status.HTTP_201_CREATED, status.HTTP_202_ACCEPTED): try: - pkhash = data['message'].replace('"', '').split('-')[ - -1].strip() + data['message'] = data['message'].split('Error')[-1] + msg = json.loads(data['message'].split('payload:')[-1].strip().strip('"').encode('utf-8').decode('unicode_escape')) + pkhash = msg['error'].replace('(', '').replace(')', '').split('tkey: ')[-1].strip() - if not len(pkhash) == 64: + if len(pkhash) != 64: raise Exception('bad pkhash') + else: + st = status.HTTP_409_CONFLICT - return Response({'message': data['message'], + return Response({'message': data['message'].split('payload')[0], 'pkhash': pkhash}, status=st) except: return Response(data, status=st) @@ -93,6 +116,8 @@ def list(self, request, *args, **kwargs): 'args': '{"Args":["queryTesttuples"]}' }) + data = data if data else [] + return Response(data, status=st) def retrieve(self, request, *args, **kwargs): @@ -112,5 +137,7 @@ def retrieve(self, request, *args, **kwargs): data = getObjectFromLedger(pk, 'queryTesttuple') except JsonException as e: return Response(e.msg, status=status.HTTP_400_BAD_REQUEST) + except Http404: + return Response(f'No element with key {pk}', status=status.HTTP_404_NOT_FOUND) else: return Response(data, status=status.HTTP_200_OK) diff --git a/substrabac/substrapp/views/traintuple.py b/substrabac/substrapp/views/traintuple.py index 4750b26c9..2ad5d213d 100644 --- a/substrabac/substrapp/views/traintuple.py +++ b/substrabac/substrapp/views/traintuple.py @@ -1,11 +1,10 @@ -import hashlib +import json -from django.conf import settings +from django.http import Http404 from rest_framework import mixins, status from rest_framework.response import Response from rest_framework.viewsets import GenericViewSet - from substrapp.serializers import LedgerTrainTupleSerializer from substrapp.utils import queryLedger from substrapp.views.utils import JsonException @@ -44,8 +43,10 @@ def create(self, request, *args, **kwargs): algo_key = request.data.get('algo_key', request.POST.get('algo_key', None)) data_manager_key = request.data.get('data_manager_key', request.POST.get('data_manager_key', None)) + objective_key = request.data.get('objective_key', request.POST.get('objective_key', None)) rank = request.data.get('rank', request.POST.get('rank', None)) FLtask_key = request.data.get('FLtask_key', request.POST.get('FLtask_key', '')) + tag = request.data.get('tag', request.POST.get('tag', '')) try: in_models_keys = request.data.getlist('in_models_keys', []) @@ -60,20 +61,39 @@ def create(self, request, *args, **kwargs): data = { 'algo_key': algo_key, 'data_manager_key': data_manager_key, + 'objective_key': objective_key, 'rank': rank, 'FLtask_key': FLtask_key, 'in_models_keys': in_models_keys, 'train_data_sample_keys': train_data_sample_keys, # list of train data keys (which are stored in the train worker node) + 'tag': tag } # init ledger serializer serializer = self.get_serializer(data=data) serializer.is_valid(raise_exception=True) - # Get traintuple pkhash of the proposal + # Get traintuple pkhash of the proposal with a queryLedger in case of 408 timeout args = serializer.get_args(serializer.validated_data) data, st = queryLedger({'args': '{"Args":["createTraintuple", ' + args + ']}'}) - pkhash = bytes.fromhex(data.rstrip()).decode('utf-8') # fail in queryLedger because it's a string hash and not a json + if st == status.HTTP_200_OK: + pkhash = data.get('key', data.get('keys')) + else: + # If queryLedger fails, invoke will fail too so we handle the issue right now + try: + data['message'] = data['message'].split('Error')[-1] + msg = json.loads(data['message'].split('payload:')[-1].strip().strip('"').encode('utf-8').decode('unicode_escape')) + pkhash = msg['error'].replace('(', '').replace(')', '').split('tkey: ')[-1].strip() + + if len(pkhash) != 64: + raise Exception('bad pkhash') + else: + st = status.HTTP_409_CONFLICT + + return Response({'message': data['message'].split('payload')[0], + 'pkhash': pkhash}, status=st) + except: + return Response(data, status=st) # create on ledger data, st = serializer.create(serializer.validated_data) @@ -84,12 +104,16 @@ def create(self, request, *args, **kwargs): if st not in (status.HTTP_201_CREATED, status.HTTP_202_ACCEPTED): try: - pkhash = data['message'].replace('"', '').split('-')[-1].strip() + data['message'] = data['message'].split('Error')[-1] + msg = json.loads(data['message'].split('payload:')[-1].strip().strip('"').encode('utf-8').decode('unicode_escape')) + pkhash = msg['error'].replace('(', '').replace(')', '').split('tkey: ')[-1].strip() - if not len(pkhash) == 64: + if len(pkhash) != 64: raise Exception('bad pkhash') + else: + st = status.HTTP_409_CONFLICT - return Response({'message': data['message'], + return Response({'message': data['message'].split('payload')[0], 'pkhash': pkhash}, status=st) except: return Response(data, status=st) @@ -102,6 +126,8 @@ def list(self, request, *args, **kwargs): 'args': '{"Args":["queryTraintuples"]}' }) + data = data if data else [] + return Response(data, status=st) def getObjectFromLedger(self, pk): @@ -110,6 +136,9 @@ def getObjectFromLedger(self, pk): 'args': f'{{"Args":["queryTraintuple","{pk}"]}}' }) + if st == status.HTTP_404_NOT_FOUND: + raise Http404('Not found') + if st != status.HTTP_200_OK: raise JsonException(data) @@ -135,5 +164,7 @@ def retrieve(self, request, *args, **kwargs): data = self.getObjectFromLedger(pk) except JsonException as e: return Response(e.msg, status=status.HTTP_400_BAD_REQUEST) + except Http404: + return Response(f'No element with key {pk}', status=status.HTTP_404_NOT_FOUND) else: return Response(data, status=status.HTTP_200_OK) diff --git a/substrabac/substrapp/views/utils.py b/substrabac/substrapp/views/utils.py index 7d39a37b7..182058283 100644 --- a/substrabac/substrapp/views/utils.py +++ b/substrabac/substrapp/views/utils.py @@ -2,7 +2,7 @@ import os from urllib.parse import unquote -from django.http import FileResponse +from django.http import FileResponse, Http404 from rest_framework import status from rest_framework.response import Response @@ -35,7 +35,7 @@ def get_filters(query_params): value = el[2] filter = { - subparent: [value] + subparent: [unquote(value)] } if not len(filters[idx]): # create and add it @@ -60,6 +60,9 @@ def getObjectFromLedger(pk, query): 'args': f'{{"Args":["{query}","{pk}"]}}' }) + if st == status.HTTP_404_NOT_FOUND: + raise Http404('Not found') + if st != status.HTTP_200_OK: raise JsonException(data) @@ -103,8 +106,39 @@ def manage_file(self, field): getObjectFromLedger(pk, self.ledger_query_call) except Exception as e: return Response(e, status=status.HTTP_400_BAD_REQUEST) + except Http404: + return Response(f'No element with key {pk}', status=status.HTTP_404_NOT_FOUND) else: object = self.get_object() data = getattr(object, field) return CustomFileResponse(open(data.path, 'rb'), as_attachment=True, filename=os.path.basename(data.path)) + + +def find_primary_key_error(validation_error, key_name='pkhash'): + detail = validation_error.detail + + def find_unique_error(detail_dict): + for key, errors in detail_dict.items(): + if key != key_name: + continue + for error in errors: + if error.code == 'unique': + return error + + return None + + # according to the rest_framework documentation, + # validation_error.detail could be either a dict, a list or a nested + # data structure + + if isinstance(detail, dict): + return find_unique_error(detail) + elif isinstance(detail, list): + for sub_detail in detail: + if isinstance(sub_detail, dict): + unique_error = find_unique_error(sub_detail) + if unique_error is not None: + return unique_error + + return None