diff --git a/docker/.env b/docker/.env new file mode 100644 index 00000000..f5e0a843 --- /dev/null +++ b/docker/.env @@ -0,0 +1,10 @@ +SMQTK_REGISTRY=kitware/smqtk + +# Dependency Image/Source versions to be used +CUDA_DEVEL_IMAGE_TAG=9.2-cudnn7-devel-ubuntu18.04 +CUDA_RUNTIME_IMAGE_TAG=9.2-cudnn7-runtime-ubuntu18.04 + +FAISS_VERSION=1.6.2 + +# OUR Images +SMQTK_TAG=latest diff --git a/docker/docker-compose.build.yml b/docker/docker-compose.build.yml new file mode 100644 index 00000000..c0146e90 --- /dev/null +++ b/docker/docker-compose.build.yml @@ -0,0 +1,105 @@ +version: "3.4" + +services: + + # + # TPL: Caffe 1.0 + # + tpl-caffe-base: + # Base stage for common use by CPU and GPU build variants. + image: ${SMQTK_REGISTRY}/tpl-caffe:1.0-base + build: + context: tpl/caffe1 + target: base + args: + CUDA_DEVEL_IMAGE_TAG: # using value from .env file. + + tpl-caffe-cpu: + # CPU-Only build variant + image: ${SMQTK_REGISTRY}/tpl-caffe:1.0-cpu + depends_on: + - tpl-caffe-base + build: + context: tpl/caffe1 + target: caffe_cpu + args: + CUDA_DEVEL_IMAGE_TAG: # using value from .env file. + + tpl-caffe-gpu: + # GPU build variant + image: ${SMQTK_REGISTRY}/tpl-caffe:1.0-cuda${CUDA_DEVEL_IMAGE_TAG} + depends_on: + - tpl-caffe-base + build: + context: tpl/caffe1 + target: caffe_gpu + args: + CUDA_DEVEL_IMAGE_TAG: # using value from .env file. + # One or more CUDA architecture binary versions to compile for. + CUDA_ARCH_BIN_LIST: 50 52 60 61 62 70 72 + + # + # TPL: FAISS + # + tpl-faiss-gpu: + image: ${SMQTK_REGISTRY}/tpl-faiss:${FAISS_VERSION}-cuda${CUDA_DEVEL_IMAGE_TAG} + build: + context: tpl/faiss + dockerfile: gpu/Dockerfile + args: + FAISS_VERSION: # using value from .env file. + CUDA_DEVEL_IMAGE_TAG: # using value from .env file. + # One or more CUDA architectures to compile for. + # Using ">-" for multi-line string concatenation with no new-lines. + # Using ">-" for multi-line string concatenation with no new-lines. + # - arch upper bound defined by CUDA version (e.g. 9.2 cannot support + # compute_75) + # - https://docs.nvidia.com/cuda/turing-compatibility-guide/index.html#building-turing-compatible-apps-using-cuda-9- + # - In order to support a NEWER gpu architecture than considered here, + # the final line here should specify a PTX version, e.g. + # `-gencode=arch=compute_XY,code=compute_XY` (note the `code=...`), + # in order to provide compatibility with future architectures. + CUDA_ARCH: >- + -gencode=arch=compute_50,code=sm_50 + -gencode=arch=compute_52,code=sm_52 + -gencode=arch=compute_60,code=sm_60 + -gencode=arch=compute_61,code=sm_61 + -gencode=arch=compute_62,code=sm_62 + -gencode=arch=compute_70,code=sm_70 + -gencode=arch=compute_72,code=sm_72 + -gencode=arch=compute_72,code=compute_72 + + # + # IQR Playground Demo Image CPU/GPU + # + iqr-playground-cpu: + image: ${SMQTK_REGISTRY}/iqr_playground:${SMQTK_TAG}-cpu + depends_on: + - tpl-caffe-cpu + - tpl-faiss-gpu + build: + # Repo root + context: .. + dockerfile: docker/smqtk_iqr_playground/Dockerfile + target: smqtk-cpu + args: + CUDA_RUNTIME_IMAGE_TAG: # using value from .env file. + CAFFE_COPY_FROM_IMAGE: ${SMQTK_REGISTRY}/tpl-caffe:1.0-cpu + FAISS_COPY_FROM_IMAGE: ${SMQTK_REGISTRY}/tpl-faiss:${FAISS_VERSION}-cuda${CUDA_DEVEL_IMAGE_TAG} + FAISS_VERSION: # using value from .env file. + + iqr-playground-gpu: + image: ${SMQTK_REGISTRY}/iqr_playground:${SMQTK_TAG}-cuda${CUDA_RUNTIME_IMAGE_TAG} + depends_on: + - tpl-caffe-gpu + - tpl-faiss-gpu + build: + # Repo root + context: .. + dockerfile: docker/smqtk_iqr_playground/Dockerfile + target: smqtk-gpu + args: + CUDA_RUNTIME_IMAGE_TAG: # using value from .env file. + CAFFE_COPY_FROM_IMAGE: ${SMQTK_REGISTRY}/tpl-caffe:1.0-cuda${CUDA_DEVEL_IMAGE_TAG} + FAISS_COPY_FROM_IMAGE: ${SMQTK_REGISTRY}/tpl-faiss:${FAISS_VERSION}-cuda${CUDA_DEVEL_IMAGE_TAG} + FAISS_VERSION: # using value from .env file. diff --git a/docker/smqtk_iqr_playground/Dockerfile b/docker/smqtk_iqr_playground/Dockerfile index 2d66972c..43a46b79 100644 --- a/docker/smqtk_iqr_playground/Dockerfile +++ b/docker/smqtk_iqr_playground/Dockerfile @@ -15,27 +15,29 @@ SHELL ["/bin/bash", "-c"] ENV DEBIAN_FRONTEND=noninteractive RUN apt-get -y update \ && apt-get -y install \ - cmake=3.10.2-1ubuntu2.18.04.1 \ + cmake \ curl \ - less=487-0.1 \ - parallel=20161222-1 \ - python3-dev=3.6.7-1~18.04 \ + less \ + parallel \ + python3-dev \ python3-pip \ sudo \ vim \ + zlib1g-dev \ + libjpeg-dev \ # PostgreSQL 10 Installation - postgresql-10=10.15-0ubuntu0.18.04.1 \ + postgresql-10 \ # MongoDB Installation - mongodb=1:3.6.3-0ubuntu1.1 \ + mongodb \ # Caffe/FAISS TPL deps - libboost-python1.65.1=1.65.1+dfsg-0ubuntu5 \ - libboost-system1.65.1=1.65.1+dfsg-0ubuntu5 \ - libboost-thread1.65.1=1.65.1+dfsg-0ubuntu5 \ - libgoogle-glog0v5=0.3.5-1 \ - libgflags2.2=2.2.1-1 \ - libhdf5-100=1.10.0-patch1+docs-4 \ - libprotobuf10=3.0.0-9.1ubuntu1 \ - libopenblas-base=0.2.20+ds-4 \ + libboost-python1.65.1 \ + libboost-system1.65.1 \ + libboost-thread1.65.1 \ + libgoogle-glog0v5 \ + libgflags2.2 \ + libhdf5-100 \ + libprotobuf10 \ + libopenblas-base \ # Clean up apt resources. && apt-get clean \ && rm -rf /var/lib/apt/lists/* @@ -50,7 +52,9 @@ RUN pip3 install \ psycopg2-binary==2.8.5 \ # Nice-to-haves file-magic==0.4.0 \ - ipython==7.14.0 + ipython==7.14.0 \ + jedi==0.17.2 \ + scikit-learn==0.24.2 ############################################################################### # COPY in FAISS wheel and install @@ -82,27 +86,10 @@ COPY --from=caffe_image /caffe-1.0-py3-none-any.whl / RUN pip3 install /caffe-1.0-py3-none-any.whl \ && rm /caffe-1.0-py3-none-any.whl -############################################################################### -# SMQTK Installation -# -COPY docs /smqtk/source/docs -COPY etc /smqtk/source/etc -COPY python /smqtk/source/python -COPY requirements /smqtk/source/requirements -COPY src /smqtk/source/src -COPY TPL /smqtk/source/TPL -COPY CMakeLists.txt LICENSE.txt pytest.* README.md setup.* setup_env.* \ - /smqtk/source/ -RUN mkdir /smqtk/build \ - && cd /smqtk/build \ - && cmake \ - -DCMAKE_BUILD_TYPE:STRING=Release \ - -DCMAKE_INSTALL_PREFIX:PATH=/usr/local \ - -DSMQTK_BUILD_FLANN:BOOL=OFF \ - /smqtk/source \ - && make install -j$(nproc) \ - && cd / \ - && rm -rf smqtk +# Export language options to use UTF-8, desired by Click +ENV LC_ALL=C.UTF-8 \ + LANG=C.UTF-8 + # Export language options to use UTF-8, desired by Click ENV LC_ALL=C.UTF-8 \ LANG=C.UTF-8 @@ -112,6 +99,26 @@ ENV LC_ALL=C.UTF-8 \ RUN ln -s /usr/share/postgresql-common/pg_wrapper /usr/local/bin/pg_ctl \ && ln -s /usr/share/postgresql-common/pg_wrapper /usr/local/bin/postgres +############################################################################### +# SMQTK installation +# +RUN mkdir /smqtk/ +COPY docs /smqtk/source/docs +COPY pyproject.toml /smqtk/source +COPY README.md /smqtk/source +COPY smqtk_iqr /smqtk/source/smqtk_iqr +RUN cd /smqtk/source && \ + pip3 install -U pip && \ + pip3 install . && \ + cd / && \ + rm -rf smqtk + +# !!! NOTE THIS KLUDGE !!! +# This symlink is lost during the smqtk-iqr installation, as pip currently +# does not support symlinks, so we re-create it here. +RUN ln -s /usr/local/lib/python3.6/dist-packages/smqtk_iqr/web/search_app/templates/base.html \ + /usr/local/lib/python3.6/dist-packages/smqtk_iqr/web/search_app/modules/iqr/templates/base.html + # Add ``smqtk`` user RUN useradd -mr -s /bin/bash smqtk \ # sudo permission for modifying permissions at runtime (see entrypoint.sh). @@ -131,19 +138,19 @@ RUN mkdir -p data/{models,configs,logs,db.psql,db.mongo} \ # Configuration for Matplotlib to just use the Agg backend, not Tkinter COPY --chown=smqtk:smqtk \ - devops/docker/smqtk_iqr_playground/matplotlibrc \ + docker/smqtk_iqr_playground/matplotlibrc \ .config/matplotlib/ # Copy in general entrypoint and CPU-variant test script COPY --chown=smqtk:smqtk \ - devops/docker/smqtk_iqr_playground/entrypoint.sh \ - devops/docker/smqtk_iqr_playground/descr_comp_test.cpu.py \ + docker/smqtk_iqr_playground/entrypoint.sh \ + docker/smqtk_iqr_playground/descr_comp_test.cpu.py \ /home/smqtk/ # Copy in general and CPU-variant configs COPY --chown=smqtk:smqtk \ - devops/docker/smqtk_iqr_playground/default_confs/* \ - devops/docker/smqtk_iqr_playground/default_confs/cpu/* \ + docker/smqtk_iqr_playground/default_confs/* \ + docker/smqtk_iqr_playground/default_confs/cpu/* \ /home/smqtk/data/configs/ ENTRYPOINT ["/home/smqtk/entrypoint.sh"] @@ -155,9 +162,9 @@ EXPOSE 5000 5001 FROM smqtk-cpu AS smqtk-gpu # Add GPU-variant test script COPY --chown=smqtk:smqtk \ - devops/docker/smqtk_iqr_playground/descr_comp_test.gpu.py \ + docker/smqtk_iqr_playground/descr_comp_test.gpu.py \ /home/smqtk/ # Overwrite CPU-variance configs with the GPU variants. COPY --chown=smqtk:smqtk \ - devops/docker/smqtk_iqr_playground/default_confs/gpu/* \ + docker/smqtk_iqr_playground/default_confs/gpu/* \ /home/smqtk/data/configs/ diff --git a/docker/smqtk_iqr_playground/default_confs/cpu/compute_many_descriptors.json b/docker/smqtk_iqr_playground/default_confs/cpu/compute_many_descriptors.json index 0b482b1c..9c7e2e55 100644 --- a/docker/smqtk_iqr_playground/default_confs/cpu/compute_many_descriptors.json +++ b/docker/smqtk_iqr_playground/default_confs/cpu/compute_many_descriptors.json @@ -1,6 +1,6 @@ { "descriptor_factory": { - "smqtk.representation.descriptor_element.postgres.PostgresDescriptorElement": { + "smqtk_descriptors.impls.descriptor_element.postgres.PostgresDescriptorElement": { "binary_col": "vector", "create_table": false, "db_host": "/dev/shm", @@ -9,51 +9,50 @@ "db_port": 5432, "db_user": "smqtk", "table_name": "descriptors_resnet50_pool5", - "type_col": "type_str", "uuid_col": "uid" }, - "type": "smqtk.representation.descriptor_element.postgres.PostgresDescriptorElement" + "type": "smqtk_descriptors.impls.descriptor_element.postgres.PostgresDescriptorElement" }, "descriptor_generator": { - "smqtk.algorithms.descriptor_generator.caffe_descriptor.CaffeDescriptorGenerator": { - "batch_size": 20, + "smqtk_descriptors.impls.descriptor_generator.caffe1.CaffeDescriptorGenerator": { + "batch_size": 10, "data_layer": "data", "gpu_device_id": 0, "image_mean": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "explicit_mimetype": null, "filepath": "/home/smqtk/caffe/msra_resnet/ResNet_mean.binaryproto", "readonly": true }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "input_scale": null, "load_truncated_images": true, "network_is_bgr": true, "network_model": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "explicit_mimetype": null, "filepath": "/home/smqtk/caffe/msra_resnet/ResNet-50-model.caffemodel", "readonly": true }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "network_prototxt": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "explicit_mimetype": null, "filepath": "/home/smqtk/caffe/msra_resnet/ResNet-50-deploy.prototxt", "readonly": true }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "pixel_rescale": null, "return_layer": "pool5", "use_gpu": false }, - "type": "smqtk.algorithms.descriptor_generator.caffe_descriptor.CaffeDescriptorGenerator" + "type": "smqtk_descriptors.impls.descriptor_generator.caffe1.CaffeDescriptorGenerator" }, "descriptor_set": { - "smqtk.representation.descriptor_set.postgres.PostgresDescriptorSet": { + "smqtk_descriptors.impls.descriptor_set.postgres.PostgresDescriptorSet": { "create_table": false, "db_host": "/dev/shm", "db_name": "postgres", @@ -67,20 +66,20 @@ "table_name": "descriptor_set_resnet50_pool5", "uuid_col": "uid" }, - "type": "smqtk.representation.descriptor_set.postgres.PostgresDescriptorSet" + "type": "smqtk_descriptors.impls.descriptor_set.postgres.PostgresDescriptorSet" }, "optional_data_set": { - "smqtk.representation.data_set.memory_set.DataMemorySet": { + "smqtk_dataprovider.impls.data_set.memory.DataMemorySet": { "cache_element": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "explicit_mimetype": null, "filepath": "models/image_elements.dms_cache", "readonly": false }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "pickle_protocol": -1 }, - "type": "smqtk.representation.data_set.memory_set.DataMemorySet" + "type": "smqtk_dataprovider.impls.data_set.memory.DataMemorySet" } } diff --git a/docker/smqtk_iqr_playground/default_confs/cpu/runApp.IqrService.json b/docker/smqtk_iqr_playground/default_confs/cpu/runApp.IqrService.json index 396ba14f..fb96443b 100644 --- a/docker/smqtk_iqr_playground/default_confs/cpu/runApp.IqrService.json +++ b/docker/smqtk_iqr_playground/default_confs/cpu/runApp.IqrService.json @@ -16,25 +16,16 @@ }, "plugins": { "classification_factory": { - "smqtk.representation.classification_element.memory.MemoryClassificationElement": {}, - "type": "smqtk.representation.classification_element.memory.MemoryClassificationElement" + "smqtk_classifier.impls.classification_element.memory.MemoryClassificationElement": {}, + "type": "smqtk_classifier.impls.classification_element.memory.MemoryClassificationElement" }, "classifier_config": { - "smqtk.algorithms.classifier.libsvm.LibSvmClassifier": { - "normalize": 2, - "svm_label_map_uri": null, - "svm_model_uri": null, - "train_params": { - "-b": 1, - "-c": 2, - "-s": 0, - "-t": 0 - } + "smqtk_classifier.impls.classify_descriptor_supervised.sklearn_logistic_regression.SkLearnLogisticRegression": { }, - "type": "smqtk.algorithms.classifier.libsvm.LibSvmClassifier" + "type": "smqtk_classifier.impls.classify_descriptor_supervised.sklearn_logistic_regression.SkLearnLogisticRegression" }, "descriptor_factory": { - "smqtk.representation.descriptor_element.postgres.PostgresDescriptorElement": { + "smqtk_descriptors.impls.descriptor_element.postgres.PostgresDescriptorElement": { "binary_col": "vector", "create_table": false, "db_host": "/dev/shm", @@ -43,51 +34,50 @@ "db_port": 5432, "db_user": "smqtk", "table_name": "descriptors_resnet50_pool5", - "type_col": "type_str", "uuid_col": "uid" }, - "type": "smqtk.representation.descriptor_element.postgres.PostgresDescriptorElement" + "type": "smqtk_descriptors.impls.descriptor_element.postgres.PostgresDescriptorElement" }, "descriptor_generator": { - "smqtk.algorithms.descriptor_generator.caffe_descriptor.CaffeDescriptorGenerator": { + "smqtk_descriptors.impls.descriptor_generator.caffe1.CaffeDescriptorGenerator": { "batch_size": 10, "data_layer": "data", "gpu_device_id": 0, "image_mean": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "explicit_mimetype": null, "filepath": "/home/smqtk/caffe/msra_resnet/ResNet_mean.binaryproto", "readonly": true }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "input_scale": null, "load_truncated_images": true, "network_is_bgr": true, "network_model": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "explicit_mimetype": null, "filepath": "/home/smqtk/caffe/msra_resnet/ResNet-50-model.caffemodel", "readonly": true }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "network_prototxt": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "explicit_mimetype": null, "filepath": "/home/smqtk/caffe/msra_resnet/ResNet-50-deploy.prototxt", "readonly": true }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "pixel_rescale": null, "return_layer": "pool5", "use_gpu": false }, - "type": "smqtk.algorithms.descriptor_generator.caffe_descriptor.CaffeDescriptorGenerator" + "type": "smqtk_descriptors.impls.descriptor_generator.caffe1.CaffeDescriptorGenerator" }, "descriptor_set": { - "smqtk.representation.descriptor_set.postgres.PostgresDescriptorSet": { + "smqtk_descriptors.impls.descriptor_set.postgres.PostgresDescriptorSet": { "create_table": false, "db_host": "/dev/shm", "db_name": "postgres", @@ -101,13 +91,13 @@ "table_name": "descriptor_set_resnet50_pool5", "uuid_col": "uid" }, - "type": "smqtk.representation.descriptor_set.postgres.PostgresDescriptorSet" + "type": "smqtk_descriptors.impls.descriptor_set.postgres.PostgresDescriptorSet" }, "neighbor_index": { - "smqtk.algorithms.nn_index.faiss.FaissNearestNeighborsIndex": { + "smqtk_indexing.impls.nn_index.faiss.FaissNearestNeighborsIndex": { "descriptor_set": { "__note__": "Using real descriptor index this time", - "smqtk.representation.descriptor_set.postgres.PostgresDescriptorSet": { + "smqtk_descriptors.impls.descriptor_set.postgres.PostgresDescriptorSet": { "create_table": false, "db_host": "/dev/shm", "db_name": "postgres", @@ -121,12 +111,12 @@ "table_name": "descriptor_set_resnet50_pool5", "uuid_col": "uid" }, - "type": "smqtk.representation.descriptor_set.postgres.PostgresDescriptorSet" + "type": "smqtk_descriptors.impls.descriptor_set.postgres.PostgresDescriptorSet" }, "factory_string": "IDMap,Flat", "gpu_id": 0, "idx2uid_kvs": { - "smqtk.representation.key_value.postgres.PostgresKeyValueStore": { + "smqtk_dataprovider.impls.key_value_store.postgres.PostgresKeyValueStore": { "batch_size": 1000, "create_table": true, "db_host": "/dev/shm", @@ -140,10 +130,10 @@ "table_name": "faiss_idx2uid_kvs", "value_col": "value" }, - "type": "smqtk.representation.key_value.postgres.PostgresKeyValueStore" + "type": "smqtk_dataprovider.impls.key_value_store.postgres.PostgresKeyValueStore" }, "uid2idx_kvs": { - "smqtk.representation.key_value.postgres.PostgresKeyValueStore": { + "smqtk_dataprovider.impls.key_value_store.postgres.PostgresKeyValueStore": { "batch_size": 1000, "create_table": true, "db_host": "/dev/shm", @@ -157,21 +147,21 @@ "table_name": "faiss_uid2idx_kvs", "value_col": "value" }, - "type": "smqtk.representation.key_value.postgres.PostgresKeyValueStore" + "type": "smqtk_dataprovider.impls.key_value_store.postgres.PostgresKeyValueStore" }, "index_element": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "filepath": "models/faiss_index", "readonly": false }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "index_param_element": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "filepath": "models/faiss_index_params.json", "readonly": false }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "ivf_nprobe": 64, "metric_type": "l2", @@ -179,34 +169,24 @@ "read_only": false, "use_gpu": false }, - "type": "smqtk.algorithms.nn_index.faiss.FaissNearestNeighborsIndex" + "type": "smqtk_indexing.impls.nn_index.faiss.FaissNearestNeighborsIndex" }, "rank_relevancy_with_feedback": { - "smqtk.algorithms.rank_relevancy.margin_sampling.RankRelevancyWithMarginSampledFeedback": { + "smqtk_relevancy.impls.rank_relevancy.margin_sampling.RankRelevancyWithMarginSampledFeedback": { "rank_relevancy": { - "smqtk.algorithms.rank_relevancy.wrap_classifier.RankRelevancyWithSupervisedClassifier": { + "smqtk_relevancy.impls.rank_relevancy.wrap_classifier.RankRelevancyWithSupervisedClassifier": { "classifier_inst": { - "smqtk.algorithms.classifier.libsvm.LibSvmClassifier": { - "n_jobs": 4, - "normalize": 2, - "svm_label_map_uri": null, - "svm_model_uri": null, - "train_params": { - "-b": 1, - "-c": 2, - "-s": 0, - "-t": 0 - } + "smqtk_classifier.impls.classify_descriptor_supervised.sklearn_logistic_regression.SkLearnLogisticRegression": { }, - "type": "smqtk.algorithms.classifier.libsvm.LibSvmClassifier" + "type": "smqtk_classifier.impls.classify_descriptor_supervised.sklearn_logistic_regression.SkLearnLogisticRegression" } }, - "type": "smqtk.algorithms.rank_relevancy.wrap_classifier.RankRelevancyWithSupervisedClassifier" + "type": "smqtk_relevancy.impls.rank_relevancy.wrap_classifier.RankRelevancyWithSupervisedClassifier" }, "n": 10, "center": 0.5 }, - "type": "smqtk.algorithms.rank_relevancy.margin_sampling.RankRelevancyWithMarginSampledFeedback" + "type": "smqtk_relevancy.impls.rank_relevancy.margin_sampling.RankRelevancyWithMarginSampledFeedback" } }, "session_control": { diff --git a/docker/smqtk_iqr_playground/default_confs/gpu/compute_many_descriptors.json b/docker/smqtk_iqr_playground/default_confs/gpu/compute_many_descriptors.json index d5594fb0..78a83ff5 100644 --- a/docker/smqtk_iqr_playground/default_confs/gpu/compute_many_descriptors.json +++ b/docker/smqtk_iqr_playground/default_confs/gpu/compute_many_descriptors.json @@ -1,6 +1,6 @@ { "descriptor_factory": { - "smqtk.representation.descriptor_element.postgres.PostgresDescriptorElement": { + "smqtk_descriptors.impls.descriptor_element.postgres.PostgresDescriptorElement": { "binary_col": "vector", "create_table": false, "db_host": "/dev/shm", @@ -9,51 +9,50 @@ "db_port": 5432, "db_user": "smqtk", "table_name": "descriptors_resnet50_pool5", - "type_col": "type_str", "uuid_col": "uid" }, - "type": "smqtk.representation.descriptor_element.postgres.PostgresDescriptorElement" + "type": "smqtk_descriptors.impls.descriptor_element.postgres.PostgresDescriptorElement" }, "descriptor_generator": { - "smqtk.algorithms.descriptor_generator.caffe_descriptor.CaffeDescriptorGenerator": { - "batch_size": 20, + "smqtk_descriptors.impls.descriptor_generator.caffe1.CaffeDescriptorGenerator": { + "batch_size": 10, "data_layer": "data", "gpu_device_id": 0, "image_mean": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "explicit_mimetype": null, "filepath": "/home/smqtk/caffe/msra_resnet/ResNet_mean.binaryproto", "readonly": true }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "input_scale": null, "load_truncated_images": true, "network_is_bgr": true, "network_model": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "explicit_mimetype": null, "filepath": "/home/smqtk/caffe/msra_resnet/ResNet-50-model.caffemodel", "readonly": true }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "network_prototxt": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "explicit_mimetype": null, "filepath": "/home/smqtk/caffe/msra_resnet/ResNet-50-deploy.prototxt", "readonly": true }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "pixel_rescale": null, "return_layer": "pool5", "use_gpu": true }, - "type": "smqtk.algorithms.descriptor_generator.caffe_descriptor.CaffeDescriptorGenerator" + "type": "smqtk_descriptors.impls.descriptor_generator.caffe1.CaffeDescriptorGenerator" }, "descriptor_set": { - "smqtk.representation.descriptor_set.postgres.PostgresDescriptorSet": { + "smqtk_descriptors.impls.descriptor_set.postgres.PostgresDescriptorSet": { "create_table": false, "db_host": "/dev/shm", "db_name": "postgres", @@ -67,20 +66,20 @@ "table_name": "descriptor_set_resnet50_pool5", "uuid_col": "uid" }, - "type": "smqtk.representation.descriptor_set.postgres.PostgresDescriptorSet" + "type": "smqtk_descriptors.impls.descriptor_set.postgres.PostgresDescriptorSet" }, "optional_data_set": { - "smqtk.representation.data_set.memory_set.DataMemorySet": { + "smqtk_dataprovider.impls.data_set.memory.DataMemorySet": { "cache_element": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "explicit_mimetype": null, "filepath": "models/image_elements.dms_cache", "readonly": false }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "pickle_protocol": -1 }, - "type": "smqtk.representation.data_set.memory_set.DataMemorySet" + "type": "smqtk_dataprovider.impls.data_set.memory.DataMemorySet" } } diff --git a/docker/smqtk_iqr_playground/default_confs/gpu/runApp.IqrService.json b/docker/smqtk_iqr_playground/default_confs/gpu/runApp.IqrService.json index b3226829..6c9fe522 100644 --- a/docker/smqtk_iqr_playground/default_confs/gpu/runApp.IqrService.json +++ b/docker/smqtk_iqr_playground/default_confs/gpu/runApp.IqrService.json @@ -16,25 +16,16 @@ }, "plugins": { "classification_factory": { - "smqtk.representation.classification_element.memory.MemoryClassificationElement": {}, - "type": "smqtk.representation.classification_element.memory.MemoryClassificationElement" + "smqtk_classifier.impls.classification_element.memory.MemoryClassificationElement": {}, + "type": "smqtk_classifier.impls.classification_element.memory.MemoryClassificationElement" }, "classifier_config": { - "smqtk.algorithms.classifier.libsvm.LibSvmClassifier": { - "normalize": 2, - "svm_label_map_uri": null, - "svm_model_uri": null, - "train_params": { - "-b": 1, - "-c": 2, - "-s": 0, - "-t": 0 - } + "smqtk_classifier.impls.classify_descriptor_supervised.sklearn_logistic_regression.SkLearnLogisticRegression": { }, - "type": "smqtk.algorithms.classifier.libsvm.LibSvmClassifier" + "type": "smqtk_classifier.impls.classify_descriptor_supervised.sklearn_logistic_regression.SkLearnLogisticRegression" }, "descriptor_factory": { - "smqtk.representation.descriptor_element.postgres.PostgresDescriptorElement": { + "smqtk_descriptors.impls.descriptor_element.postgres.PostgresDescriptorElement": { "binary_col": "vector", "create_table": false, "db_host": "/dev/shm", @@ -43,51 +34,50 @@ "db_port": 5432, "db_user": "smqtk", "table_name": "descriptors_resnet50_pool5", - "type_col": "type_str", "uuid_col": "uid" }, - "type": "smqtk.representation.descriptor_element.postgres.PostgresDescriptorElement" + "type": "smqtk_descriptors.impls.descriptor_element.postgres.PostgresDescriptorElement" }, "descriptor_generator": { - "smqtk.algorithms.descriptor_generator.caffe_descriptor.CaffeDescriptorGenerator": { + "smqtk_descriptors.impls.descriptor_generator.caffe1.CaffeDescriptorGenerator": { "batch_size": 10, "data_layer": "data", "gpu_device_id": 0, "image_mean": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "explicit_mimetype": null, "filepath": "/home/smqtk/caffe/msra_resnet/ResNet_mean.binaryproto", "readonly": true }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "input_scale": null, "load_truncated_images": true, "network_is_bgr": true, "network_model": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "explicit_mimetype": null, "filepath": "/home/smqtk/caffe/msra_resnet/ResNet-50-model.caffemodel", "readonly": true }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "network_prototxt": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "explicit_mimetype": null, "filepath": "/home/smqtk/caffe/msra_resnet/ResNet-50-deploy.prototxt", "readonly": true }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "pixel_rescale": null, "return_layer": "pool5", "use_gpu": true }, - "type": "smqtk.algorithms.descriptor_generator.caffe_descriptor.CaffeDescriptorGenerator" + "type": "smqtk_descriptors.impls.descriptor_generator.caffe1.CaffeDescriptorGenerator" }, "descriptor_set": { - "smqtk.representation.descriptor_set.postgres.PostgresDescriptorSet": { + "smqtk_descriptors.impls.descriptor_set.postgres.PostgresDescriptorSet": { "create_table": false, "db_host": "/dev/shm", "db_name": "postgres", @@ -101,13 +91,13 @@ "table_name": "descriptor_set_resnet50_pool5", "uuid_col": "uid" }, - "type": "smqtk.representation.descriptor_set.postgres.PostgresDescriptorSet" + "type": "smqtk_descriptors.impls.descriptor_set.postgres.PostgresDescriptorSet" }, "neighbor_index": { - "smqtk.algorithms.nn_index.faiss.FaissNearestNeighborsIndex": { + "smqtk_indexing.impls.nn_index.faiss.FaissNearestNeighborsIndex": { "descriptor_set": { "__note__": "Using real descriptor index this time", - "smqtk.representation.descriptor_set.postgres.PostgresDescriptorSet": { + "smqtk_descriptors.impls.descriptor_set.postgres.PostgresDescriptorSet": { "create_table": false, "db_host": "/dev/shm", "db_name": "postgres", @@ -121,12 +111,12 @@ "table_name": "descriptor_set_resnet50_pool5", "uuid_col": "uid" }, - "type": "smqtk.representation.descriptor_set.postgres.PostgresDescriptorSet" + "type": "smqtk_descriptors.impls.descriptor_set.postgres.PostgresDescriptorSet" }, "factory_string": "IDMap,Flat", "gpu_id": 0, "idx2uid_kvs": { - "smqtk.representation.key_value.postgres.PostgresKeyValueStore": { + "smqtk_dataprovider.impls.key_value_store.postgres.PostgresKeyValueStore": { "batch_size": 1000, "create_table": true, "db_host": "/dev/shm", @@ -140,10 +130,10 @@ "table_name": "faiss_idx2uid_kvs", "value_col": "value" }, - "type": "smqtk.representation.key_value.postgres.PostgresKeyValueStore" + "type": "smqtk_dataprovider.impls.key_value_store.postgres.PostgresKeyValueStore" }, "uid2idx_kvs": { - "smqtk.representation.key_value.postgres.PostgresKeyValueStore": { + "smqtk_dataprovider.impls.key_value_store.postgres.PostgresKeyValueStore": { "batch_size": 1000, "create_table": true, "db_host": "/dev/shm", @@ -157,21 +147,21 @@ "table_name": "faiss_uid2idx_kvs", "value_col": "value" }, - "type": "smqtk.representation.key_value.postgres.PostgresKeyValueStore" + "type": "smqtk_dataprovider.impls.key_value_store.postgres.PostgresKeyValueStore" }, "index_element": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "filepath": "models/faiss_index", "readonly": false }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "index_param_element": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "filepath": "models/faiss_index_params.json", "readonly": false }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "ivf_nprobe": 64, "metric_type": "l2", @@ -179,34 +169,24 @@ "read_only": false, "use_gpu": false }, - "type": "smqtk.algorithms.nn_index.faiss.FaissNearestNeighborsIndex" + "type": "smqtk_indexing.impls.nn_index.faiss.FaissNearestNeighborsIndex" }, "rank_relevancy_with_feedback": { - "smqtk.algorithms.rank_relevancy.margin_sampling.RankRelevancyWithMarginSampledFeedback": { + "smqtk_relevancy.impls.rank_relevancy.margin_sampling.RankRelevancyWithMarginSampledFeedback": { "rank_relevancy": { - "smqtk.algorithms.rank_relevancy.wrap_classifier.RankRelevancyWithSupervisedClassifier": { + "smqtk_relevancy.impls.rank_relevancy.wrap_classifier.RankRelevancyWithSupervisedClassifier": { "classifier_inst": { - "smqtk.algorithms.classifier.libsvm.LibSvmClassifier": { - "n_jobs": 4, - "normalize": 2, - "svm_label_map_uri": null, - "svm_model_uri": null, - "train_params": { - "-b": 1, - "-c": 2, - "-s": 0, - "-t": 0 - } + "smqtk_classifier.impls.classify_descriptor_supervised.sklearn_logistic_regression.SkLearnLogisticRegression": { }, - "type": "smqtk.algorithms.classifier.libsvm.LibSvmClassifier" + "type": "smqtk_classifier.impls.classify_descriptor_supervised.sklearn_logistic_regression.SkLearnLogisticRegression" } }, - "type": "smqtk.algorithms.rank_relevancy.wrap_classifier.RankRelevancyWithSupervisedClassifier" + "type": "smqtk_relevancy.impls.rank_relevancy.wrap_classifier.RankRelevancyWithSupervisedClassifier" }, "n": 10, "center": 0.5 }, - "type": "smqtk.algorithms.rank_relevancy.margin_sampling.RankRelevancyWithMarginSampledFeedback" + "type": "smqtk_relevancy.impls.rank_relevancy.margin_sampling.RankRelevancyWithMarginSampledFeedback" } }, "session_control": { diff --git a/docker/smqtk_iqr_playground/default_confs/nn_index_tool.json b/docker/smqtk_iqr_playground/default_confs/nn_index_tool.json index d0ae5a42..5d25f2f1 100644 --- a/docker/smqtk_iqr_playground/default_confs/nn_index_tool.json +++ b/docker/smqtk_iqr_playground/default_confs/nn_index_tool.json @@ -1,6 +1,6 @@ { "descriptor_set": { - "smqtk.representation.descriptor_set.postgres.PostgresDescriptorSet": { + "smqtk_descriptors.impls.descriptor_set.postgres.PostgresDescriptorSet": { "create_table": false, "db_host": "/dev/shm", "db_name": "postgres", @@ -14,22 +14,32 @@ "table_name": "descriptor_set_resnet50_pool5", "uuid_col": "uid" }, - "type": "smqtk.representation.descriptor_set.postgres.PostgresDescriptorSet" + "type": "smqtk_descriptors.impls.descriptor_set.postgres.PostgresDescriptorSet" }, "neighbor_index": { - "smqtk.algorithms.nn_index.faiss.FaissNearestNeighborsIndex": { + "smqtk_indexing.impls.nn_index.faiss.FaissNearestNeighborsIndex": { "descriptor_set": { - "__note__": "Using memory here in order to avoid re-writing database set", - "smqtk.representation.descriptor_set.memory.MemoryDescriptorSet": { - "cache_element": {"type": null}, - "pickle_protocol": -1 + "__note__": "Using real descriptor index this time", + "smqtk_descriptors.impls.descriptor_set.postgres.PostgresDescriptorSet": { + "create_table": false, + "db_host": "/dev/shm", + "db_name": "postgres", + "db_pass": null, + "db_port": 5432, + "db_user": "smqtk", + "element_col": "element", + "multiquery_batch_size": 1000, + "pickle_protocol": -1, + "read_only": false, + "table_name": "descriptor_set_resnet50_pool5", + "uuid_col": "uid" }, - "type": "smqtk.representation.descriptor_set.memory.MemoryDescriptorSet" + "type": "smqtk_descriptors.impls.descriptor_set.postgres.PostgresDescriptorSet" }, "factory_string": "IDMap,Flat", "gpu_id": 0, "idx2uid_kvs": { - "smqtk.representation.key_value.postgres.PostgresKeyValueStore": { + "smqtk_dataprovider.impls.key_value_store.postgres.PostgresKeyValueStore": { "batch_size": 1000, "create_table": true, "db_host": "/dev/shm", @@ -43,10 +53,10 @@ "table_name": "faiss_idx2uid_kvs", "value_col": "value" }, - "type": "smqtk.representation.key_value.postgres.PostgresKeyValueStore" + "type": "smqtk_dataprovider.impls.key_value_store.postgres.PostgresKeyValueStore" }, "uid2idx_kvs": { - "smqtk.representation.key_value.postgres.PostgresKeyValueStore": { + "smqtk_dataprovider.impls.key_value_store.postgres.PostgresKeyValueStore": { "batch_size": 1000, "create_table": true, "db_host": "/dev/shm", @@ -60,21 +70,21 @@ "table_name": "faiss_uid2idx_kvs", "value_col": "value" }, - "type": "smqtk.representation.key_value.postgres.PostgresKeyValueStore" + "type": "smqtk_dataprovider.impls.key_value_store.postgres.PostgresKeyValueStore" }, "index_element": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "filepath": "models/faiss_index", "readonly": false }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "index_param_element": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "filepath": "models/faiss_index_params.json", "readonly": false }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "ivf_nprobe": 64, "metric_type": "l2", @@ -82,6 +92,6 @@ "read_only": false, "use_gpu": false }, - "type": "smqtk.algorithms.nn_index.faiss.FaissNearestNeighborsIndex" + "type": "smqtk_indexing.impls.nn_index.faiss.FaissNearestNeighborsIndex" } } diff --git a/docker/smqtk_iqr_playground/default_confs/psql_table_init.sql b/docker/smqtk_iqr_playground/default_confs/psql_table_init.sql index eb1bc5eb..ac8b8e3e 100644 --- a/docker/smqtk_iqr_playground/default_confs/psql_table_init.sql +++ b/docker/smqtk_iqr_playground/default_confs/psql_table_init.sql @@ -1,9 +1,8 @@ CREATE TABLE IF NOT EXISTS descriptors_resnet50_pool5 ( - type_str TEXT NOT NULL, uid TEXT NOT NULL, vector BYTEA NOT NULL, - PRIMARY KEY (uid, type_str) + PRIMARY KEY (uid) ); CREATE TABLE IF NOT EXISTS descriptor_set_resnet50_pool5 ( uid TEXT NOT NULL, diff --git a/docker/smqtk_iqr_playground/default_confs/runApp.IqrClassifier.json b/docker/smqtk_iqr_playground/default_confs/runApp.IqrClassifier.json new file mode 100644 index 00000000..75cc4112 --- /dev/null +++ b/docker/smqtk_iqr_playground/default_confs/runApp.IqrClassifier.json @@ -0,0 +1,6 @@ +{ + "classifier": { + "smqtk_classifier.impls.classify_descriptor_supervised.sklearn_logistic_regression.SkLearnLogisticRegression": {}, + "type": "smqtk_classifier.impls.classify_descriptor_supervised.sklearn_logistic_regression.SkLearnLogisticRegression" + } +} diff --git a/docker/smqtk_iqr_playground/default_confs/runApp.IqrSearchDispatcher.json b/docker/smqtk_iqr_playground/default_confs/runApp.IqrSearchDispatcher.json index 5aaf0842..9760eb9f 100644 --- a/docker/smqtk_iqr_playground/default_confs/runApp.IqrSearchDispatcher.json +++ b/docker/smqtk_iqr_playground/default_confs/runApp.IqrSearchDispatcher.json @@ -5,20 +5,20 @@ "SECRET_KEY": "MySuperUltraSecret" }, "iqr_tabs": { - "Data-set Iqr": { + "LEEDS Butterflies": { "data_set": { - "smqtk.representation.data_set.memory_set.DataMemorySet": { + "smqtk_dataprovider.impls.data_set.memory.DataMemorySet": { "cache_element": { - "smqtk.representation.data_element.file_element.DataFileElement": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { "explicit_mimetype": null, "filepath": "models/image_elements.dms_cache", "readonly": true }, - "type": "smqtk.representation.data_element.file_element.DataFileElement" + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" }, "pickle_protocol": -1 }, - "type": "smqtk.representation.data_set.memory_set.DataMemorySet" + "type": "smqtk_dataprovider.impls.data_set.memory.DataMemorySet" }, "iqr_service_url": "localhost:5001", "working_directory": "data/iqr_app_work" diff --git a/docker/smqtk_iqr_playground/run_container.cpu.sh b/docker/smqtk_iqr_playground/run_container.cpu.sh index 020d40ab..df2c6a3a 100755 --- a/docker/smqtk_iqr_playground/run_container.cpu.sh +++ b/docker/smqtk_iqr_playground/run_container.cpu.sh @@ -14,7 +14,7 @@ set -e IQR_CONTAINER=kitware/smqtk/iqr_playground IQR_CONTAINER_VERSION="latest-cpu" # Name for run container instance -CONTAINER_NAME="smqtk-iqr-playground-gpu" +CONTAINER_NAME="smqtk-iqr-playground-cpu" IQR_GUI_PORT_PUBLISH=5000 IQR_REST_PORT_PUBLISH=5001 diff --git a/docker/tpl/caffe1/Dockerfile b/docker/tpl/caffe1/Dockerfile index 9720ce0a..b155a818 100644 --- a/docker/tpl/caffe1/Dockerfile +++ b/docker/tpl/caffe1/Dockerfile @@ -5,17 +5,18 @@ FROM nvidia/cuda:${CUDA_DEVEL_IMAGE_TAG} AS base RUN apt-get -y update \ && apt-get -y install \ # Basic dependencies - git=1:2.17.1-1ubuntu0.7 \ - cmake=3.10.2-1ubuntu2.18.04.1 \ - python3-dev=3.6.7-1~18.04 \ - python3-pip=9.0.1-2.3~ubuntu1.18.04.2 \ + git \ + cmake \ + python3-dev \ + python3-pip \ # Caffe Dependencies - libopenblas-dev=0.2.20+ds-4 \ - libboost-all-dev=1.65.1.0ubuntu1 \ - libprotobuf-dev=3.0.0-9.1ubuntu1 protobuf-compiler=3.0.0-9.1ubuntu1 \ - libgoogle-glog-dev=0.3.5-1 \ - libgflags-dev=2.2.1-1 \ - libhdf5-dev=1.10.0-patch1+docs-4 \ + libopenblas-dev \ + libboost-all-dev \ + libprotobuf-dev \ + protobuf-compiler \ + libgoogle-glog-dev \ + libgflags-dev \ + libhdf5-dev \ # Clean products of ``update`` && apt-get clean \ && rm -rf /var/lib/apt/lists/* diff --git a/docker/tpl/faiss/gpu/Dockerfile b/docker/tpl/faiss/gpu/Dockerfile index 62956d45..9a2e9ed8 100644 --- a/docker/tpl/faiss/gpu/Dockerfile +++ b/docker/tpl/faiss/gpu/Dockerfile @@ -37,11 +37,11 @@ FROM nvidia/cuda:$CUDA_DEVEL_IMAGE_TAG AS base RUN apt-get -y update \ && apt-get -y install \ - git=1:2.17.1-1ubuntu0.7 \ - libopenblas-dev=0.2.20+ds-4 \ - python3-dev=3.6.7-1~18.04 \ + git \ + libopenblas-dev \ + python3-dev \ python3-pip \ - swig=3.0.12-1 \ + swig \ && rm -rf /var/lib/apt/lists/* RUN pip3 install numpy==1.18.3 diff --git a/docs/release_notes/pending_release.rst b/docs/release_notes/pending_release.rst index dbc99585..3ef37dc2 100644 --- a/docs/release_notes/pending_release.rst +++ b/docs/release_notes/pending_release.rst @@ -3,6 +3,7 @@ Pending Release Notes Updates / New Features ---------------------- + * Transfer IQR web demo from mono-repo to this repo Fixes ----- diff --git a/docs/webservices/iqrdemonstration.rst b/docs/webservices/iqrdemonstration.rst index 7d03bc5d..5e9c8f8f 100644 --- a/docs/webservices/iqrdemonstration.rst +++ b/docs/webservices/iqrdemonstration.rst @@ -49,54 +49,52 @@ These provide the configuration blocks for each of the SMQTK algorithms (:class: For convenience, the same configuration files will be provided to the web applications when they are run later. The SMQTK source repository contains sample configuration files for both the :class:`.IqrSearchDispatcher` and :class:`.IqrService` services. -They can be found at :download:`smqtk_iqr/web/search_app/sample_configs/config.IqrSearchApp.json ` and :download:`smqtk_iqr/web/search_app/sample_configs/config.IqrRestService.json ` respectively. +They can be found at :download:`smqtk_iqr/web/search_app/sample_configs/runApp.IqrSearchDispatcher.json ` and :download:`smqtk_iqr/web/search_app/sample_configs/runApp.IqrService.json ` respectively. The :py:mod:`.iqr_app_model_generation` script is designed to run from an empty directory and will create the sub-directories specified in the above configurations requires when run. Since these configuration files drive both the generation of the models and the web applications themselves, a closer examination is in order. Present in both configuration files are the ``flask_app`` and ``server`` sections which control Flask web server application parameters. -The :file:`config.IqrSearchApp.json` contains the additional section ``mongo`` that configures the MongoDB_ server the UI service uses for storing user session information. +The :file:`runApp.IqrSearchDispatcher.json` contains the additional section ``mongo`` that configures the MongoDB_ server the UI service uses for storing user session information. .. _MongoDB: http://www.mongodb.org -.. literalinclude:: /../smqtk_iqr/web/search_app/sample_configs/config.IqrSearchApp.json +.. literalinclude:: /../smqtk_iqr/web/search_app/sample_configs/runApp.IqrSearchDispatcher.json :language: json :linenos: - :emphasize-lines: 15,18,32 + :emphasize-lines: 7,8,23 -The :file:`config.IqrSerchApp.json` configuration has an additional block "iqr_tabs" (line 15). +The :file:`runApp.IqrSearchDispatcher.json` configuration has an additional block "iqr_tabs" (line 7). This defines the different archives, and matching IQR REST service describing that archive, the UI is to provide an interface for. -In our case there will be only one entry, "LEEDS Butterflies" (line 16), representing the archive that we are currently building. -This section describes the data-set container that contains the archive imagery to show in the UI (line 18) as well as the URL to the RESTful service providing the IQR functions for the archive (line 32). +In our case there will be only one entry, "LEEDS Butterflies" (line 8), representing the archive that we are currently building. +This section describes the data-set container that contains the archive imagery to show in the UI (line 10) as well as the URL to the RESTful service providing the IQR functions for the archive (line 23). -In the :file:`config.IqrRestService.json` configuration file (shown below) we see the specification of the algorithm and representation plugins the RESTful IQR service app will use under ``iqr_service -> plugins``. +In the :file:`runApp.IqrService.json` configuration file (shown below) we see the specification of the algorithm and representation plugins the RESTful IQR service app will use under ``iqr_service -> plugins``. Each of these of these blocks is passed to the SMQTK plugin system to create the appropriate instances of the algorithm or data representation in question. -The blocks located at lines 35, 66, and 147 configure the three main algorithms used by the application: the descriptor generator, the nearest neighbors index, and the relevancy index. -For example the ``nn_index`` block that starts at line 66 specifies two different implementations: :py:class:`.FlannNearestNeighborsIndex`, which uses the Flann_ library, and :py:class:`.LSHNearestNeighborIndex`, configured to use the Iterative Quantization hash function (`paper`_). -The ``type`` element on line 135 selects the :py:class:`.LSHNearestNeighborIndex` to be used for this configuration. +The blocks located at lines 41, 96, and 174 configure the three main algorithms used by the application: the descriptor generator, the nearest neighbors index, and the relevancy index. +For example the ``nn_index`` block that starts at line 97 specifies two different implementations: :py:class:`.FlannNearestNeighborsIndex`, which uses the Flann_ library. -.. _paper: http://www.cs.unc.edu/~lazebnik/publications/cvpr11_small_code.pdf .. _Flann: http://www.cs.ubc.ca/research/flann/ :ref:`(jump past configuration display) ` -.. literalinclude:: /../smqtk_iqr/web/search_app/sample_configs/config.IqrRestService.json +.. literalinclude:: /../smqtk_iqr/web/search_app/sample_configs/runApp.IqrService.json :language: json :linenos: - :emphasize-lines: 35,66,135,147 + :emphasize-lines: 41, 96, 174 .. _post_iqr_rest_conf: Once you have the configuration file set up the way that you like it, you can generate all of the models and indexes required by the application by running the following command:: iqr_app_model_generation \ - -c config.IqrSearchApp.json config.IqrRestService.json \ + -c runApp.IqrSearchDispatcher.json runApp.IqrService.json \ -t "LEEDS Butterflies" /path/to/butterfly/images/*.jpg This will generate descriptors for all of the images in the data set and use them to compute the models and indices we configured, outputting to the files under the ``workdir`` directory in your current directory. Once it completes, you can run the ``IqrSearchApp`` and ``IqrService`` web-apps. -You'll need an instance of MongoDB running on the port and host address specified by the ``mongo`` element on line 13 in your ``config.IqrSearchApp.json`` configuration file. +You'll need an instance of MongoDB running on the port and host address specified by the ``mongo`` element on line 27 in your ``runApp.IqrSearchDispatcher.json`` configuration file. You can start a Mongo instance (presuming you have it installed) with:: mongod --dbpath /path/to/mongo/data/dir @@ -104,10 +102,10 @@ You can start a Mongo instance (presuming you have it installed) with:: Once Mongo has been started you can start the ``IqrSearchApp`` and ``IqrService`` services with the following commands in separate terminals:: # Terminal 1 - runApplication -a IqrService -c config.IqrRestService.json + runApplication -a IqrService -c runApp.IqrService.json # Terminal 2 - runApplication -a IqrSearchDispatcher -c config.IqrSearchApp.json + runApplication -a IqrSearchDispatcher -c runApp.IqrSearchDispatcher.json After the services have been started, open a web browser and navigate to ``http://localhost:5000``. Click lick on the ``login`` button in the upper-right and then enter the credentials specified in the default login settings file :file:`source/python/smqtk/web/search_app/modules/login/users.json`. @@ -123,7 +121,7 @@ Click lick on the ``login`` button in the upper-right and then enter the credent *Enter demo credentials* Once you've logged in you will be able to select the ``LEEDS Butterfly`` link. -This link was named by line 16 in the :file:`config.IqrSearchApp.json` configuration file. +This link was named by line 8 in the :file:`runApp.IqrSearchDispatcher.json` configuration file. The ``iqr_tabs`` mapping allows you to configure interfacing with different IQR REST services providing different combinations of the required algorithms -- useful for example, if you want to compare the performance of different descriptors or nearest-neighbor index algorithms. .. figure:: figures/iqr-butterflies-link.png @@ -186,31 +184,3 @@ Let us assume the IQR session state was downloaded as ``monarch.IqrState``. The following command will train a classifier leveraging the descriptors labeled by the IQR session that was saved:: iqrTrainClassifier.py -c config.iqrTrainClassifier.json -i monarch.IqrState - -Once you have trained the classifier, you can use the ``classifyFiles`` command to actually classify a set of files. - -.. argparse:: - :ref: smqtk_iqr.utils.classifyFiles.get_cli_parser - :prog: smqtk-classify-files - :nodescription: - :noepilog: - -Again, we need to provide a JSON configuration file for the command. -As with ``iqrTrainClassifier``, there is a sample configuration file in the repository: - -.. literalinclude:: /../smqtk_iqr/web/search_app/sample_configs/config.classifyFiles.json - :language: json - :linenos: - :emphasize-lines: 7-18,25-39 - -Note that the ``classifier`` block on lines 7-18 is the same as the ``classifier`` block in the ``iqrTrainClassfier`` configuration file. -Further, the ``descriptor_generator`` block on lines 25-39 matches the descriptor generator used for the IQR application itself (thus matching the type of descriptor used to train the classifier). - -Once you've set up the configuration file to your liking, you can classify a set of labels with the following command:: - - smqtk-classify-files -c config.classifyFiles.json -l positive /path/to/butterfly/images/*.jpg - -If you leave the ``-l`` argument, the command will tell you the labels available with the classifier (in this case *positive* and *negative*). - -SMQTK's ``smqtk-classify-files`` tool can use this saved -IQR state to classify a set of files (not necessarily the files in your IQR Applicaiton ingest). The command has the following form: diff --git a/poetry.lock b/poetry.lock index 4c72446e..c8b2931b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -650,7 +650,7 @@ psql = ["psycopg2-binary (>=2.8.6,<3.0.0)"] [[package]] name = "smqtk-descriptors" -version = "0.18.0" +version = "0.18.1" description = "Algorithms, data structures and utilities around computingdescriptor vectors from data." category = "main" optional = false @@ -697,7 +697,7 @@ sklearn = ["scikit-learn (>=0.24.1,<0.25.0)"] [[package]] name = "smqtk-relevancy" -version = "0.16.0" +version = "0.16.1" description = "SMQTK Relevancy" category = "main" optional = false @@ -961,7 +961,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytes [metadata] lock-version = "1.1" python-versions = "^3.6" -content-hash = "0964a852efb368f35ab14a2855faf03c468cfe4c4480f9c5050c721c4acb73a0" +content-hash = "6914298accfeabe44cee55e859a164cf3dd926958aecae58ab6ac8d7e0640827" [metadata.files] alabaster = [ @@ -1138,6 +1138,9 @@ markupsafe = [ {file = "MarkupSafe-2.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d7d807855b419fc2ed3e631034685db6079889a1f01d5d9dac950f764da3dad"}, {file = "MarkupSafe-2.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:add36cb2dbb8b736611303cd3bfcee00afd96471b09cda130da3581cbdc56a6d"}, {file = "MarkupSafe-2.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:168cd0a3642de83558a5153c8bd34f175a9a6e7f6dc6384b9655d2697312a646"}, + {file = "MarkupSafe-2.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4dc8f9fb58f7364b63fd9f85013b780ef83c11857ae79f2feda41e270468dd9b"}, + {file = "MarkupSafe-2.0.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:20dca64a3ef2d6e4d5d615a3fd418ad3bde77a47ec8a23d984a12b5b4c74491a"}, + {file = "MarkupSafe-2.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cdfba22ea2f0029c9261a4bd07e830a8da012291fbe44dc794e488b6c9bb353a"}, {file = "MarkupSafe-2.0.1-cp310-cp310-win32.whl", hash = "sha256:99df47edb6bda1249d3e80fdabb1dab8c08ef3975f69aed437cb69d0a5de1e28"}, {file = "MarkupSafe-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:e0f138900af21926a02425cf736db95be9f4af72ba1bb21453432a07f6082134"}, {file = "MarkupSafe-2.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f9081981fe268bd86831e5c75f7de206ef275defcb82bc70740ae6dc507aee51"}, @@ -1149,6 +1152,9 @@ markupsafe = [ {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf5d821ffabf0ef3533c39c518f3357b171a1651c1ff6827325e4489b0e46c3c"}, {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0d4b31cc67ab36e3392bbf3862cfbadac3db12bdd8b02a2731f509ed5b829724"}, {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:baa1a4e8f868845af802979fcdbf0bb11f94f1cb7ced4c4b8a351bb60d108145"}, + {file = "MarkupSafe-2.0.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:deb993cacb280823246a026e3b2d81c493c53de6acfd5e6bfe31ab3402bb37dd"}, + {file = "MarkupSafe-2.0.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:63f3268ba69ace99cab4e3e3b5840b03340efed0948ab8f78d2fd87ee5442a4f"}, + {file = "MarkupSafe-2.0.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:8d206346619592c6200148b01a2142798c989edcb9c896f9ac9722a99d4e77e6"}, {file = "MarkupSafe-2.0.1-cp36-cp36m-win32.whl", hash = "sha256:6c4ca60fa24e85fe25b912b01e62cb969d69a23a5d5867682dd3e80b5b02581d"}, {file = "MarkupSafe-2.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b2f4bf27480f5e5e8ce285a8c8fd176c0b03e93dcc6646477d4630e83440c6a9"}, {file = "MarkupSafe-2.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0717a7390a68be14b8c793ba258e075c6f4ca819f15edfc2a3a027c823718567"}, @@ -1160,6 +1166,9 @@ markupsafe = [ {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9936f0b261d4df76ad22f8fee3ae83b60d7c3e871292cd42f40b81b70afae85"}, {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:2a7d351cbd8cfeb19ca00de495e224dea7e7d919659c2841bbb7f420ad03e2d6"}, {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:60bf42e36abfaf9aff1f50f52644b336d4f0a3fd6d8a60ca0d054ac9f713a864"}, + {file = "MarkupSafe-2.0.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d6c7ebd4e944c85e2c3421e612a7057a2f48d478d79e61800d81468a8d842207"}, + {file = "MarkupSafe-2.0.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f0567c4dc99f264f49fe27da5f735f414c4e7e7dd850cfd8e69f0862d7c74ea9"}, + {file = "MarkupSafe-2.0.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:89c687013cb1cd489a0f0ac24febe8c7a666e6e221b783e53ac50ebf68e45d86"}, {file = "MarkupSafe-2.0.1-cp37-cp37m-win32.whl", hash = "sha256:a30e67a65b53ea0a5e62fe23682cfe22712e01f453b95233b25502f7c61cb415"}, {file = "MarkupSafe-2.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:611d1ad9a4288cf3e3c16014564df047fe08410e628f89805e475368bd304914"}, {file = "MarkupSafe-2.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5bb28c636d87e840583ee3adeb78172efc47c8b26127267f54a9c0ec251d41a9"}, @@ -1172,6 +1181,9 @@ markupsafe = [ {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fcf051089389abe060c9cd7caa212c707e58153afa2c649f00346ce6d260f1b"}, {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5855f8438a7d1d458206a2466bf82b0f104a3724bf96a1c781ab731e4201731a"}, {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3dd007d54ee88b46be476e293f48c85048603f5f516008bee124ddd891398ed6"}, + {file = "MarkupSafe-2.0.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:aca6377c0cb8a8253e493c6b451565ac77e98c2951c45f913e0b52facdcff83f"}, + {file = "MarkupSafe-2.0.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:04635854b943835a6ea959e948d19dcd311762c5c0c6e1f0e16ee57022669194"}, + {file = "MarkupSafe-2.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6300b8454aa6930a24b9618fbb54b5a68135092bc666f7b06901f897fa5c2fee"}, {file = "MarkupSafe-2.0.1-cp38-cp38-win32.whl", hash = "sha256:023cb26ec21ece8dc3907c0e8320058b2e0cb3c55cf9564da612bc325bed5e64"}, {file = "MarkupSafe-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:984d76483eb32f1bcb536dc27e4ad56bba4baa70be32fa87152832cdd9db0833"}, {file = "MarkupSafe-2.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2ef54abee730b502252bcdf31b10dacb0a416229b72c18b19e24a4509f273d26"}, @@ -1184,6 +1196,9 @@ markupsafe = [ {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c47adbc92fc1bb2b3274c4b3a43ae0e4573d9fbff4f54cd484555edbf030baf1"}, {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:37205cac2a79194e3750b0af2a5720d95f786a55ce7df90c3af697bfa100eaac"}, {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:1f2ade76b9903f39aa442b4aadd2177decb66525062db244b35d71d0ee8599b6"}, + {file = "MarkupSafe-2.0.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4296f2b1ce8c86a6aea78613c34bb1a672ea0e3de9c6ba08a960efe0b0a09047"}, + {file = "MarkupSafe-2.0.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f02365d4e99430a12647f09b6cc8bab61a6564363f313126f775eb4f6ef798e"}, + {file = "MarkupSafe-2.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5b6d930f030f8ed98e3e6c98ffa0652bdb82601e7a016ec2ab5d7ff23baa78d1"}, {file = "MarkupSafe-2.0.1-cp39-cp39-win32.whl", hash = "sha256:10f82115e21dc0dfec9ab5c0223652f7197feb168c940f3ef61563fc2d6beb74"}, {file = "MarkupSafe-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:693ce3f9e70a6cf7d2fb9e6c9d8b204b6b39897a2c4a1aa65728d5ac97dcc1d8"}, {file = "MarkupSafe-2.0.1.tar.gz", hash = "sha256:594c67807fb16238b30c44bdf74f36c02cdf22d1c8cda91ef8a0ed8dabf5620a"}, @@ -1492,8 +1507,8 @@ smqtk-dataprovider = [ {file = "smqtk_dataprovider-0.16.0.tar.gz", hash = "sha256:27cea4dcfc9ec47ccefef8ec14dc20f0c3840c27ed471f49025f63777cfdfbc3"}, ] smqtk-descriptors = [ - {file = "smqtk_descriptors-0.18.0-py3-none-any.whl", hash = "sha256:febbe866de69911a211ee368b2597454213c36f3e1c815263649011ea490f965"}, - {file = "smqtk_descriptors-0.18.0.tar.gz", hash = "sha256:abd3e6ddefd6cad793f673e3dc98fe0802d8c8911a8b7015cc2cd6f2f5c4066f"}, + {file = "smqtk_descriptors-0.18.1-py3-none-any.whl", hash = "sha256:c1163109ed24907005f0681c0e9d54b6a6ed3a7b8d9c1f23b69aa85ca25a7b8e"}, + {file = "smqtk_descriptors-0.18.1.tar.gz", hash = "sha256:1392a9ecd415c445c629378e378a02fc4aec476da0ab67e80f5cb6f3e8bd94f1"}, ] smqtk-image-io = [ {file = "smqtk_image_io-0.16.2-py3-none-any.whl", hash = "sha256:14b12e81db7c22716657d3066e908199457b9bf56c5fb8248f2feac98b6f8e3d"}, @@ -1504,8 +1519,8 @@ smqtk-indexing = [ {file = "smqtk_indexing-0.17.0.tar.gz", hash = "sha256:e9e690c510fefcfa2dd079e275a7310ec6cdb70792980fdd2f0daba568c2791f"}, ] smqtk-relevancy = [ - {file = "smqtk_relevancy-0.16.0-py3-none-any.whl", hash = "sha256:8886532f4d02e3ed84db9c73c2b56f4a8d11f0e2009f609f54aa3b7bba8a6400"}, - {file = "smqtk_relevancy-0.16.0.tar.gz", hash = "sha256:7addc1607d8c9b13c6783915ac49fa6a781b8ff7b0acb3dfec1a4802cfbd3322"}, + {file = "smqtk_relevancy-0.16.1-py3-none-any.whl", hash = "sha256:99774fcdbffa0bc33c254746496c6ae9a91adbc665b31bb9af59a11503a7f952"}, + {file = "smqtk_relevancy-0.16.1.tar.gz", hash = "sha256:f2d73b09c38c57b828a39397bcbafcb74531514cc508a4d56b280c784f14c2b9"}, ] snowballstemmer = [ {file = "snowballstemmer-2.1.0-py2.py3-none-any.whl", hash = "sha256:b51b447bea85f9968c13b650126a888aabd4cb4463fca868ec596826325dedc2"}, diff --git a/pyproject.toml b/pyproject.toml index 6501c347..fb166b60 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,8 +19,8 @@ documentation = "https://smqtk-iqr.readthedocs.io/" [tool.poetry.dependencies] python = "^3.6" smqtk-indexing = ">=0.17.0" -smqtk-relevancy = ">=0.16.0" -smqtk-descriptors = ">=0.18" +smqtk-relevancy = ">=0.16.1" +smqtk-descriptors = ">=0.18.1" smqtk-core = ">=0.18" smqtk-dataprovider = ">=0.16.0" Flask = "^2.0.1" @@ -62,6 +62,9 @@ jedi = "^0.17" runApplication = "smqtk_iqr.utils.runApplication:main" iqrTrainClassifier = "smqtk_iqr.utils.iqrTrainClassifier:main" iqr_app_model_generation = "smqtk_iqr.utils.iqr_app_model_generation:main" +compute_many_descriptors = "smqtk_iqr.utils.compute_many_descriptors:main" +generate_image_transform = "smqtk_iqr.utils.generate_image_transform:main" +smqtk-nn-index-tool = "smqtk_iqr.utils.nn_index_tool:cli_group" ############################################################################### diff --git a/smqtk_iqr/iqr/iqr_session.py b/smqtk_iqr/iqr/iqr_session.py index 28d99ef3..15f158d8 100644 --- a/smqtk_iqr/iqr/iqr_session.py +++ b/smqtk_iqr/iqr/iqr_session.py @@ -522,7 +522,7 @@ def d_set_to_list( d_set: Set[DescriptorElement] ) -> List[Tuple[Hashable, str, List[float]]]: # Convert set of descriptors to list of tuples: - # [..., (uuid, type, vector), ...] + # [..., (uuid, vector), ...] return [(d.uuid(), d.vector().tolist()) for d in d_set] # type: ignore with self: diff --git a/smqtk_iqr/utils/compute_functions.py b/smqtk_iqr/utils/compute_functions.py new file mode 100644 index 00000000..8fb3b52e --- /dev/null +++ b/smqtk_iqr/utils/compute_functions.py @@ -0,0 +1,157 @@ +""" +Collection of higher level functions to perform operational tasks. + +Some day, this module could have a companion module containing the CLI logic +for these functions instead of scripts in ``/bin/scripts``. + +""" +import collections +import logging +from typing import ( + Deque, Hashable, Set, Tuple, Generator, Iterable, Any, Optional +) + +from smqtk_dataprovider import ( + DataElement +) +from smqtk_descriptors import ( + DescriptorElement, DescriptorGenerator, DescriptorSet +) +from smqtk_descriptors.descriptor_element_factory import DescriptorElementFactory + + +def compute_many_descriptors(data_elements: Iterable[DataElement], + descr_generator: DescriptorGenerator, + descr_factory: DescriptorElementFactory, + descr_set: DescriptorSet, + batch_size: Optional[int] = None, + overwrite: bool = False, + procs: Optional[int] = None, + **kwds: Any) -> Iterable[Tuple[DataElement, + DescriptorElement]]: + """ + Compute descriptors for each data element, yielding + (DataElement, DescriptorElement) tuple pairs in the order that they were + input. + + *Note:* **This function currently only operated over images due to the + specific data validity check/filter performed.* + + :param data_elements: Iterable of DataElement instances of files to + work on. + :type data_elements: collections.abc.Iterable[DataElement] + + :param descr_generator: DescriptorGenerator implementation instance + to use to generate descriptor vectors. + :type descr_generator: DescriptorGenerator + + :param descr_factory: DescriptorElement factory to use when producing + descriptor vectors. + :type descr_factory: DescriptorElementFactory + + :param descr_set: DescriptorSet instance to add generated descriptors + to. When given a non-zero batch size, we add descriptors to the given + set in batches of that size. When a batch size is not given, we add + all generated descriptors to the set after they have been generated. + :type descr_set: DescriptorSet + + :param batch_size: Optional number of elements to asynchronously compute + at a time. This is useful when it is desired for this function to yield + results before all descriptors have been computed, yet still take + advantage of any batch asynchronous computation optimizations a + particular DescriptorGenerator implementation may have. If this is 0 or + None (false-evaluating), this function blocks until all descriptors + have been generated. + :type batch_size: None | int | long + + :param overwrite: If descriptors from a particular generator already exist + for particular data, re-compute the descriptor for that data and set + into the generated DescriptorElement. + :type overwrite: bool + + :param procs: Deprecated parameter. Parallelism in batch computation is now + controlled on a per implementation basis. + :type procs: None | int + + :param kwds: Deprecated parameter. Extra keyword arguments are no longer + passed down to the batch generation method on the descriptor generator. + + :return: Generator that yields (DataElement, DescriptorElement) for each + data element given, in the order they were provided. + :rtype: collections.abc.Iterable[(DataElement, + DescriptorElement)] + + """ + log = logging.getLogger(__name__) + + # Capture of generated elements in order of generation + de_deque: Deque[DataElement] = collections.deque() + + # Counts for logging + total = [0] + unique: Set[Hashable] = set() + + def iter_capture_elements() -> Generator: + for d in data_elements: + de_deque.append(d) + yield d + + # TODO: Re-write this method to more simply tee the input data elem iter + # and yield with paired generated descriptors:: + # data_iter1, data_iter2 = itertools.tee(data_elements, 2) + # descr_iter = descr_generator.generate_elements( + # data_iter1, descr_factory, overwrite + # ) + # return zip(data_iter2, descr_iter) + + if batch_size: + log.debug("Computing in batches of size %d", batch_size) + + def iterate_batch_results() -> Generator: + descr_list_ = list(descr_generator.generate_elements( + de_deque, descr_factory, overwrite + )) + total[0] += len(de_deque) + unique.update(d.uuid() for d in descr_list_) + log.debug("-- Processed %d so far (%d total data elements " + "input)", len(unique), total[0]) + log.debug("-- adding to set") + descr_set.add_many_descriptors(descr_list_) + log.debug("-- yielding generated descriptor elements") + for data_, descr_ in zip(de_deque, descr_list_): + yield data_, descr_ + de_deque.clear() + + batch_i = 0 + + for _ in iter_capture_elements(): + # elements captured ``de_deque`` in iter_capture_elements + + if len(de_deque) == batch_size: + batch_i += 1 + log.debug("Computing batch {}".format(batch_i)) + for data_e, descr_e in iterate_batch_results(): + yield data_e, descr_e + + if len(de_deque): + log.debug("Computing final batch of size %d", + len(de_deque)) + for data_e, descr_e in iterate_batch_results(): + yield data_e, descr_e + + else: + log.debug("Using single generate call") + + # Just do everything in one call + log.debug("Computing descriptors") + descr_list = list(descr_generator.generate_elements( + iter_capture_elements(), descr_factory, + overwrite + )) + + log.debug("Adding to set") + descr_set.add_many_descriptors(descr_list) + + log.debug("yielding generated elements") + for data, descr in zip(de_deque, descr_list): + yield data, descr diff --git a/smqtk_iqr/utils/compute_many_descriptors.py b/smqtk_iqr/utils/compute_many_descriptors.py new file mode 100644 index 00000000..69c1bc2d --- /dev/null +++ b/smqtk_iqr/utils/compute_many_descriptors.py @@ -0,0 +1,246 @@ +""" +Descriptor computation helper utility. Checks data content type with respect +to the configured descriptor generator to skip content that does not match +the accepted types. Optionally, we can additionally filter out image content +whose image bytes we cannot load via ``PIL.Image.open``. +""" +import collections +import csv +import logging +import os +import argparse +from typing import cast, Deque, Optional, Dict, Union, Generator + +from smqtk_descriptors import DescriptorGenerator, DescriptorSet +from smqtk_descriptors.descriptor_element_factory import DescriptorElementFactory +from smqtk_descriptors.utils import parallel +from smqtk_iqr.utils.compute_functions import compute_many_descriptors + +from smqtk_dataprovider import DataSet +from smqtk_dataprovider.impls.data_element.file import DataFileElement + +from smqtk_iqr.utils.cli import ( + utility_main_helper, + ProgressReporter, + basic_cli_parser, +) +from smqtk_core.configuration import ( + from_config_dict, + make_default_config, +) + +from smqtk_image_io.utils.image import is_valid_element + + +def default_config() -> Dict: + return { + "descriptor_generator": + make_default_config(DescriptorGenerator.get_impls()), + "descriptor_factory": DescriptorElementFactory.get_default_config(), + "descriptor_set": + make_default_config(DescriptorSet.get_impls()), + "optional_data_set": + make_default_config(DataSet.get_impls()) + } + + +def run_file_list(c: dict, filelist_filepath: str, checkpoint_filepath: str, + batch_size: Optional[int] = None, check_image: bool = False) -> None: + """ + Top level function handling configuration and inputs/outputs. + + :param c: Configuration dictionary (JSON) + :type c: dict + + :param filelist_filepath: Path to a text file that lists paths to data + files, separated by new lines. + :type filelist_filepath: str + + :param checkpoint_filepath: Output file to which we write input filepath to + SHA1 (UUID) relationships. + :type checkpoint_filepath: + + :param batch_size: Optional batch size (None default) of data elements to + process / descriptors to compute at a time. This causes files and + stores to be written to incrementally during processing instead of + one single batch transaction at a time. + :type batch_size: + + :param check_image: Enable checking image loading from file before queueing + that file for processing. If the check fails, the file is skipped + instead of a halting exception being raised. + :type check_image: bool + + """ + log = logging.getLogger(__name__) + + file_paths = [line.strip() for line in open(filelist_filepath)] + + log.info("Making descriptor factory") + factory = DescriptorElementFactory.from_config(c['descriptor_factory']) + + log.info("Making descriptor index") + descriptor_set = cast( + DescriptorSet, + from_config_dict(c['descriptor_set'], + DescriptorSet.get_impls()) + ) + + # ``data_set`` added to within the ``iter_valid_elements`` function. + data_set: Optional[DataSet] = None + if c['optional_data_set']['type'] is None: + log.info("Not saving loaded data elements to data set") + else: + log.info("Initializing data set to append to") + data_set = cast( + DataSet, + from_config_dict(c['optional_data_set'], DataSet.get_impls()) + ) + + log.info("Making descriptor generator '%s'", + c['descriptor_generator']['type']) + generator = cast( + DescriptorGenerator, + from_config_dict(c['descriptor_generator'], + DescriptorGenerator.get_impls()) + ) + + def iter_valid_elements() -> Generator: + def is_valid(file_path: str) -> Union[DataFileElement, bool]: + e = DataFileElement(file_path) + + if is_valid_element( + e, valid_content_types=generator.valid_content_types(), + check_image=check_image): + return e + else: + return False + + data_elements: Deque[DataFileElement] = collections.deque() + valid_files_filter = parallel.parallel_map(is_valid, + file_paths, + name="check-file-type", + use_multiprocessing=True) + for dfe in valid_files_filter: + if dfe: + assert isinstance(dfe, DataFileElement) + yield dfe + if data_set is not None: + data_elements.append(dfe) + if batch_size and len(data_elements) == batch_size: + log.debug("Adding data element batch to set (size: %d)", + len(data_elements)) + data_set.add_data(*data_elements) + data_elements.clear() + # elements only collected if we have a data-set configured, so add any + # still in the deque to the set + if data_set is not None and data_elements: + log.debug("Adding data elements to set (size: %d", + len(data_elements)) + data_set.add_data(*data_elements) + + log.info("Computing descriptors") + m = compute_many_descriptors(iter_valid_elements(), + generator, + factory, + descriptor_set, + batch_size=batch_size, + ) + + # Recording computed file paths and associated file UUIDs (SHA1) + cf = open(checkpoint_filepath, 'w') + cf_writer = csv.writer(cf) + try: + pr = ProgressReporter(log.debug, 1.0).start() + for de, descr in m: + # We know that we are using DataFileElements going into the + # compute_many_descriptors, so we can assume that's what comes out + # of it as well. + # noinspection PyProtectedMember + cf_writer.writerow([de._filepath, descr.uuid()]) # type: ignore + pr.increment_report() + pr.report() + finally: + del cf_writer + cf.close() + + log.info("Done") + + +def cli_parser() -> argparse.ArgumentParser: + parser = basic_cli_parser(__doc__) + + parser.add_argument('-b', '--batch-size', + type=int, default=0, metavar='INT', + help="Number of files to batch together into a single " + "compute async call. This defines the " + "granularity of the checkpoint file in regards " + "to computation completed. If given 0, we do not " + "batch and will perform a single " + "``compute_async`` call on the configured " + "generator. Default batch size is 0.") + parser.add_argument('--check-image', + default=False, action='store_true', + help="If se should check image pixel loading before " + "queueing an input image for processing. If we " + "cannot load the image pixels via " + "``PIL.Image.open``, the input image is not " + "queued for processing") + + # Non-config required arguments + g_required = parser.add_argument_group("Required Arguments") + g_required.add_argument('-f', '--file-list', + type=str, default=None, metavar='PATH', + help="Path to a file that lists data file paths. " + "Paths in this file may be relative, but " + "will at some point be coerced into absolute " + "paths based on the current working " + "directory.") + g_required.add_argument('-p', '--completed-files', + default=None, metavar='PATH', + help='Path to a file into which we add CSV ' + 'format lines detailing filepaths that have ' + 'been computed from the file-list provided, ' + 'as the UUID for that data (currently the ' + 'SHA1 checksum of the data).') + + return parser + + +def main() -> None: + args = cli_parser().parse_args() + config = utility_main_helper(default_config(), args) + log = logging.getLogger(__name__) + + completed_files_fp = args.completed_files + filelist_fp = args.file_list + batch_size = args.batch_size + check_image = args.check_image + + # Input checking + if not filelist_fp: + log.error("No file-list file specified") + exit(102) + elif not os.path.isfile(filelist_fp): + log.error("Invalid file list path: %s", filelist_fp) + exit(103) + + if not completed_files_fp: + log.error("No complete files output specified") + exit(104) + + if batch_size < 0: + log.error("Batch size must be >= 0.") + exit(105) + + run_file_list( + config, + filelist_fp, + completed_files_fp, + batch_size, + check_image + ) + + +if __name__ == '__main__': + main() diff --git a/smqtk_iqr/utils/generate_image_transform.py b/smqtk_iqr/utils/generate_image_transform.py new file mode 100644 index 00000000..6a347c6a --- /dev/null +++ b/smqtk_iqr/utils/generate_image_transform.py @@ -0,0 +1,208 @@ +""" +Utility for transforming an input image in various standardized ways, saving +out those transformed images with standard namings. Transformations used are +configurable via a configuration file (JSON). + +Configuration details: +{ + "crop": { + + "center_levels": null | int + # If greater than 0, crop out one or more increasing smaller images + # from a base image by cutting off increasingly larger portions of + # the outside perimeter. Cropped image dimensions determined by the + # dimensions of the base image and the number of crops to generate. + + "quadrant_pyramid_levels": null | int + # If greater than 0, generate a number of crops based on a number of + # quad-tree partitions made based on the given number of levels. + # Partitions for all levels less than the level provides are also + # made. + + "tile_shape": null | [width, height] + # If not null and is a list of two integers, crop out tile windows + # from the base image that have the width and height specified. + # If the image width or height is not evenly divisible by the tile + # width or height, respectively, then the crop out as many tiles as + # neatly fit starting from the axis origin. The remaining pixels are + # ignored. + + "tile_stride": null | [x, y] + # If not null and is a list of two integers, crop out sub-images of + # the above width and height (if given) with this stride. When not + # this is not provided, the default stride is the same as the tile + # width and height. + }, + + "brightness_levels": null | int + # Generate a number of images with different brightness levels using + # linear interpolation to choose levels between 0 (black) and 1 + # (original image) as well as between 1 and 2. + # Results will not include contrast level 0, 1 or 2 images. + + "contrast_levels": null | int + # Generate a number of images with different contrast levels using + # linear interpolation to choose levels between 0 (black) and 1 + # (original image) as well as between 1 and 2. + # Results will not include contrast level 0, 1 or 2 images. + +} + +""" + +import logging +import os +import argparse +from typing import Dict, List, Optional, Tuple + +import PIL.Image + +import smqtk_iqr.utils.cli +import smqtk_dataprovider.utils.file +import smqtk_descriptors.utils.parallel + +from smqtk_image_io.utils.image import ( + image_crop_center_levels, image_crop_quadrant_pyramid, image_crop_tiles, + image_brightness_intervals, image_contrast_intervals +) + + +def generate_image_transformations(image_path: str, + crop_center_n: Optional[int], + crop_quadrant_levels: Optional[int], + crop_tile_shape: Optional[Tuple[int, int]], + crop_tile_stride: Optional[Tuple[int, int]], + brightness_intervals: Optional[int], + contrast_intervals: Optional[int], + output_dir: str = None, + output_ext: str = '.png') -> None: + """ + Transform an input image into different crops or other transforms, + outputting results to the given output directory without overwriting or + otherwise changing the input image. + + By default, if not told otherwise, we will write output images in the same + directory as the source image. Output images share a core filename as the + source image, but with extra suffix syntax to differentiate produced images + from the original. Output images will share the same image extension as the + source image. + """ + log = logging.getLogger(__name__) + + abs_path = os.path.abspath(image_path) + output_dir = output_dir or os.path.dirname(abs_path) + smqtk_dataprovider.utils.file.safe_create_dir(output_dir) + p_base = os.path.splitext(os.path.basename(abs_path))[0] + p_ext = output_ext + p_base = os.path.join(output_dir, p_base) + image = PIL.Image.open(image_path).convert('RGB') + + def save_image(img: PIL.Image.Image, suffixes: List[str]) -> None: + """ + Save an image based on source image basename and an iterable of suffix + parts that will be separated by periods. + """ + fn = '.'.join([p_base] + list(suffixes)) + p_ext + log.debug("Saving: %s", fn) + img.save(fn) + + if crop_center_n: + log.info("Computing center crops") + tag = "crop_centers" + for level, c in image_crop_center_levels(image, crop_center_n): + save_image(c, [tag, str(level)]) + + if crop_quadrant_levels: + log.info("Computing quadrant crops") + tag = "crop_quadrants" + for level, (i, j), c in image_crop_quadrant_pyramid(image, crop_quadrant_levels): + save_image(c, [tag, str(level), "q_{:d}_{:d}".format(i, j)]) + + if crop_tile_shape and crop_tile_shape[0] > 0 and crop_tile_shape[1] > 0: + tag = "crop_tiles" + t_width = crop_tile_shape[0] + t_height = crop_tile_shape[1] + log.info("Cropping %dx%d pixel tiles from images with stride %s" + % (t_width, t_height, crop_tile_stride)) + # List needed to iterate generator. + list(smqtk_descriptors.utils.parallel.parallel_map( + lambda x, y, ii: + save_image(ii, [tag, + '%dx%d+%d+%d' % (t_width, t_height, x, y)]), + image_crop_tiles(image, t_width, t_height, crop_tile_stride) + )) + + if brightness_intervals: + log.info("Computing brightness variants") + for b, i in image_brightness_intervals(image, brightness_intervals): + save_image(i, ['brightness', str(b)]) + + if contrast_intervals: + log.info("Computing contrast variants") + for c, i in image_contrast_intervals(image, contrast_intervals): + save_image(i, ['contrast', str(c)]) + + +def default_config() -> Dict: + return { + "crop": { + # 0 means disabled + "center_levels": None, + # 0 means disabled, 2 meaning 2x2 and 4x4 + "quadrant_pyramid_levels": None, + # Tile shape or None for no tiling + "tile_shape": None, + # The stride of tiles top crop out. This defaults to the height and + # width of the tiles to create non-overlapping chips. + "tile_stride": None, + }, + "brightness_levels": None, + "contrast_levels": None, + } + + +def cli_parser() -> argparse.ArgumentParser: + parser = smqtk_iqr.utils.cli.basic_cli_parser(__doc__) + + g_io = parser.add_argument_group("Input/Output") + g_io.add_argument("-i", "--image", + help="Image to produce transformations for.") + g_io.add_argument("-o", "--output", + help="Directory to output generated images to. By " + "default, if not told otherwise, we will write " + "output images in the same directory as the source " + "image. Output images share a core filename as the " + "source image, but with extra suffix syntax to " + "differentiate produced images from the original. " + "Output images will share the same image extension " + "as the source image.") + return parser + + +def main() -> None: + args = cli_parser().parse_args() + config = smqtk_iqr.utils.cli.utility_main_helper(default_config(), args) + input_image_path = args.image + output_dir = args.output + + if input_image_path is None: + raise ValueError("No input image path given") + + crop_center_levels = config['crop']['center_levels'] + crop_quad_levels = config['crop']['quadrant_pyramid_levels'] + crop_tile_shape = config['crop']['tile_shape'] + crop_tile_stride = config['crop']['tile_stride'] + b_levels = config['brightness_levels'] + c_levels = config['contrast_levels'] + + generate_image_transformations( + input_image_path, + crop_center_levels, crop_quad_levels, + crop_tile_shape, crop_tile_stride, + b_levels, c_levels, + output_dir + ) + + +if __name__ == '__main__': + main() diff --git a/smqtk_iqr/utils/nn_index_tool.py b/smqtk_iqr/utils/nn_index_tool.py new file mode 100644 index 00000000..4ff097ae --- /dev/null +++ b/smqtk_iqr/utils/nn_index_tool.py @@ -0,0 +1,100 @@ +""" +TODO: Adapt this into SMQTK if/when generalized, click usage in SMQTK. +""" +import click +import logging +from typing import Dict + +from smqtk_indexing import NearestNeighborsIndex +from smqtk_descriptors import DescriptorSet +from smqtk_iqr.utils.cli import initialize_logging, load_config, output_config +from smqtk_core.configuration import from_config_dict, make_default_config + +LOG = logging.getLogger(__name__) + + +def build_default_config() -> Dict: + return { + 'descriptor_set': make_default_config(DescriptorSet.get_impls()), + 'neighbor_index': make_default_config(NearestNeighborsIndex.get_impls()), + } + + +@click.group(context_settings={'help_option_names': ['-h', '--help']}) +@click.option('-v', '--verbose', + default=0, count=True, + help="This option must be provided before any command. " + "Provide once for additional informational logging. " + "Provide a second time for additional debug logging.") +def cli_group(verbose: int) -> None: + """ + Tool for building a nearest neighbors index from an input descriptor set. + + The index is built, not updated. If the index configured must not be + read-only and any persisted index, if already existing, may be overwritten. + """ + llevel = logging.WARN - (10 * verbose) + # Attempting just setting the root logger. If this becomes too verbose, + # initially relevant namespaces manually. + initialize_logging(logging.getLogger(), llevel) + LOG.info("Displaying informational logging.") + LOG.debug("Displaying debug logging.") + + +@click.command('config') +@click.argument('output_filepath') +@click.option('-c', '--input-config', + type=click.Path(exists=True, dir_okay=False), + default=None, + help='Optional existing configuration file to update with ' + 'defaults.') +@click.option('-o', '--overwrite', + default=False, is_flag=True, + help='If the given filepath should be overwritten if it ' + 'already exists.') +def cli_config(output_filepath: str, input_config: str, overwrite: bool) -> None: + """ + Generate a default or template JSON configuration file for this tool. + """ + if input_config is not None: + c_dict, success = load_config(input_config, build_default_config()) + if not success: + raise RuntimeError( + "Did not load input configuration file '{}' " + "successfully.") + else: + c_dict = build_default_config() + output_config(output_filepath, c_dict, overwrite=overwrite) + + +@click.command('build') +@click.argument('config_filepath', + type=click.Path(exists=True, dir_okay=False)) +def cli_build(config_filepath: str) -> None: + """ + Build a new nearest-neighbors index from the configured descriptor set's + contents. + """ + config_dict, success = load_config(config_filepath, + defaults=build_default_config()) + # Defaults are insufficient so we assert that the configuration file was + # (successfully) loaded. + if not success: + raise RuntimeError("Failed to load configuration file.") + + descr_set = from_config_dict(config_dict['descriptor_set'], + DescriptorSet.get_impls()) + + nn_index = from_config_dict(config_dict['neighbor_index'], + NearestNeighborsIndex.get_impls()) + + # TODO: reduced amount used for building ("training") and remainder used + # for update. + nn_index.build_index(descr_set) + + +# Non-destructive update command? + + +cli_group.add_command(cli_config) +cli_group.add_command(cli_build) diff --git a/smqtk_iqr/utils/runApplication.py b/smqtk_iqr/utils/runApplication.py index cba203a3..aab6186a 100644 --- a/smqtk_iqr/utils/runApplication.py +++ b/smqtk_iqr/utils/runApplication.py @@ -139,14 +139,14 @@ def main() -> None: # If the application class's logger does not already report as having INFO/ # DEBUG level logging (due to being a child of an above handled namespace) # then set the app namespace's logger level appropriately - app_class_logger_level = logging.getLogger(app_class.name).getEffectiveLevel() + app_class_logger_level = logging.getLogger(app_class.__name__).getEffectiveLevel() app_class_target_level = logging.INFO - (10 * debug_app) if app_class_logger_level > app_class_target_level: level_name = \ "DEBUG" if app_class_target_level == logging.DEBUG else "INFO" log.info("Enabling '{}' logging for '{}' logger namespace." - .format(level_name, logging.getLogger(app_class.name).name)) - logging.getLogger(app_class.name).setLevel(logging.INFO - (10 * debug_app)) + .format(level_name, logging.getLogger(app_class.__name__).name)) + logging.getLogger(app_class.__name__).setLevel(logging.INFO - (10 * debug_app)) config = cli.utility_main_helper(app_class.get_default_config(), args, skip_logging_init=True) diff --git a/smqtk_iqr/web/search_app/modules/iqr/iqr_search.py b/smqtk_iqr/web/search_app/modules/iqr/iqr_search.py index cbe3ad85..3e680ac9 100644 --- a/smqtk_iqr/web/search_app/modules/iqr/iqr_search.py +++ b/smqtk_iqr/web/search_app/modules/iqr/iqr_search.py @@ -132,7 +132,7 @@ def __init__( """ super(IqrSearch, self).__init__( - import_name=__name__, + import_name=__name__.replace('.', '-'), static_folder=os.path.join(SCRIPT_DIR, "static"), template_folder=os.path.join(SCRIPT_DIR, "templates"), ) @@ -157,7 +157,8 @@ def __init__( # Uploader Sub-Module self.upload_work_dir = os.path.join(self.work_dir, "uploads") - self.mod_upload = FileUploadMod('%s_uploader' % self.name, parent_app, + self.mod_upload = FileUploadMod('%s_uploader' % self.name, + parent_app, self.upload_work_dir, url_prefix='/uploader') self.register_blueprint(self.mod_upload) diff --git a/smqtk_iqr/web/search_app/sample_configs/config.iqrTrainClassifier.json b/smqtk_iqr/web/search_app/sample_configs/config.iqrTrainClassifier.json index 6a017cff..0b43a972 100644 --- a/smqtk_iqr/web/search_app/sample_configs/config.iqrTrainClassifier.json +++ b/smqtk_iqr/web/search_app/sample_configs/config.iqrTrainClassifier.json @@ -1,16 +1,7 @@ { "classifier": { - "smqtk_classifier.impls.classify_descriptor_supervised.libsvm.LibSvmClassifier": { - "normalize": 2, - "svm_label_map_uri": "workdir/iqr_classifier/label_map", - "svm_model_uri": "workdir/iqr_classifier/model", - "train_params": { - "-b": 1, - "-c": 2, - "-s": 0, - "-t": 0 - } + "smqtk_classifier.impls.classify_descriptor_supervised.sklearn_logistic_regression.SkLearnLogisticRegression": { }, - "type": "smqtk_classifier.impls.classify_descriptor_supervised.libsvm.LibSvmClassifier" + "type": "smqtk_classifier.impls.classify_descriptor_supervised.sklearn_logistic_regression.SkLearnLogisticRegression" } } diff --git a/smqtk_iqr/web/search_app/sample_configs/runApp.IqrSearchDispatcher.json b/smqtk_iqr/web/search_app/sample_configs/runApp.IqrSearchDispatcher.json new file mode 100644 index 00000000..9760eb9f --- /dev/null +++ b/smqtk_iqr/web/search_app/sample_configs/runApp.IqrSearchDispatcher.json @@ -0,0 +1,35 @@ +{ + "flask_app": { + "BASIC_AUTH_PASSWORD": "demo", + "BASIC_AUTH_USERNAME": "demo", + "SECRET_KEY": "MySuperUltraSecret" + }, + "iqr_tabs": { + "LEEDS Butterflies": { + "data_set": { + "smqtk_dataprovider.impls.data_set.memory.DataMemorySet": { + "cache_element": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement": { + "explicit_mimetype": null, + "filepath": "models/image_elements.dms_cache", + "readonly": true + }, + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" + }, + "pickle_protocol": -1 + }, + "type": "smqtk_dataprovider.impls.data_set.memory.DataMemorySet" + }, + "iqr_service_url": "localhost:5001", + "working_directory": "data/iqr_app_work" + } + }, + "mongo": { + "database": "smqtk", + "server": "127.0.0.1:27017" + }, + "server": { + "host": "0.0.0.0", + "port": 5000 + } +} diff --git a/smqtk_iqr/web/search_app/sample_configs/runApp.IqrService.json b/smqtk_iqr/web/search_app/sample_configs/runApp.IqrService.json new file mode 100644 index 00000000..c394df2a --- /dev/null +++ b/smqtk_iqr/web/search_app/sample_configs/runApp.IqrService.json @@ -0,0 +1,205 @@ +{ + "flask_app": { + "BASIC_AUTH_PASSWORD": "demo", + "BASIC_AUTH_USERNAME": "demo", + "SECRET_KEY": "MySuperUltraSecret" + }, + "iqr_service": { + "plugin_notes": { + "classification_factory": "Selection of the backend in which classifications are stored. The in-memory version is recommended because normal caching mechanisms will not account for the variety of classifiers that can potentially be created via this utility.", + "classifier_config": "The configuration to use for training and using classifiers for the /classifier endpoint. When configuring a classifier for use, don't fill out model persistence values as many classifiers may be created and thrown away during this service's operation.", + "descriptor_factory": "What descriptor element factory to use when asked to compute a descriptor on data.", + "descriptor_generator": "Descriptor generation algorithm to use when requested to describe data.", + "descriptor_set": "This is the index from which given positive and negative example descriptors are retrieved from. Not used for nearest neighbor querying. This index must contain all descriptors that could possibly be used as positive/negative examples and updated accordingly.", + "neighbor_index": "This is the neighbor index to pull initial near-positive descriptors from.", + "relevancy_index_config": "The relevancy index config provided should not have persistent storage configured as it will be used in such a way that instances are created, built and destroyed often." + }, + "plugins": { + "classification_factory": { + "smqtk_classifier.impls.classification_element.memory.MemoryClassificationElement": {}, + "type": "smqtk_classifier.impls.classification_element.memory.MemoryClassificationElement" + }, + "classifier_config": { + "smqtk_classifier.impls.classify_descriptor_supervised.sklearn_logistic_regression.SkLearnLogisticRegression": { + }, + "type": "smqtk_classifier.impls.classify_descriptor_supervised.sklearn_logistic_regression.SkLearnLogisticRegression" + }, + "descriptor_factory": { + "smqtk_descriptors.impls.descriptor_element.postgres.PostgresDescriptorElement": { + "binary_col": "vector", + "create_table": false, + "db_host": "/dev/shm", + "db_name": "postgres", + "db_pass": null, + "db_port": 5432, + "db_user": "smqtk", + "table_name": "descriptors_resnet50_pool5", + "uuid_col": "uid" + }, + "type": "smqtk_descriptors.impls.descriptor_element.postgres.PostgresDescriptorElement" + }, + "descriptor_generator": { + "smqtk_descriptors.impls.descriptor_generator.caffe1.CaffeDescriptorGenerator": { + "batch_size": 10, + "data_layer": "data", + "gpu_device_id": 0, + "image_mean": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement" : { + "explicit_mimetype": null, + "filepath": "/home/smqtk/caffe/msra_resnet/ResNet_mean.binaryproto", + "readonly": true + }, + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" + }, + "input_scale": null, + "load_truncated_images": true, + "network_is_bgr": true, + "network_model": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement" : { + "explicit_mimetype": null, + "filepath": "/home/smqtk/caffe/msra_resnet/ResNet-50-model.caffemodel", + "readonly": true + }, + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" + }, + "network_prototxt": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement" : { + "explicit_mimetype": null, + "filepath": "/home/smqtk/caffe/msra_resnet/ResNet-50-deploy.prototxt", + "readonly": true + }, + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" + }, + "pixel_rescale": null, + "return_layer": "pool5", + "use_gpu": false + }, + "type": "smqtk_descriptors.impls.descriptor_generator.caffe1.CaffeDescriptorGenerator" + }, + "descriptor_set": { + "smqtk_descriptors.impls.descriptor_set.postgres.PostgresDescriptorSet": { + "create_table": false, + "db_host": "/dev/shm", + "db_name": "postgres", + "db_pass": null, + "db_port": 5432, + "db_user": "smqtk", + "element_col": "element", + "multiquery_batch_size": 1000, + "pickle_protocol": -1, + "read_only": false, + "table_name": "descriptor_set_resnet50_pool5", + "uuid_col": "uid" + }, + "type": "smqtk_descriptors.impls.descriptor_set.postgres.PostgresDescriptorSet" + }, + "neighbor_index": { + "smqtk_indexing.impls.nn_index.faiss.FaissNearestNeighborsIndex": { + "descriptor_set": { + "__note__": "Using real descriptor index this time", + "smqtk_descriptors.impls.descriptor_set.postgres.PostgresDescriptorSet": { + "create_table": false, + "db_host": "/dev/shm", + "db_name": "postgres", + "db_pass": null, + "db_port": 5432, + "db_user": "smqtk", + "element_col": "element", + "multiquery_batch_size": 1000, + "pickle_protocol": -1, + "read_only": false, + "table_name": "descriptor_set_resnet50_pool5", + "uuid_col": "uid" + }, + "type": "smqtk_descriptors.impls.descriptor_set.postgres.PostgresDescriptorSet" + }, + "factory_string": "IDMap,Flat", + "gpu_id": 0, + "idx2uid_kvs": { + "smqtk_dataprovider.impls.key_value_store.postgres.PostgresKeyValueStore": { + "batch_size": 1000, + "create_table": true, + "db_host": "/dev/shm", + "db_name": "postgres", + "db_pass": null, + "db_port": 5432, + "db_user": "smqtk", + "key_col": "key", + "pickle_protocol": -1, + "read_only": false, + "table_name": "faiss_idx2uid_kvs", + "value_col": "value" + }, + "type": "smqtk_dataprovider.impls.key_value_store.postgres.PostgresKeyValueStore" + }, + "uid2idx_kvs": { + "smqtk_dataprovider.impls.key_value_store.postgres.PostgresKeyValueStore": { + "batch_size": 1000, + "create_table": true, + "db_host": "/dev/shm", + "db_name": "postgres", + "db_pass": null, + "db_port": 5432, + "db_user": "smqtk", + "key_col": "key", + "pickle_protocol": -1, + "read_only": false, + "table_name": "faiss_uid2idx_kvs", + "value_col": "value" + }, + "type": "smqtk_dataprovider.impls.key_value_store.postgres.PostgresKeyValueStore" + }, + "index_element": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement" : { + "filepath": "models/faiss_index", + "readonly": false + }, + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" + }, + "index_param_element": { + "smqtk_dataprovider.impls.data_element.file.DataFileElement" : { + "filepath": "models/faiss_index_params.json", + "readonly": false + }, + "type": "smqtk_dataprovider.impls.data_element.file.DataFileElement" + }, + "ivf_nprobe": 64, + "metric_type": "l2", + "random_seed": 0, + "read_only": false, + "use_gpu": false + }, + "type": "smqtk_indexing.impls.nn_index.faiss.FaissNearestNeighborsIndex" + }, + "rank_relevancy_with_feedback": { + "smqtk_relevancy.impls.rank_relevancy.margin_sampling.RankRelevancyWithMarginSampledFeedback": { + "rank_relevancy": { + "smqtk_relevancy.impls.rank_relevancy.wrap_classifier.RankRelevancyWithSupervisedClassifier": { + "classifier_inst": { + "smqtk_classifier.impls.classify_descriptor_supervised.sklearn_logistic_regression.SkLearnLogisticRegression": { + }, + "type": "smqtk_classifier.impls.classify_descriptor_supervised.sklearn_logistic_regression.SkLearnLogisticRegression" + } + }, + "type": "smqtk_relevancy.impls.rank_relevancy.wrap_classifier.RankRelevancyWithSupervisedClassifier" + }, + "n": 10, + "center": 0.5 + }, + "type": "smqtk_relevancy.impls.rank_relevancy.margin_sampling.RankRelevancyWithMarginSampledFeedback" + } + }, + "session_control": { + "positive_seed_neighbors": 500, + "session_expiration": { + "check_interval_seconds": 30, + "enabled": true, + "session_timeout": 86400 + } + } + }, + "server": { + "host": "0.0.0.0", + "port": 5001 + } +} diff --git a/tests/utils/test_compute_many_descriptors.py b/tests/utils/test_compute_many_descriptors.py new file mode 100644 index 00000000..0763b04c --- /dev/null +++ b/tests/utils/test_compute_many_descriptors.py @@ -0,0 +1,29 @@ +import sys + +from smqtk_iqr.utils.compute_many_descriptors import cli_parser, default_config +from smqtk_iqr.utils.cli import utility_main_helper + + +class TestComputeDescriptors (object): + """ + Unit tests pertaining to the compute_many_descriptors utility script + """ + + def test_parse_args(self) -> None: + """ + Test parsing command line args for the compute_many_descriptors application + """ + sys.argv = ['compute_many_descriptors', '-v', '-b', '20', '--check-image', '-c', + 'docker/smqtk_iqr_playground/default_confs/cpu/compute_many_descriptors.json', + '-f', 'list.txt', '-p', 'test.csv'] + + args = cli_parser().parse_args() + config = utility_main_helper(default_config(), args) + + assert args.file_list == 'list.txt' + assert args.batch_size == 20 + assert args.check_image + + assert config['descriptor_generator'] is not None + assert config['descriptor_set'] is not None + assert config['descriptor_factory'] is not None diff --git a/tests/utils/test_generate_image_transform.py b/tests/utils/test_generate_image_transform.py new file mode 100644 index 00000000..722c5ae0 --- /dev/null +++ b/tests/utils/test_generate_image_transform.py @@ -0,0 +1,23 @@ +import sys + +from smqtk_iqr.utils.generate_image_transform import cli_parser, default_config +from smqtk_iqr.utils.cli import utility_main_helper + + +class TestGenerateImageTransformTool (object): + """ + Unit tests pertaining to the generate_image_transform utility script + """ + + def test_parse_args(self) -> None: + """ + Test parsing command line args for the generate_image_transform index application + """ + sys.argv = ['generate_image_transform', '-c', + 'docker/smqtk_iqr_playground/default_confs/generate_image_transform.tiles.json'] + + args = cli_parser().parse_args() + config = utility_main_helper(default_config(), args, default_config_valid=False) + + assert config['crop']['tile_shape'] is not None + assert config['crop']['tile_stride'] is not None diff --git a/tests/web/search_app/__init__.py b/tests/web/search_app/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/web/search_app/test_iqr_search.py b/tests/web/search_app/test_iqr_search.py new file mode 100644 index 00000000..56696c75 --- /dev/null +++ b/tests/web/search_app/test_iqr_search.py @@ -0,0 +1,40 @@ +import os +import unittest +import unittest.mock as mock + +from smqtk_iqr.web.search_app.modules.iqr.iqr_search import IqrSearch +from smqtk_iqr.web.search_app import IqrSearchDispatcher +from smqtk_dataprovider.impls.data_set.memory import DataMemorySet + +from smqtk_core import Pluggable + + +class TestIqrSearch (unittest.TestCase): + """ + Unit tests pertaining to the IqrSearch class. + """ + + # Patch in this module for stub implementation access. + # noinspection PyUnresolvedReferences + @mock.patch.dict(os.environ, { + Pluggable.PLUGIN_ENV_VAR: __name__ + }) + def setUp(self) -> None: + """ + Make an instance of the IqrSearchDispatcher application + """ + # Setup configuration for test application + config = IqrSearchDispatcher.get_default_config() + self.dispatcher_app = IqrSearchDispatcher(config) + + # Setup dataset + self.dataset = DataMemorySet() + + def test_iqr_search(self) -> None: + """ + Test creation of an IqrSearch instance + """ + app = IqrSearch(self.dispatcher_app, "test", self.dataset, ".") + + assert app.mod_upload is not None + assert app.mod_static_dir is not None