Merge pull request #52 from achaiah/WIP2

Docker functionality
achaiah · Oct 22, 2021 · 9d663fa · 9d663fa
2 parents 07dfa70 + 97700d2
commit 9d663fa
Show file tree

Hide file tree

Showing 9 changed files with 130 additions and 28 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,49 @@
+FROM nvidia/cuda:11.3.1-cudnn8-devel-centos8
+
+ENV HOME /home/pywick
+
+RUN yum install -y epel-release && yum install -y dnf-plugins-core && yum config-manager --set-enabled powertools
+RUN yum update -y && yum -y install atop bzip2-devel ca-certificates cmake curl git grep htop less libffi-devel hdf5-devel libjpeg-devel xz-devel libuuid-devel libXext libSM libXrender make nano openssl-devel sed screen tini vim wget unzip
+
+RUN yum groupinstall -y "Development Tools"
+
+RUN wget https://www.python.org/ftp/python/3.9.5/Python-3.9.5.tgz
+RUN tar xvf Python-3.9.5.tgz && cd Python-3.9*/ && ./configure --enable-optimizations && make altinstall && cd .. && rm -rf Python*
+
+RUN cd /usr/bin && rm python3 pip3 && ln -s /usr/local/bin/python3.9 python && ln -s /usr/local/bin/python3.9 python3 && ln -s /usr/local/bin/pip3.9 pip3 && ln -s /usr/local/bin/pip3.9 pip
+RUN pip install --upgrade pip setuptools wheel
+
+### Pytorch V1.8.2 + CUDA (py3.9_cuda11.1_cudnn7.6.3_0)
+RUN pip install torch==1.8.2+cu111 torchvision==0.9.2+cu111 torchaudio==0.8.2 -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
+
+## MacOS currently not supported for CUDA or LTS
+#RUN pip install torch torchvision torchaudio
+
+RUN mkdir -p /home && rm -rf $HOME
+RUN cd /home && git clone https://github.com/achaiah/pywick
+# To build from a different branch or tag specify per example below
+#RUN cd $HOME && git checkout WIP2
+
+# install requirements
+RUN pip install versioned-hdf5
+RUN pip install --upgrade -r $HOME/requirements.txt
+
+ENV PYTHONPATH=/home:$HOME:$HOME/configs
+WORKDIR $HOME
+
+RUN chmod -R +x $HOME/*.sh
+
+CMD ["/bin/bash", "/home/pywick/entrypoint.sh"]
+
+###########
+# Build with:
+#   git clone https://github.com/achaiah/pywick
+#   cd pywick
+#   docker build -t "achaiah/pywick:latest" .
+#
+# Run 17flowers demo with:
+#   docker run --rm -it --ipc=host --init -e demo=true achaiah/pywick:latest
+# Optionally specify local dir where you want to save output: docker run --rm -it --ipc=host -v your_local_out_dir:/jobs/17flowers --init -e demo=true achaiah/pywick:latest
+# Run container that just stays up (for your own processes):
+#   docker run --rm -it --ipc=host -v <your_local_data_dir>:<container_data_dir> -v <your_local_out_dir>:<container_out_dir> --init achaiah/pywick:latest
+###########
diff --git a/README.md b/README.md
@@ -36,13 +36,26 @@ Hey, [check this out](https://pywick.readthedocs.io/en/latest/), we now have [do
 
 ## What's New (highlights)
 
-### v0.6.0 - We thought ya might like YAML!
-So you're saying you like **configuration files**? You're saying you like **examples** too? Well, we've got you covered! Huge release today with a configuration-based training example! All you have to do is:
-  - Get your favorite dataset (or download [17 flowers](https://www.robots.ox.ac.uk/~vgg/data/flowers/17/) to get started and `pywick/examples/17flowers_split.py` to convert)
-  - Adjust the `configs/train_classifier.yaml` file to fit your workspace
-  - Then simply run: `python3 train_classifier.py configs/train_classifier.yaml` and watch it train!
+### v0.6.5 - Docker all the things!
+Another great improvement to the framework - docker! You can now run the 17flowers demo right out of the box!
+  - Grab our docker image at [docker hub](https://hub.docker.com/repository/docker/achaiah/pywick): `docker pull achaiah/pywick:latest`. Pytorch 1.8 and cuda dependencies are pre-installed.
+  - Run 17flowers demo with: `docker run --rm -it --ipc=host -v your_local_out_dir:/jobs/17flowers --init -e demo=true achaiah/pywick:latest`
+  - Or run the container in standalone mode so you can use your own data (don't forget to map your local dir to container):
+    ```bash
+      docker run --rm -it \
+      --ipc=host \
+      -v <your_local_data_dir>:<container_data_dir> \
+      -v <your_local_out_dir>:<container_out_dir> \
+      --init \
+      achaiah/pywick:latest
+    ```
 
 ### Older Notes
+- **Oct. 11, 2021 - We thought ya might like YAML!**
+  - So you're saying you like **configuration files**? You're saying you like **examples** too? Well, we've got you covered! Huge release today with a configuration-based training example! All you have to do is:
+    - Get your favorite dataset (or download [17 flowers](https://www.robots.ox.ac.uk/~vgg/data/flowers/17/) to get started and `pywick/examples/17flowers_split.py` to convert)
+    - Adjust the `configs/train_classifier.yaml` file to fit your workspace
+    - Then simply run: `python3 train_classifier.py configs/train_classifier.yaml` and watch it train!
 - **May 6, 2021**
   - Many SoTA classification and segmentation models added: Swin-Transformer variants, NFNet variants (L0, L1), Halo nets, Lambda nets, ECA variants, Rexnet + others
   - Many new loss functions added: RecallLoss, SoftInvDiceLoss, OhemBCEDicePenalizeBorderLoss, RMIBCEDicePenalizeBorderLoss + others
@@ -61,10 +74,10 @@ So you're saying you like **configuration files**? You're saying you like **exam
     - spnasnet
   - Additional loss functions
 - **Aug. 1, 2019**
-  -   New segmentation NNs: BiSeNet, DANet, DenseASPP, DUNet, OCNet, PSANet
-    - New Loss Functions: Focal Tversky Loss, OHEM CrossEntropy Loss, various combination losses
-    - Major restructuring and standardization of NN models and loading functionality
-    - General bug fixes and code improvements 
+  - New segmentation NNs: BiSeNet, DANet, DenseASPP, DUNet, OCNet, PSANet
+  - New Loss Functions: Focal Tversky Loss, OHEM CrossEntropy Loss, various combination losses
+  - Major restructuring and standardization of NN models and loading functionality
+  - General bug fixes and code improvements 
 
 ## Install
 Pywick requires **pytorch >= 1.4**
@@ -73,7 +86,7 @@ Pywick requires **pytorch >= 1.4**
 
 or specific version from git:
 
-`pip install git+https://github.com/achaiah/[email protected].0`
+`pip install git+https://github.com/achaiah/[email protected].5`
 
 ## ModuleTrainer
 The `ModuleTrainer` class provides a high-level training interface which abstracts away the training loop while providing callbacks, constraints, initializers, regularizers,

diff --git a/docs/source/README.md b/docs/source/README.md
@@ -7,7 +7,7 @@
 [![pypi](https://img.shields.io/pypi/v/pywick.svg)](https://pypi.org/project/pywick/)
 [![python compatibility](https://img.shields.io/pypi/pyversions/pywick.svg)](https://pywick.readthedocs.io/en/latest/)
 [![license](https://img.shields.io/pypi/l/pywick.svg)](https://github.com/achaiah/pywick/blob/master/LICENSE.txt)
-[![Documentation Status](https://readthedocs.org/projects/pywick/badge/?version=latest)](https://pywick.readthedocs.io/en/latest/?badge=latest) 
+
 </div>
 
 #### High-Level Training framework for Pytorch
@@ -36,13 +36,26 @@ Hey, [check this out](https://pywick.readthedocs.io/en/latest/), we now have [do
 
 ## What's New (highlights)
 
-### v0.6.0 - We thought ya might like YAML!
-So you're saying you like **configuration files**? You're saying you like **examples** too? Well, we've got you covered! Huge release today with a configuration-based training example! All you have to do is:
-  - Get your favorite dataset (or download [17 flowers](https://www.robots.ox.ac.uk/~vgg/data/flowers/17/) to get started and `pywick/examples/17flowers_split.py` to convert)
-  - Adjust the `configs/train_classifier.yaml` file to fit your workspace
-  - Then simply run: `python3 train_classifier.py configs/train_classifier.yaml` and watch it train!
+### v0.6.5 - Docker all the things!
+Another great improvement to the framework - docker! You can now run the 17flowers demo right out of the box!
+  - Grab our docker image at [docker hub](https://hub.docker.com/repository/docker/achaiah/pywick): `docker pull achaiah/pywick:latest`. Pytorch 1.8 and cuda dependencies are pre-installed.
+  - Run 17flowers demo with: `docker run --rm -it --ipc=host -v your_local_out_dir:/jobs/17flowers --init -e demo=true achaiah/pywick:latest`
+  - Or run the container in standalone mode so you can use your own data (don't forget to map your local dir to container):
+    ```bash
+      docker run --rm -it \
+      --ipc=host \
+      -v <your_local_data_dir>:<container_data_dir> \
+      -v <your_local_out_dir>:<container_out_dir> \
+      --init \
+      achaiah/pywick:latest
+    ```
 
 ### Older Notes
+- **Oct. 11, 2021 - We thought ya might like YAML!**
+  - So you're saying you like **configuration files**? You're saying you like **examples** too? Well, we've got you covered! Huge release today with a configuration-based training example! All you have to do is:
+    - Get your favorite dataset (or download [17 flowers](https://www.robots.ox.ac.uk/~vgg/data/flowers/17/) to get started and `pywick/examples/17flowers_split.py` to convert)
+    - Adjust the `configs/train_classifier.yaml` file to fit your workspace
+    - Then simply run: `python3 train_classifier.py configs/train_classifier.yaml` and watch it train!
 - **May 6, 2021**
   - Many SoTA classification and segmentation models added: Swin-Transformer variants, NFNet variants (L0, L1), Halo nets, Lambda nets, ECA variants, Rexnet + others
   - Many new loss functions added: RecallLoss, SoftInvDiceLoss, OhemBCEDicePenalizeBorderLoss, RMIBCEDicePenalizeBorderLoss + others
@@ -61,10 +74,10 @@ So you're saying you like **configuration files**? You're saying you like **exam
     - spnasnet
   - Additional loss functions
 - **Aug. 1, 2019**
-  -   New segmentation NNs: BiSeNet, DANet, DenseASPP, DUNet, OCNet, PSANet
-    - New Loss Functions: Focal Tversky Loss, OHEM CrossEntropy Loss, various combination losses
-    - Major restructuring and standardization of NN models and loading functionality
-    - General bug fixes and code improvements 
+  - New segmentation NNs: BiSeNet, DANet, DenseASPP, DUNet, OCNet, PSANet
+  - New Loss Functions: Focal Tversky Loss, OHEM CrossEntropy Loss, various combination losses
+  - Major restructuring and standardization of NN models and loading functionality
+  - General bug fixes and code improvements 
 
 ## Install
 Pywick requires **pytorch >= 1.4**
@@ -73,7 +86,7 @@ Pywick requires **pytorch >= 1.4**
 
 or specific version from git:
 
-`pip install git+https://github.com/achaiah/[email protected].0`
+`pip install git+https://github.com/achaiah/[email protected].5`
 
 ## ModuleTrainer
 The `ModuleTrainer` class provides a high-level training interface which abstracts away the training loop while providing callbacks, constraints, initializers, regularizers,
@@ -190,7 +203,7 @@ trainer.fit_loader(loader, val_loader=val_loader, num_epoch=100)
 - [**TResNet: High Performance GPU-Dedicated Architecture**](https://arxiv.org/abs/2003.13630)
 - [**Wide Resnet**](https://arxiv.org/abs/1605.07146)
 - [**XCeption**](https://arxiv.org/pdf/1610.02357.pdf)
-- All the newest classification models (700+) from [rwightman's repo](https://github.com/rwightman/pytorch-image-models) ECA-NFNet, GERNet, RegNet, SKResnext, SWIN-Transformer, VIT etc.)
+- All the newest classification models (200+) from [rwightman's repo](https://github.com/rwightman/pytorch-image-models) ECA-NFNet, GERNet, RegNet, SKResnext, SWIN-Transformer, VIT etc.)
 
 ## Image Segmentation Models
 - **BiSeNet** ([Bilateral Segmentation Network for Real-time Semantic Segmentation](https://arxiv.org/abs/1808.00897))

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -41,7 +41,7 @@
 version = ''
 # The full version, including alpha/beta/rc tags
 
-release = '0.6.0'
+release = '0.6.5'
 
 # -- General configuration ---------------------------------------------------
 

diff --git a/entrypoint.sh b/entrypoint.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# run demo if "demo" env variable is set
+if [ -n "$demo" ]; then
+  # prepare directories
+  mkdir -p /data /jobs && cd /data && \
+  # get the dataset
+  wget https://www.robots.ox.ac.uk/~vgg/data/flowers/17/17flowers.tgz && \
+  tar xzf 17flowers.tgz && rm 17flowers.tgz && \
+  # refactor images into correct structure
+  python /home/pywick/examples/17flowers_split.py && \
+  rm -rf jpg && \
+  # train on the dataset
+  cd /home/pywick/pywick && python train_classifier.py configs/train_classifier.yaml
+  echo "keeping container alive ..."
+  tail -f /dev/null
+
+# otherwise keep the container alive
+else
+  echo "running a blank container..."
+  tail -f /dev/null
+fi
diff --git a/pywick/__init__.py b/pywick/__init__.py
@@ -1,4 +1,4 @@
-__version__ = '0.6.0'
+__version__ = '0.6.5'
 __author__ = 'Achaiah'
 __description__ = 'High-level batteries-included neural network training library for Pytorch'
 

diff --git a/pywick/callbacks/CSVLogger.py b/pywick/callbacks/CSVLogger.py
@@ -1,6 +1,6 @@
 import csv
 import os
-from collections import Iterable
+from collections.abc import Iterable
 from collections import OrderedDict
 
 import torch
@@ -9,6 +9,7 @@
 
 __all__ = ['CSVLogger']
 
+
 class CSVLogger(Callback):
     """
     Logs epoch-level metrics to a CSV file

diff --git a/pywick/callbacks/ModelCheckpoint.py b/pywick/callbacks/ModelCheckpoint.py
@@ -106,7 +106,8 @@ def on_epoch_end(self, epoch, logs=None):
 
             if (current_loss < self.best_loss and self.save_best_only) or not self.save_best_only or (not self.do_minimize and current_loss > self.best_loss):
                 if current_loss is None:
-                    pass
+                    if self.verbose:
+                        print(f'ModelCheckpoint could not find monitored_log_key (loss variable) in logs: {self.monitored_log_key}')
                 else:
                     # Call custom function (if set) to process things like best-N results etc
                     if self.custom_func is not None:
@@ -149,6 +150,8 @@ def on_epoch_end(self, epoch, logs=None):
                     if len(self.old_files) >= self.max_saves:
                         try:
                             os.remove(self.old_files[0])
+                            if self.verbose:
+                                print(f'ModelCheckpoint removing old model snapshot: {self.old_files[0]}')
                         except:
                             pass
                         self.old_files = self.old_files[1:]

diff --git a/pywick/configs/train_classifier.yaml b/pywick/configs/train_classifier.yaml
@@ -33,13 +33,14 @@ train:
       save_interval: 1              # save every N epochs
       save_dir: *outroot            # where to save output
       custom_func:                  # name of custom function to execute on key/val dictionary (if any)
-      verbose: False
+      verbose: True
   scheduler:                      # scheduler configuration
     name: OnceCycleLRScheduler    # should match to a name of an imported scheduler (either from callbacks or torch.optim.lr_scheduler)
     params:
       epochs: *nepochs
+      steps_per_epoch: 2
       max_lr: 0.05
-      pct_start: 0.2
+      pct_start: 0.4
   train_val_ratio: 0.9            # split ratio between training and validation data (if using a single dataset)
   use_apex: False                 # whether to use APEX optimization (not yet implemented)
   use_gpu: True                   # whether to use the GPU for training