From d8557c1970939ee9bb90bd41edcd86c6fcf84d19 Mon Sep 17 00:00:00 2001 From: torzdf <36920800+torzdf@users.noreply.github.com> Date: Wed, 12 Aug 2020 10:36:41 +0100 Subject: [PATCH] Faceswap 2.0 (#1045) * Core Updates - Remove lib.utils.keras_backend_quiet and replace with get_backend() where relevant - Document lib.gpu_stats and lib.sys_info - Remove call to GPUStats.is_plaidml from convert and replace with get_backend() - lib.gui.menu - typofix * Update Dependencies Bump Tensorflow Version Check * Port extraction to tf2 * Add custom import finder for loading Keras or tf.keras depending on backend * Add `tensorflow` to KerasFinder search path * Basic TF2 training running * model.initializers - docstring fix * Fix and pass tests for tf2 * Replace Keras backend tests with faceswap backend tests * Initial optimizers update * Monkey patch tf.keras optimizer * Remove custom Adam Optimizers and Memory Saving Gradients * Remove multi-gpu option. Add Distribution to cli * plugins.train.model._base: Add Mirror, Central and Default distribution strategies * Update tensorboard kwargs for tf2 * Penalized Loss - Fix for TF2 and AMD * Fix syntax for tf2.1 * requirements typo fix * Explicit None for clipnorm if using a distribution strategy * Fix penalized loss for distribution strategies * Update Dlight * typo fix * Pin to TF2.2 * setup.py - Install tensorflow from pip if not available in Conda * Add reduction options and set default for mirrored distribution strategy * Explicitly use default strategy rather than nullcontext * lib.model.backup_restore documentation * Remove mirrored strategy reduction method and default based on OS * Initial restructure - training * Remove PingPong Start model.base refactor * Model saving and resuming enabled * More tidying up of model.base * Enable backup and snapshotting * Re-enable state file Remove loss names from state file Fix print loss function Set snapshot iterations correctly * Revert original model to Keras Model structure rather than custom layer Output full model and sub model summary Change NNBlocks to callables rather than custom keras layers * Apply custom Conv2D layer * Finalize NNBlock restructure Update Dfaker blocks * Fix reloading model under a different distribution strategy * Pass command line arguments through to trainer * Remove training_opts from model and reference params directly * Tidy up model __init__ * Re-enable tensorboard logging Suppress "Model Not Compiled" warning * Fix timelapse * lib.model.nnblocks - Bugfix residual block Port dfaker bugfix original * dfl-h128 ported * DFL SAE ported * IAE Ported * dlight ported * port lightweight * realface ported * unbalanced ported * villain ported * lib.cli.args - Update Batchsize + move allow_growth to config * Remove output shape definition Get image sizes per side rather than globally * Strip mask input from encoder * Fix learn mask and output learned mask to preview * Trigger Allow Growth prior to setting strategy * Fix GUI Graphing * GUI - Display batchsize correctly + fix training graphs * Fix penalized loss * Enable mixed precision training * Update analysis displayed batch to match input * Penalized Loss - Multi-GPU Fix * Fix all losses for TF2 * Fix Reflect Padding * Allow different input size for each side of the model * Fix conv-aware initialization on reload * Switch allow_growth order * Move mixed_precision to cli * Remove distrubution strategies * Compile penalized loss sub-function into LossContainer * Bump default save interval to 250 Generate preview on first iteration but don't save Fix iterations to start at 1 instead of 0 Remove training deprecation warnings Bump some scripts.train loglevels * Add ability to refresh preview on demand on pop-up window * Enable refresh of training preview from GUI * Fix Convert Debug logging in Initializers * Fix Preview Tool * Update Legacy TF1 weights to TF2 Catch stats error on loading stats with missing logs * lib.gui.popup_configure - Make more responsive + document * Multiple Outputs supported in trainer Original Model - Mask output bugfix * Make universal inference model for convert Remove scaling from penalized mask loss (now handled at input to y_true) * Fix inference model to work properly with all models * Fix multi-scale output for convert * Fix clipnorm issue with distribution strategies Edit error message on OOM * Update plaidml losses * Add missing file * Disable gmsd loss for plaidnl * PlaidML - Basic training working * clipnorm rewriting for mixed-precision * Inference model creation bugfixes * Remove debug code * Bugfix: Default clipnorm to 1.0 * Remove all mask inputs from training code * Remove mask inputs from convert * GUI - Analysis Tab - Docstrings * Fix rate in totals row * lib.gui - Only update display pages if they have focus * Save the model on first iteration * plaidml - Fix SSIM loss with penalized loss * tools.alignments - Remove manual and fix jobs * GUI - Remove case formatting on help text * gui MultiSelect custom widget - Set default values on init * vgg_face2 - Move to plugins.extract.recognition and use plugins._base base class cli - Add global GPU Exclude Option tools.sort - Use global GPU Exlude option for backend lib.model.session - Exclude all GPUs when running in CPU mode lib.cli.launcher - Set backend to CPU mode when all GPUs excluded * Cascade excluded devices to GPU Stats * Explicit GPU selection for Train and Convert * Reduce Tensorflow Min GPU Multiprocessor Count to 4 * remove compat.v1 code from extract * Force TF to skip mixed precision compatibility check if GPUs have been filtered * Add notes to config for non-working AMD losses * Rasie error if forcing extract to CPU mode * Fix loading of legace dfl-sae weights + dfl-sae typo fix * Remove unused requirements Update sphinx requirements Fix broken rst file locations * docs: lib.gui.display * clipnorm amd condition check * documentation - gui.display_analysis * Documentation - gui.popup_configure * Documentation - lib.logger * Documentation - lib.model.initializers * Documentation - lib.model.layers * Documentation - lib.model.losses * Documentation - lib.model.nn_blocks * Documetation - lib.model.normalization * Documentation - lib.model.session * Documentation - lib.plaidml_stats * Documentation: lib.training_data * Documentation: lib.utils * Documentation: plugins.train.model._base * GUI Stats: prevent stats from using GPU * Documentation - Original Model * Documentation: plugins.model.trainer._base * linting * unit tests: initializers + losses * unit tests: nn_blocks * bugfix - Exclude gpu devices in train, not include * Enable Exclude-Gpus in Extract * Enable exclude gpus in tools * Disallow multiple plugin types in a single model folder * Automatically add exclude_gpus argument in for cpu backends * Cpu backend fixes * Relax optimizer test threshold * Default Train settings - Set mask to Extended * Update Extractor cli help text Update to Python 3.8 * Fix FAN to run on CPU * lib.plaidml_tools - typofix * Linux installer - check for curl * linux installer - typo fix --- .install/linux/faceswap_setup_x64.sh | 14 +- .install/windows/install.nsi | 2 +- .travis.yml | 2 +- Dockerfile.cpu | 2 +- Dockerfile.gpu | 2 +- INSTALL.md | 6 +- _requirements_base.txt | 13 - docs/full/lib/gui.rst | 22 + docs/full/lib/logger.rst | 8 + docs/full/lib/model.rst | 53 +- docs/full/lib/plaidml_stats.rst | 7 + docs/full/lib/utils.rst | 8 + docs/full/lib/vgg_face2_keras.rst | 7 - docs/full/plugins/extract.rst | 8 + docs/full/plugins/train.rst | 35 +- docs/sphinx_requirements.txt | 39 +- lib/cli/args.py | 137 +- lib/cli/launcher.py | 83 +- lib/gpu_stats.py | 50 +- lib/gui/__init__.py | 4 +- lib/gui/control_helper.py | 7 +- lib/gui/custom_widgets.py | 2 +- lib/gui/display.py | 148 +- lib/gui/display_analysis.py | 364 +-- lib/gui/display_command.py | 36 +- lib/gui/display_graph.py | 4 +- lib/gui/display_page.py | 52 +- lib/gui/menu.py | 7 +- lib/gui/popup_configure.py | 191 +- lib/gui/stats.py | 190 +- lib/gui/utils.py | 42 + lib/gui/wrapper.py | 31 +- lib/logger.py | 238 +- lib/model/__init__.py | 9 + lib/model/backup_restore.py | 141 +- lib/model/initializers.py | 142 +- lib/model/layers.py | 114 +- lib/model/{losses.py => losses_plaid.py} | 520 ++--- lib/model/losses_tf.py | 556 +++++ lib/model/memory_saving_gradients.py | 439 ---- lib/model/nn_blocks.py | 901 +++---- lib/model/normalization.py | 23 +- lib/model/optimizers.py | 146 -- lib/model/session.py | 175 +- lib/plaidml_tools.py | 236 +- lib/training_data.py | 179 +- lib/utils.py | 476 ++-- plugins/extract/_base.py | 16 +- plugins/extract/align/_base.py | 5 +- plugins/extract/align/fan.py | 95 +- plugins/extract/detect/_base.py | 5 +- plugins/extract/detect/mtcnn.py | 71 +- plugins/extract/detect/s3fd.py | 403 +++- plugins/extract/mask/_base.py | 5 +- plugins/extract/mask/unet_dfl.py | 7 +- plugins/extract/mask/vgg_clear.py | 7 +- plugins/extract/mask/vgg_obstructed.py | 7 +- plugins/extract/pipeline.py | 27 +- plugins/extract/recognition/__init__.py | 0 .../extract/recognition}/vgg_face2_keras.py | 100 +- plugins/train/_config.py | 19 +- plugins/train/model/_base.py | 2060 ++++++++++------- plugins/train/model/dfaker.py | 53 +- plugins/train/model/dfl_h128.py | 58 +- plugins/train/model/dfl_sae.py | 216 +- plugins/train/model/dlight.py | 204 +- plugins/train/model/iae.py | 106 +- plugins/train/model/lightweight.py | 56 +- plugins/train/model/original.py | 202 +- plugins/train/model/realface.py | 128 +- plugins/train/model/unbalanced.py | 160 +- plugins/train/model/villain.py | 73 +- plugins/train/model/villain_defaults.py | 2 +- plugins/train/trainer/_base.py | 961 ++++---- requirements_amd.txt | 5 +- requirements_cpu.txt | 2 +- requirements_nvidia.txt | 2 +- scripts/convert.py | 107 +- scripts/extract.py | 1 + scripts/gui.py | 4 +- scripts/train.py | 140 +- setup.py | 40 +- tests/__init__.py | 7 + tests/lib/model/initializers_test.py | 9 +- tests/lib/model/layers_test.py | 9 +- tests/lib/model/losses_test.py | 56 +- tests/lib/model/nn_blocks_test.py | 34 +- tests/lib/model/normalization_test.py | 2 +- tests/lib/model/optimizers_test.py | 51 +- tests/startup_test.py | 15 +- tests/utils.py | 125 + tools/alignments/alignments.py | 6 +- tools/alignments/cli.py | 255 +- tools/alignments/jobs.py | 49 - tools/alignments/jobs_manual.py | 939 -------- tools/manual/manual.py | 20 +- tools/mask/mask.py | 11 +- tools/preview/cli.py | 68 +- tools/sort/cli.py | 9 - tools/sort/sort.py | 19 +- 100 files changed, 6720 insertions(+), 6182 deletions(-) create mode 100755 docs/full/lib/logger.rst create mode 100755 docs/full/lib/plaidml_stats.rst create mode 100755 docs/full/lib/utils.rst delete mode 100755 docs/full/lib/vgg_face2_keras.rst rename lib/model/{losses.py => losses_plaid.py} (53%) create mode 100644 lib/model/losses_tf.py delete mode 100644 lib/model/memory_saving_gradients.py delete mode 100644 lib/model/optimizers.py create mode 100644 plugins/extract/recognition/__init__.py rename {lib => plugins/extract/recognition}/vgg_face2_keras.py (70%) create mode 100644 tests/utils.py delete mode 100644 tools/alignments/jobs_manual.py diff --git a/.install/linux/faceswap_setup_x64.sh b/.install/linux/faceswap_setup_x64.sh index cf9894a872..3d861c64d7 100644 --- a/.install/linux/faceswap_setup_x64.sh +++ b/.install/linux/faceswap_setup_x64.sh @@ -70,6 +70,15 @@ check_for_sudo() { fi } +check_for_curl() { + # Ensure that curl is available on the system + if ! command -V curl &> /dev/null ; then + error "'curl' is required for running the Faceswap installer, but could not be found. \ + Please install 'curl' using the package manager for your distribution before proceeding." + exit 1 + fi +} + create_tmp_dir() { TMP_DIR="$(mktemp -d)" if [ -z "$TMP_DIR" -o ! -d "$TMP_DIR" ]; then @@ -336,10 +345,10 @@ delete_env() { } create_env() { - # Create Python 3.7 env for faceswap + # Create Python 3.8 env for faceswap delete_env info "Creating Conda Virtual Environment..." - yellow ; "$CONDA_EXECUTABLE" create -n "$ENV_NAME" -q python=3.7 -y + yellow ; "$CONDA_EXECUTABLE" create -n "$ENV_NAME" -q python=3.8 -y } @@ -406,6 +415,7 @@ create_desktop_shortcut () { } check_for_sudo +check_for_curl banner user_input review diff --git a/.install/windows/install.nsi b/.install/windows/install.nsi index 8b146b2479..e2d2bddbcf 100644 --- a/.install/windows/install.nsi +++ b/.install/windows/install.nsi @@ -22,7 +22,7 @@ InstallDir $PROFILE\faceswap # Install cli flags !define flagsConda "/S /RegisterPython=0 /AddToPath=0 /D=$PROFILE\MiniConda3" !define flagsRepo "--depth 1 --no-single-branch ${wwwRepo}" -!define flagsEnv "-y python=3.7" +!define flagsEnv "-y python=3.8" # Folders Var ProgramData diff --git a/.travis.yml b/.travis.yml index 84e56cc1f7..9075e4af8e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,7 @@ language: shell env: global: - - CONDA_PYTHON=3.7 + - CONDA_PYTHON=3.8 - CONDA_BLD_PATH=${HOME}/conda-bld os: diff --git a/Dockerfile.cpu b/Dockerfile.cpu index 1b2e4c16cf..bb30b48883 100755 --- a/Dockerfile.cpu +++ b/Dockerfile.cpu @@ -1,4 +1,4 @@ -FROM tensorflow/tensorflow:1.12.0-py3 +FROM tensorflow/tensorflow:2.2.0-py3 RUN add-apt-repository -y ppa:jonathonf/ffmpeg-4 \ && apt-get update -qq -y \ diff --git a/Dockerfile.gpu b/Dockerfile.gpu index 62a6e52c95..9087b79ee9 100755 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -1,4 +1,4 @@ -FROM tensorflow/tensorflow:1.15.0-gpu-py3 +FROM tensorflow/tensorflow:2.2.0-gpu-py3 ENV DEBIAN_FRONTEND noninteractive diff --git a/INSTALL.md b/INSTALL.md index e8657512e3..9c0cc63286 100755 --- a/INSTALL.md +++ b/INSTALL.md @@ -93,8 +93,8 @@ Reboot your PC, so that everything you have just installed gets registered. - Select "Create" at the bottom - In the pop up: - Give it the name: faceswap - - **IMPORTANT**: Select python version 3.7 - - Hit "Create" (NB: This may take a while as it will need to download Python 3.7) + - **IMPORTANT**: Select python version 3.8 + - Hit "Create" (NB: This may take a while as it will need to download Python) ![Anaconda virtual env setup](https://i.imgur.com/59RHnLs.png) #### Entering your virtual environment @@ -155,7 +155,7 @@ Obtain git for your distribution from the [git website](https://git-scm.com/down The recommended install method is to use a Conda3 Environment as this will handle the installation of Nvidia's CUDA and cuDNN straight into your Conda Environment. This is by far the easiest and most reliable way to setup the project. - MiniConda3 is recommended: [MiniConda3](https://docs.conda.io/en/latest/miniconda.html) -Alternatively you can install Python (>= 3.6-3.7 64-bit) for your distribution (links below.) If you go down this route and are using an Nvidia GPU you should install CUDA (https://developer.nvidia.com/cuda-zone) and cuDNN (https://developer.nvidia.com/cudnn). for your system. If you do not plan to build Tensorflow yourself, make sure you install no higher than version 10.0 of CUDA and 7.5.x of CUDNN. +Alternatively you can install Python (>= 3.6-3.8 64-bit) for your distribution (links below.) If you go down this route and are using an Nvidia GPU you should install CUDA (https://developer.nvidia.com/cuda-zone) and cuDNN (https://developer.nvidia.com/cudnn). for your system. If you do not plan to build Tensorflow yourself, make sure you install no higher than version 10.0 of CUDA and 7.5.x of CUDNN. - Python distributions: - apt/yum install python3 (Linux) - [Installer](https://www.python.org/downloads/release/python-368/) (Windows) diff --git a/_requirements_base.txt b/_requirements_base.txt index abe6086630..af6547d5b7 100644 --- a/_requirements_base.txt +++ b/_requirements_base.txt @@ -3,10 +3,8 @@ psutil>=5.7.0 pathlib==1.0.1 numpy>=1.18.0 opencv-python>=4.1.2.0 -scikit-image>=0.16.2 pillow>=7.0.0 scikit-learn>=0.22.0 -toposort==1.5 fastcluster==1.1.26 matplotlib>=3.0.3 imageio>=2.8.0 @@ -15,16 +13,5 @@ ffmpy==0.2.3 # Revert back to nvidia-ml-py3 when windows/system32 patch is implemented git+https://github.com/deepfakes/nvidia-ml-py3.git #nvidia-ml-py3 -h5py>=2.10.0 -Keras==2.2.4 pywin32>=227 ; sys_platform == "win32" pynvx==1.0.0 ; sys_platform == "darwin" - -# tensorflow is included within the docker image. -# If you are looking for dependencies for a manual install, - -# NB: Tensorflow version 1.12 is the minimum supported version of Tensorflow. -# If your graphics card support is below Cuda 9.0 you will need to either -# compile tensorflow yourself or download a custom version. -# Install 1.12.0<=tensorflow-gpu<=1.13.0 for CUDA 9.0 -# or 1.13.1<=tensorflow-gpu<1.15 for CUDA 10.0 diff --git a/docs/full/lib/gui.rst b/docs/full/lib/gui.rst index 8655ca3cd9..c0fca80358 100755 --- a/docs/full/lib/gui.rst +++ b/docs/full/lib/gui.rst @@ -30,6 +30,28 @@ custom\_widgets module :undoc-members: :show-inheritance: +display module +============== +.. automodule:: lib.gui.display + :members: + :undoc-members: + :show-inheritance: + + +display\_analysis module +======================== +.. autoclass:: lib.gui.display_analysis.Analysis + :members: + :undoc-members: + :show-inheritance: + +popup_configure module +====================== +.. automodule:: lib.gui.popup_configure + :members: + :undoc-members: + :show-inheritance: + project module ============== diff --git a/docs/full/lib/logger.rst b/docs/full/lib/logger.rst new file mode 100755 index 0000000000..82c375a95d --- /dev/null +++ b/docs/full/lib/logger.rst @@ -0,0 +1,8 @@ +************* +logger module +************* + +.. automodule:: lib.logger + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/full/lib/model.rst b/docs/full/lib/model.rst index d8402afeea..5396f89a4d 100755 --- a/docs/full/lib/model.rst +++ b/docs/full/lib/model.rst @@ -6,6 +6,13 @@ The Model Package handles interfacing with the neural network backend and holds .. contents:: Contents :local: +model.backup_restore module +--------------------------- + +.. automodule:: lib.model.backup_restore + :members: + :undoc-members: + :show-inheritance: model.initializers module ------------------------- @@ -17,6 +24,7 @@ model.initializers module ~lib.model.initializers.ConvolutionAware ~lib.model.initializers.ICNR + ~lib.model.initializers.compute_fans .. automodule:: lib.model.initializers :members: @@ -46,22 +54,23 @@ model.layers module model.losses module ------------------- +The losses listed here are generated from the docstrings in :mod:`lib.model.losses_tf`, however +the functions are excactly the same for :mod:`lib.model.losses_plaid`. The correct loss module will +be imported as :mod:`lib.model.losses` depending on the backend in use. + .. rubric:: Module Summary .. autosummary:: :nosignatures: - ~lib.model.losses.DSSIMObjective - ~lib.model.losses.PenalizedLoss - ~lib.model.losses.gaussian_blur - ~lib.model.losses.generalized_loss - ~lib.model.losses.gmsd_loss - ~lib.model.losses.gradient_loss - ~lib.model.losses.l_inf_norm - ~lib.model.losses.mask_loss_wrapper - ~lib.model.losses.scharr_edges - -.. automodule:: lib.model.losses + ~lib.model.losses_tf.DSSIMObjective + ~lib.model.losses_tf.PenalizedLoss + ~lib.model.losses_tf.GeneralizedLoss + ~lib.model.losses_tf.GMSDLoss + ~lib.model.losses_tf.GradientLoss + ~lib.model.losses_tf.LInfNorm + +.. automodule:: lib.model.losses_tf :members: :undoc-members: :show-inheritance: @@ -69,6 +78,20 @@ model.losses module model.nn_blocks module ---------------------- +.. rubric:: Module Summary + +.. autosummary:: + :nosignatures: + + ~lib.model.nn_blocks.Conv2D + ~lib.model.nn_blocks.Conv2DBlock + ~lib.model.nn_blocks.Conv2DOutput + ~lib.model.nn_blocks.ResidualBlock + ~lib.model.nn_blocks.SeparableConv2DBlock + ~lib.model.nn_blocks.Upscale2xBlock + ~lib.model.nn_blocks.UpscaleBlock + ~lib.model.nn_blocks.set_config + .. automodule:: lib.model.nn_blocks :members: :undoc-members: @@ -89,14 +112,6 @@ model.normalization module :undoc-members: :show-inheritance: -model.optimizers module ------------------------ - -.. automodule:: lib.model.optimizers - :members: - :undoc-members: - :show-inheritance: - model.session module --------------------- diff --git a/docs/full/lib/plaidml_stats.rst b/docs/full/lib/plaidml_stats.rst new file mode 100755 index 0000000000..72c19bd8c5 --- /dev/null +++ b/docs/full/lib/plaidml_stats.rst @@ -0,0 +1,7 @@ +plaidml\_tools module +===================== + +.. automodule:: lib.plaidml_tools + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/full/lib/utils.rst b/docs/full/lib/utils.rst new file mode 100755 index 0000000000..53fefa7c0b --- /dev/null +++ b/docs/full/lib/utils.rst @@ -0,0 +1,8 @@ +************ +utils module +************ + +.. automodule:: lib.utils + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/full/lib/vgg_face2_keras.rst b/docs/full/lib/vgg_face2_keras.rst deleted file mode 100755 index adb6acedcb..0000000000 --- a/docs/full/lib/vgg_face2_keras.rst +++ /dev/null @@ -1,7 +0,0 @@ -vgg\_face2\_keras module -======================== - -.. automodule:: lib.vgg_face2_keras - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/full/plugins/extract.rst b/docs/full/plugins/extract.rst index 103b9e7f26..59da672e2d 100755 --- a/docs/full/plugins/extract.rst +++ b/docs/full/plugins/extract.rst @@ -59,6 +59,14 @@ mask._base module ----------------- .. automodule:: plugins.extract.mask._base + :members: + :undoc-members: + :show-inheritance: + +vgg\_face2\_keras module +------------------------ + +.. automodule:: plugins.extract.recognition.vgg_face2_keras :members: :undoc-members: :show-inheritance: \ No newline at end of file diff --git a/docs/full/plugins/train.rst b/docs/full/plugins/train.rst index fc4c87138d..4f0675d23b 100755 --- a/docs/full/plugins/train.rst +++ b/docs/full/plugins/train.rst @@ -4,23 +4,40 @@ train package The Train Package handles the Model and Trainer plugins for training models in Faceswap. -trainer._base module -==================== + +.. contents:: Contents + :local: + +model._base module +================== .. rubric:: Module Summary .. autosummary:: :nosignatures: - - ~plugins.train.trainer._base.Batcher - ~plugins.train.trainer._base.PingPong - ~plugins.train.trainer._base.Samples - ~plugins.train.trainer._base.Timelapse - ~plugins.train.trainer._base.TrainerBase - ~plugins.train.trainer._base.TrainingAlignments + + ~plugins.train.model._base.KerasModel + ~plugins.train.model._base.ModelBase + ~plugins.train.model._base.State .. rubric:: Module +.. automodule:: plugins.train.model._base + :members: + :undoc-members: + :show-inheritance: + +model.original module +===================== + +.. automodule:: plugins.train.model.original + :members: + :undoc-members: + :show-inheritance: + +trainer._base module +==================== + .. automodule:: plugins.train.trainer._base :members: :undoc-members: diff --git a/docs/sphinx_requirements.txt b/docs/sphinx_requirements.txt index 7f8a503509..9d38b6f16f 100755 --- a/docs/sphinx_requirements.txt +++ b/docs/sphinx_requirements.txt @@ -1,25 +1,20 @@ # NB Do not install from this requirements file # It is for documentation purposes only -tqdm -psutil -pathlib -numpy==1.16.2 -opencv-python==4.1.1.26 -scikit-image -Pillow==6.1.0 -scikit-learn -toposort -fastcluster -matplotlib==2.2.2 -imageio==2.5.0 -imageio-ffmpeg -ffmpy==0.2.2 -# Revert back to nvidia-ml-py3 when windows/system32 patch is implemented -git+https://github.com/deepfakes/nvidia-ml-py3.git -#nvidia-ml-py3 -h5py==2.9.0 -Keras==2.2.4 -pywin32 ; sys_platform == "win32" -pynvx==0.0.4 ; sys_platform == "darwin" -tensorflow==1.13.1 +tqdm==4.42 +psutil==5.7.0 +pathlib==1.0.1 +numpy==1.18.0 +opencv-python==4.1.2.30 +pillow==7.0.0 +scikit-learn==0.22.0 +fastcluster==1.1.26 +matplotlib==3.0.3 +imageio==2.8.0 +imageio-ffmpeg==0.4.2 +ffmpy==0.2.3 +nvidia-ml-py3 +pywin32==227 ; sys_platform == "win32" +pynvx==1.0.0 ; sys_platform == "darwin" +plaidml-keras==0.7.0 +tensorflow==2.2.0 diff --git a/lib/cli/args.py b/lib/cli/args.py index 60024efba8..f676a250e4 100644 --- a/lib/cli/args.py +++ b/lib/cli/args.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 """ The Command Line Argument options for faceswap.py """ + +# pylint:disable=too-many-lines import argparse import logging import re @@ -7,6 +9,8 @@ import textwrap from lib.utils import get_backend +from lib.gpu_stats import GPUStats + from plugins.plugin_loader import PluginLoader from .actions import (DirFullPaths, DirOrFileFullPaths, FileFullPaths, FilesFullPaths, MultiOption, @@ -14,6 +18,7 @@ from .launcher import ScriptExecutor logger = logging.getLogger(__name__) # pylint: disable=invalid-name +_GPUS = GPUStats().cli_devices class FullHelpArgumentParser(argparse.ArgumentParser): @@ -156,6 +161,19 @@ def _get_global_arguments(): The list of global command line options for all Faceswap commands. """ global_args = list() + if _GPUS: + global_args.append(dict( + opts=("-X", "--exclude-gpus"), + dest="exclude_gpus", + action=MultiOption, + type=str.lower, + nargs="+", + choices=[str(idx) for idx in range(len(_GPUS))], + group="Global Options", + help="R|Exclude GPUs from use by Faceswap. Select the number(s) which correspond " + "to any GPU(s) that you do not wish to be made available to Faceswap. " + "Selecting all GPUs here will force Faceswap into CPU mode." + "\nL|{}".format(" \nL|".join(_GPUS)))) global_args.append(dict( opts=("-C", "--configfile"), action=FileFullPaths, @@ -344,11 +362,10 @@ def get_optional_arguments(): "'/config/extract.ini' or 'Settings > Configure Extract 'Plugins':" "\nL|cv2-dnn: A CPU only extractor which is the least reliable and least " "resource intensive. Use this if not using a GPU and time is important." - "\nL|mtcnn: Good detector. Fast on CPU, faster on GPU. Uses fewer resources " - "than other GPU detectors but can often return more false positives." - "\nL|s3fd: Best detector. Fast on GPU, slow on CPU. Can detect more faces and " - "fewer false positives than other GPU detectors, but is a lot more resource " - "intensive.")) + "\nL|mtcnn: Good detector. GPU only. Uses fewer resources than other GPU " + "detectors but can often return more false positives." + "\nL|s3fd: Best detector. GPU only. Can detect more faces and fewer false " + "positives than other GPU detectors, but is a lot more resource intensive.")) argument_list.append(dict( opts=("-A", "--aligner"), action=Radio, @@ -768,15 +785,12 @@ def get_optional_arguments(): "this to, it will never attempt to use more processes than are available on " "your system. If singleprocess is enabled this setting will be ignored.")) argument_list.append(dict( - opts=("-g", "--gpus"), - action=Slider, - min_max=(1, 10), - rounding=1, - type=int, - default=1, + opts=("-d", "--distributed"), + action="store_true", + default=False, backend="nvidia", group="settings", - help="Number of GPUs to use for conversion")) + help="Use the Tensorflow Mirrored Distrubution Strategy to train on multiple GPUs.")) argument_list.append(dict( opts=("-t", "--trainer"), type=str.lower, @@ -784,15 +798,6 @@ def get_optional_arguments(): group="settings", help="[LEGACY] This only needs to be selected if a legacy model is being loaded or " "if there are multiple models in the model folder")) - argument_list.append(dict( - opts=("-ag", "--allow-growth"), - action="store_true", - dest="allow_growth", - default=False, - backend="nvidia", - group="settings", - help="Sets allow_growth option of Tensorflow to spare memory on some " - "configurations.")) argument_list.append(dict( opts=("-otf", "--on-the-fly"), action="store_true", @@ -917,9 +922,9 @@ def get_argument_list(): "\nL|original: The original model created by /u/deepfakes." "\nL|dfaker: 64px in/128px out model from dfaker. Enable 'warp-to-landmarks' " "for full dfaker method." - "\nL|dfl-h128. 128px in/out model from deepfacelab" - "\nL|dfl-sae. Adaptable model from deepfacelab" - "\nL|dlight. A lightweight, high resolution DFaker variant." + "\nL|dfl-h128: 128px in/out model from deepfacelab" + "\nL|dfl-sae: Adaptable model from deepfacelab" + "\nL|dlight: A lightweight, high resolution DFaker variant." "\nL|iae: A model that uses intermediate layers to try to get better details" "\nL|lightweight: A lightweight model for low-end cards. Don't expect great " "results. Can train as low as 1.6GB with batch size 8." @@ -928,20 +933,22 @@ def get_argument_list(): "won't work so well. By andenixa et al. Very configurable." "\nL|unbalanced: 128px in/out model from andenixa. The autoencoders are " "unbalanced so B>A swaps won't work so well. Very configurable." - "\nL|villain: 128px in/out model from villainguy. Very resource hungry (11GB " - "for batchsize 16). Good for details, but more susceptible to color " - "differences.")) + "\nL|villain: 128px in/out model from villainguy. Very resource hungry (You " + "will require a GPU with a fair amount of VRAM). Good for details, but more " + "susceptible to color differences.")) argument_list.append(dict( opts=("-bs", "--batch-size"), action=Slider, - min_max=(2, 256), - rounding=2, + min_max=(1, 256), + rounding=1, type=int, dest="batch_size", - default=64, + default=16, group="training", - help="Batch size. This is the number of images processed through the model for " - "each iteration. Larger batches require more GPU RAM.")) + help="Batch size. This is the number of images processed through the model for each " + "side per iteration. NB: As the model is fed 2 sides at a time, the actual " + "number of images within the model at any one time is double the number that you " + "set here. Larger batches require more GPU RAM.")) argument_list.append(dict( opts=("-it", "--iterations"), action=Slider, @@ -956,47 +963,28 @@ def get_argument_list(): "you want the model to stop automatically at a set number of iterations, you " "can set that value here.")) argument_list.append(dict( - opts=("-g", "--gpus"), - action=Slider, - min_max=(1, 10), - rounding=1, - type=int, - default=1, - backend="nvidia", - group="training", - help="Number of GPUs to use for training")) - argument_list.append(dict( - opts=("-msg", "--memory-saving-gradients"), + opts=("-d", "--distributed"), action="store_true", - dest="memory_saving_gradients", default=False, backend="nvidia", - group="VRAM Savings", - help="Trades off VRAM usage against computation time. Can fit larger models into " - "memory at a cost of slower training speed. 50%%-150%% batch size increase " - "for 20%%-50%% longer training time. NB: Launch time will be significantly " - "delayed. Switching sides using ping-pong training will take longer.")) - argument_list.append(dict( - opts=("-o", "--optimizer-savings"), - action="store_true", - dest="optimizer_savings", - default=False, - backend="nvidia", - group="VRAM Savings", - help="To save VRAM some optimizer gradient calculations can be performed on the " - "CPU rather than the GPU. This allows you to increase batchsize at a training " - "speed/system RAM cost.")) + group="training", + help="Use the Tensorflow Mirrored Distrubution Strategy to train on multiple GPUs.")) argument_list.append(dict( - opts=("-pp", "--ping-pong"), + opts=("-mp", "--mixed-precision"), action="store_true", - dest="pingpong", + dest="mixed_precision", default=False, backend="nvidia", - group="VRAM Savings", - help="Enable ping pong training. Trains one side at a time, switching sides at " - "each save iteration. Training will take 2 to 4 times longer, with about a " - "30%%-50%% reduction in VRAM useage. NB: Preview won't show until both sides " - "have been trained once.")) + group="training", + help="R|NVIDIA GPUs can run operations in float16 faster than in float32. Mixed " + "precision allows you to use a mix of float16 with float32, to get the " + "performance benefits from float16 and the numeric stability benefits from " + "float32.\nWhile mixed precision will run on most Nvidia models, it will only " + "speed up training on more recent GPUs. Those with compute capability 7.0 or " + "higher will see the greatest performance benefit from mixed precision because " + "they have Tensor Cores. Older GPUs offer no math performance benefit for using " + "mixed precision, however memory and bandwidth savings can enable some speedups. " + "Generally RTX GPUs and later will offer the most benefit.")) argument_list.append(dict( opts=("-s", "--save-interval"), action=Slider, @@ -1004,7 +992,7 @@ def get_argument_list(): rounding=10, type=int, dest="save_interval", - default=100, + default=250, group="Saving", help="Sets the number of iterations between each model save.")) argument_list.append(dict( @@ -1075,15 +1063,6 @@ def get_argument_list(): group="preview", help="Writes the training result to a file. The image will be stored in the root " "of your FaceSwap folder.")) - argument_list.append(dict( - opts=("-ag", "--allow-growth"), - action="store_true", - dest="allow_growth", - default=False, - backend="nvidia", - group="model", - help="Sets allow_growth option of Tensorflow to spare memory on some " - "configurations.")) argument_list.append(dict( opts=("-nl", "--no-logs"), action="store_true", @@ -1097,7 +1076,7 @@ def get_argument_list(): action="store_true", dest="warp_to_landmarks", default=False, - group="training", + group="augmentation", help="Warps training faces to closely matched Landmarks from the opposite face-set " "rather than randomly warping the face. This is the 'dfaker' way of doing " "warping. Alignments files for both sets of faces must be provided if using " @@ -1107,7 +1086,7 @@ def get_argument_list(): action="store_true", dest="no_flip", default=False, - group="training", + group="augmentation", help="To effectively learn, a random set of images are flipped horizontally. " "Sometimes it is desirable for this not to occur. Generally this should be " "left off except for during 'fit training'.")) @@ -1116,7 +1095,7 @@ def get_argument_list(): action="store_true", dest="no_augment_color", default=False, - group="training", + group="augmentation", help="Color augmentation helps make the model less susceptible to color " "differences between the A and B sets, at an increased training time cost. " "Enable this option to disable color augmentation.")) diff --git a/lib/cli/launcher.py b/lib/cli/launcher.py index aaa9b97513..6a4f743a48 100644 --- a/lib/cli/launcher.py +++ b/lib/cli/launcher.py @@ -6,8 +6,11 @@ import sys from importlib import import_module + +from lib.gpu_stats import set_exclude_devices, GPUStats from lib.logger import crash_log, log_setup -from lib.utils import FaceswapError, get_backend, safe_shutdown, set_system_verbosity +from lib.utils import (FaceswapError, get_backend, KerasFinder, safe_shutdown, set_backend, + set_system_verbosity) logger = logging.getLogger(__name__) # pylint: disable=invalid-name @@ -51,12 +54,13 @@ def _test_for_tf_version(): Raises ------ FaceswapError - If Tensorflow is not found, or is not between versions 1.12 and 1.15 + If Tensorflow is not found, or is not between versions 2.2 and 2.2 """ - min_ver = 1.12 - max_ver = 1.15 + min_ver = 2.2 + max_ver = 2.2 try: # Ensure tensorflow doesn't pin all threads to one core when using Math Kernel Library + os.environ["TF_MIN_GPU_MULTIPROCESSOR_COUNT"] = "4" os.environ["KMP_AFFINITY"] = "disabled" import tensorflow as tf # pylint:disable=import-outside-toplevel except ImportError as err: @@ -142,13 +146,10 @@ def execute_script(self, arguments): set_system_verbosity(arguments.loglevel) is_gui = hasattr(arguments, "redirect_gui") and arguments.redirect_gui log_setup(arguments.loglevel, arguments.logfile, self._command, is_gui) - logger.debug("Executing: %s. PID: %s", self._command, os.getpid()) success = False - if get_backend() == "amd": - plaidml_found = self._setup_amd(arguments.loglevel) - if not plaidml_found: - safe_shutdown(got_error=True) - return + + if self._command != "gui": + self._configure_backend(arguments) try: script = self._import_script() process = script(arguments) @@ -172,14 +173,66 @@ def execute_script(self, arguments): finally: safe_shutdown(got_error=not success) - @staticmethod - def _setup_amd(log_level): + def _configure_backend(self, arguments): + """ Configure the backend. + + Exclude any GPUs for use by Faceswap when requested. + + Set Faceswap backend to CPU if all GPUs have been deselected. + + Add the Keras import interception code. + + Parameters + ---------- + arguments: :class:`argparse.Namespace` + The command line arguments passed to Faceswap. + """ + if not hasattr(arguments, "exclude_gpus"): + # Cpu backends will not have this attribute + logger.debug("Adding missing exclude gpus argument to namespace") + setattr(arguments, "exclude_gpus", None) + + if arguments.exclude_gpus: + if not all(idx.isdigit() for idx in arguments.exclude_gpus): + logger.error("GPUs passed to the ['-X', '--exclude-gpus'] argument must all be " + "integers.") + sys.exit(1) + arguments.exclude_gpus = [int(idx) for idx in arguments.exclude_gpus] + set_exclude_devices(arguments.exclude_gpus) + + if ((get_backend() == "cpu" or GPUStats().exclude_all_devices) and + (self._command == "extract" and arguments.detector in ("mtcnn", "s3fd"))): + logger.error("Extracting on CPU is not currently for detector: '%s'", + arguments.detector.upper()) + sys.exit(0) + + if GPUStats().exclude_all_devices and get_backend() != "cpu": + msg = "Switching backend to CPU" + if get_backend() == "amd": + msg += (". Using Tensorflow for CPU operations.") + os.environ["KERAS_BACKEND"] = "tensorflow" + set_backend("cpu") + logger.info(msg) + + # Add Keras finder to the meta_path list as the first item + sys.meta_path.insert(0, KerasFinder()) + + logger.debug("Executing: %s. PID: %s", self._command, os.getpid()) + + if get_backend() == "amd": + plaidml_found = self._setup_amd(arguments) + if not plaidml_found: + safe_shutdown(got_error=True) + sys.exit(1) + + @classmethod + def _setup_amd(cls, arguments): """ Test for plaidml and perform setup for AMD. Parameters ---------- - log_level: str - The requested log level to run at + arguments: :class:`argparse.Namespace` + The command line arguments passed to Faceswap. """ logger.debug("Setting up for AMD") try: @@ -188,6 +241,6 @@ def _setup_amd(log_level): logger.error("PlaidML not found. Run `pip install plaidml-keras` for AMD support") return False from lib.plaidml_tools import setup_plaidml # pylint:disable=import-outside-toplevel - setup_plaidml(log_level) + setup_plaidml(arguments.loglevel, arguments.exclude_gpus) logger.debug("setup up for PlaidML") return True diff --git a/lib/gpu_stats.py b/lib/gpu_stats.py index 72e64d8843..3509893a5a 100644 --- a/lib/gpu_stats.py +++ b/lib/gpu_stats.py @@ -27,6 +27,25 @@ plaidlib = None +_EXCLUDE_DEVICES = [] + + +def set_exclude_devices(devices): + """ Add any explicitly selected GPU devices to the global list of devices to be excluded + from use by Faceswap. + + Parameters + ---------- + devices: list + list of indices corresponding to the GPU devices connected to the computer + """ + logger = logging.getLogger(__name__) + logger.debug("Excluding GPU indicies: %s", devices) + if not devices: + return + _EXCLUDE_DEVICES.extend(devices) + + class GPUStats(): """ Holds information and statistics about the GPU(s) available on the currently running system. @@ -71,6 +90,16 @@ def device_count(self): """int: The number of GPU devices discovered on the system. """ return self._device_count + @property + def cli_devices(self): + """ list: List of available devices for use in faceswap's command line arguments """ + return ["{}: {}".format(idx, device) for idx, device in enumerate(self._devices)] + + @property + def exclude_all_devices(self): + """ bool: ``True`` if all GPU devices have been explicitly disabled otherwise ``False`` """ + return all(idx in _EXCLUDE_DEVICES for idx in range(len(self._devices))) + @property def _is_plaidml(self): """ bool: ``True`` if the backend is plaidML otherwise ``False``. """ @@ -132,7 +161,7 @@ def _initialize(self, log=False): if get_backend() == "amd": self._log("debug", "AMD Detected. Using plaidMLStats") loglevel = "INFO" if self._logger is None else self._logger.getEffectiveLevel() - self._plaid = plaidlib(loglevel=loglevel, log=log) + self._plaid = plaidlib(log_level=loglevel, log=log) elif IS_MACOS: self._log("debug", "macOS Detected. Using pynvx") try: @@ -199,18 +228,21 @@ def _get_device_count(self): def _get_active_devices(self): """ Obtain the indices of active GPUs (those that have not been explicitly excluded by - CUDA_VISIBLE_DEVICES or plaidML) and allocate to :attr:`_active_devices`. """ + CUDA_VISIBLE_DEVICES, plaidML or command line arguments) and allocate to + :attr:`_active_devices`. """ if self._is_plaidml: self._active_devices = self._plaid.active_devices else: - devices = os.environ.get("CUDA_VISIBLE_DEVICES", None) if self._device_count == 0: - self._active_devices = list() - elif devices is not None: - self._active_devices = [int(i) for i in devices.split(",") if devices] + self._active_devices = [] else: - self._active_devices = list(range(self._device_count)) - self._log("debug", "Active GPU Devices: {}".format(self._active_devices)) + devices = [idx for idx in range(self._device_count) if idx not in _EXCLUDE_DEVICES] + env_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "") + if env_devices: + env_devices = [int(i) for i in env_devices.split(",")] + devices = [idx for idx in devices if idx in env_devices] + self._active_devices = devices + self._log("debug", "Active GPU Devices: {}".format(self._active_devices)) def _get_handles(self): """ Obtain the internal handle identifiers for the system GPUs and allocate to @@ -340,7 +372,7 @@ def get_card_most_free(self): If a GPU is not detected then the **card_id** is returned as ``-1`` and the amount of free and total RAM available is fixed to 2048 Megabytes. """ - if self._device_count == 0: + if len(self._active_devices) == 0: return {"card_id": -1, "device": "No GPU devices found", "free": 2048, diff --git a/lib/gui/__init__.py b/lib/gui/__init__.py index b611c3c341..8bf41c9b3d 100644 --- a/lib/gui/__init__.py +++ b/lib/gui/__init__.py @@ -3,8 +3,8 @@ from lib.gui.display import DisplayNotebook from lib.gui.options import CliOptions from lib.gui.menu import MainMenuBar, TaskBar -from lib.gui.popup_configure import popup_config from lib.gui.project import LastSession from lib.gui.stats import Session -from lib.gui.utils import get_config, get_images, initialize_config, initialize_images +from lib.gui.utils import (get_config, get_images, initialize_config, initialize_images, + preview_trigger) from lib.gui.wrapper import ProcessWrapper diff --git a/lib/gui/control_helper.py b/lib/gui/control_helper.py index aad12b39d6..91f43d25ee 100644 --- a/lib/gui/control_helper.py +++ b/lib/gui/control_helper.py @@ -217,7 +217,6 @@ def helptext(self): helptext = helptext[2:].replace("\nL|", "\n - ").replace("\n", "\n\n") else: helptext = helptext.replace("\n\t", "\n - ").replace("%%", "%") - helptext = ". ".join(i.capitalize() for i in helptext.split(". ")) helptext = self.title + " - " + helptext logger.debug("Formatted control help: (name: '%s', help: '%s'", self.name, helptext) return helptext @@ -859,10 +858,8 @@ def _multi_option_control(self, option_type): variable=self.option.tk_var) if choice.lower() in help_items: self.helpset = True - helptext = help_items[choice.lower()].capitalize() - helptext = "{}\n\n - {}".format( - '. '.join(item.capitalize() for item in helptext.split('. ')), - help_intro) + helptext = help_items[choice.lower()] + helptext = "{}\n\n - {}".format(helptext, help_intro) _get_tooltip(ctl, text=helptext, wraplength=600) ctl.pack(anchor=tk.W) logger.debug("Added %s option %s", option_type, choice) diff --git a/lib/gui/custom_widgets.py b/lib/gui/custom_widgets.py index cf8b276700..873e22139e 100644 --- a/lib/gui/custom_widgets.py +++ b/lib/gui/custom_widgets.py @@ -701,7 +701,7 @@ class MultiOption(ttk.Checkbutton): # pylint: disable=too-many-ancestors """ def __init__(self, parent, value, variable, **kwargs): self._tk_var = tk.BooleanVar() - self._tk_var.set(False) + self._tk_var.set(value == variable.get()) super().__init__(parent, variable=self._tk_var, **kwargs) self._value = value self._master_variable = variable diff --git a/lib/gui/display.py b/lib/gui/display.py index 6e0f1d770f..de83e2f951 100644 --- a/lib/gui/display.py +++ b/lib/gui/display.py @@ -1,8 +1,9 @@ #!/usr/bin python3 """ Display Frame of the Faceswap GUI - What is displayed in the Display Frame varies - depending on what tasked is being run """ +This is the large right hand area of the GUI. At default, the Analysis tab is always displayed +here. Further optional tabs will also be displayed depending on the currently executing Faceswap +task. """ import logging import tkinter as tk @@ -16,29 +17,46 @@ class DisplayNotebook(ttk.Notebook): # pylint: disable=too-many-ancestors - """ The display tabs """ + """ The tkinter Notebook that holds the display items. + + Parameters + ---------- + parent: :class:`tk.PanedWindow` + The paned window that holds the Display Notebook + """ def __init__(self, parent): logger.debug("Initializing %s", self.__class__.__name__) super().__init__(parent) parent.add(self) tk_vars = get_config().tk_vars - self.wrapper_var = tk_vars["display"] - self.runningtask = tk_vars["runningtask"] - - self.set_wrapper_var_trace() - self.add_static_tabs() - self.static_tabs = [child for child in self.tabs()] + self._wrapper_var = tk_vars["display"] + self._runningtask = tk_vars["runningtask"] + + self._set_wrapper_var_trace() + self._add_static_tabs() + # pylint:disable=unnecessary-comprehension + self._static_tabs = [child for child in self.tabs()] + self.bind("<>", self._on_tab_change) logger.debug("Initialized %s", self.__class__.__name__) - def set_wrapper_var_trace(self): - """ Set the trigger actions for the display vars - when they have been triggered in the Process Wrapper """ + @property + def runningtask(self): + """ :class:`tkinter.BooleanVar`: The global tkinter variable that indicates whether a + Faceswap task is currently running or not. """ + return self._runningtask + + def _set_wrapper_var_trace(self): + """ Sets the trigger to update the displayed notebook's pages when the global tkinter + variable `display` is updated in the :class:`~lib.gui.wrapper.ProcessWrapper`. """ logger.debug("Setting wrapper var trace") - self.wrapper_var.trace("w", self.update_displaybook) + self._wrapper_var.trace("w", self._update_displaybook) + + def _add_static_tabs(self): + """ Add the tabs to the Display Notebook that are permanently displayed. - def add_static_tabs(self): - """ Add tabs that are permanently available """ + Currently this is just the `Analysis` tab. + """ logger.debug("Adding static tabs") for tab in ("job queue", "analysis"): if tab == "job queue": @@ -48,32 +66,52 @@ def add_static_tabs(self): "Summary statistics for each training session"} frame = Analysis(self, tab, helptext) else: - frame = self.add_frame() + frame = self._add_frame() self.add(frame, text=tab.title()) - def add_frame(self): - """ Add a single frame for holding tab's contents """ + def _add_frame(self): + """ Add a single frame for holding a static tab's contents. + + Returns + ------- + ttk.Frame + The frame, packed into position + """ logger.debug("Adding frame") frame = ttk.Frame(self) frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=5, pady=5) return frame - def command_display(self, command): - """ Select what to display based on incoming - command """ - build_tabs = getattr(self, "{}_tabs".format(command)) + def _command_display(self, command): + """ Build the relevant command specific tabs based on the incoming Faceswap command. + + Parameters + ---------- + command: str + The Faceswap command that is being executed + """ + build_tabs = getattr(self, "_{}_tabs".format(command)) build_tabs() - def extract_tabs(self, command="extract"): - """ Build the extract tabs """ + def _extract_tabs(self, command="extract"): + """ Build the display tabs that are used for Faceswap extract and convert tasks. + + Notes + ----- + The same display tabs are used for both convert and extract tasks. + + command: [`"extract"`, `"convert"`], optional + The command that the display tabs are being built for. Default: `"extract"` + + """ logger.debug("Build extract tabs") helptext = ("Updates preview from output every 5 " "seconds to limit disk contention") PreviewExtract(self, "preview", helptext, 5000, command) logger.debug("Built extract tabs") - def train_tabs(self): - """ Build the train tabs """ + def _train_tabs(self): + """ Build the display tabs that are used for the Faceswap train task.""" logger.debug("Build train tabs") for tab in ("graph", "preview"): if tab == "graph": @@ -84,17 +122,21 @@ def train_tabs(self): PreviewTrain(self, "preview", helptext, 1000) logger.debug("Built train tabs") - def convert_tabs(self): - """ Build the convert tabs - Currently identical to Extract, so just call that """ + def _convert_tabs(self): + """ Build the display tabs that are used for the Faceswap convert task. + + Notes + ----- + The tabs displayed are the same as used for extract, so :func:`_extract_tabs` is called. + """ logger.debug("Build convert tabs") - self.extract_tabs(command="convert") + self._extract_tabs(command="convert") logger.debug("Built convert tabs") - def remove_tabs(self): - """ Remove all command specific tabs """ + def _remove_tabs(self): + """ Remove all optional displayed command specific tabs from the notebook. """ for child in self.tabs(): - if child in self.static_tabs: + if child in self._static_tabs: continue logger.debug("removing child: %s", child) child_name = child.split(".")[-1] @@ -102,10 +144,40 @@ def remove_tabs(self): child_object.close() # Call the OptionalDisplayPage close() method self.forget(child) - def update_displaybook(self, *args): # pylint: disable=unused-argument - """ Set the display tabs based on executing task """ - command = self.wrapper_var.get() - self.remove_tabs() + def _update_displaybook(self, *args): # pylint: disable=unused-argument + """ Callback to be executed when the global tkinter variable `display` + (:attr:`wrapper_var`) is updated when a Faceswap task is executed. + + Currently only updates when a core faceswap task (extract, train or convert) is executed. + + Parameters + ---------- + args: tuple + Required for tkinter callback events, but unused. + + """ + command = self._wrapper_var.get() + self._remove_tabs() if not command or command not in ("extract", "train", "convert"): return - self.command_display(command) + self._command_display(command) + + def _on_tab_change(self, event): # pylint:disable=unused-argument + """ Event trigger for tab change events. + + Calls the selected tabs :func:`on_tab_select` method, if it exists, otherwise returns. + + Parameters + ---------- + event: tkinter callback event + Required, but unused + """ + selected = self.select().split(".")[-1] + logger.debug("Selected tab: %s", selected) + selected_object = self.children[selected] + if hasattr(selected_object, "on_tab_select"): + logger.debug("Calling on_tab_select for '%s'", selected_object) + selected_object.on_tab_select() + else: + logger.debug("Object does not have on_tab_select method. Returning: '%s'", + selected_object) diff --git a/lib/gui/display_analysis.py b/lib/gui/display_analysis.py index aa1e797dfa..c4d79297e3 100644 --- a/lib/gui/display_analysis.py +++ b/lib/gui/display_analysis.py @@ -18,77 +18,122 @@ class Analysis(DisplayPage): # pylint: disable=too-many-ancestors - """ Session analysis tab """ - def __init__(self, parent, tabname, helptext): - logger.debug("Initializing: %s: (parent, %s, tabname: '%s', helptext: '%s')", - self.__class__.__name__, parent, tabname, helptext) - super().__init__(parent, tabname, helptext) - - self.summary = None - self.session = None - self.add_options() - self.add_main_frame() - self.thread = None # Thread for compiling stats data in background - self.set_callbacks() + """ Session Analysis Tab. + + The area of the GUI that holds the session summary stats for model training sessions. + + Parameters + ---------- + parent: :class:`lib.gui.display.DisplayNotebook` + The :class:`ttk.Notebook` that holds this session summary statistics page + tab_name: str + The name of the tab to be displayed in the notebook + helptext: str + The help text to display for the summary statistics page + """ + def __init__(self, parent, tab_name, helptext): + logger.debug("Initializing: %s: (parent, %s, tab_name: '%s', helptext: '%s')", + self.__class__.__name__, parent, tab_name, helptext) + super().__init__(parent, tab_name, helptext) + self._summary = None + self._session = None + + self._reset_session_info() + _Options(self) + self._stats = self._get_main_frame() + + self._thread = None # Thread for compiling stats data in background + self._set_callbacks() logger.debug("Initialized: %s", self.__class__.__name__) - def set_callbacks(self): - """ Add a callback to update analysis when the training graph is updated """ - tkv = get_config().tk_vars - tkv["refreshgraph"].trace("w", self.update_current_session) - tkv["istraining"].trace("w", self.remove_current_session) - tkv["analysis_folder"].trace("w", self.populate_from_folder) + def set_vars(self): + """ Set the analysis specific tkinter variables to :attr:`vars`. + + The tracked variables are the global variables that: + * Trigger when a graph refresh has been requested. + * Trigger training is commenced or halted + * The variable holding the location of the current Tensorboard log folder. + + Returns + ------- + dict + The dictionary of variable names to tkinter variables + """ + return dict(selected_id=tk.StringVar(), + refresh_graph=get_config().tk_vars["refreshgraph"], + is_training=get_config().tk_vars["istraining"], + analysis_folder=get_config().tk_vars["analysis_folder"]) + + def on_tab_select(self): + """ Callback for when the analysis tab is selected. + + If Faceswap is currently training a model, then update the statistics with the latest + values. + """ + if not self.vars["is_training"].get(): + return + logger.debug("Analysis update callback received") + self._reset_session() + + def _get_main_frame(self): + """ Get the main frame to the sub-notebook to hold stats and session data. + + Returns + ------- + :class:`StatsData` + The frame that holds the analysis statistics for the Analysis notebook page + """ + logger.debug("Getting main stats frame") + mainframe = self.subnotebook_add_page("stats") + retval = StatsData(mainframe, self.vars["selected_id"], self.helptext["stats"]) + logger.debug("got main frame: %s", retval) + return retval + + def _set_callbacks(self): + """ Adds callbacks to update the analysis summary statistics and add them to :attr:`vars` + + Training graph refresh - Updates the stats for the current training session when the graph + has been updated. + + When training is commenced - Removes the currently displayed session. + + When the analysis folder has been populated - Updates the stats from that folder. + """ + self.vars["refresh_graph"].trace("w", self._update_current_session) + self.vars["is_training"].trace("w", self._remove_current_session) + self.vars["analysis_folder"].trace("w", self._populate_from_folder) - def update_current_session(self, *args): # pylint:disable=unused-argument - """ Update the current session data on a graph update callback """ - if not get_config().tk_vars["refreshgraph"].get(): + def _update_current_session(self, *args): # pylint:disable=unused-argument + """ Update the currently training session data on a graph update callback. """ + if not self.vars["refresh_graph"].get(): + return + if not self._tab_is_active: + logger.debug("Analyis tab not selected. Not updating stats") return logger.debug("Analysis update callback received") - self.reset_session() + self._reset_session() - def remove_current_session(self, *args): # pylint:disable=unused-argument - """ Remove the current session data on a istraining=False callback """ - if get_config().tk_vars["istraining"].get(): + def _remove_current_session(self, *args): # pylint:disable=unused-argument + """ Remove the current session data on a is_training=False callback """ + if self.vars["is_training"].get(): return logger.debug("Remove current training Analysis callback received") - self.clear_session() + self._clear_session() - def set_vars(self): - """ Analysis specific vars """ - selected_id = tk.StringVar() - return {"selected_id": selected_id} - - def add_main_frame(self): - """ Add the main frame to the sub-notebook - to hold stats and session data """ - logger.debug("Adding main frame") - mainframe = self.subnotebook_add_page("stats") - self.stats = StatsData(mainframe, - self.vars["selected_id"], - self.helptext["stats"]) - logger.debug("Added main frame") - - def add_options(self): - """ Add the options bar """ - logger.debug("Adding options") - self.reset_session_info() - options = Options(self) - options.add_options() - logger.debug("Added options") - - def reset_session_info(self): + def _reset_session_info(self): """ Reset the session info status to default """ logger.debug("Resetting session info") self.set_info("No session data loaded") - def populate_from_folder(self, *args): # pylint:disable=unused-argument - """ Populate the Analysis tab from just a model folder. Triggered - when tkinter variable ``analysis_folder`` is set. + def _populate_from_folder(self, *args): # pylint:disable=unused-argument + """ Populate the Analysis tab from a model folder. + + Triggered when :attr:`vars` ``analysis_folder`` variable is is set. """ - folder = get_config().tk_vars["analysis_folder"].get() + folder = self.vars["analysis_folder"].get() if not folder or not os.path.isdir(folder): logger.debug("Not a valid folder") - self.clear_session() + self._clear_session() return state_files = [fname @@ -96,40 +141,33 @@ def populate_from_folder(self, *args): # pylint:disable=unused-argument if fname.endswith("_state.json")] if not state_files: logger.debug("No state files found in folder: '%s'", folder) - self.clear_session() + self._clear_session() return state_file = state_files[0] if len(state_files) > 1: logger.debug("Multiple models found. Selecting: '%s'", state_file) - if self.thread is None: - self.load_session(fullpath=os.path.join(folder, state_file)) - - def load_session(self, fullpath=None): - """ Load previously saved sessions """ - logger.debug("Loading session") - if fullpath is None: - fullpath = FileHandler("filename", "state").retfile - if not fullpath: - return - self.clear_session() - logger.debug("state_file: '%s'", fullpath) - model_dir, state_file = os.path.split(fullpath) - logger.debug("model_dir: '%s'", model_dir) - model_name = self.get_model_name(model_dir, state_file) - if not model_name: - return - self.session = Session(model_dir=model_dir, model_name=model_name) - self.session.initialize_session(is_training=False) - msg = fullpath - if len(msg) > 70: - msg = "...{}".format(msg[-70:]) - self.set_session_summary(msg) - - @staticmethod - def get_model_name(model_dir, state_file): - """ Get the state file from the model directory """ + if self._thread is None: + self._load_session(full_path=os.path.join(folder, state_file)) + + @classmethod + def _get_model_name(cls, model_dir, state_file): + """ Obtain the model name from a state file's file name. + + Parameters + ---------- + model_dir: str + The folder that the model's state file resides in + state_file: str + The filename of the model's state file + + Returns + ------- + str or ``None`` + The name of the model extracted from the state file's file name or ``None`` if no + log folders were found in the model folder + """ logger.debug("Getting model name") model_name = state_file.replace("_state.json", "") logger.debug("model_name: %s", model_name) @@ -139,69 +177,101 @@ def get_model_name(model_dir, state_file): return None return model_name - def reset_session(self): - """ Reset currently training sessions """ - logger.debug("Reset current training session") - self.clear_session() - session = get_config().session - if not session.initialized: - logger.debug("Training not running") - return - if session.logging_disabled: - logger.trace("Logging disabled. Not triggering analysis update") - return - msg = "Currently running training session" - self.session = session - # Reload the state file to get approx currently training iterations - self.session.load_state_file() - self.set_session_summary(msg) - - def set_session_summary(self, message): + def _set_session_summary(self, message): """ Set the summary data and info message """ - if self.thread is None: + if self._thread is None: logger.debug("Setting session summary. (message: '%s')", message) - self.thread = LongRunningTask(target=self.summarise_data, - args=(self.session, ), - widget=self) - self.thread.start() - self.after(1000, lambda msg=message: self.set_session_summary(msg)) - elif not self.thread.complete.is_set(): + self._thread = LongRunningTask(target=self._summarise_data, + args=(self._session, ), + widget=self) + self._thread.start() + self.after(1000, lambda msg=message: self._set_session_summary(msg)) + elif not self._thread.complete.is_set(): logger.debug("Data not yet available") - self.after(1000, lambda msg=message: self.set_session_summary(msg)) + self.after(1000, lambda msg=message: self._set_session_summary(msg)) else: logger.debug("Retrieving data from thread") - result = self.thread.get_result() + result = self._thread.get_result() if result is None: logger.debug("No result from session summary. Clearing analysis view") - self.clear_session() + self._clear_session() return - self.summary = result - self.thread = None + self._summary = result + self._thread = None self.set_info("Session: {}".format(message)) - self.stats.session = self.session - self.stats.tree_insert_data(self.summary) + self._stats.session = self._session + self._stats.tree_insert_data(self._summary) - @staticmethod - def summarise_data(session): + @classmethod + def _summarise_data(cls, session): """ Summarize data in a LongRunningThread as it can take a while """ return session.full_summary - def clear_session(self): - """ Clear sessions stats """ + def _clear_session(self): + """ Clear the currently displayed analysis data from the Tree-View. """ logger.debug("Clearing session") - if self.session is None: + if self._session is None: logger.trace("No session loaded. Returning") return - self.summary = None - self.stats.session = None - self.stats.tree_clear() - self.reset_session_info() - self.session = None - - def save_session(self): - """ Save sessions stats to csv """ + self._summary = None + self._stats.session = None + self._stats.tree_clear() + self._reset_session_info() + self._session = None + + def _load_session(self, full_path=None): + """ Load the session statistics from a model's state file into the Analysis tab of the GUI + display window. + + If a model's log files cannot be found within the model folder then the session is cleared. + + Parameters + ---------- + full_path: str, optional + The path to the state file to load session information from. If this is ``None`` then + a file dialog is popped to enable the user to choose a state file. Default: ``None`` + """ + logger.debug("Loading session") + if full_path is None: + full_path = FileHandler("filename", "state").retfile + if not full_path: + return + self._clear_session() + logger.debug("state_file: '%s'", full_path) + model_dir, state_file = os.path.split(full_path) + logger.debug("model_dir: '%s'", model_dir) + model_name = self._get_model_name(model_dir, state_file) + if not model_name: + return + self._session = Session(model_dir=model_dir, model_name=model_name) + self._session.initialize_session(is_training=False) + msg = full_path + if len(msg) > 70: + msg = "...{}".format(msg[-70:]) + self._set_session_summary(msg) + + def _reset_session(self): + """ Reset currently training sessions. Clears the current session and loads in the latest + data. """ + logger.debug("Reset current training session") + self._clear_session() + session = get_config().session + if not session.initialized: + logger.debug("Training not running") + return + if session.logging_disabled: + logger.trace("Logging disabled. Not triggering analysis update") + return + msg = "Currently running training session" + self._session = session + # Reload the state file to get approx currently training iterations + self._session.load_state_file() + self._set_session_summary(msg) + + def _save_session(self): + """ Launch a file dialog pop-up to save the current analysis data to a CSV file. """ logger.debug("Saving session") - if not self.summary: + if not self._summary: logger.debug("No summary data loaded. Nothing to save") print("No summary data loaded. Nothing to save") return @@ -211,40 +281,42 @@ def save_session(self): return logger.debug("Saving to: '%s'", savefile) - fieldnames = sorted(key for key in self.summary[0].keys()) + fieldnames = sorted(key for key in self._summary[0].keys()) with savefile as outfile: csvout = csv.DictWriter(outfile, fieldnames) csvout.writeheader() - for row in self.summary: + for row in self._summary: csvout.writerow(row) -class Options(): - """ Options bar of Analysis tab """ +class _Options(): # pylint:disable=too-few-public-methods + """ Options buttons for the Analysis tab. + + Parameters + ---------- + parent: :class:`Analysis` + The Analysis Display Tab that holds the options buttons + """ def __init__(self, parent): - logger.debug("Initializing: %s", self.__class__.__name__) - self.optsframe = parent.optsframe - self.parent = parent + logger.debug("Initializing: %s (parent: %s)", self.__class__.__name__, parent) + self._parent = parent + self._add_buttons() logger.debug("Initialized: %s", self.__class__.__name__) - def add_options(self): - """ Add the display tab options """ - self.add_buttons() - - def add_buttons(self): + def _add_buttons(self): """ Add the option buttons """ for btntype in ("clear", "save", "load"): logger.debug("Adding button: '%s'", btntype) - cmd = getattr(self.parent, "{}_session".format(btntype)) - btn = ttk.Button(self.optsframe, + cmd = getattr(self._parent, "_{}_session".format(btntype)) + btn = ttk.Button(self._parent.optsframe, image=get_images().icons[btntype], command=cmd) btn.pack(padx=2, side=tk.RIGHT) - hlp = self.set_help(btntype) + hlp = self._set_help(btntype) Tooltip(btn, text=hlp, wraplength=200) - @staticmethod - def set_help(btntype): + @classmethod + def _set_help(cls, btntype): """ Set the help text for option buttons """ logger.debug("Setting help") hlp = "" diff --git a/lib/gui/display_command.py b/lib/gui/display_command.py index b4a7477a08..08c347f336 100644 --- a/lib/gui/display_command.py +++ b/lib/gui/display_command.py @@ -13,7 +13,7 @@ from .custom_widgets import Tooltip from .stats import Calculations from .control_helper import set_slider_rounding -from .utils import FileHandler, get_config, get_images +from .utils import FileHandler, get_config, get_images, preview_trigger logger = logging.getLogger(__name__) # pylint: disable=invalid-name @@ -73,6 +73,23 @@ def __init__(self, *args, **kwargs): self.update_preview = get_config().tk_vars["updatepreview"] super().__init__(*args, **kwargs) + def add_options(self): + """ Add the additional options """ + self.add_option_refresh() + super().add_options() + + def add_option_refresh(self): + """ Add refresh button to refresh preview immediately """ + logger.debug("Adding refresh option") + btnrefresh = ttk.Button(self.optsframe, + image=get_images().icons["reload"], + command=preview_trigger().set) + btnrefresh.pack(padx=2, side=tk.RIGHT) + Tooltip(btnrefresh, + text="Preview updates at every model save. Click to refresh now.", + wraplength=200) + logger.debug("Added refresh option") + def display_item_set(self): """ Load the latest preview if available """ logger.trace("Loading latest preview") @@ -173,9 +190,9 @@ def save_preview(self, location): class GraphDisplay(DisplayOptionalPage): # pylint: disable=too-many-ancestors """ The Graph Tab of the Display section """ - def __init__(self, parent, tabname, helptext, waittime, command=None): + def __init__(self, parent, tab_name, helptext, waittime, command=None): self.trace_var = None - super().__init__(parent, tabname, helptext, waittime, command) + super().__init__(parent, tab_name, helptext, waittime, command) def add_options(self): """ Add the additional options """ @@ -232,7 +249,7 @@ def display_item_set(self): smooth_amount_var = get_config().tk_vars["smoothgraph"] if session.initialized and session.logging_disabled: logger.trace("Logs disabled. Hiding graph") - self.set_info("Graph is disabled as 'no-logs' or 'pingpong' has been selected") + self.set_info("Graph is disabled as 'no-logs' has been selected") self.display_item = None if self.trace_var is not None: smooth_amount_var.trace_vdelete("w", self.trace_var) @@ -250,20 +267,19 @@ def display_item_set(self): def display_item_process(self): """ Add a single graph to the graph window """ - logger.trace("Adding graph") + logger.debug("Adding graph") existing = list(self.subnotebook_get_titles_ids().keys()) - display_tabs = sorted(self.display_item.loss_keys) - if any(key.startswith("total") for key in display_tabs): - total_idx = [idx for idx, key in enumerate(display_tabs) if key.startswith("total")][0] - display_tabs.insert(0, display_tabs.pop(total_idx)) + loss_keys = [key for key in self.display_item.loss_keys if key != "total"] + display_tabs = sorted(set(key[:-1].rstrip("_") for key in loss_keys)) for loss_key in display_tabs: tabname = loss_key.replace("_", " ").title() if tabname in existing: continue + display_keys = [key for key in loss_keys if key.startswith(loss_key)] data = Calculations(session=get_config().session, display="loss", - loss_keys=[loss_key], + loss_keys=display_keys, selections=["raw", "smoothed"], smooth_amount=get_config().tk_vars["smoothgraph"].get()) self.add_child(tabname, data) diff --git a/lib/gui/display_graph.py b/lib/gui/display_graph.py index 6a2652c190..895b2b6792 100755 --- a/lib/gui/display_graph.py +++ b/lib/gui/display_graph.py @@ -14,8 +14,8 @@ from matplotlib import style # noqa from matplotlib.figure import Figure # noqa -from matplotlib.backends.backend_tkagg import (FigureCanvasTkAgg, - NavigationToolbar2Tk) # noqa +from matplotlib.backends.backend_tkagg import (FigureCanvasTkAgg, # noqa + NavigationToolbar2Tk) from .custom_widgets import Tooltip # noqa from .utils import get_config, get_images, LongRunningTask # noqa diff --git a/lib/gui/display_page.py b/lib/gui/display_page.py index d872e86a21..f8405c3cac 100644 --- a/lib/gui/display_page.py +++ b/lib/gui/display_page.py @@ -14,15 +14,15 @@ class DisplayPage(ttk.Frame): # pylint: disable=too-many-ancestors """ Parent frame holder for each tab. Defines uniform structure for each tab to inherit from """ - def __init__(self, parent, tabname, helptext): - logger.debug("Initializing %s: (tabname: '%s', helptext: %s)", - self.__class__.__name__, tabname, helptext) + def __init__(self, parent, tab_name, helptext): + logger.debug("Initializing %s: (tab_name: '%s', helptext: %s)", + self.__class__.__name__, tab_name, helptext) ttk.Frame.__init__(self, parent) - self.pack(fill=tk.BOTH, side=tk.TOP, anchor=tk.NW) + self._parent = parent self.runningtask = parent.runningtask self.helptext = helptext - self.tabname = tabname + self.tabname = tab_name self.vars = {"info": tk.StringVar()} self.add_optional_vars(self.set_vars()) @@ -33,9 +33,17 @@ def __init__(self, parent, tabname, helptext): self.add_frame_separator() self.set_mainframe_single_tab_style() + + self.pack(fill=tk.BOTH, side=tk.TOP, anchor=tk.NW) parent.add(self, text=self.tabname.title()) + logger.debug("Initialized %s", self.__class__.__name__,) + @property + def _tab_is_active(self): + """ bool: ``True`` if the tab currently has focus otherwise ``False`` """ + return self._parent.tab(self._parent.select(), "text").lower() == self.tabname.lower() + def add_optional_vars(self, varsdict): """ Add page specific variables """ if isinstance(varsdict, dict): @@ -48,6 +56,11 @@ def set_vars(): """ Override to return a dict of page specific variables """ return dict() + def on_tab_select(self): # pylint:disable=no-self-use + """ Override for specific actions when the current tab is selected """ + logger.debug("Returning as 'on_tab_select' not implemented for %s", + self.__class__.__name__) + def add_subnotebook(self): """ Add the main frame notebook """ logger.debug("Adding subnotebook") @@ -150,11 +163,12 @@ def subnotebook_page_from_id(self, tab_id): class DisplayOptionalPage(DisplayPage): # pylint: disable=too-many-ancestors """ Parent Context Sensitive Display Tab """ - def __init__(self, parent, tabname, helptext, waittime, command=None): + def __init__(self, parent, tab_name, helptext, waittime, command=None): logger.debug("%s: OptionalPage args: (waittime: %s, command: %s)", self.__class__.__name__, waittime, command) - DisplayPage.__init__(self, parent, tabname, helptext) + DisplayPage.__init__(self, parent, tab_name, helptext) + self._waittime = waittime self.command = command self.display_item = None @@ -163,7 +177,7 @@ def __init__(self, parent, tabname, helptext, waittime, command=None): parent.select(self) self.update_idletasks() - self.update_page(waittime) + self._update_page() @staticmethod def set_vars(): @@ -183,6 +197,14 @@ def set_vars(): logger.debug(tk_vars) return tk_vars + def on_tab_select(self): + """ Callback for when the optional tab is selected. + + Run the tab's update code when the tab is selected. + """ + logger.debug("Callback received for '%s' tab", self.tabname) + self._update_page() + # INFO LABEL def set_info_text(self): """ Set waiting for display text """ @@ -213,7 +235,7 @@ def add_option_save(self): wraplength=200) def add_option_enable(self): - """ Add checkbutton to enable/disable page """ + """ Add check-button to enable/disable page """ logger.debug("Adding enable option") chkenable = ttk.Checkbutton(self.optsframe, variable=self.vars["enabled"], @@ -229,7 +251,7 @@ def save_items(self): raise NotImplementedError() def on_chkenable_change(self): - """ Update the display immediately on a checkbutton change """ + """ Update the display immediately on a check-button change """ logger.debug("Enabled checkbox changed") if self.vars["enabled"].get(): self.subnotebook_show() @@ -237,15 +259,15 @@ def on_chkenable_change(self): self.subnotebook_hide() self.set_info_text() - def update_page(self, waittime): + def _update_page(self): """ Update the latest preview item """ - if not self.runningtask.get(): + if not self.runningtask.get() or not self._tab_is_active: return if self.vars["enabled"].get(): logger.trace("Updating page") self.display_item_set() self.load_display() - self.after(waittime, lambda t=waittime: self.update_page(t)) + self.after(self._waittime, self._update_page) def display_item_set(self): """ Override for display specific loading """ @@ -253,9 +275,9 @@ def display_item_set(self): def load_display(self): """ Load the display """ - if not self.display_item: + if not self.display_item or not self._tab_is_active: return - logger.debug("Loading display") + logger.debug("Loading display for tab: %s", self.tabname) self.display_item_process() self.vars["ready"].set(True) self.set_info_text() diff --git a/lib/gui/menu.py b/lib/gui/menu.py index 61e336e9d5..218bb41d08 100644 --- a/lib/gui/menu.py +++ b/lib/gui/menu.py @@ -101,13 +101,13 @@ def build(self): self.add_command( label=label, underline=10, - command=lambda conf=(name, config), root=self.root: popup_config(conf, root)) + command=lambda n=name, c=config: popup_config(n, c)) self.add_separator() conf = get_config().user_config self.add_command( label="GUI Settings...", underline=10, - command=lambda conf=("GUI", conf), root=self.root: popup_config(conf, root)) + command=lambda n="GUI", c=conf: popup_config(n, c)) logger.debug("Built settings menu") @@ -546,7 +546,6 @@ def _settings_btns(self): # pylint: disable=cell-var-from-loop frame = ttk.Frame(self._btn_frame) frame.pack(side=tk.LEFT, anchor=tk.W, expand=False, padx=2) - root = get_config().root for name in _CONFIG_FILES: config = _CONFIGS[name] btntype = "settings_{}".format(name) @@ -555,7 +554,7 @@ def _settings_btns(self): btn = ttk.Button( frame, image=get_images().icons[btntype], - command=lambda conf=(name, config), root=root: popup_config(conf, root)) + command=lambda n=name, c=config: popup_config(n, c)) btn.pack(side=tk.LEFT, anchor=tk.W) hlp = "Configure {} settings...".format(name.title()) Tooltip(btn, text=hlp, wraplength=200) diff --git a/lib/gui/popup_configure.py b/lib/gui/popup_configure.py index 9a7dfd7aba..9cb2b01b72 100644 --- a/lib/gui/popup_configure.py +++ b/lib/gui/popup_configure.py @@ -1,5 +1,5 @@ #!/usr/bin python3 -""" Configure Plugins popup of the Faceswap GUI """ +""" The pop-up window of the Faceswap GUI for the setting of configuration options. """ from collections import OrderedDict from configparser import ConfigParser @@ -16,40 +16,62 @@ POPUP = dict() -def popup_config(config, root): - """ Close any open popup and open requested popup """ +def popup_config(name, configuration): + """ Open the settings for the requested configuration file and close any already active + pop-ups. + + Parameters + ---------- + name: str + The name of the configuration file. Used for the pop-up title bar. + configuration: :class:`~lib.config.FaceswapConfig` + The configuration options for the requested pop-up window + """ + logger.debug("name: %s, configuration: %s", name, configuration) if POPUP: p_key = list(POPUP.keys())[0] logger.debug("Closing open popup: '%s'", p_key) POPUP[p_key].destroy() del POPUP[p_key] - window = ConfigurePlugins(config, root) - POPUP[config[0]] = window - - -class ConfigurePlugins(tk.Toplevel): - """ Pop up for detailed graph/stats for selected session """ - def __init__(self, config, root): - logger.debug("Initializing %s", self.__class__.__name__) + window = _ConfigurePlugins(name, configuration) + POPUP[name] = window + logger.debug("Current pop-up: %s", POPUP) + + +class _ConfigurePlugins(tk.Toplevel): + """ Pop-up window for the setting of Faceswap Configuration Options. + + Parameters + ---------- + name: str + The name of the configuration file. Used for the pop-up title bar. + configuration: :class:`~lib.config.FaceswapConfig` + The configuration options for the requested pop-up window + """ + def __init__(self, name, configuration): + logger.debug("Initializing %s: (name: %s, configuration: %s)", + self.__class__.__name__, name, configuration) super().__init__() - self._name, self.config = config - self.title("{} Plugins".format(self._name.title())) - self.tk.call('wm', 'iconphoto', self._w, get_images().icons["favicon"]) + self._name = name + self._config = configuration + self._root = get_config().root - self._root = root - self.set_geometry() + self._set_geometry() - self.page_frame = ttk.Frame(self) - self.page_frame.pack(fill=tk.BOTH, expand=True) + self._page_frame = ttk.Frame(self) + self._plugin_info = dict() - self.plugin_info = dict() - self.config_cpanel_dict = self.get_config() - self.build() + self._config_cpanel_dict = self._get_config() + self._build() self.update() + + self._page_frame.pack(fill=tk.BOTH, expand=True) + self.title("{} Plugins".format(self._name.title())) + self.tk.call('wm', 'iconphoto', self._w, get_images().icons["favicon"]) logger.debug("Initialized %s", self.__class__.__name__) - def set_geometry(self): - """ Set pop-up geometry """ + def _set_geometry(self): + """ Set the geometry of the pop-up window """ scaling_factor = get_config().scaling_factor pos_x = self._root.winfo_x() + 80 pos_y = self._root.winfo_y() + 80 @@ -58,26 +80,34 @@ def set_geometry(self): logger.debug("Pop up Geometry: %sx%s, %s+%s", width, height, pos_x, pos_y) self.geometry("{}x{}+{}+{}".format(width, height, pos_x, pos_y)) - def get_config(self): - """ Format config into a dict of ControlPanelOptions """ + def _get_config(self): + """ Format the configuration options stored in :attr:`_config` into a dict of + :class:`~lib.gui.control_helper.ControlPanelOption's for placement into option frames. + + Returns + ------- + dict + A dictionary of section names to :class:`~lib.gui.control_helper.ControlPanelOption` + objects + """ logger.debug("Formatting Config for GUI") conf = dict() - for section in self.config.config.sections(): - self.config.section = section + for section in self._config.config.sections(): + self._config.section = section category = section.split(".")[0] - options = self.config.defaults[section] + options = self._config.defaults[section] section = section.split(".")[-1] conf.setdefault(category, dict())[section] = OrderedDict() for key, val in options.items(): if key == "helptext": - self.plugin_info[section] = val + self._plugin_info[section] = val continue conf[category][section][key] = ControlPanelOption( title=key, dtype=val["type"], group=val["group"], default=val["default"], - initial_value=self.config.config_dict.get(key, val["default"]), + initial_value=self._config.config_dict.get(key, val["default"]), choices=val["choices"], is_radio=val["gui_radio"], rounding=val["rounding"], @@ -86,77 +116,96 @@ def get_config(self): logger.debug("Formatted Config for GUI: %s", conf) return conf - def build(self): - """ Build the config popup """ + def _build(self): + """ Build the configuration pop-up window""" logger.debug("Building plugin config popup") - container = ttk.Notebook(self.page_frame) - container.pack(fill=tk.BOTH, expand=True) - categories = sorted(list(self.config_cpanel_dict.keys())) + container = ttk.Notebook(self._page_frame) + categories = sorted(list(self._config_cpanel_dict.keys())) if "global" in categories: # Move global to first item categories.insert(0, categories.pop(categories.index("global"))) for category in categories: - page = self.build_page(container, category) + page = self._build_page(container, category) container.add(page, text=category.title()) - self.add_frame_separator() - self.add_actions() + self._add_frame_separator() + self._add_actions() + + container.pack(fill=tk.BOTH, expand=True) logger.debug("Built plugin config popup") - def build_page(self, container, category): - """ Build a plugin config page """ + def _build_page(self, container, category): + """ Build a single tab within the plugin's configuration pop-up. + + Parameters + ---------- + container: :class:`ttk.Notebook` + The notebook to place the category options into + category: str + The name of the categories to build options for + + Returns + ------- + :class:'~lib.gui.control_helper.ControlPanel` or :class:`ttk.Notebook` + The control panel options in a Control Panel frame (for single plugin configurations) + or a Notebook containing tabs with Control Panel frames (for multi-plugin + configurations) + """ logger.debug("Building plugin config page: '%s'", category) - plugins = sorted(list(key for key in self.config_cpanel_dict[category].keys())) + plugins = sorted(list(key for key in self._config_cpanel_dict[category].keys())) panel_kwargs = dict(columns=2, max_columns=2, option_columns=2, blank_nones=False) if any(plugin != category for plugin in plugins): page = ttk.Notebook(container) - page.pack(side=tk.TOP, fill=tk.BOTH, expand=True) for plugin in plugins: - cp_options = list(self.config_cpanel_dict[category][plugin].values()) + cp_options = list(self._config_cpanel_dict[category][plugin].values()) frame = ControlPanel(page, cp_options, - header_text=self.plugin_info[plugin], + header_text=self._plugin_info[plugin], **panel_kwargs) title = plugin[plugin.rfind(".") + 1:] title = title.replace("_", " ").title() page.add(frame, text=title) + page.pack(side=tk.TOP, fill=tk.BOTH, expand=True) else: - cp_options = list(self.config_cpanel_dict[category][plugins[0]].values()) + cp_options = list(self._config_cpanel_dict[category][plugins[0]].values()) page = ControlPanel(container, cp_options, - header_text=self.plugin_info[plugins[0]], + header_text=self._plugin_info[plugins[0]], **panel_kwargs) logger.debug("Built plugin config page: '%s'", category) return page - def add_frame_separator(self): - """ Add a separator between top and bottom frames """ + def _add_frame_separator(self): + """ Add a separator between the configuration options and the action buttons. """ logger.debug("Add frame seperator") - sep = ttk.Frame(self.page_frame, height=2, relief=tk.RIDGE) + sep = ttk.Frame(self._page_frame, height=2, relief=tk.RIDGE) sep.pack(fill=tk.X, pady=(5, 0), side=tk.BOTTOM) logger.debug("Added frame seperator") - def add_actions(self): - """ Add Action buttons """ + def _add_actions(self): + """ Add Action buttons to the bottom of the pop-up window. """ logger.debug("Add action buttons") - frame = ttk.Frame(self.page_frame) - frame.pack(fill=tk.BOTH, padx=5, pady=5, side=tk.BOTTOM) + frame = ttk.Frame(self._page_frame) btn_cls = ttk.Button(frame, text="Cancel", width=10, command=self.destroy) - btn_cls.pack(padx=2, side=tk.RIGHT) + btn_ok = ttk.Button(frame, text="OK", width=10, command=self._save) + btn_rst = ttk.Button(frame, text="Reset", width=10, command=self._reset) + Tooltip(btn_cls, text="Close without saving", wraplength=720) - btn_ok = ttk.Button(frame, text="OK", width=10, command=self.save_config) - btn_ok.pack(padx=2, side=tk.RIGHT) Tooltip(btn_ok, text="Close and save config", wraplength=720) - btn_rst = ttk.Button(frame, text="Reset", width=10, command=self.reset) - btn_rst.pack(padx=2, side=tk.RIGHT) Tooltip(btn_rst, text="Reset all plugins to default values", wraplength=720) + + frame.pack(fill=tk.BOTH, padx=5, pady=5, side=tk.BOTTOM) + btn_cls.pack(padx=2, side=tk.RIGHT) + btn_ok.pack(padx=2, side=tk.RIGHT) + btn_rst.pack(padx=2, side=tk.RIGHT) + logger.debug("Added action buttons") - def reset(self): - """ Reset all config options to default """ + def _reset(self): + """ Reset all configuration options to their default values. """ logger.debug("Resetting config") - for section, items in self.config.defaults.items(): + for section, items in self._config.defaults.items(): logger.debug("Resetting section: '%s'", section) lookup = [section.split(".")[0], section.split(".")[-1]] for item, def_opt in items.items(): @@ -164,18 +213,18 @@ def reset(self): continue default = def_opt["default"] logger.debug("Resetting: '%s' to '%s'", item, default) - self.config_cpanel_dict[lookup[0]][lookup[1]][item].set(default) + self._config_cpanel_dict[lookup[0]][lookup[1]][item].set(default) - def save_config(self): - """ Save the config file """ + def _save(self): + """ Save the configuration file to disk. """ logger.debug("Saving config") options = {".".join((key, sect)) if sect != key else key: opts - for key, value in self.config_cpanel_dict.items() + for key, value in self._config_cpanel_dict.items() for sect, opts in value.items()} new_config = ConfigParser(allow_no_value=True) - for section, items in self.config.defaults.items(): + for section, items in self._config.defaults.items(): logger.debug("Adding section: '%s')", section) - self.config.insert_config_section(section, items["helptext"], config=new_config) + self._config.insert_config_section(section, items["helptext"], config=new_config) for item, def_opt in items.items(): if item == "helptext": continue @@ -183,12 +232,12 @@ def save_config(self): logger.debug("Adding option: (item: '%s', default: %s new: '%s'", item, def_opt, new_opt) helptext = def_opt["helptext"] - helptext = self.config.format_help(helptext, is_section=False) + helptext = self._config.format_help(helptext, is_section=False) new_config.set(section, helptext) new_config.set(section, item, str(new_opt)) - self.config.config = new_config - self.config.save_config() - logger.info("Saved config: '%s'", self.config.configfile) + self._config.config = new_config + self._config.save_config() + logger.info("Saved config: '%s'", self._config.configfile) self.destroy() running_task = get_config().tk_vars["runningtask"].get() diff --git a/lib/gui/stats.py b/lib/gui/stats.py index 307ba485bb..960368da46 100644 --- a/lib/gui/stats.py +++ b/lib/gui/stats.py @@ -11,6 +11,7 @@ import numpy as np import tensorflow as tf from tensorflow.python import errors_impl as tf_errors # pylint:disable=no-name-in-module +from tensorflow.core.util import event_pb2 from lib.serializer import get_serializer logger = logging.getLogger(__name__) # pylint: disable=invalid-name @@ -29,11 +30,18 @@ def convert_time(timestamp): class TensorBoardLogs(): """ Parse and return data from TensorBoard logs """ def __init__(self, logs_folder): + tf.config.set_visible_devices([], "GPU") # Don't use the GPU for stats self.folder_base = logs_folder - self.log_filenames = self.set_log_filenames() + self.log_filenames = self._get_log_filenames() - def set_log_filenames(self): - """ Set the TensorBoard log filenames for all existing sessions """ + def _get_log_filenames(self): + """ Get the TensorBoard log filenames for all existing sessions. + + Returns + ------- + dict + The full path of each log file for each training session that has been run + """ logger.debug("Loading log filenames. base_dir: '%s'", self.folder_base) log_filenames = dict() for dirpath, _, filenames in os.walk(self.folder_base): @@ -41,64 +49,89 @@ def set_log_filenames(self): continue logfiles = [filename for filename in filenames if filename.startswith("events.out.tfevents")] - # Take the last logfile, in case of previous crash + # Take the last log file, in case of previous crash logfile = os.path.join(dirpath, sorted(logfiles)[-1]) - side, session = os.path.split(dirpath) - side = os.path.split(side)[1] - session = int(session[session.rfind("_") + 1:]) - log_filenames.setdefault(session, dict())[side] = logfile + session = os.path.split(os.path.split(dirpath)[0])[1] + session = session[session.rfind("_") + 1:] + if not session.isdigit(): + logger.warning("Unable to load session data for model") + return log_filenames + session = int(session) + log_filenames[session] = logfile logger.debug("logfiles: %s", log_filenames) return log_filenames - def get_loss(self, side=None, session=None): - """ Read the loss from the TensorBoard logs - Specify a side or a session or leave at None for all + def get_loss(self, session=None): + """ Read the loss from the TensorBoard event logs + + Parameters + ---------- + session: int, optional + The Session ID to return the loss for. Set to ``None`` to return all session + losses. Default ``None`` + + Returns + ------- + dict + A list of loss values for each step for the requested session """ - logger.debug("Getting loss: (side: %s, session: %s)", side, session) + logger.debug("Getting loss: (session: %s)", session) all_loss = dict() - for sess, sides in self.log_filenames.items(): + for sess, logfile in self.log_filenames.items(): if session is not None and sess != session: logger.debug("Skipping session: %s", sess) continue loss = dict() - for sde, logfile in sides.items(): - if side is not None and sde != side: - logger.debug("Skipping side: %s", sde) + events = [event_pb2.Event.FromString(record.numpy()) + for record in tf.data.TFRecordDataset(logfile)] + for event in events: + if not event.summary.value or not event.summary.value[0].tag.startswith("batch_"): continue - for event in tf.train.summary_iterator(logfile): - for summary in event.summary.value: - if "loss" not in summary.tag: - continue - tag = summary.tag.replace("batch_", "") - loss.setdefault(tag, - dict()).setdefault(sde, - list()).append(summary.simple_value) + summary = event.summary.value[0] + tag = summary.tag.replace("batch_", "") + loss.setdefault(tag, []).append(summary.simple_value) all_loss[sess] = loss + logger.debug(all_loss) return all_loss def get_timestamps(self, session=None): - """ Read the timestamps from the TensorBoard logs - Specify a session or leave at None for all - NB: For all intents and purposes timestamps are the same for - both sides, so just read from one side """ + """ Read the timestamps from the TensorBoard logs. + + As loss timestamps are slightly different for each loss, we collect the timestamp from the + `batch_total` key. + + Parameters + ---------- + session: int, optional + The Session ID to return the timestamps for. Set to ``None`` to return all session + timestamps. Default ``None`` + + Returns + ------- + dict + The timestamps for each event for the requested session + """ + logger.debug("Getting timestamps") all_timestamps = dict() - for sess, sides in self.log_filenames.items(): + for sess, logfile in self.log_filenames.items(): if session is not None and sess != session: logger.debug("Skipping sessions: %s", sess) continue try: - for logfile in sides.values(): - timestamps = [event.wall_time - for event in tf.train.summary_iterator(logfile) - if event.summary.value] - logger.debug("Total timestamps for session %s: %s", sess, len(timestamps)) - all_timestamps[sess] = timestamps - break # break after first file read + events = [event_pb2.Event.FromString(record.numpy()) + for record in tf.data.TFRecordDataset(logfile)] + timestamps = [event.wall_time + for event in events + if event.summary.value + and event.summary.value[0].tag == "batch_total"] + logger.debug("Total timestamps for session %s: %s", sess, len(timestamps)) + all_timestamps[sess] = timestamps except tf_errors.DataLossError as err: logger.warning("The logs for Session %s are corrupted and cannot be displayed. " "The totals do not include this session. Original error message: " "'%s'", sess, str(err)) + logger.debug(all_timestamps) return all_timestamps @@ -133,7 +166,7 @@ def config(self): @property def full_summary(self): - """ Retun all sessions summary data""" + """ Return all sessions summary data""" return self.summary.compile_stats() @property @@ -144,23 +177,22 @@ def iterations(self): @property def logging_disabled(self): """ Return whether logging is disabled for this session """ - return self.session["no_logs"] or self.session["pingpong"] + return self.session["no_logs"] @property def loss(self): - """ Return loss from logs for current session """ + """ dict: The loss for the current session id for each loss key """ loss_dict = self.tb_logs.get_loss(session=self.session_id)[self.session_id] return loss_dict @property def loss_keys(self): - """ Return list of unique session loss keys """ + """ list: The loss keys for the current session, or loss keys for all sessions. """ if self.session_id is None: - loss_keys = self.total_loss_keys + retval = self._total_loss_keys else: - loss_keys = set(loss_key for side_keys in self.session["loss_names"].values() - for loss_key in side_keys) - return list(loss_keys) + retval = self.session["loss_names"] + return retval @property def lowest_loss(self): @@ -196,27 +228,25 @@ def total_iterations(self): @property def total_loss(self): - """ Return collated loss for all session """ + """ dict: The collated loss for all sessions for each loss key """ loss_dict = dict() all_loss = self.tb_logs.get_loss() - for key in sorted(int(idx) for idx in all_loss): - for loss_key, side_loss in all_loss[key].items(): - for side, loss in side_loss.items(): - loss_dict.setdefault(loss_key, dict()).setdefault(side, list()).extend(loss) + for key in sorted(all_loss): + for loss_key, loss in all_loss[key].items(): + loss_dict.setdefault(loss_key, []).extend(loss) return loss_dict @property - def total_loss_keys(self): - """ Return list of unique session loss keys across all sessions """ + def _total_loss_keys(self): + """ list: The loss keys for all sessions. """ loss_keys = set(loss_key for session in self.state["sessions"].values() - for loss_keys in session["loss_names"].values() - for loss_key in loss_keys) + for loss_key in session["loss_names"]) return list(loss_keys) @property def total_timestamps(self): - """ Return timestamps from logs seperated per session for all sessions """ + """ Return timestamps from logs separated per session for all sessions """ return self.tb_logs.get_timestamps() def initialize_session(self, is_training=False, session_id=None): @@ -280,13 +310,14 @@ def sessions_stats(self): iterations = self.session.get_iterations_for_session(sess_idx) elapsed = ts_data["end_time"] - ts_data["start_time"] batchsize = self.session.total_batchsize.get(sess_idx, 0) - compiled.append({"session": sess_idx, - "start": ts_data["start_time"], - "end": ts_data["end_time"], - "elapsed": elapsed, - "rate": (batchsize * iterations) / elapsed if elapsed != 0 else 0, - "batch": batchsize, - "iterations": iterations}) + compiled.append( + {"session": sess_idx, + "start": ts_data["start_time"], + "end": ts_data["end_time"], + "elapsed": elapsed, + "rate": ((batchsize * 2) * iterations) / elapsed if elapsed != 0 else 0, + "batch": batchsize, + "iterations": iterations}) compiled = sorted(compiled, key=lambda k: k["session"]) return compiled @@ -294,7 +325,7 @@ def compile_stats(self): """ Compile sessions stats with totals, format and return """ logger.debug("Compiling sessions summary data") compiled_stats = self.sessions_stats - if compiled_stats is None: + if not compiled_stats: return compiled_stats logger.debug("sessions_stats: %s", compiled_stats) total_stats = self.total_stats(compiled_stats) @@ -318,7 +349,7 @@ def total_stats(sessions_stats): if idx == total_summaries - 1: endtime = summary["end"] elapsed += summary["elapsed"] - examples += (summary["batch"] * summary["iterations"]) + examples += ((summary["batch"] * 2) * summary["iterations"]) batchset.add(summary["batch"]) iterations += summary["iterations"] batch = ",".join(str(bs) for bs in batchset) @@ -376,28 +407,32 @@ def refresh(self): logger.warning("Session data is not initialized. Not refreshing") return None self.iterations = 0 - self.stats = self.get_raw() + self.stats = self._get_raw() self.get_calculations() self.remove_raw() logger.debug("Refreshed") return self - def get_raw(self): - """ Add raw data to stats dict """ - logger.debug("Getting Raw Data") + def _get_raw(self): + """ Obtain the raw loss values. + Returns + ------- + dict + The loss name as key with list of loss values as value + """ + logger.debug("Getting Raw Data") raw = dict() iterations = set() if self.display.lower() == "loss": loss_dict = self.session.total_loss if self.is_totals else self.session.loss - for loss_name, side_loss in loss_dict.items(): + for loss_name, loss in loss_dict.items(): if loss_name not in self.loss_keys: continue - for side, loss in side_loss.items(): - if self.args["flatten_outliers"]: - loss = self.flatten_outliers(loss) - iterations.add(len(loss)) - raw["raw_{}_{}".format(loss_name, side)] = loss + if self.args["flatten_outliers"]: + loss = self.flatten_outliers(loss) + iterations.add(len(loss)) + raw["raw_{}".format(loss_name)] = loss self.iterations = 0 if not iterations else min(iterations) if len(iterations) > 1: @@ -407,7 +442,7 @@ def get_raw(self): else: raw = {lossname: loss[:self.iterations] for lossname, loss in raw.items()} - else: # Rate calulation + else: # Rate calculation data = self.calc_rate_total() if self.is_totals else self.calc_rate() if self.args["flatten_outliers"]: data = self.flatten_outliers(data) @@ -433,7 +468,7 @@ def calc_rate(self): batchsize = self.session.batchsize timestamps = self.session.timestamps iterations = range(len(timestamps) - 1) - rate = [batchsize / (timestamps[i + 1] - timestamps[i]) for i in iterations] + rate = [(batchsize * 2) / (timestamps[i + 1] - timestamps[i]) for i in iterations] logger.debug("Calculated rate: Item_count: %s", len(rate)) return rate @@ -450,7 +485,8 @@ def calc_rate_total(self): batchsize = batchsizes[sess_id] timestamps = total_timestamps[sess_id] iterations = range(len(timestamps) - 1) - rate.extend([batchsize / (timestamps[i + 1] - timestamps[i]) for i in iterations]) + rate.extend([(batchsize * 2) / (timestamps[i + 1] - timestamps[i]) + for i in iterations]) logger.debug("Calculated totals rate: Item_count: %s", len(rate)) return rate @@ -508,7 +544,7 @@ def calc_avg(self, data): def calc_smoothed(self, data): """ Smooth the data """ - last = data[0] # First value in the plot (first timestep) + last = data[0] # First value in the plot (first time step) weight = self.args["smooth_amount"] smoothed = list() for point in data: diff --git a/lib/gui/utils.py b/lib/gui/utils.py index 31adfadeae..a735880d18 100644 --- a/lib/gui/utils.py +++ b/lib/gui/utils.py @@ -17,6 +17,7 @@ logger = logging.getLogger(__name__) # pylint: disable=invalid-name _CONFIG = None _IMAGES = None +_PREVIEW_TRIGGER = None PATHCACHE = os.path.join(os.path.realpath(os.path.dirname(sys.argv[0])), "lib", "gui", ".cache") @@ -1155,3 +1156,44 @@ def get_result(self): logger.debug("Got result from thread") self._config.set_cursor_default(widget=self._widget) return retval + + +class PreviewTrigger(): + """ Trigger to indicate to underlying Faceswap process that the preview image should + be updated. + + Writes a file to the cache folder that is picked up by the main process. + """ + def __init__(self): + logger.debug("Initializing: %s", self.__class__.__name__) + self._trigger_file = os.path.join(PATHCACHE, ".preview_trigger") + logger.debug("Initialized: %s (trigger_file: %s)", + self.__class__.__name__, self._trigger_file) + + def set(self): + """ Place the trigger file into the cache folder """ + if not os.path.isfile(self._trigger_file): + with open(self._trigger_file, "w"): + pass + logger.debug("Set preview update trigger: %s", self._trigger_file) + + def clear(self): + """ Remove the trigger file from the cache folder """ + if os.path.isfile(self._trigger_file): + os.remove(self._trigger_file) + logger.debug("Removed preview update trigger: %s", self._trigger_file) + + +def preview_trigger(): + """ Set the global preview trigger if it has not always been set and return. + + Returns + ------- + :class:`PreviewTrigger` + The trigger to indicate to the main faceswap process that it should perform a training + preview update + """ + global _PREVIEW_TRIGGER # pylint:disable=global-statement + if _PREVIEW_TRIGGER is None: + _PREVIEW_TRIGGER = PreviewTrigger() + return _PREVIEW_TRIGGER diff --git a/lib/gui/wrapper.py b/lib/gui/wrapper.py index c31b84344f..38b3ee9ef1 100644 --- a/lib/gui/wrapper.py +++ b/lib/gui/wrapper.py @@ -11,7 +11,7 @@ import psutil -from .utils import get_config, get_images, LongRunningTask +from .utils import get_config, get_images, LongRunningTask, preview_trigger if os.name == "nt": import win32console # pylint: disable=import-error @@ -131,6 +131,7 @@ def terminate(self, message): self.tk_vars["display"].set(None) get_images().delete_preview() get_config().session.__init__() + preview_trigger().clear() self.command = None logger.debug("Terminated Faceswap processes") print("Process exited.") @@ -188,19 +189,21 @@ def read_stdout(self): (self.command == "effmpeg" and self.capture_ffmpeg(output)) or (self.command not in ("train", "effmpeg") and self.capture_tqdm(output))): continue - if (self.command == "train" and - self.wrapper.tk_vars["istraining"].get() and - "[saved models]" in output.strip().lower()): - logger.debug("Trigger GUI Training update") - logger.trace("tk_vars: %s", {itm: var.get() - for itm, var in self.wrapper.tk_vars.items()}) - if not self.config.session.initialized: - # Don't initialize session until after the first save as state - # file must exist first - logger.debug("Initializing curret training session") - self.config.session.initialize_session(is_training=True) - self.wrapper.tk_vars["updatepreview"].set(True) - self.wrapper.tk_vars["refreshgraph"].set(True) + if self.command == "train" and self.wrapper.tk_vars["istraining"].get(): + if "[saved models]" in output.strip().lower(): + logger.debug("Trigger GUI Training update") + logger.trace("tk_vars: %s", {itm: var.get() + for itm, var in self.wrapper.tk_vars.items()}) + if not self.config.session.initialized: + # Don't initialize session until after the first save as state + # file must exist first + logger.debug("Initializing curret training session") + self.config.session.initialize_session(is_training=True) + self.wrapper.tk_vars["updatepreview"].set(True) + self.wrapper.tk_vars["refreshgraph"].set(True) + if "[preview updated]" in output.strip().lower(): + self.wrapper.tk_vars["updatepreview"].set(True) + continue print(output.strip()) returncode = self.process.poll() message = self.set_final_status(returncode) diff --git a/lib/logger.py b/lib/logger.py index 66e77550f5..35edb9c259 100644 --- a/lib/logger.py +++ b/lib/logger.py @@ -1,5 +1,5 @@ #!/usr/bin/python -""" Logging Setup """ +""" Logging Functions for Faceswap. """ import collections import logging from logging.handlers import RotatingFileHandler @@ -12,7 +12,7 @@ class FaceswapLogger(logging.Logger): - """ Create custom logger with custom levels """ + """ A standard :class:`logging.logger` with additional "verbose" and "trace" levels added. """ def __init__(self, name): for new_level in (("VERBOSE", 15), ("TRACE", 5)): level_name, level_num = new_level @@ -23,26 +23,61 @@ def __init__(self, name): super().__init__(name) def verbose(self, msg, *args, **kwargs): - """ - Log 'msg % args' with severity 'VERBOSE'. + # pylint:disable=wrong-spelling-in-docstring + """ Create a log message at severity level 15. + + Parameters + ---------- + msg: str + The log message to be recorded at Verbose level + args: tuple + Standard logging arguments + kwargs: dict + Standard logging key word arguments """ if self.isEnabledFor(15): self._log(15, msg, args, **kwargs) def trace(self, msg, *args, **kwargs): - """ - Log 'msg % args' with severity 'VERBOSE'. + # pylint:disable=wrong-spelling-in-docstring + """ Create a log message at severity level 5. + + Parameters + ---------- + msg: str + The log message to be recorded at Trace level + args: tuple + Standard logging arguments + kwargs: dict + Standard logging key word arguments """ if self.isEnabledFor(5): self._log(5, msg, args, **kwargs) class FaceswapFormatter(logging.Formatter): - """ Override formatter to strip newlines the final message """ + """ Overrides the standard :class:`logging.Formatter`. + + Strip newlines from incoming log messages. + + Rewrites some upstream warning messages to debug level to avoid spamming the console. + """ def format(self, record): + """ Strip new lines from log records and rewrite certain warning messages to debug level. + + Parameters + ---------- + record : :class:`logging.LogRecord` + The incoming log record to be formatted for entry into the logger. + + Returns + ------- + str + The formatted log message + """ record.message = record.getMessage() - record = self.rewrite_tf_deprecation(record) + record = self._rewrite_warnings(record) # strip newlines if "\n" in record.message or "\r" in record.message: record.message = record.message.replace("\n", "\\n").replace("\r", "\\r") @@ -65,62 +100,132 @@ def format(self, record): msg = msg + self.formatStack(record.stack_info) return msg - @staticmethod - def rewrite_tf_deprecation(record): - """ Change TF deprecation messages from WARNING to DEBUG """ + @classmethod + def _rewrite_warnings(cls, record): + """ Change certain warning messages from WARNING to DEBUG to avoid passing non-important + information to output. + + Parameters + ---------- + record: :class:`logging.LogRecord` + The log record to check for rewriting + """ if record.levelno == 30 and (record.funcName == "_tfmw_add_deprecation_warning" or - record.module in("deprecation", "deprecation_wrapper")): + record.module in ("deprecation", "deprecation_wrapper")): record.levelno = 10 record.levelname = "DEBUG" return record class RollingBuffer(collections.deque): - """File-like that keeps a certain number of lines of text in memory.""" + """File-like that keeps a certain number of lines of text in memory for writing out to the + crash log. """ + def write(self, buffer): - """ Write line to buffer """ + """ Splits lines from the incoming buffer and writes them out to the rolling buffer. + + Parameters + ---------- + buffer: str + The log messages to write to the rolling buffer + """ for line in buffer.rstrip().splitlines(): self.append(line + "\n") class TqdmHandler(logging.StreamHandler): - """ Use TQDM Write for outputting to console """ + """ Overrides :class:`logging.StreamHandler` to use :func:`tqdm.tqdm.write` rather than writing + to :func:`sys.stderr` so that log messages do not mess up tqdm progress bars. """ + def emit(self, record): + """ Format the incoming message and pass to :func:`tqdm.tqdm.write`. + + Parameters + ---------- + record : :class:`logging.LogRecord` + The incoming log record to be formatted for entry into the logger. + """ msg = self.format(record) tqdm.write(msg) -def set_root_logger(loglevel=logging.INFO): - """ Setup the root logger. """ +def _set_root_logger(loglevel=logging.INFO): + """ Setup the root logger. + + Parameters + ---------- + loglevel: int, optional + The log level to set the root logger to. Default :attr:`logging.INFO` + + Returns + ------- + :class:`logging.Logger` + The root logger for Faceswap + """ rootlogger = logging.getLogger() rootlogger.setLevel(loglevel) return rootlogger -def log_setup(loglevel, logfile, command, is_gui=False): - """ initial log set up. """ +def log_setup(loglevel, log_file, command, is_gui=False): + """ Set up logging for Faceswap. + + Sets up the root logger, the formatting for the crash logger and the file logger, and sets up + the crash, file and stream log handlers. + + Parameters + ---------- + loglevel: str + The requested log level that Faceswap should be run at. + log_file: str + The location of the log file to write Faceswap's log to + command: str + The Faceswap command that is being run. Used to dictate whether the log file should + have "_gui" appended to the filename or not. + is_gui: bool, optional + Whether Faceswap is running in the GUI or not. Dictates where the stream handler should + output messages to. Default: ``False`` + """ numeric_loglevel = get_loglevel(loglevel) root_loglevel = min(logging.DEBUG, numeric_loglevel) - rootlogger = set_root_logger(loglevel=root_loglevel) + rootlogger = _set_root_logger(loglevel=root_loglevel) log_format = FaceswapFormatter("%(asctime)s %(processName)-15s %(threadName)-15s " "%(module)-15s %(funcName)-25s %(levelname)-8s %(message)s", datefmt="%m/%d/%Y %H:%M:%S") - f_handler = file_handler(numeric_loglevel, logfile, log_format, command) - s_handler = stream_handler(numeric_loglevel, is_gui) - c_handler = crash_handler(log_format) + f_handler = _file_handler(numeric_loglevel, log_file, log_format, command) + s_handler = _stream_handler(numeric_loglevel, is_gui) + c_handler = _crash_handler(log_format) rootlogger.addHandler(f_handler) rootlogger.addHandler(s_handler) rootlogger.addHandler(c_handler) logging.info("Log level set to: %s", loglevel.upper()) -def file_handler(loglevel, logfile, log_format, command): - """ Add a logging rotating file handler """ - if logfile is not None: - filename = logfile +def _file_handler(loglevel, log_file, log_format, command): + """ Add a rotating file handler for the current Faceswap session. 1 backup is always kept. + + Parameters + ---------- + loglevel: str + The requested log level that messages should be logged at. + log_file: str + The location of the log file to write Faceswap's log to + log_format: :class:`FaceswapFormatter: + The formatting to store log messages as + command: str + The Faceswap command that is being run. Used to dictate whether the log file should + have "_gui" appended to the filename or not. + + Returns + ------- + :class:`logging.RotatingFileHandler` + The logging file handler + """ + if log_file is not None: + filename = log_file else: filename = os.path.join(os.path.dirname(os.path.realpath(sys.argv[0])), "faceswap") - # Windows has issues sharing the log file with subprocesses, so log GUI separately + # Windows has issues sharing the log file with sub-processes, so log GUI separately filename += "_gui.log" if command == "gui" else ".log" should_rotate = os.path.isfile(filename) @@ -132,8 +237,23 @@ def file_handler(loglevel, logfile, log_format, command): return log_file -def stream_handler(loglevel, is_gui): - """ Add a logging cli handler """ +def _stream_handler(loglevel, is_gui): + """ Add a stream handler for the current Faceswap session. The stream handler will only ever + output at a maximum of VERBOSE level to avoid spamming the console. + + Parameters + ---------- + loglevel: str + The requested log level that messages should be logged at. + is_gui: bool, optional + Whether Faceswap is running in the GUI or not. Dictates where the stream handler should + output messages to. + + Returns + ------- + :class:`TqdmHandler` or :class:`logging.StreamHandler` + The stream handler to use + """ # Don't set stdout to lower than verbose loglevel = max(loglevel, 15) log_format = FaceswapFormatter("%(asctime)s %(levelname)-8s %(message)s", @@ -150,30 +270,59 @@ def stream_handler(loglevel, is_gui): return log_console -def crash_handler(log_format): - """ Add a handler that sores the last 100 debug lines to 'debug_buffer' - for use in crash reports """ - log_crash = logging.StreamHandler(debug_buffer) +def _crash_handler(log_format): + """ Add a handler that stores the last 100 debug lines to :attr:'_debug_buffer' for use in + crash reports. + + Parameters + ---------- + log_format: :class:`FaceswapFormatter: + The formatting to store log messages as + + Returns + ------- + :class:`logging.StreamHandler` + The crash log handler + """ + log_crash = logging.StreamHandler(_debug_buffer) log_crash.setFormatter(log_format) log_crash.setLevel(logging.DEBUG) return log_crash def get_loglevel(loglevel): - """ Check valid log level supplied and return numeric log level """ + """ Check whether a valid log level has been supplied, and return the numeric log level that + corresponds to the given string level. + + Parameters + ---------- + loglevel: str + The loglevel that has been requested + + Returns + ------- + int + The numeric representation of the given loglevel + """ numeric_level = getattr(logging, loglevel.upper(), None) if not isinstance(numeric_level, int): raise ValueError("Invalid log level: %s" % loglevel) - return numeric_level def crash_log(): - """ Write debug_buffer to a crash log on crash """ + """ On a crash, write out the contents of :func:`_debug_buffer` containing the last 100 lines + of debug messages to a crash report in the root Faceswap folder. + + Returns + ------- + str + The filename of the file that contains the crash report + """ original_traceback = traceback.format_exc() path = os.path.dirname(os.path.realpath(sys.argv[0])) filename = os.path.join(path, datetime.now().strftime("crash_report.%Y.%m.%d.%H%M%S%f.log")) - freeze_log = list(debug_buffer) + freeze_log = list(_debug_buffer) try: from lib.sysinfo import sysinfo # pylint:disable=import-outside-toplevel except Exception: # pylint:disable=broad-except @@ -186,20 +335,21 @@ def crash_log(): return filename -old_factory = logging.getLogRecordFactory() # pylint: disable=invalid-name +_old_factory = logging.getLogRecordFactory() -def faceswap_logrecord(*args, **kwargs): - """ Add a flag to logging.LogRecord to not strip formatting from particular records """ - record = old_factory(*args, **kwargs) +def _faceswap_logrecord(*args, **kwargs): + """ Add a flag to :class:`logging.LogRecord` to not strip formatting from particular + records. """ + record = _old_factory(*args, **kwargs) record.strip_spaces = True return record -logging.setLogRecordFactory(faceswap_logrecord) +logging.setLogRecordFactory(_faceswap_logrecord) # Set logger class to custom logger logging.setLoggerClass(FaceswapLogger) # Stores the last 100 debug messages -debug_buffer = RollingBuffer(maxlen=100) # pylint: disable=invalid-name +_debug_buffer = RollingBuffer(maxlen=100) diff --git a/lib/model/__init__.py b/lib/model/__init__.py index e69de29bb2..ef4d5036c8 100644 --- a/lib/model/__init__.py +++ b/lib/model/__init__.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python3 +""" Conditional imports depending on whether the AMD version is installed or not """ + +from lib.utils import get_backend + +if get_backend() == "amd": + from . import losses_plaid as losses +else: + from . import losses_tf as losses diff --git a/lib/model/backup_restore.py b/lib/model/backup_restore.py index 4a3261c279..6026228663 100644 --- a/lib/model/backup_restore.py +++ b/lib/model/backup_restore.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -""" Functions for backing up, restoring and snapshotting models """ +""" Functions for backing up, restoring and creating model snapshots. """ import logging import os @@ -14,8 +14,16 @@ class Backup(): - """ Holds information about model location and functions for backing up - Restoring and Snapshotting models """ + """ Performs the back up of models at each save iteration, and the restoring of models from + their back up location. + + Parameters + ---------- + model_dir: str + The folder that contains the model to be backed up + model_name: str + The name of the model that is to be backed up + """ def __init__(self, model_dir, model_name): logger.debug("Initializing %s: (model_dir: '%s', model_name: '%s')", self.__class__.__name__, model_dir, model_name) @@ -23,8 +31,22 @@ def __init__(self, model_dir, model_name): self.model_name = model_name logger.debug("Initialized %s", self.__class__.__name__) - def check_valid(self, filename, for_restore=False): - """ Check if the passed in filename is valid for a backup operation """ + def _check_valid(self, filename, for_restore=False): + """ Check if the passed in filename is valid for a backup or restore operation. + + Parameters + ---------- + filename: str + The filename that is to be checked for backup or restore + for_restore: bool, optional + ``True`` if the checks are to be performed for restoring a model, ``False`` if the + checks are to be performed for backing up a model. Default: ``False`` + + Returns + ------- + bool + ``True`` if the given file is valid for a backup/restore operation otherwise ``False`` + """ fullpath = os.path.join(self.model_dir, filename) if not filename.startswith(self.model_name): # Any filename that does not start with the model name are invalid @@ -45,19 +67,38 @@ def check_valid(self, filename, for_restore=False): return retval @staticmethod - def backup_model(fullpath): - """ Backup Model File - Fullpath should be the path to an h5.py file or a state.json file """ - backupfile = fullpath + ".bk" - logger.verbose("Backing up: '%s' to '%s'", fullpath, backupfile) + def backup_model(full_path): + """ Backup a model file. + + The backed up file is saved with the original filename in the original location with `.bk` + appended to the end of the name. + + Parameters + ---------- + full_path: str + The full path to a `.h5` model file or a `.json` state file + """ + backupfile = full_path + ".bk" if os.path.exists(backupfile): os.remove(backupfile) - if os.path.exists(fullpath): - os.rename(fullpath, backupfile) + if os.path.exists(full_path): + logger.verbose("Backing up: '%s' to '%s'", full_path, backupfile) + os.rename(full_path, backupfile) def snapshot_models(self, iterations): - """ Take a snapshot of the model at current state and back up """ - logger.info("Saving snapshot") + """ Take a snapshot of the model at the current state and back it up. + + The snapshot is a copy of the model folder located in the same root location + as the original model file, with the number of iterations appended to the end + of the folder name. + + Parameters + ---------- + iterations: int + The number of iterations that the model has trained when performing the snapshot. + """ + print("") # New line so log message doesn't append to last loss output + logger.verbose("Saving snapshot") snapshot_dir = "{}_snapshot_{}_iters".format(self.model_dir, iterations) if os.path.isdir(snapshot_dir): @@ -66,7 +107,7 @@ def snapshot_models(self, iterations): dst = str(get_folder(snapshot_dir)) for filename in os.listdir(self.model_dir): - if not self.check_valid(filename, for_restore=False): + if not self._check_valid(filename, for_restore=False): logger.debug("Not snapshotting file: '%s'", filename) continue srcfile = os.path.join(self.model_dir, filename) @@ -74,26 +115,33 @@ def snapshot_models(self, iterations): copyfunc = copytree if os.path.isdir(srcfile) else copyfile logger.debug("Saving snapshot: '%s' > '%s'", srcfile, dstfile) copyfunc(srcfile, dstfile) - logger.info("Saved snapshot") + logger.info("Saved snapshot (%s iterations)", iterations) def restore(self): """ Restores a model from backup. - This will place all existing models/logs into a folder named: - - "_archived_" - Copy all .bk files to replace original files - Remove logs from after the restore session_id from the logs folder """ - archive_dir = self.move_archived() - self.restore_files() - self.restore_logs(archive_dir) - - def move_archived(self): - """ Move archived files to archived folder and return archived folder name """ + + The original model files are migrated into a folder within the original model folder + named `_archived_`. The `.bk` backup files are then moved to + the location of the previously existing model files. Logs that were generated after the + the last backup was taken are removed. """ + archive_dir = self._move_archived() + self._restore_files() + self._restore_logs(archive_dir) + + def _move_archived(self): + """ Move archived files to the archived folder. + + Returns + ------- + str + The name of the generated archive folder + """ logger.info("Archiving existing model files...") now = datetime.now().strftime("%Y%m%d_%H%M%S") archive_dir = os.path.join(self.model_dir, "{}_archived_{}".format(self.model_name, now)) os.mkdir(archive_dir) for filename in os.listdir(self.model_dir): - if not self.check_valid(filename, for_restore=False): + if not self._check_valid(filename, for_restore=False): logger.debug("Not moving file to archived: '%s'", filename) continue logger.verbose("Moving '%s' to archived model folder: '%s'", filename, archive_dir) @@ -103,11 +151,11 @@ def move_archived(self): logger.verbose("Archived existing model files") return archive_dir - def restore_files(self): + def _restore_files(self): """ Restore files from .bk """ logger.info("Restoring models from backup...") for filename in os.listdir(self.model_dir): - if not self.check_valid(filename, for_restore=True): + if not self._check_valid(filename, for_restore=True): logger.debug("Not restoring file: '%s'", filename) continue dstfile = os.path.splitext(filename)[0] @@ -117,11 +165,17 @@ def restore_files(self): copyfile(src, dst) logger.verbose("Restored models from backup") - def restore_logs(self, archive_dir): - """ Restore the log files since before archive """ + def _restore_logs(self, archive_dir): + """ Restores the log files up to and including the last backup. + + Parameters + ---------- + archive_dir: str + The full path to the model's archive folder + """ logger.info("Restoring Logs...") - session_names = self.get_session_names() - log_dirs = self.get_log_dirs(archive_dir, session_names) + session_names = self._get_session_names() + log_dirs = self._get_log_dirs(archive_dir, session_names) for log_dir in log_dirs: src = os.path.join(archive_dir, log_dir) dst = os.path.join(self.model_dir, log_dir) @@ -129,8 +183,8 @@ def restore_logs(self, archive_dir): copytree(src, dst) logger.verbose("Restored Logs") - def get_session_names(self): - """ Get the existing session names from state file """ + def _get_session_names(self): + """ Get the existing session names from a state file. """ serializer = get_serializer("json") state_file = os.path.join(self.model_dir, "{}_state.{}".format(self.model_name, serializer.file_extension)) @@ -140,8 +194,21 @@ def get_session_names(self): logger.debug("Session to restore: %s", session_names) return session_names - def get_log_dirs(self, archive_dir, session_names): - """ Get the session logdir paths in the archive folder """ + def _get_log_dirs(self, archive_dir, session_names): + """ Get the session log directory paths in the archive folder. + + Parameters + ---------- + archive_dir: str + The full path to the model's archive folder + session_names: list + The name of the training sessions that exist for the model + + Returns + ------- + list + The full paths to the log folders + """ archive_logs = os.path.join(archive_dir, "{}_logs".format(self.model_name)) paths = [os.path.join(dirpath.replace(archive_dir, "")[1:], folder) for dirpath, dirnames, _ in os.walk(archive_logs) diff --git a/lib/model/initializers.py b/lib/model/initializers.py index 7d4f28b93d..c436342284 100644 --- a/lib/model/initializers.py +++ b/lib/model/initializers.py @@ -9,13 +9,61 @@ import tensorflow as tf from keras import backend as K from keras import initializers -from keras.utils.generic_utils import get_custom_objects +from keras.utils import get_custom_objects from lib.utils import get_backend logger = logging.getLogger(__name__) # pylint: disable=invalid-name +def compute_fans(shape, data_format='channels_last'): + """Computes the number of input and output units for a weight shape. + + Ported directly from Keras as the location moves between keras and tensorflow-keras + + Parameters + ---------- + shape: tuple + shape tuple of integers + data_format: str + Image data format to use for convolution kernels. Note that all kernels in Keras are + standardized on the `"channels_last"` ordering (even when inputs are set to + `"channels_first"`). + + Returns + ------- + tuple + A tuple of scalars, `(fan_in, fan_out)`. + + Raises + ------ + ValueError + In case of invalid `data_format` argument. + """ + if len(shape) == 2: + fan_in = shape[0] + fan_out = shape[1] + elif len(shape) in {3, 4, 5}: + # Assuming convolution kernels (1D, 2D or 3D). + # Theano kernel shape: (depth, input_depth, ...) + # Tensorflow kernel shape: (..., input_depth, depth) + if data_format == 'channels_first': + receptive_field_size = np.prod(shape[2:]) + fan_in = shape[1] * receptive_field_size + fan_out = shape[0] * receptive_field_size + elif data_format == 'channels_last': + receptive_field_size = np.prod(shape[:-2]) + fan_in = shape[-2] * receptive_field_size + fan_out = shape[-1] * receptive_field_size + else: + raise ValueError('Invalid data_format: ' + data_format) + else: + # No specific assumptions. + fan_in = np.sqrt(np.prod(shape)) + fan_out = np.sqrt(np.prod(shape)) + return fan_in, fan_out + + class ICNR(initializers.Initializer): # pylint: disable=invalid-name """ ICNR initializer for checkerboard artifact free sub pixel convolution @@ -23,8 +71,9 @@ class ICNR(initializers.Initializer): # pylint: disable=invalid-name ---------- initializer: :class:`keras.initializers.Initializer` The initializer used for sub kernels (orthogonal, glorot uniform, etc.) - scale: int - scaling factor of sub pixel convolution (up sampling from 8x8 to 16x16 is scale 2) + scale: int, optional + scaling factor of sub pixel convolution (up sampling from 8x8 to 16x16 is scale 2). + Default: `2` Returns ------- @@ -68,42 +117,16 @@ def __call__(self, shape, dtype="float32"): self.initializer = initializers.deserialize(self.initializer) var_x = self.initializer(new_shape, dtype) var_x = K.permute_dimensions(var_x, [2, 0, 1, 3]) - var_x = self._resize_nearest_neighbour(var_x, - (shape[0] * self.scale, shape[1] * self.scale)) + var_x = K.resize_images(var_x, + self.scale, + self.scale, + "channels_last", + interpolation="nearest") var_x = self._space_to_depth(var_x) var_x = K.permute_dimensions(var_x, [1, 2, 0, 3]) - logger.debug("Output: %s", var_x) + logger.debug("Output shape: %s", var_x.shape) return var_x - def _resize_nearest_neighbour(self, input_tensor, size): - """ Resize a tensor using nearest neighbor interpolation. - - Notes - ----- - Tensorflow has a bug that resizes the image incorrectly if :attr:`align_corners` is not set - to ``True``. Keras Backend does not set this flag, so we explicitly call the Tensorflow - operation for non-amd backends. - - Parameters - ---------- - input_tensor: tensor - The tensor to be resized - tuple: int - The (`h`, `w`) that the tensor should be resized to (used for non-amd backends only) - - Returns - ------- - tensor - The input tensor resized to the given size - """ - if get_backend() == "amd": - retval = K.resize_images(input_tensor, self.scale, self.scale, "channels_last", - interpolation="nearest") - else: - retval = tf.image.resize_nearest_neighbor(input_tensor, size=size, align_corners=True) - logger.debug("Input Tensor: %s, Output Tensor: %s", input_tensor, retval) - return retval - def _space_to_depth(self, input_tensor): """ Space to depth implementation. @@ -129,8 +152,8 @@ def _space_to_depth(self, input_tensor): retval = K.reshape(K.permute_dimensions(reshaped, [0, 1, 3, 2, 4, 5]), (batch, new_height, new_width, -1)) else: - retval = tf.space_to_depth(input_tensor, block_size=self.scale, data_format="NHWC") - logger.debug("Input Tensor: %s, Output Tensor: %s", input_tensor, retval) + retval = tf.nn.space_to_depth(input_tensor, block_size=self.scale, data_format="NHWC") + logger.debug("Input shape: %s, Output shape: %s", input_tensor.shape, retval.shape) return retval def get_config(self): @@ -158,11 +181,15 @@ class ConvolutionAware(initializers.Initializer): Parameters ---------- - eps_std: float + eps_std: float, optional The Standard deviation for the random normal noise used to break symmetry in the inverse - Fourier transform. + Fourier transform. Default: 0.05 seed: int, optional Used to seed the random generator. Default: ``None`` + initialized: bool, optional + This should always be set to ``False``. To avoid Keras re-calculating the values every time + the model is loaded, this parameter is internally set on first time initialization. + Default:``False`` Returns ------- @@ -172,20 +199,14 @@ class ConvolutionAware(initializers.Initializer): References ---------- Armen Aghajanyan, https://arxiv.org/abs/1702.06295 - - Notes - ----- - Convolutional Aware Initialization takes a long time. Keras model loading loads a model, - performs initialization and then loads weights, which is an unnecessary waste of time. - init defaults to False so that this is bypassed when loading a saved model passing zeros. """ - def __init__(self, eps_std=0.05, seed=None, init=False): - self._init = init + def __init__(self, eps_std=0.05, seed=None, initialized=False): self.eps_std = eps_std self.seed = seed self.orthogonal = initializers.Orthogonal() self.he_uniform = initializers.he_uniform() + self.initialized = initialized def __call__(self, shape, dtype=None): """ Call function for the ICNR initializer. @@ -202,20 +223,18 @@ def __call__(self, shape, dtype=None): tensor The modified kernel weights """ - dtype = K.floatx() if dtype is None else dtype - if self._init: - logger.info("Calculating Convolution Aware Initializer for shape: %s", shape) - else: - logger.debug("Bypassing Convolutional Aware Initializer for saved model") - # Dummy in he_uniform just in case there aren't any weighs being loaded - # and it needs some kind of initialization + # TODO Tensorflow appears to pass in a :class:`tensorflow.python.framework.dtypes.DType` + # object which causes this to error, so currently just reverts to default dtype if a string + # is not passed in. + if self.initialized: # Avoid re-calculating initializer when loading a saved model return self.he_uniform(shape, dtype=dtype) - + dtype = K.floatx() if not isinstance(dtype, str) else dtype + logger.info("Calculating Convolution Aware Initializer for shape: %s", shape) rank = len(shape) if self.seed is not None: np.random.seed(self.seed) - fan_in, _ = initializers._compute_fans(shape) # pylint:disable=protected-access + fan_in, _ = compute_fans(shape) # pylint:disable=protected-access variance = 2 / fan_in if rank == 3: @@ -243,6 +262,7 @@ def __call__(self, shape, dtype=None): correct_ifft = np.fft.irfftn else: + self.initialized = True return K.variable(self.orthogonal(shape), dtype=dtype) kernel_fourier_shape = correct_fft(np.zeros(kernel_shape)).shape @@ -252,10 +272,13 @@ def __call__(self, shape, dtype=None): randoms = np.random.normal(0, self.eps_std, basis.shape[:-2] + kernel_shape) init = correct_ifft(basis, kernel_shape) + randoms init = self._scale_filters(init, variance) + self.initialized = True return K.variable(init.transpose(transpose_dimensions), dtype=dtype, name="conv_aware") def _create_basis(self, filters_size, filters, size, dtype): """ Create the basis for convolutional aware initialization """ + logger.debug("filters_size: %s, filters: %s, size: %s, dtype: %s", + filters_size, filters, size, dtype) if size == 1: return np.random.normal(0.0, self.eps_std, (filters_size, filters, size)) nbb = filters // size + 1 @@ -288,10 +311,9 @@ def get_config(self): dict The configuration for ICNR Initialization """ - return { - "eps_std": self.eps_std, - "seed": self.seed - } + return dict(eps_std=self.eps_std, + seed=self.seed, + initialized=self.initialized) # Update initializers into Keras custom objects diff --git a/lib/model/layers.py b/lib/model/layers.py index 206b4ea857..574414d764 100644 --- a/lib/model/layers.py +++ b/lib/model/layers.py @@ -9,17 +9,17 @@ import tensorflow as tf import keras.backend as K -from keras.engine import InputSpec, Layer -from keras.utils import conv_utils -from keras.utils.generic_utils import get_custom_objects -from keras.layers.pooling import _GlobalPooling2D +from keras.layers import InputSpec, Layer +from keras.utils import get_custom_objects from lib.utils import get_backend if get_backend() == "amd": from lib.plaidml_utils import pad + from keras.utils import conv_utils # pylint:disable=ungrouped-imports else: from tensorflow import pad + from tensorflow.python.keras.utils import conv_utils class PixelShuffler(Layer): @@ -63,10 +63,13 @@ class PixelShuffler(Layer): """ def __init__(self, size=(2, 2), data_format=None, **kwargs): super().__init__(**kwargs) - self.data_format = K.normalize_data_format(data_format) - self.size = conv_utils.normalize_tuple(size, 2, "size") + if get_backend() == "amd": + self.data_format = K.normalize_data_format(data_format) + else: + self.data_format = conv_utils.normalize_data_format(data_format) + self.size = conv_utils.normalize_tuple(size, 2, 'size') - def call(self, inputs, **kwargs): + def call(self, inputs, **kwargs): # pylint:disable=unused-argument """This is where the layer's logic lives. Parameters @@ -74,7 +77,7 @@ def call(self, inputs, **kwargs): inputs: tensor Input tensor, or list/tuple of input tensors kwargs: dict - Additional keyword arguments + Additional keyword arguments. Unused Returns ------- @@ -237,7 +240,10 @@ def __init__(self, scale_factor=2, data_format=None, **kwargs): super(SubPixelUpscaling, self).__init__(**kwargs) self.scale_factor = scale_factor - self.data_format = K.normalize_data_format(data_format) + if get_backend() == "amd": + self.data_format = K.normalize_data_format(data_format) + else: + self.data_format = conv_utils.normalize_data_format(data_format) def build(self, input_shape): """Creates the layer weights. @@ -250,9 +256,9 @@ def build(self, input_shape): Keras tensor (future input to layer) or ``list``/``tuple`` of Keras tensors to reference for weight shape computations. """ - pass + pass # pylint: disable=unnecessary-pass - def call(self, input_tensor, mask=None): # pylint:disable=unused-argument,arguments-differ + def call(self, inputs, **kwargs): # pylint:disable=unused-argument """This is where the layer's logic lives. Parameters @@ -260,14 +266,14 @@ def call(self, input_tensor, mask=None): # pylint:disable=unused-argument,argum inputs: tensor Input tensor, or list/tuple of input tensors kwargs: dict - Additional keyword arguments + Additional keyword arguments. Unused Returns ------- tensor A tensor or list/tuple of tensors """ - retval = self._depth_to_space(input_tensor, self.scale_factor, self.data_format) + retval = self._depth_to_space(inputs, self.scale_factor, self.data_format) return retval def compute_output_shape(self, input_shape): @@ -305,17 +311,17 @@ def _depth_to_space(cls, ipt, scale, data_format=None): data_format = K.image_data_format() data_format = data_format.lower() ipt = cls._preprocess_conv2d_input(ipt, data_format) - out = tf.depth_to_space(ipt, scale) + out = tf.nn.depth_to_space(ipt, scale) out = cls._postprocess_conv2d_output(out, data_format) return out @staticmethod - def _postprocess_conv2d_output(input_tensor, data_format): + def _postprocess_conv2d_output(inputs, data_format): """Transpose and cast the output from conv2d if needed. Parameters ---------- - input_tensor: tensor + inputs: tensor The input that requires transposing and casting data_format: str `"channels_last"` or `"channels_first"` @@ -327,19 +333,19 @@ def _postprocess_conv2d_output(input_tensor, data_format): """ if data_format == "channels_first": - input_tensor = tf.transpose(input_tensor, (0, 3, 1, 2)) + inputs = tf.transpose(inputs, (0, 3, 1, 2)) if K.floatx() == "float64": - input_tensor = tf.cast(input_tensor, "float64") - return input_tensor + inputs = tf.cast(inputs, "float64") + return inputs @staticmethod - def _preprocess_conv2d_input(input_tensor, data_format): + def _preprocess_conv2d_input(inputs, data_format): """Transpose and cast the input before the conv2d. Parameters ---------- - input_tensor: tensor + inputs: tensor The input that requires transposing and casting data_format: str `"channels_last"` or `"channels_first"` @@ -349,14 +355,14 @@ def _preprocess_conv2d_input(input_tensor, data_format): tensor The transposed and cast input tensor """ - if K.dtype(input_tensor) == "float64": - input_tensor = tf.cast(input_tensor, "float32") + if K.dtype(inputs) == "float64": + inputs = tf.cast(inputs, "float32") if data_format == "channels_first": # Tensorflow uses the last dimension as channel dimension, instead of the 2nd one. # Theano input shape: (samples, input_depth, rows, cols) # Tensorflow input shape: (samples, rows, cols, input_depth) - input_tensor = tf.transpose(input_tensor, (0, 2, 3, 1)) - return input_tensor + inputs = tf.transpose(inputs, (0, 2, 3, 1)) + return inputs def get_config(self): """Returns the config of the layer. @@ -394,8 +400,12 @@ class ReflectionPadding2D(Layer): The standard Keras Layer keyword arguments (if any) """ def __init__(self, stride=2, kernel_size=5, **kwargs): + if isinstance(stride, (tuple, list)): + assert len(stride) == 2 and stride[0] == stride[1] + stride = stride[0] self.stride = stride self.kernel_size = kernel_size + self.input_spec = None super().__init__(**kwargs) def build(self, input_shape): @@ -446,7 +456,7 @@ def compute_output_shape(self, input_shape): input_shape[2] + padding_width, input_shape[3]) - def call(self, x, mask=None): # pylint:disable=unused-argument,arguments-differ + def call(self, var_x, mask=None): # pylint:disable=unused-argument,arguments-differ """This is where the layer's logic lives. Parameters @@ -479,7 +489,7 @@ def call(self, x, mask=None): # pylint:disable=unused-argument,arguments-differ padding_left = padding_width // 2 padding_right = padding_width - padding_left - return pad(x, + return pad(var_x, [[0, 0], [padding_top, padding_bot], [padding_left, padding_right], @@ -507,10 +517,54 @@ class name. These are handled by `Network` (one layer of abstraction above). return dict(list(base_config.items()) + list(config.items())) +class _GlobalPooling2D(Layer): + """Abstract class for different global pooling 2D layers. + + From keras as access to pooling is trickier in tensorflow.keras + """ + def __init__(self, data_format=None, **kwargs): + super(_GlobalPooling2D, self).__init__(**kwargs) + if get_backend() == "amd": + self.data_format = K.normalize_data_format(data_format) + else: + self.data_format = conv_utils.normalize_data_format(data_format) + self.input_spec = InputSpec(ndim=4) + + def compute_output_shape(self, input_shape): + """ Compute the output shape based on the input shape. + + Parameters + ---------- + input_shape: tuple + The input shape to the layer + """ + if self.data_format == 'channels_last': + return (input_shape[0], input_shape[3]) + return (input_shape[0], input_shape[1]) + + def call(self, inputs, **kwargs): + """ Override to call the layer. + + Parameters + ---------- + inputs: Tensor + The input to the layer + kwargs: dict + Additional keyword arguments + """ + raise NotImplementedError + + def get_config(self): + """ Set the Keras config """ + config = {'data_format': self.data_format} + base_config = super(_GlobalPooling2D, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + class GlobalMinPooling2D(_GlobalPooling2D): """Global minimum pooling operation for spatial data. """ - def call(self, inputs): + def call(self, inputs, **kwargs): """This is where the layer's logic lives. Parameters @@ -535,7 +589,7 @@ def call(self, inputs): class GlobalStdDevPooling2D(_GlobalPooling2D): """Global standard deviation pooling operation for spatial data. """ - def call(self, inputs): + def call(self, inputs, **kwargs): """This is where the layer's logic lives. Parameters @@ -557,7 +611,7 @@ def call(self, inputs): return pooled -class L2_normalize(Layer): # Pylint:disable=invalid-name +class L2_normalize(Layer): # pylint:disable=invalid-name """ Normalizes a tensor w.r.t. the L2 norm alongside the specified axis. Parameters diff --git a/lib/model/losses.py b/lib/model/losses_plaid.py similarity index 53% rename from lib/model/losses.py rename to lib/model/losses_plaid.py index 706e6f7546..621589aa02 100644 --- a/lib/model/losses.py +++ b/lib/model/losses_plaid.py @@ -5,40 +5,17 @@ import logging -import keras.backend as K +from keras import backend as K + import numpy as np import tensorflow as tf -from lib.utils import get_backend - -if get_backend() == "amd": - from plaidml.op import extract_image_patches - from lib.plaidml_utils import pad -else: - from tensorflow import extract_image_patches # pylint: disable=ungrouped-imports - from tensorflow import pad +from plaidml.op import extract_image_patches -logger = logging.getLogger(__name__) # pylint: disable=invalid-name +from lib.plaidml_utils import pad +from lib.utils import FaceswapError - -def mask_loss_wrapper(loss_func, preprocessing_func=None): - """ A wrapper for mask loss that can perform pre-processing on the input prior to calling the - loss function. - - Parameters - ---------- - loss_func: class or function - The actual loss function to use - preprocessing_func: function - The pre-processing function to use. Should take a Keras Input as it's only argument - """ - def func(y_true, y_pred): - """ Process input if a processing function has been passed, otherwise just return loss """ - if preprocessing_func is not None: - y_true = K.reshape(y_true, [-1] + list(K.int_shape(y_pred)[1:])) - y_true = preprocessing_func(y_true) - return loss_func(y_true, y_pred) - return func +logger = logging.getLogger(__name__) # pylint:disable=invalid-name class DSSIMObjective(): @@ -96,7 +73,6 @@ def __init__(self, k_1=0.01, k_2=0.03, kernel_size=3, max_value=1.0): self.c_1 = (self.k_1 * self.max_value) ** 2 self.c_2 = (self.k_2 * self.max_value) ** 2 self.dim_ordering = K.image_data_format() - self.backend = K.backend() @staticmethod def __int_shape(input_tensor): @@ -225,152 +201,140 @@ def extract_image_patches(self, input_tensor, k_sizes, s_sizes, return patches -# <<< START: from Dfaker >>> # -def PenalizedLoss(mask, loss_func, # pylint: disable=invalid-name - mask_prop=1.0, mask_scaling=1.0, preprocessing_func=None): - """ Plaidml and Tensorflow Penalized Loss function. +class PenalizedLoss(): # pylint:disable=too-few-public-methods + """ Penalized Loss function. Applies the given loss function just to the masked area of the image. Parameters ---------- - mask: input tensor - The mask for the current image loss_func: function The actual loss function to use mask_prop: float, optional The amount of mask propagation. Default: `1.0` - mask_scaling: float, optional - For multi-decoder output the target mask will likely be at full size scaling, so this is - the scaling factor to reduce the mask by. Default: `1.0` - preprocessing_func: function, optional - If preprocessing is required on the input mask, then this should be the function to use. - The function should take a Keras Input as it's only argument. Set to ``None`` if no - preprocessing is to be performed. Default: ``None`` """ - def _scale_mask(mask, scaling): - """ Scale the input mask to be the same size as the input face + def __init__(self, loss_func, mask_prop=1.0): + self._loss_func = loss_func + self._mask_prop = mask_prop + + def __call__(self, y_true, y_pred): + """ Apply the loss function to the masked area of the image. Parameters ---------- - mask: input tensor - The mask for the current image - scaling: float - The amount to scale the input mask by + y_true: tensor or variable + The ground truth value. This should contain the mask in the 4th channel that will be + split off for penalizing. + y_pred: tensor or variable + The predicted value Returns ------- tensor - The resized input mask + The Loss value """ - if scaling != 1.0: - size = round(1 / scaling) - mask = K.pool2d(mask, - pool_size=(size, size), - strides=(size, size), - padding="valid", - data_format=K.image_data_format(), - pool_mode="avg") - logger.debug("resized tensor: %s", mask) - return mask - - mask = _scale_mask(mask, mask_scaling) - if preprocessing_func is not None: - mask = preprocessing_func(mask) - mask_as_k_inv_prop = 1 - mask_prop - mask = (mask * mask_prop) + mask_as_k_inv_prop + mask = self._prepare_mask(K.expand_dims(y_true[..., -1], axis=-1)) + y_true = y_true[..., :-1] + n_true = y_true * mask + n_pred = y_pred * mask + if isinstance(self._loss_func, DSSIMObjective): + # Extract Image Patches in SSIM requires that y_pred be of a known shape, so + # specifically reshape the tensor. + n_pred = K.reshape(n_pred, K.int_shape(y_pred)) + return self._loss_func(n_true, n_pred) + + def _prepare_mask(self, mask): + """ Prepare the masks for calculating loss - def _inner_loss(y_true, y_pred): - """ Apply the loss function to the masked area of the image. - - Parameters + Parameters ---------- - y_true: tensor or variable - The ground truth value - y_pred: tensor or variable - The predicted value + mask: :class:`numpy.ndarray` + The masks for the current batch Returns ------- tensor - The Loss value - - Notes - ----- - Branching because TensorFlow's broadcasting is wonky and plaidML's concatenate is - implemented inefficiently. + The prepared mask for applying to loss """ - if K.backend() == "plaidml.keras.backend": - n_true = y_true * mask - n_pred = y_pred * mask - else: - n_true = K.concatenate([y_true[:, :, :, i:i+1] * mask for i in range(3)], axis=-1) - n_pred = K.concatenate([y_pred[:, :, :, i:i+1] * mask for i in range(3)], axis=-1) - return loss_func(n_true, n_pred) - return _inner_loss -# <<< END: from Dfaker >>> # + mask_as_k_inv_prop = 1 - self._mask_prop + mask = (mask * self._mask_prop) + mask_as_k_inv_prop + return mask -def generalized_loss(y_true, y_pred, alpha=1.0, beta=1.0/255.0): - """ Generalized function used to return a large variety of mathematical loss functions. +class GeneralizedLoss(): # pylint:disable=too-few-public-methods + """ Generalized function used to return a large variety of mathematical loss functions. The primary benefit is a smooth, differentiable version of L1 loss. + References + ---------- + Barron, J. A More General Robust Loss Function - https://arxiv.org/pdf/1701.03077.pdf + + Example + ------- + >>> a=1.0, x>>c , c=1.0/255.0 # will give a smoothly differentiable version of L1 / MAE loss + >>> a=1.999999 (limit as a->2), beta=1.0/255.0 # will give L2 / RMSE loss + Parameters ---------- - y_true: tensor or variable - The ground truth value - y_pred: tensor or variable - The predicted value alpha: float, optional Penalty factor. Larger number give larger weight to large deviations. Default: `1.0` beta: float, optional Scale factor used to adjust to the input scale (i.e. inputs of mean `1e-4` or `256`). Default: `1.0/255.0` + """ + def __init__(self, alpha=1.0, beta=1.0/255.0): + self.alpha = alpha + self.beta = beta - Returns - ------- - tensor - The loss value from the results of function(y_pred - y_true) + def __call__(self, y_true, y_pred): + """ Call the Generalized Loss Function - References - ---------- - Barron, J. A More General Robust Loss Function - https://arxiv.org/pdf/1701.03077.pdf + Parameters + ---------- + y_true: tensor or variable + The ground truth value + y_pred: tensor or variable + The predicted value - Example - ------- - >>> a=1.0, x>>c , c=1.0/255.0 # will give a smoothly differentiable version of L1 / MAE loss - >>> a=1.999999 (limit as a->2), beta=1.0/255.0 # will give L2 / RMSE loss - """ - diff = y_pred - y_true - second = (K.pow(K.pow(diff/beta, 2.) / K.abs(2.-alpha) + 1., (alpha/2.)) - 1.) - loss = (K.abs(2.-alpha)/alpha) * second - loss = K.mean(loss, axis=-1) * beta - return loss + Returns + ------- + tensor + The loss value from the results of function(y_pred - y_true) + """ + diff = y_pred - y_true + second = (K.pow(K.pow(diff/self.beta, 2.) / K.abs(2. - self.alpha) + 1., + (self.alpha / 2.)) - 1.) + loss = (K.abs(2. - self.alpha)/self.alpha) * second + loss = K.mean(loss, axis=-1) * self.beta + return loss -def l_inf_norm(y_true, y_pred): - """ Calculate the L-inf norm as a loss function. +class LInfNorm(): # pylint:disable=too-few-public-methods + """ Calculate the L-inf norm as a loss function. """ - Parameters - ---------- - y_true: tensor or variable - The ground truth value - y_pred: tensor or variable - The predicted value + def __call__(self, y_true, y_pred): + """ Call the L-inf norm loss function. - Returns - ------- - tensor - The loss value - """ - diff = K.abs(y_true - y_pred) - max_loss = K.max(diff, axis=(1, 2), keepdims=True) - loss = K.mean(max_loss, axis=-1) - return loss + Parameters + ---------- + y_true: tensor or variable + The ground truth value + y_pred: tensor or variable + The predicted value + + Returns + ------- + tensor + The loss value + """ + diff = K.abs(y_true - y_pred) + max_loss = K.max(diff, axis=(1, 2), keepdims=True) + loss = K.mean(max_loss, axis=-1) + return loss -def gradient_loss(y_true, y_pred): +class GradientLoss(): # pylint:disable=too-few-public-methods """ Gradient Loss Function. Calculates the first and second order gradient difference between pixels of an image in the x @@ -378,25 +342,44 @@ def gradient_loss(y_true, y_pred): image and the difference is taken. When used as a loss, its minimization will result in predicted images approaching the same level of sharpness / blurriness as the ground truth. - Parameters - ---------- - y_true: tensor or variable - The ground truth value - y_pred: tensor or variable - The predicted value - - Returns - ------- - tensor - The loss value - References ---------- TV+TV2 Regularization with Non-Convex Sparseness-Inducing Penalty for Image Restoration, Chengwu Lu & Hua Huang, 2014 - http://downloads.hindawi.com/journals/mpe/2014/790547.pdf """ + def __init__(self): + self.generalized_loss = GeneralizedLoss(alpha=1.9999) + + def __call__(self, y_true, y_pred): + """ Call the gradient loss function. - def _diff_x(img): + Parameters + ---------- + y_true: tensor or variable + The ground truth value + y_pred: tensor or variable + The predicted value + + Returns + ------- + tensor + The loss value + """ + tv_weight = 1.0 + tv2_weight = 1.0 + loss = 0.0 + loss += tv_weight * (self.generalized_loss(self._diff_x(y_true), self._diff_x(y_pred)) + + self.generalized_loss(self._diff_y(y_true), self._diff_y(y_pred))) + loss += tv2_weight * (self.generalized_loss(self._diff_xx(y_true), self._diff_xx(y_pred)) + + self.generalized_loss(self._diff_yy(y_true), self._diff_yy(y_pred)) + + self.generalized_loss(self._diff_xy(y_true), self._diff_xy(y_pred)) + * 2.) + loss = loss / (tv_weight + tv2_weight) + # TODO simplify to use MSE instead + return loss + + @classmethod + def _diff_x(cls, img): """ X Difference """ x_left = img[:, :, 1:2, :] - img[:, :, 0:1, :] x_inner = img[:, :, 2:, :] - img[:, :, :-2, :] @@ -404,7 +387,8 @@ def _diff_x(img): x_out = K.concatenate([x_left, x_inner, x_right], axis=2) return x_out * 0.5 - def _diff_y(img): + @classmethod + def _diff_y(cls, img): """ Y Difference """ y_top = img[:, 1:2, :, :] - img[:, 0:1, :, :] y_inner = img[:, 2:, :, :] - img[:, :-2, :, :] @@ -412,7 +396,8 @@ def _diff_y(img): y_out = K.concatenate([y_top, y_inner, y_bot], axis=1) return y_out * 0.5 - def _diff_xx(img): + @classmethod + def _diff_xx(cls, img): """ X-X Difference """ x_left = img[:, :, 1:2, :] + img[:, :, 0:1, :] x_inner = img[:, :, 2:, :] + img[:, :, :-2, :] @@ -420,7 +405,8 @@ def _diff_xx(img): x_out = K.concatenate([x_left, x_inner, x_right], axis=2) return x_out - 2.0 * img - def _diff_yy(img): + @classmethod + def _diff_yy(cls, img): """ Y-Y Difference """ y_top = img[:, 1:2, :, :] + img[:, 0:1, :, :] y_inner = img[:, 2:, :, :] + img[:, :-2, :, :] @@ -428,7 +414,8 @@ def _diff_yy(img): y_out = K.concatenate([y_top, y_inner, y_bot], axis=1) return y_out - 2.0 * img - def _diff_xy(img): + @classmethod + def _diff_xy(cls, img): """ X-Y Difference """ # xout1 top_left = img[:, 1:2, 1:2, :] + img[:, 0:1, 0:1, :] @@ -466,175 +453,112 @@ def _diff_xy(img): xy_out2 = K.concatenate([xy_left, xy_mid, xy_right], axis=2) return (xy_out1 - xy_out2) * 0.25 - tv_weight = 1.0 - tv2_weight = 1.0 - loss = 0.0 - loss += tv_weight * (generalized_loss(_diff_x(y_true), _diff_x(y_pred), alpha=1.9999) + - generalized_loss(_diff_y(y_true), _diff_y(y_pred), alpha=1.9999)) - loss += tv2_weight * (generalized_loss(_diff_xx(y_true), _diff_xx(y_pred), alpha=1.9999) + - generalized_loss(_diff_yy(y_true), _diff_yy(y_pred), alpha=1.9999) + - generalized_loss(_diff_xy(y_true), _diff_xy(y_pred), alpha=1.9999) * 2.) - loss = loss / (tv_weight + tv2_weight) - # TODO simplify to use MSE instead - return loss - - -def scharr_edges(image, magnitude): - """ Returns a tensor holding modified Scharr edge maps. - - Parameters - ---------- - image: tensor - Image tensor with shape [batch_size, h, w, d] and type float32. The image(s) must be 2x2 - or larger. - magnitude: bool - Boolean to determine if the edge magnitude or edge direction is returned - - Returns - ------- - tensor - Tensor holding edge maps for each channel. Returns a tensor with shape `[batch_size, h, w, - d, 2]` where the last two dimensions hold `[[dy[0], dx[0]], [dy[1], dx[1]], ..., [dy[d-1], - dx[d-1]]]` calculated using the Scharr filter. - """ - # Define vertical and horizontal Scharr filters. - static_image_shape = image.shape.dims if get_backend() == "amd" else image.get_shape() - image_shape = K.shape(image) - - # 5x5 modified Scharr kernel ( reshape to (5,5,1,2) ) - matrix = np.array([[[[0.00070, 0.00070]], - [[0.00520, 0.00370]], - [[0.03700, 0.00000]], - [[0.00520, -0.0037]], - [[0.00070, -0.0007]]], - [[[0.00370, 0.00520]], - [[0.11870, 0.11870]], - [[0.25890, 0.00000]], - [[0.11870, -0.1187]], - [[0.00370, -0.0052]]], - [[[0.00000, 0.03700]], - [[0.00000, 0.25890]], - [[0.00000, 0.00000]], - [[0.00000, -0.2589]], - [[0.00000, -0.0370]]], - [[[-0.0037, 0.00520]], - [[-0.1187, 0.11870]], - [[-0.2589, 0.00000]], - [[-0.1187, -0.1187]], - [[-0.0037, -0.0052]]], - [[[-0.0007, 0.00070]], - [[-0.0052, 0.00370]], - [[-0.0370, 0.00000]], - [[-0.0052, -0.0037]], - [[-0.0007, -0.0007]]]]) - num_kernels = [2] - kernels = K.constant(matrix, dtype='float32') - kernels = K.tile(kernels, [1, 1, image_shape[-1], 1]) - - # Use depth-wise convolution to calculate edge maps per channel. - # Output tensor has shape [batch_size, h, w, d * num_kernels]. - pad_sizes = [[0, 0], [2, 2], [2, 2], [0, 0]] - padded = pad(image, pad_sizes, mode='REFLECT') - output = K.depthwise_conv2d(padded, kernels) - - if not magnitude: # direction of edges - # Reshape to [batch_size, h, w, d, num_kernels]. - shape = K.concatenate([image_shape, num_kernels], axis=0) - output = K.reshape(output, shape=shape) - output.set_shape(static_image_shape.concatenate(num_kernels)) - output = tf.atan(K.squeeze(output[:, :, :, :, 0] / output[:, :, :, :, 1], axis=None)) - # magnitude of edges -- unified x & y edges don't work well with Neural Networks - return output - - -def gmsd_loss(y_true, y_pred): +class GMSDLoss(): # pylint:disable=too-few-public-methods """ Gradient Magnitude Similarity Deviation Loss. Improved image quality metric over MS-SSIM with easier calculations - Parameters - ---------- - y_true: tensor or variable - The ground truth value - y_pred: tensor or variable - The predicted value - - Returns - ------- - tensor - The loss value - References ---------- http://www4.comp.polyu.edu.hk/~cslzhang/IQA/GMSD/GMSD.htm https://arxiv.org/ftp/arxiv/papers/1308/1308.3052.pdf - """ - true_edge = scharr_edges(y_true, True) - pred_edge = scharr_edges(y_pred, True) - ephsilon = 0.0025 - upper = 2.0 * true_edge * pred_edge - lower = K.square(true_edge) + K.square(pred_edge) - gms = (upper + ephsilon) / (lower + ephsilon) - gmsd = K.std(gms, axis=(1, 2, 3), keepdims=True) - gmsd = K.squeeze(gmsd, axis=-1) - return gmsd - -# Gaussian Blur is here as it is only used for losses. -# It was previously kept in lib/model/masks but the import of keras backend -# breaks plaidml -def gaussian_blur(radius=2.0): - """ Apply gaussian blur to an input. - - Used for blurring mask in training. - - Parameters - ---------- - radius: float, optional - The kernel radius for applying gaussian blur. Default: `2.0` - - Returns - ------- - tensor - The input tensor with gaussian blurring applied - - References - ---------- - https://github.com/iperov/DeepFaceLab - """ - def _gaussian(var_x, radius, sigma): - """ Obtain the gaussian kernel. """ - return np.exp(-(float(var_x) - float(radius)) ** 2 / (2 * sigma ** 2)) - - def _make_kernel(sigma): - """ Make the gaussian kernel. """ - kernel_size = max(3, int(2 * 2 * sigma + 1)) - mean = np.floor(0.5 * kernel_size) - kernel_1d = np.array([_gaussian(x, mean, sigma) for x in range(kernel_size)]) - np_kernel = np.outer(kernel_1d, kernel_1d).astype(dtype=K.floatx()) - kernel = np_kernel / np.sum(np_kernel) - return kernel + def __call__(self, y_true, y_pred): + """ Return the Gradient Magnitude Similarity Deviation Loss. - gauss_kernel = _make_kernel(radius) - gauss_kernel = gauss_kernel[:, :, np.newaxis, np.newaxis] + Parameters + ---------- + y_true: tensor or variable + The ground truth value + y_pred: tensor or variable + The predicted value - def func(input_tensor): - """ Apply gaussian blurring to the input tensor + Returns + ------- + tensor + The loss value + """ + raise FaceswapError("GMSD Loss is not currently compatible with PlaidML. Please select a " + "different Loss method.") + + true_edge = self._scharr_edges(y_true, True) + pred_edge = self._scharr_edges(y_pred, True) + ephsilon = 0.0025 + upper = 2.0 * true_edge * pred_edge + lower = K.square(true_edge) + K.square(pred_edge) + gms = (upper + ephsilon) / (lower + ephsilon) + gmsd = K.std(gms, axis=(1, 2, 3), keepdims=True) + gmsd = K.squeeze(gmsd, axis=-1) + return gmsd + + @classmethod + def _scharr_edges(cls, image, magnitude): + """ Returns a tensor holding modified Scharr edge maps. Parameters ---------- - input_tensor: tensor - The input to have gaussian blurring applied. + image: tensor + Image tensor with shape [batch_size, h, w, d] and type float32. The image(s) must be + 2x2 or larger. + magnitude: bool + Boolean to determine if the edge magnitude or edge direction is returned Returns ------- tensor - The input with gaussian blurring applied + Tensor holding edge maps for each channel. Returns a tensor with shape `[batch_size, h, + w, d, 2]` where the last two dimensions hold `[[dy[0], dx[0]], [dy[1], dx[1]], ..., + [dy[d-1], dx[d-1]]]` calculated using the Scharr filter. """ - inputs = [input_tensor[:, :, :, i:i + 1] for i in range(K.int_shape(input_tensor)[-1])] - outputs = [K.conv2d(inp, K.constant(gauss_kernel), strides=(1, 1), padding="same") - for inp in inputs] - return K.concatenate(outputs, axis=-1) - return func + + # Define vertical and horizontal Scharr filters. + # TODO PlaidML: AttributeError: 'Value' object has no attribute 'get_shape' + static_image_shape = image.get_shape() + image_shape = K.shape(image) + + # 5x5 modified Scharr kernel ( reshape to (5,5,1,2) ) + matrix = np.array([[[[0.00070, 0.00070]], + [[0.00520, 0.00370]], + [[0.03700, 0.00000]], + [[0.00520, -0.0037]], + [[0.00070, -0.0007]]], + [[[0.00370, 0.00520]], + [[0.11870, 0.11870]], + [[0.25890, 0.00000]], + [[0.11870, -0.1187]], + [[0.00370, -0.0052]]], + [[[0.00000, 0.03700]], + [[0.00000, 0.25890]], + [[0.00000, 0.00000]], + [[0.00000, -0.2589]], + [[0.00000, -0.0370]]], + [[[-0.0037, 0.00520]], + [[-0.1187, 0.11870]], + [[-0.2589, 0.00000]], + [[-0.1187, -0.1187]], + [[-0.0037, -0.0052]]], + [[[-0.0007, 0.00070]], + [[-0.0052, 0.00370]], + [[-0.0370, 0.00000]], + [[-0.0052, -0.0037]], + [[-0.0007, -0.0007]]]]) + num_kernels = [2] + kernels = K.constant(matrix, dtype='float32') + kernels = K.tile(kernels, [1, 1, image_shape[-1], 1]) + + # Use depth-wise convolution to calculate edge maps per channel. + # Output tensor has shape [batch_size, h, w, d * num_kernels]. + pad_sizes = [[0, 0], [2, 2], [2, 2], [0, 0]] + padded = pad(image, pad_sizes, mode='REFLECT') + output = K.depthwise_conv2d(padded, kernels) + + if not magnitude: # direction of edges + # Reshape to [batch_size, h, w, d, num_kernels]. + shape = K.concatenate([image_shape, num_kernels], axis=0) + output = K.reshape(output, shape=shape) + output.set_shape(static_image_shape.concatenate(num_kernels)) + output = tf.atan(K.squeeze(output[:, :, :, :, 0] / output[:, :, :, :, 1], axis=None)) + # magnitude of edges -- unified x & y edges don't work well with Neural Networks + return output diff --git a/lib/model/losses_tf.py b/lib/model/losses_tf.py new file mode 100644 index 0000000000..3d676e7db5 --- /dev/null +++ b/lib/model/losses_tf.py @@ -0,0 +1,556 @@ +#!/usr/bin/env python3 +""" Custom Loss Functions for faceswap.py """ + +from __future__ import absolute_import + +import logging + +import numpy as np +import tensorflow as tf +from tensorflow.python.keras.engine import compile_utils + +from keras import backend as K + +logger = logging.getLogger(__name__) # pylint:disable=invalid-name + + +class DSSIMObjective(tf.keras.losses.Loss): + """ DSSIM Loss Function + + Difference of Structural Similarity (DSSIM loss function). Clipped between 0 and 0.5 + + Parameters + ---------- + k_1: float, optional + Parameter of the SSIM. Default: `0.01` + k_2: float, optional + Parameter of the SSIM. Default: `0.03` + kernel_size: int, optional + Size of the sliding window Default: `3` + max_value: float, optional + Max value of the output. Default: `1.0` + + Notes + ------ + You should add a regularization term like a l2 loss in addition to this one. + + References + ---------- + https://github.com/keras-team/keras-contrib/blob/master/keras_contrib/losses/dssim.py + + MIT License + + Copyright (c) 2017 Fariz Rahman + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + """ + def __init__(self, k_1=0.01, k_2=0.03, kernel_size=3, max_value=1.0): + super().__init__(name="DSSIMObjective") + self.kernel_size = kernel_size + self.k_1 = k_1 + self.k_2 = k_2 + self.max_value = max_value + self.c_1 = (self.k_1 * self.max_value) ** 2 + self.c_2 = (self.k_2 * self.max_value) ** 2 + self.dim_ordering = K.image_data_format() + + @staticmethod + def __int_shape(input_tensor): + """ Returns the shape of tensor or variable as a tuple of int or None entries. + + Parameters + ---------- + input_tensor: tensor or variable + The input to return the shape for + + Returns + ------- + tuple + A tuple of integers (or None entries) + """ + return K.int_shape(input_tensor) + + def call(self, y_true, y_pred): + """ Call the DSSIM Loss Function. + + Parameters + ---------- + y_true: tensor or variable + The ground truth value + y_pred: tensor or variable + The predicted value + + Returns + ------- + tensor + The DSSIM Loss value + + Notes + ----- + There are additional parameters for this function. some of the 'modes' for edge behavior + do not yet have a gradient definition in the Theano tree and cannot be used for learning + """ + + kernel = [self.kernel_size, self.kernel_size] + y_true = K.reshape(y_true, [-1] + list(self.__int_shape(y_pred)[1:])) + y_pred = K.reshape(y_pred, [-1] + list(self.__int_shape(y_pred)[1:])) + patches_pred = self.extract_image_patches(y_pred, + kernel, + kernel, + 'valid', + self.dim_ordering) + patches_true = self.extract_image_patches(y_true, + kernel, + kernel, + 'valid', + self.dim_ordering) + + # Get mean + u_true = K.mean(patches_true, axis=-1) + u_pred = K.mean(patches_pred, axis=-1) + # Get variance + var_true = K.var(patches_true, axis=-1) + var_pred = K.var(patches_pred, axis=-1) + # Get standard deviation + covar_true_pred = K.mean( + patches_true * patches_pred, axis=-1) - u_true * u_pred + + ssim = (2 * u_true * u_pred + self.c_1) * ( + 2 * covar_true_pred + self.c_2) + denom = (K.square(u_true) + K.square(u_pred) + self.c_1) * ( + var_pred + var_true + self.c_2) + ssim /= denom # no need for clipping, c_1 + c_2 make the denorm non-zero + return K.mean((1.0 - ssim) / 2.0) + + @staticmethod + def _preprocess_padding(padding): + """Convert keras padding to tensorflow padding. + + Parameters + ---------- + padding: string, + `"same"` or `"valid"`. + + Returns + ------- + str + `"SAME"` or `"VALID"`. + + Raises + ------ + ValueError + If `padding` is invalid. + """ + if padding == 'same': + padding = 'SAME' + elif padding == 'valid': + padding = 'VALID' + else: + raise ValueError('Invalid padding:', padding) + return padding + + def extract_image_patches(self, input_tensor, k_sizes, s_sizes, + padding='same', data_format='channels_last'): + """ Extract the patches from an image. + + Parameters + ---------- + input_tensor: tensor + The input image + k_sizes: tuple + 2-d tuple with the kernel size + s_sizes: tuple + 2-d tuple with the strides size + padding: str, optional + `"same"` or `"valid"`. Default: `"same"` + data_format: str, optional. + `"channels_last"` or `"channels_first"`. Default: `"channels_last"` + + Returns + ------- + The (k_w, k_h) patches extracted + Tensorflow ==> (batch_size, w, h, k_w, k_h, c) + Theano ==> (batch_size, w, h, c, k_w, k_h) + """ + kernel = [1, k_sizes[0], k_sizes[1], 1] + strides = [1, s_sizes[0], s_sizes[1], 1] + padding = self._preprocess_padding(padding) + if data_format == 'channels_first': + input_tensor = K.permute_dimensions(input_tensor, (0, 2, 3, 1)) + patches = tf.image.extract_patches(input_tensor, kernel, strides, [1, 1, 1, 1], padding) + return patches + + +class PenalizedLoss(tf.keras.losses.Loss): + """ Penalized Loss function. + + Applies the given loss function just to the masked area of the image. + + Parameters + ---------- + loss_func: function + The actual loss function to use + mask_prop: float, optional + The amount of mask propagation. Default: `1.0` + """ + def __init__(self, loss_func, mask_prop=1.0): + super().__init__(name="penalized_loss") + self._loss_func = compile_utils.LossesContainer(loss_func) + self._mask_prop = mask_prop + + def call(self, y_true, y_pred): + """ Apply the loss function to the masked area of the image. + + Parameters + ---------- + y_true: tensor or variable + The ground truth value. This should contain the mask in the 4th channel that will be + split off for penalizing. + y_pred: tensor or variable + The predicted value + + Returns + ------- + tensor + The Loss value + """ + mask = self._prepare_mask(K.expand_dims(y_true[..., -1], axis=-1)) + y_true = y_true[..., :-1] + n_true = K.concatenate([y_true[:, :, :, i:i+1] * mask for i in range(3)], axis=-1) + n_pred = K.concatenate([y_pred[:, :, :, i:i+1] * mask for i in range(3)], axis=-1) + return self._loss_func(n_true, n_pred) + + def _prepare_mask(self, mask): + """ Prepare the masks for calculating loss + + Parameters + ---------- + mask: :class:`numpy.ndarray` + The masks for the current batch + + Returns + ------- + tensor + The prepared mask for applying to loss + """ + mask_as_k_inv_prop = 1 - self._mask_prop + mask = (mask * self._mask_prop) + mask_as_k_inv_prop + return mask + + +class GeneralizedLoss(tf.keras.losses.Loss): + """ Generalized function used to return a large variety of mathematical loss functions. + + The primary benefit is a smooth, differentiable version of L1 loss. + + References + ---------- + Barron, J. A More General Robust Loss Function - https://arxiv.org/pdf/1701.03077.pdf + + Example + ------- + >>> a=1.0, x>>c , c=1.0/255.0 # will give a smoothly differentiable version of L1 / MAE loss + >>> a=1.999999 (limit as a->2), beta=1.0/255.0 # will give L2 / RMSE loss + + Parameters + ---------- + alpha: float, optional + Penalty factor. Larger number give larger weight to large deviations. Default: `1.0` + beta: float, optional + Scale factor used to adjust to the input scale (i.e. inputs of mean `1e-4` or `256`). + Default: `1.0/255.0` + """ + def __init__(self, alpha=1.0, beta=1.0/255.0): + super().__init__(name="generalized_loss") + self.alpha = alpha + self.beta = beta + + def call(self, y_true, y_pred): + """ Call the Generalized Loss Function + + Parameters + ---------- + y_true: tensor or variable + The ground truth value + y_pred: tensor or variable + The predicted value + + Returns + ------- + tensor + The loss value from the results of function(y_pred - y_true) + """ + diff = y_pred - y_true + second = (K.pow(K.pow(diff/self.beta, 2.) / K.abs(2. - self.alpha) + 1., + (self.alpha / 2.)) - 1.) + loss = (K.abs(2. - self.alpha)/self.alpha) * second + loss = K.mean(loss, axis=-1) * self.beta + return loss + + +class LInfNorm(tf.keras.losses.Loss): + """ Calculate the L-inf norm as a loss function. """ + + def call(self, y_true, y_pred): + """ Call the L-inf norm loss function. + + Parameters + ---------- + y_true: tensor or variable + The ground truth value + y_pred: tensor or variable + The predicted value + + Returns + ------- + tensor + The loss value + """ + diff = K.abs(y_true - y_pred) + max_loss = K.max(diff, axis=(1, 2), keepdims=True) + loss = K.mean(max_loss, axis=-1) + return loss + + +class GradientLoss(tf.keras.losses.Loss): + """ Gradient Loss Function. + + Calculates the first and second order gradient difference between pixels of an image in the x + and y dimensions. These gradients are then compared between the ground truth and the predicted + image and the difference is taken. When used as a loss, its minimization will result in + predicted images approaching the same level of sharpness / blurriness as the ground truth. + + References + ---------- + TV+TV2 Regularization with Non-Convex Sparseness-Inducing Penalty for Image Restoration, + Chengwu Lu & Hua Huang, 2014 - http://downloads.hindawi.com/journals/mpe/2014/790547.pdf + """ + def __init__(self): + super().__init__(name="generalized_loss") + self.generalized_loss = GeneralizedLoss(alpha=1.9999) + + def call(self, y_true, y_pred): + """ Call the gradient loss function. + + Parameters + ---------- + y_true: tensor or variable + The ground truth value + y_pred: tensor or variable + The predicted value + + Returns + ------- + tensor + The loss value + """ + tv_weight = 1.0 + tv2_weight = 1.0 + loss = 0.0 + loss += tv_weight * (self.generalized_loss(self._diff_x(y_true), self._diff_x(y_pred)) + + self.generalized_loss(self._diff_y(y_true), self._diff_y(y_pred))) + loss += tv2_weight * (self.generalized_loss(self._diff_xx(y_true), self._diff_xx(y_pred)) + + self.generalized_loss(self._diff_yy(y_true), self._diff_yy(y_pred)) + + self.generalized_loss(self._diff_xy(y_true), self._diff_xy(y_pred)) + * 2.) + loss = loss / (tv_weight + tv2_weight) + # TODO simplify to use MSE instead + return loss + + @classmethod + def _diff_x(cls, img): + """ X Difference """ + x_left = img[:, :, 1:2, :] - img[:, :, 0:1, :] + x_inner = img[:, :, 2:, :] - img[:, :, :-2, :] + x_right = img[:, :, -1:, :] - img[:, :, -2:-1, :] + x_out = K.concatenate([x_left, x_inner, x_right], axis=2) + return x_out * 0.5 + + @classmethod + def _diff_y(cls, img): + """ Y Difference """ + y_top = img[:, 1:2, :, :] - img[:, 0:1, :, :] + y_inner = img[:, 2:, :, :] - img[:, :-2, :, :] + y_bot = img[:, -1:, :, :] - img[:, -2:-1, :, :] + y_out = K.concatenate([y_top, y_inner, y_bot], axis=1) + return y_out * 0.5 + + @classmethod + def _diff_xx(cls, img): + """ X-X Difference """ + x_left = img[:, :, 1:2, :] + img[:, :, 0:1, :] + x_inner = img[:, :, 2:, :] + img[:, :, :-2, :] + x_right = img[:, :, -1:, :] + img[:, :, -2:-1, :] + x_out = K.concatenate([x_left, x_inner, x_right], axis=2) + return x_out - 2.0 * img + + @classmethod + def _diff_yy(cls, img): + """ Y-Y Difference """ + y_top = img[:, 1:2, :, :] + img[:, 0:1, :, :] + y_inner = img[:, 2:, :, :] + img[:, :-2, :, :] + y_bot = img[:, -1:, :, :] + img[:, -2:-1, :, :] + y_out = K.concatenate([y_top, y_inner, y_bot], axis=1) + return y_out - 2.0 * img + + @classmethod + def _diff_xy(cls, img): + """ X-Y Difference """ + # xout1 + top_left = img[:, 1:2, 1:2, :] + img[:, 0:1, 0:1, :] + inner_left = img[:, 2:, 1:2, :] + img[:, :-2, 0:1, :] + bot_left = img[:, -1:, 1:2, :] + img[:, -2:-1, 0:1, :] + xy_left = K.concatenate([top_left, inner_left, bot_left], axis=1) + + top_mid = img[:, 1:2, 2:, :] + img[:, 0:1, :-2, :] + mid_mid = img[:, 2:, 2:, :] + img[:, :-2, :-2, :] + bot_mid = img[:, -1:, 2:, :] + img[:, -2:-1, :-2, :] + xy_mid = K.concatenate([top_mid, mid_mid, bot_mid], axis=1) + + top_right = img[:, 1:2, -1:, :] + img[:, 0:1, -2:-1, :] + inner_right = img[:, 2:, -1:, :] + img[:, :-2, -2:-1, :] + bot_right = img[:, -1:, -1:, :] + img[:, -2:-1, -2:-1, :] + xy_right = K.concatenate([top_right, inner_right, bot_right], axis=1) + + # Xout2 + top_left = img[:, 0:1, 1:2, :] + img[:, 1:2, 0:1, :] + inner_left = img[:, :-2, 1:2, :] + img[:, 2:, 0:1, :] + bot_left = img[:, -2:-1, 1:2, :] + img[:, -1:, 0:1, :] + xy_left = K.concatenate([top_left, inner_left, bot_left], axis=1) + + top_mid = img[:, 0:1, 2:, :] + img[:, 1:2, :-2, :] + mid_mid = img[:, :-2, 2:, :] + img[:, 2:, :-2, :] + bot_mid = img[:, -2:-1, 2:, :] + img[:, -1:, :-2, :] + xy_mid = K.concatenate([top_mid, mid_mid, bot_mid], axis=1) + + top_right = img[:, 0:1, -1:, :] + img[:, 1:2, -2:-1, :] + inner_right = img[:, :-2, -1:, :] + img[:, 2:, -2:-1, :] + bot_right = img[:, -2:-1, -1:, :] + img[:, -1:, -2:-1, :] + xy_right = K.concatenate([top_right, inner_right, bot_right], axis=1) + + xy_out1 = K.concatenate([xy_left, xy_mid, xy_right], axis=2) + xy_out2 = K.concatenate([xy_left, xy_mid, xy_right], axis=2) + return (xy_out1 - xy_out2) * 0.25 + + +class GMSDLoss(tf.keras.losses.Loss): + """ Gradient Magnitude Similarity Deviation Loss. + + Improved image quality metric over MS-SSIM with easier calculations + + References + ---------- + http://www4.comp.polyu.edu.hk/~cslzhang/IQA/GMSD/GMSD.htm + https://arxiv.org/ftp/arxiv/papers/1308/1308.3052.pdf + """ + + def call(self, y_true, y_pred): + """ Return the Gradient Magnitude Similarity Deviation Loss. + + + Parameters + ---------- + y_true: tensor or variable + The ground truth value + y_pred: tensor or variable + The predicted value + + Returns + ------- + tensor + The loss value + """ + true_edge = self._scharr_edges(y_true, True) + pred_edge = self._scharr_edges(y_pred, True) + ephsilon = 0.0025 + upper = 2.0 * true_edge * pred_edge + lower = K.square(true_edge) + K.square(pred_edge) + gms = (upper + ephsilon) / (lower + ephsilon) + gmsd = K.std(gms, axis=(1, 2, 3), keepdims=True) + gmsd = K.squeeze(gmsd, axis=-1) + return gmsd + + @classmethod + def _scharr_edges(cls, image, magnitude): + """ Returns a tensor holding modified Scharr edge maps. + + Parameters + ---------- + image: tensor + Image tensor with shape [batch_size, h, w, d] and type float32. The image(s) must be + 2x2 or larger. + magnitude: bool + Boolean to determine if the edge magnitude or edge direction is returned + + Returns + ------- + tensor + Tensor holding edge maps for each channel. Returns a tensor with shape `[batch_size, h, + w, d, 2]` where the last two dimensions hold `[[dy[0], dx[0]], [dy[1], dx[1]], ..., + [dy[d-1], dx[d-1]]]` calculated using the Scharr filter. + """ + + # Define vertical and horizontal Scharr filters. + static_image_shape = image.get_shape() + image_shape = K.shape(image) + + # 5x5 modified Scharr kernel ( reshape to (5,5,1,2) ) + matrix = np.array([[[[0.00070, 0.00070]], + [[0.00520, 0.00370]], + [[0.03700, 0.00000]], + [[0.00520, -0.0037]], + [[0.00070, -0.0007]]], + [[[0.00370, 0.00520]], + [[0.11870, 0.11870]], + [[0.25890, 0.00000]], + [[0.11870, -0.1187]], + [[0.00370, -0.0052]]], + [[[0.00000, 0.03700]], + [[0.00000, 0.25890]], + [[0.00000, 0.00000]], + [[0.00000, -0.2589]], + [[0.00000, -0.0370]]], + [[[-0.0037, 0.00520]], + [[-0.1187, 0.11870]], + [[-0.2589, 0.00000]], + [[-0.1187, -0.1187]], + [[-0.0037, -0.0052]]], + [[[-0.0007, 0.00070]], + [[-0.0052, 0.00370]], + [[-0.0370, 0.00000]], + [[-0.0052, -0.0037]], + [[-0.0007, -0.0007]]]]) + num_kernels = [2] + kernels = K.constant(matrix, dtype='float32') + kernels = K.tile(kernels, [1, 1, image_shape[-1], 1]) + + # Use depth-wise convolution to calculate edge maps per channel. + # Output tensor has shape [batch_size, h, w, d * num_kernels]. + pad_sizes = [[0, 0], [2, 2], [2, 2], [0, 0]] + padded = tf.pad(image, pad_sizes, mode='REFLECT') + output = K.depthwise_conv2d(padded, kernels) + + if not magnitude: # direction of edges + # Reshape to [batch_size, h, w, d, num_kernels]. + shape = K.concatenate([image_shape, num_kernels], axis=0) + output = K.reshape(output, shape=shape) + output.set_shape(static_image_shape.concatenate(num_kernels)) + output = tf.atan(K.squeeze(output[:, :, :, :, 0] / output[:, :, :, :, 1], axis=None)) + # magnitude of edges -- unified x & y edges don't work well with Neural Networks + return output diff --git a/lib/model/memory_saving_gradients.py b/lib/model/memory_saving_gradients.py deleted file mode 100644 index 8a893a2cb0..0000000000 --- a/lib/model/memory_saving_gradients.py +++ /dev/null @@ -1,439 +0,0 @@ -#!/usr/bin/env python3 -""" Memory saving gradients. -Adapted from: https://github.com/openai/gradient-checkpointing - -The MIT License - -Copyright (c) 2018 OpenAI (http://openai.com) - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - -import contextlib -import logging -import time -import sys - -import numpy as np -import tensorflow as tf -import tensorflow.contrib.graph_editor as ge # pylint: disable=no-name-in-module -from toposort import toposort - - -logger = logging.getLogger(__name__) # pylint: disable=invalid-name -sys.setrecursionlimit(10000) -# refers back to current module if we decide to split helpers out -util = sys.modules[__name__] - -# getting rid of "WARNING:tensorflow:VARIABLES collection name is deprecated" -setattr(tf.GraphKeys, "VARIABLES", "variables") - -# save original gradients since tf.gradient could be monkey-patched to point -# to our version -from tensorflow.python.ops import gradients as tf_grads_lib # pylint: disable=no-name-in-module -tf_gradients = tf_grads_lib.gradients - -MIN_CHECKPOINT_NODE_SIZE = 1024 # use lower value during testing - - -# specific versions we can use to do process-wide replacement of tf.gradients -def gradients_speed(ys, xs, grad_ys=None, **kwargs): - return gradients(ys, xs, grad_ys, checkpoints='speed', **kwargs) - - -def gradients_memory(ys, xs, grad_ys=None, **kwargs): - return gradients(ys, xs, grad_ys, checkpoints='memory', **kwargs) - - -def gradients_collection(ys, xs, grad_ys=None, **kwargs): - return gradients(ys, xs, grad_ys, checkpoints='collection', **kwargs) - - -def gradients(ys, xs, # pylint: disable: too-many-statements, too-many-branches - grad_ys=None, checkpoints='collection', **kwargs): - ''' - Authors: Tim Salimans & Yaroslav Bulatov - - memory efficient gradient implementation inspired by "Training Deep Nets with Sublinear Memory - Cost" by Chen et al. 2016 (https://arxiv.org/abs/1604.06174) - - ys,xs,grad_ys,kwargs are the arguments to standard tensorflow tf.gradients - (https://www.tensorflow.org/versions/r0.12/api_docs/python/train.html#gradients) - - 'checkpoints' can either be - - a list consisting of tensors from the forward pass of the neural net - that we should re-use when calculating the gradients in the backward pass - all other tensors that do not appear in this list will be re-computed - - a string specifying how this list should be determined. currently we support - - 'speed': checkpoint all outputs of convolutions and matmuls. these ops are usually - the most expensive, so checkpointing them maximizes the running speed - (this is a good option if nonlinearities, concats, batchnorms, etc are - taking up a lot of memory) - - 'memory': try to minimize the memory usage - (currently using a very simple strategy that identifies a number of - bottleneck tensors in the graph to checkpoint) - - 'collection': look for a tensorflow collection named 'checkpoints', which holds the - tensors to checkpoint - ''' - - # print("Calling memsaving gradients with", checkpoints) - if not isinstance(ys, list): - ys = [ys] - if not isinstance(xs, list): - xs = [xs] - - bwd_ops = ge.get_backward_walk_ops([y.op for y in ys], - inclusive=True) - - debug_print("bwd_ops: {}".format(bwd_ops)) - - # forward ops are all ops that are candidates for recomputation - fwd_ops = ge.get_forward_walk_ops([x.op for x in xs], - inclusive=True, - within_ops=bwd_ops) - debug_print("fwd_ops: {}".format(fwd_ops)) - - # exclude ops with no inputs - fwd_ops = [op for op in fwd_ops if op.inputs] - - # don't recompute xs, remove variables - xs_ops = _to_ops(xs) - fwd_ops = [op for op in fwd_ops if op not in xs_ops] - fwd_ops = [op for op in fwd_ops if '/assign' not in op.name] - fwd_ops = [op for op in fwd_ops if '/Assign' not in op.name] - fwd_ops = [op for op in fwd_ops if '/read' not in op.name] - ts_all = ge.filter_ts(fwd_ops, True) # get the tensors - ts_all = [t for t in ts_all if '/read' not in t.name] - ts_all = set(ts_all) - set(xs) - set(ys) - - # construct list of tensors to checkpoint during forward pass, if not - # given as input - if type(checkpoints) is not list: - if checkpoints == 'collection': - checkpoints = tf.get_collection('checkpoints') - - elif checkpoints == 'speed': - # checkpoint all expensive ops to maximize running speed - checkpoints = ge.filter_ts_from_regex(fwd_ops, 'conv2d|Conv|MatMul') - - elif checkpoints == 'memory': - - # remove very small tensors and some weird ops - def fixdims(t): # tf.Dimension values are not compatible with int, convert manually - try: - return [int(e if e.value is not None else 64) for e in t] - except: - return [0] # unknown shape - ts_all = [t for t in ts_all if np.prod(fixdims(t.shape)) > MIN_CHECKPOINT_NODE_SIZE] - ts_all = [t for t in ts_all if 'L2Loss' not in t.name] - ts_all = [t for t in ts_all if 'entropy' not in t.name] - ts_all = [t for t in ts_all if 'FusedBatchNorm' not in t.name] - ts_all = [t for t in ts_all if 'Switch' not in t.name] - ts_all = [t for t in ts_all if 'dropout' not in t.name] - # DV: FP16_FIX - need to add 'Cast' layer here to make it work for FP16 - ts_all = [t for t in ts_all if 'Cast' not in t.name] - - # filter out all tensors that are inputs of the backward graph - with util.capture_ops() as bwd_ops: - tf_gradients(ys, xs, grad_ys, **kwargs) - - bwd_inputs = [t for op in bwd_ops for t in op.inputs] - # list of tensors in forward graph that is in input to bwd graph - ts_filtered = list(set(bwd_inputs).intersection(ts_all)) - debug_print("Using tensors {}".format(ts_filtered)) - - # try two slightly different ways of getting bottlenecks tensors - # to checkpoint - for ts in [ts_filtered, ts_all]: - - # get all bottlenecks in the graph - bottleneck_ts = [] - for t in ts: - b = set(ge.get_backward_walk_ops(t.op, inclusive=True, within_ops=fwd_ops)) - f = set(ge.get_forward_walk_ops(t.op, inclusive=False, within_ops=fwd_ops)) - # check that there are not shortcuts - b_inp = set([inp for op in b for inp in op.inputs]).intersection(ts_all) - f_inp = set([inp for op in f for inp in op.inputs]).intersection(ts_all) - if not set(b_inp).intersection(f_inp) and len(b_inp)+len(f_inp) >= len(ts_all): - bottleneck_ts.append(t) # we have a bottleneck! - else: - debug_print("Rejected bottleneck candidate and ops {}".format( - [t] + list(set(ts_all) - set(b_inp) - set(f_inp)))) - - # success? or try again without filtering? - if len(bottleneck_ts) >= np.sqrt(len(ts_filtered)): # enough bottlenecks found! - break - - if not bottleneck_ts: - raise Exception('unable to find bottleneck tensors! please provide checkpoint ' - 'nodes manually, or use checkpoints="speed".') - - # sort the bottlenecks - bottlenecks_sorted_lists = tf_toposort(bottleneck_ts, within_ops=fwd_ops) - sorted_bottlenecks = [t for ts in bottlenecks_sorted_lists for t in ts] - - # save an approximately optimal number ~ sqrt(N) - N = len(ts_filtered) - if len(bottleneck_ts) <= np.ceil(np.sqrt(N)): - checkpoints = sorted_bottlenecks - else: - step = int(np.ceil(len(bottleneck_ts) / np.sqrt(N))) - checkpoints = sorted_bottlenecks[step::step] - - else: - raise Exception('%s is unsupported input for "checkpoints"' % (checkpoints,)) - - checkpoints = list(set(checkpoints).intersection(ts_all)) - - # at this point automatic selection happened and checkpoints is list of nodes - assert isinstance(checkpoints, list) - - debug_print("Checkpoint nodes used: {}".format(checkpoints)) - # better error handling of special cases - # xs are already handled as checkpoint nodes, so no need to include them - xs_intersect_checkpoints = set(xs).intersection(set(checkpoints)) - if xs_intersect_checkpoints: - debug_print("Warning, some input nodes are also checkpoint nodes: {}".format( - xs_intersect_checkpoints)) - ys_intersect_checkpoints = set(ys).intersection(set(checkpoints)) - debug_print("ys: {}, checkpoints:{}, intersect: {}".format( - ys, checkpoints, ys_intersect_checkpoints)) - # saving an output node (ys) gives no benefit in memory while creating - # new edge cases, exclude them - if ys_intersect_checkpoints: - debug_print("Warning, some output nodes are also checkpoints nodes: {}".format( - format_ops(ys_intersect_checkpoints))) - - # remove initial and terminal nodes from checkpoints list if present - checkpoints = list(set(checkpoints) - set(ys) - set(xs)) - - # check that we have some nodes to checkpoint - if not checkpoints: - raise Exception('no checkpoints nodes found or given as input! ') - - # disconnect dependencies between checkpointed tensors - checkpoints_disconnected = {} - for x in checkpoints: - if x.op and x.op.name is not None: - grad_node = tf.stop_gradient(x, name=x.op.name+"_sg") - else: - grad_node = tf.stop_gradient(x) - checkpoints_disconnected[x] = grad_node - - # partial derivatives to the checkpointed tensors and xs - ops_to_copy = fast_backward_ops(seed_ops=[y.op for y in ys], - stop_at_ts=checkpoints, within_ops=fwd_ops) - debug_print("Found {} ops to copy within fwd_ops {}, seed {}, stop_at {}".format( - len(ops_to_copy), fwd_ops, [r.op for r in ys], checkpoints)) - debug_print("ops_to_copy = {}".format(ops_to_copy)) - debug_print("Processing list {}".format(ys)) - _, info = ge.copy_with_input_replacements(ge.sgv(ops_to_copy), {}) - for origin_op, op in info._transformed_ops.items(): - op._set_device(origin_op.node_def.device) - copied_ops = info._transformed_ops.values() - debug_print("Copied {} to {}".format(ops_to_copy, copied_ops)) - ge.reroute_ts(checkpoints_disconnected.values(), - checkpoints_disconnected.keys(), - can_modify=copied_ops) - debug_print("Rewired {} in place of {} restricted to {}".format( - checkpoints_disconnected.values(), checkpoints_disconnected.keys(), copied_ops)) - - # get gradients with respect to current boundary + original x's - copied_ys = [info._transformed_ops[y.op]._outputs[0] for y in ys] - boundary = list(checkpoints_disconnected.values()) - dv = tf_gradients(ys=copied_ys, xs=boundary+xs, grad_ys=grad_ys, **kwargs) - debug_print("Got gradients {}".format(dv)) - debug_print("for %s", copied_ys) - debug_print("with respect to {}".format(boundary+xs)) - - inputs_to_do_before = [y.op for y in ys] - if grad_ys is not None: - inputs_to_do_before += grad_ys - wait_to_do_ops = list(copied_ops) + [g.op for g in dv if g is not None] - my_add_control_inputs(wait_to_do_ops, inputs_to_do_before) - - # partial derivatives to the checkpointed nodes - # dictionary of "node: backprop" for nodes in the boundary - d_checkpoints = {r: dr for r, dr in zip(checkpoints_disconnected.keys(), - dv[:len(checkpoints_disconnected)])} - # partial derivatives to xs (usually the params of the neural net) - d_xs = dv[len(checkpoints_disconnected):] - - # incorporate derivatives flowing through the checkpointed nodes - checkpoints_sorted_lists = tf_toposort(checkpoints, within_ops=fwd_ops) - for ts in checkpoints_sorted_lists[::-1]: - debug_print("Processing list {}".format(ts)) - checkpoints_other = [r for r in checkpoints if r not in ts] - checkpoints_disconnected_other = [checkpoints_disconnected[r] for r in checkpoints_other] - - # copy part of the graph below current checkpoint node, stopping at - # other checkpoints nodes - ops_to_copy = fast_backward_ops(within_ops=fwd_ops, - seed_ops=[r.op for r in ts], - stop_at_ts=checkpoints_other) - debug_print("Found {} ops to copy within {}, seed {}, stop_at {}".format( - len(ops_to_copy), fwd_ops, [r.op for r in ts], checkpoints_other)) - debug_print("ops_to_copy = {}".format(ops_to_copy)) - if not ops_to_copy: # we're done! - break - _, info = ge.copy_with_input_replacements(ge.sgv(ops_to_copy), {}) - for origin_op, op in info._transformed_ops.items(): - op._set_device(origin_op.node_def.device) - copied_ops = info._transformed_ops.values() - debug_print("Copied {} to {}".format(ops_to_copy, copied_ops)) - ge.reroute_ts(checkpoints_disconnected_other, checkpoints_other, can_modify=copied_ops) - debug_print("Rewired %s in place of %s restricted to %s", - checkpoints_disconnected_other, checkpoints_other, copied_ops) - - # gradient flowing through the checkpointed node - boundary = [info._transformed_ops[r.op]._outputs[0] for r in ts] - substitute_backprops = [d_checkpoints[r] for r in ts] - dv = tf_gradients(boundary, - checkpoints_disconnected_other+xs, - grad_ys=substitute_backprops, **kwargs) - debug_print("Got gradients {}".format(dv)) - debug_print("for {}".format(boundary)) - debug_print("with respect to {}".format(checkpoints_disconnected_other+xs)) - debug_print("with boundary backprop substitutions {}".format(substitute_backprops)) - - inputs_to_do_before = [d_checkpoints[r].op for r in ts] - wait_to_do_ops = list(copied_ops) + [g.op for g in dv if g is not None] - my_add_control_inputs(wait_to_do_ops, inputs_to_do_before) - - # partial derivatives to the checkpointed nodes - for r, dr in zip(checkpoints_other, dv[:len(checkpoints_other)]): - if dr is not None: - if d_checkpoints[r] is None: - d_checkpoints[r] = dr - else: - d_checkpoints[r] += dr - - def _unsparsify(var_x): - if not isinstance(var_x, tf.IndexedSlices): - return var_x - assert var_x.dense_shape is not None, \ - "memory_saving_gradients encountered sparse gradients of unknown shape" - indices = var_x.indices - while indices.shape.ndims < var_x.values.shape.ndims: - indices = tf.expand_dims(indices, -1) - return tf.scatter_nd(indices, var_x.values, var_x.dense_shape) - - # partial derivatives to xs (usually the params of the neural net) - d_xs_new = dv[len(checkpoints_other):] - for j in range(len(xs)): - if d_xs_new[j] is not None: - if d_xs[j] is None: - d_xs[j] = _unsparsify(d_xs_new[j]) - else: - d_xs[j] += _unsparsify(d_xs_new[j]) - - return d_xs - - -def tf_toposort(ts_inp, within_ops=None): - """ Tensorflow topological sort """ - all_ops = ge.get_forward_walk_ops([x.op for x in ts_inp], within_ops=within_ops) - - deps = {} - for tf_op in all_ops: - for outp in tf_op.outputs: - deps[outp] = set(tf_op.inputs) - sorted_ts = toposort(deps) - - # only keep the tensors from our original list - ts_sorted_lists = [] - for lst in sorted_ts: - keep = list(set(lst).intersection(ts_inp)) - if keep: - ts_sorted_lists.append(keep) - return ts_sorted_lists - - -def fast_backward_ops(within_ops, seed_ops, stop_at_ts): - """ Fast backward ops """ - bwd_ops = set(ge.get_backward_walk_ops(seed_ops, stop_at_ts=stop_at_ts)) - ops = bwd_ops.intersection(within_ops).difference([t.op for t in stop_at_ts]) - return list(ops) - - -@contextlib.contextmanager -def capture_ops(): - """Decorator to capture ops created in the block. - with capture_ops() as ops: - # create some ops - print(ops) # => prints ops created. - """ - - micros = int(time.time()*10**6) - scope_name = str(micros) - op_list = [] - with tf.name_scope(scope_name): - yield op_list - - graph = tf.get_default_graph() - op_list.extend(ge.select_ops(scope_name+"/.*", graph=graph)) - - -def _to_op(tensor_or_op): - """ Convert to op """ - if hasattr(tensor_or_op, "op"): - return tensor_or_op.op - return tensor_or_op - - -def _to_ops(iterable): - """ Convert to ops """ - if not _is_iterable(iterable): - return iterable - return [_to_op(i) for i in iterable] - - -def _is_iterable(obj): - """ Check if object is iterable """ - try: - _ = iter(obj) - except Exception: # pylint: disable=broad-except - return False - return True - - -def debug_print(msg, *args): - """ Debug logging """ - formatted_args = [format_ops(arg) for arg in args] - logger.debug("%s: %s", msg, formatted_args) - - -def format_ops(ops, sort_outputs=True): - """Helper method for printing ops. Converts Tensor/Operation op to op.name, - rest to str(op).""" - - if hasattr(ops, '__iter__') and not isinstance(ops, str): - lst = [(op.name if hasattr(op, "name") else str(op)) for op in ops] - if sort_outputs: - return sorted(lst) - return lst - return ops.name if hasattr(ops, "name") else str(ops) - - -def my_add_control_inputs(wait_to_do_ops, inputs_to_do_before): - """ Add control inputs """ - for tf_op in wait_to_do_ops: - ctl_inp = [i for i in inputs_to_do_before - if tf_op.control_inputs is None or i not in tf_op.control_inputs] - ge.add_control_inputs(tf_op, ctl_inp) diff --git a/lib/model/nn_blocks.py b/lib/model/nn_blocks.py index 91a66afb88..498a989efb 100644 --- a/lib/model/nn_blocks.py +++ b/lib/model/nn_blocks.py @@ -3,11 +3,10 @@ import logging -from keras.layers import Add, Concatenate, SeparableConv2D, UpSampling2D -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import Conv2D -from keras.layers.core import Activation +from keras.layers import (Activation, Add, Concatenate, Conv2D as KConv2D, LeakyReLU, + SeparableConv2D, UpSampling2D) from keras.initializers import he_uniform, VarianceScaling + from .initializers import ICNR, ConvolutionAware from .layers import PixelShuffler, ReflectionPadding2D from .normalization import InstanceNormalization @@ -15,476 +14,584 @@ logger = logging.getLogger(__name__) # pylint: disable=invalid-name -class NNBlocks(): - """ Blocks that are often used for multiple models are stored here for easy access. +_CONFIG = dict() +_NAMES = dict() + - This class is always brought in as ``self.blocks`` in all model plugins so that all models - have access to them. +def set_config(configuration): + """ Set the global configuration parameters from the user's config file. - The parameters passed into this class should ultimately originate from the user's training - configuration file, rather than being hard-coded at the plugin level. + These options are used when creating layers for new models. Parameters ---------- - use_icnr_init: bool, Optional - ``True`` if ICNR initialization should be used rather than the default. Default: ``False`` - use_convaware_init: bool, Optional - ``True`` if Convolutional Aware initialization should be used rather than the default. - Default: ``False`` - use_reflect_padding: bool, Optional - ``True`` if Reflect Padding initialization should be used rather than the padding. + configuration: dict + The configuration options that exist in the training configuration files that pertain + specifically to Custom Faceswap Layers. The keys should be: `icnr_init`, `conv_aware_init` + and 'reflect_padding' + """ + global _CONFIG # pylint:disable=global-statement + _CONFIG = configuration + logger.debug("Set NNBlock configuration to: %s", _CONFIG) + + +def _get_name(name): + """ Return unique layer name for requested block. + + As blocks can be used multiple times, auto appends an integer to the end of the requested + name to keep all block names unique + + Parameters + ---------- + name: str + The requested name for the layer + + Returns + ------- + str + The unique name for this layer + """ + global _NAMES # pylint:disable=global-statement + _NAMES[name] = _NAMES.setdefault(name, -1) + 1 + name = "{}_{}".format(name, _NAMES[name]) + logger.debug("Generating block name: %s", name) + return name + + +# << CONVOLUTIONS >> +class Conv2D(KConv2D): # pylint:disable=too-few-public-methods + """ A standard Keras Convolution 2D layer with parameters updated to be more appropriate for + Faceswap architecture. + + Parameters are the same, with the same defaults, as a standard :class:`keras.layers.Conv2D` + except where listed below. The default initializer is updated to `he_uniform` or `convolutional + aware` based on user configuration settings. + + Parameters + ---------- + padding: str, optional + One of `"valid"` or `"same"` (case-insensitive). Default: `"same"`. Note that `"same"` is + slightly inconsistent across backends with `strides` != 1, as described + `here `_. + check_icnr_init: `bool`, optional + ``True`` if the user configuration options should be checked to apply ICNR initialization + to the layer. This should only be passed in from :class:`UpscaleBlock` layers. Default: ``False`` - first_run: bool, Optional - ``True`` if a model is being created for the first time, ``False`` if a model is being - resumed. Used to prevent Convolutional Aware weights from being calculated when a model - is being reloaded. Default: ``True`` """ - def __init__(self, - use_icnr_init=False, - use_convaware_init=False, - use_reflect_padding=False, - first_run=True): - logger.debug("Initializing %s: (use_icnr_init: %s, use_convaware_init: %s, " - "use_reflect_padding: %s, first_run: %s)", - self.__class__.__name__, use_icnr_init, use_convaware_init, - use_reflect_padding, first_run) - self.names = dict() - self.first_run = first_run - self.use_icnr_init = use_icnr_init - self.use_convaware_init = use_convaware_init - self.use_reflect_padding = use_reflect_padding - if self.use_convaware_init and self.first_run: - logger.info("Using Convolutional Aware Initialization. Model generation will take a " - "few minutes...") - logger.debug("Initialized %s", self.__class__.__name__) - - def _get_name(self, name): - """ Return unique layer name for requested block. - - As blocks can be used multiple times, auto appends an integer to the end of the requested - name to keep all block names unique + def __init__(self, *args, padding="same", check_icnr_init=False, **kwargs): + if kwargs.get("name", None) is None: + kwargs["name"] = _get_name("conv2d_{}".format(args[0])) + initializer = self._get_default_initializer(kwargs.pop("kernel_initializer", None)) + if check_icnr_init and _CONFIG["icnr_init"]: + initializer = ICNR(initializer=initializer) + logger.debug("Using ICNR Initializer: %s", initializer) + super().__init__(*args, padding=padding, kernel_initializer=initializer, **kwargs) + + @classmethod + def _get_default_initializer(cls, initializer): + """ Returns a default initializer of Convolutional Aware or he_uniform for convolutional + layers. Parameters ---------- - name: str - The requested name for the layer + initializer: :class:`keras.initializers.Initializer` or None + The initializer that has been passed into the model. If this value is ``None`` then a + default initializer will be returned based on the configuration choices, otherwise + the given initializer will be returned. Returns ------- - str - The unique name for this layer + :class:`keras.initializers.Initializer` + The kernel initializer to use for this convolutional layer. Either the original given + initializer, he_uniform or convolutional aware (if selected in config options) """ - self.names[name] = self.names.setdefault(name, -1) + 1 - name = "{}_{}".format(name, self.names[name]) - logger.debug("Generating block name: %s", name) - return name - - def _set_default_initializer(self, kwargs): - """ Sets the default initializer for convolution 2D and Seperable convolution 2D layers - to Convolutional Aware or he_uniform. - - if a specific initializer has been passed in from the model plugin, then the specified - initializer will be used rather than the default. - - Parameters - ---------- - kwargs: dict - The keyword arguments for the current layer - - Returns - ------- - dict - The keyword arguments for the current layer with the initializer updated to - the select default value - """ - if "kernel_initializer" in kwargs: - logger.debug("Using model specified initializer: %s", kwargs["kernel_initializer"]) - return kwargs - if self.use_convaware_init: - default = ConvolutionAware() - if self.first_run: - # Indicate the Convolutional Aware should be calculated on first run - default._init = True # pylint:disable=protected-access + if initializer is None: + retval = ConvolutionAware() if _CONFIG["conv_aware_init"] else he_uniform() + logger.debug("Set default kernel_initializer: %s", retval) else: - default = he_uniform() - if kwargs.get("kernel_initializer", None) != default: - kwargs["kernel_initializer"] = default - logger.debug("Set default kernel_initializer to: %s", kwargs["kernel_initializer"]) - return kwargs - - @staticmethod - def _switch_kernel_initializer(kwargs, initializer): - """ Switch the initializer in the given kwargs to the given initializer and return the - previous initializer to caller. - - For residual blocks and up-scaling, user selected initializer methods should replace those - set by the model. This method updates the initializer for the layer, and returns the - original initializer so that it can be set back to the layer's key word arguments for - subsequent layers where the initializer should not be switched. + retval = initializer + logger.debug("Using model supplied initializer: %s", retval) + return retval + + +class Conv2DOutput(): # pylint:disable=too-few-public-methods + """ A Convolution 2D layer that separates out the activation layer to explicitly set the data + type on the activation to float 32 to fully support mixed precision training. + + The Convolution 2D layer uses default parameters to be more appropriate for Faceswap + architecture. + + Parameters are the same, with the same defaults, as a standard :class:`keras.layers.Conv2D` + except where listed below. The default initializer is updated to he_uniform or convolutional + aware based on user config settings. + + Parameters + ---------- + filters: int + The dimensionality of the output space (i.e. the number of output filters in the + convolution) + kernel_size: int or tuple/list of 2 ints + The height and width of the 2D convolution window. Can be a single integer to specify the + same value for all spatial dimensions. + activation: str, optional + The activation function to apply to the output. Default: `"sigmoid"` + padding: str, optional + One of `"valid"` or `"same"` (case-insensitive). Default: `"same"`. Note that `"same"` is + slightly inconsistent across backends with `strides` != 1, as described + `here `_. + kwargs: dict + Any additional Keras standard layer keyword arguments to pass to the Convolutional 2D layer + """ + def __init__(self, filters, kernel_size, activation="sigmoid", padding="same", **kwargs): + self._name = kwargs.pop("name") if "name" in kwargs else _get_name( + "conv_output_{}".format(filters)) + self._filters = filters + self._kernel_size = kernel_size + self._activation = activation + self._padding = padding + self._kwargs = kwargs + + def __call__(self, inputs): + """ Call the Faceswap Convolutional Output Layer. Parameters ---------- - kwargs: dict - The keyword arguments for the current layer - initializer: keras or faceswap initializer class - The initializer that should replace the current initializer that exists in keyword - arguments + inputs: Tensor + The input to the layer Returns ------- - keras or faceswap initializer class - The original initializer that existed in the given keyword arguments + Tensor + The output tensor from the Convolution 2D Layer """ - original = kwargs.get("kernel_initializer", None) - kwargs["kernel_initializer"] = initializer - logger.debug("Switched kernel_initializer from %s to %s", original, initializer) - return original + var_x = Conv2D(self._filters, + self._kernel_size, + padding=self._padding, + name="{}_conv2d".format(self._name), + **self._kwargs)(inputs) + var_x = Activation(self._activation, dtype="float32", name=self._name)(var_x) + return var_x + + +class Conv2DBlock(): # pylint:disable=too-few-public-methods + """ A standard Convolution 2D layer which applies user specified configuration to the + layer. + + Adds reflection padding if it has been selected by the user, and other post-processing + if requested by the plugin. - def conv2d(self, input_tensor, filters, kernel_size, strides=(1, 1), padding="same", **kwargs): - """ A standard Convolution 2D layer with correct initialization. + Adds instance normalization if requested. Adds a LeakyReLU if a residual block follows. - This layer creates a convolution kernel that is convolved with the layer input to produce - a tensor of outputs. + Parameters + ---------- + filters: int + The dimensionality of the output space (i.e. the number of output filters in the + convolution) + kernel_size: int, optional + An integer or tuple/list of 2 integers, specifying the height and width of the 2D + convolution window. Can be a single integer to specify the same value for all spatial + dimensions. Default: 5 + strides: tuple or int, optional + An integer or tuple/list of 2 integers, specifying the strides of the convolution along the + height and width. Can be a single integer to specify the same value for all spatial + dimensions. Default: `2` + padding: ["valid", "same"], optional + The padding to use. NB: If reflect padding has been selected in the user configuration + options, then this argument will be ignored in favor of reflect padding. Default: `"same"` + use_instance_norm: bool, optional + ``True`` if instance normalization should be applied after the convolutional layer. + Default: ``False`` + res_block_follows: bool, optional + If a residual block will follow this layer, then this should be set to ``True`` to add a + leaky ReLu after the convolutional layer. Default: ``False`` + kwargs: dict + Any additional Keras standard layer keyword arguments to pass to the Convolutional 2D layer + """ + def __init__(self, + filters, + kernel_size=5, + strides=2, + padding="same", + use_instance_norm=False, + res_block_follows=False, + **kwargs): + self._name = kwargs.pop("name") if "name" in kwargs else _get_name( + "conv_{}".format(filters)) + logger.debug("name: %s, filters: %s, kernel_size: %s, strides: %s, padding: %s, " + "use_instance_norm: %s, res_block_follows: %s, kwargs: %s)", + self._name, filters, kernel_size, strides, padding, use_instance_norm, + res_block_follows, kwargs) + self._use_reflect_padding = _CONFIG["reflect_padding"] + + self._filters = filters + self._kernel_size = kernel_size + self._strides = strides + self._padding = "valid" if self._use_reflect_padding else padding + self._kwargs = kwargs + self._use_instance_norm = use_instance_norm + self._res_block_follows = res_block_follows + + def __call__(self, inputs): + """ Call the Faceswap Convolutional Layer. Parameters ---------- - input_tensor: tensor - The input tensor to the layer - filters: int - The dimensionality of the output space (i.e. the number of output filters in the - convolution) - kernel_size: int - An integer or tuple/list of 2 integers, specifying the height and width of the 2D - convolution window. Can be a single integer to specify the same value for all spatial - dimensions - strides: tuple, optional - An integer or tuple/list of 2 integers, specifying the strides of the convolution along - the height and width. Can be a single integer to specify the same value for all spatial - dimensions. Default: `(1, 1)` - padding: ["valid", "same"], optional - The padding to use. Default: `"same"` - kwargs: dict - Any additional Keras standard layer keyword arguments + inputs: Tensor + The input to the layer Returns ------- - tensor + Tensor The output tensor from the Convolution 2D Layer """ - logger.debug("input_tensor: %s, filters: %s, kernel_size: %s, strides: %s, padding: %s, " - "kwargs: %s)", input_tensor, filters, kernel_size, strides, padding, kwargs) - if kwargs.get("name", None) is None: - kwargs["name"] = self._get_name("conv2d_{}".format(input_tensor.shape[1])) - kwargs = self._set_default_initializer(kwargs) - var_x = Conv2D(filters, kernel_size, - strides=strides, - padding=padding, - **kwargs)(input_tensor) + if self._use_reflect_padding: + inputs = ReflectionPadding2D(stride=self._strides, + kernel_size=self._kernel_size, + name="{}_reflectionpadding2d".format(self._name))(inputs) + var_x = Conv2D(self._filters, + self._kernel_size, + strides=self._strides, + padding=self._padding, + name="{}_conv2d".format(self._name), + **self._kwargs)(inputs) + if self._use_instance_norm: + var_x = InstanceNormalization(name="{}_instancenorm".format(self._name))(var_x) + if not self._res_block_follows: + var_x = LeakyReLU(0.1, name="{}_leakyrelu".format(self._name))(var_x) return var_x - # <<< Original Model Blocks >>> # - def conv(self, input_tensor, filters, kernel_size=5, strides=2, padding="same", - use_instance_norm=False, res_block_follows=False, **kwargs): - """ A standard Convolution 2D layer which applies user specified configuration to the - layer. - Adds reflection padding if it has been selected by the user, and other post-processing - if requested by the plugin. +class SeparableConv2DBlock(): # pylint:disable=too-few-public-methods + """ Seperable Convolution Block. + + Parameters + ---------- + filters: int + The dimensionality of the output space (i.e. the number of output filters in the + convolution) + kernel_size: int, optional + An integer or tuple/list of 2 integers, specifying the height and width of the 2D + convolution window. Can be a single integer to specify the same value for all spatial + dimensions. Default: 5 + strides: tuple or int, optional + An integer or tuple/list of 2 integers, specifying the strides of the convolution along + the height and width. Can be a single integer to specify the same value for all spatial + dimensions. Default: `2` + kwargs: dict + Any additional Keras standard layer keyword arguments to pass to the Separable + Convolutional 2D layer + """ + def __init__(self, filters, kernel_size=5, strides=2, **kwargs): + self._name = _get_name("separableconv2d_{}".format(filters)) + logger.debug("name: %s, filters: %s, kernel_size: %s, strides: %s, kwargs: %s)", + self._name, filters, kernel_size, strides, kwargs) + + self._filters = filters + self._kernel_size = kernel_size + self._strides = strides + + initializer = self._get_default_initializer(kwargs.pop("kernel_initializer", None)) + kwargs["kernel_initializer"] = initializer + self._kwargs = kwargs + + @classmethod + def _get_default_initializer(cls, initializer): + """ Returns a default initializer of Convolutional Aware or he_uniform for convolutional + layers. Parameters ---------- - input_tensor: tensor - The input tensor to the layer - filters: int - The dimensionality of the output space (i.e. the number of output filters in the - convolution) - kernel_size: int, optional - An integer or tuple/list of 2 integers, specifying the height and width of the 2D - convolution window. Can be a single integer to specify the same value for all spatial - dimensions. Default: 5 - strides: tuple or int, optional - An integer or tuple/list of 2 integers, specifying the strides of the convolution along - the height and width. Can be a single integer to specify the same value for all spatial - dimensions. Default: `2` - padding: ["valid", "same"], optional - The padding to use. Default: `"same"` - use_instance_norm: bool, optional - ``True`` if instance normalization should be applied after the convolutional layer. - Default: ``False`` - res_block_follows: bool, optional - If a residual block will follow this layer, then this should be set to `True` to add - a leaky ReLu after the convolutional layer. Default: ``False`` - kwargs: dict - Any additional Keras standard layer keyword arguments + initializer: :class:`keras.initializers.Initializer` or None + The initializer that has been passed into the model. If this value is ``None`` then a + default initializer will be returned based on the configuration choices, otherwise + the given initializer will be returned. Returns ------- - tensor - The output tensor from the Convolution 2D Layer + :class:`keras.initializers.Initializer` + The kernel initializer to use for this convolutional layer. Either the original given + initializer, he_uniform or convolutional aware (if selected in config options) """ - logger.debug("input_tensor: %s, filters: %s, kernel_size: %s, strides: %s, " - "use_instance_norm: %s, kwargs: %s)", input_tensor, filters, kernel_size, - strides, use_instance_norm, kwargs) - name = self._get_name("conv_{}".format(input_tensor.shape[1])) - if self.use_reflect_padding: - input_tensor = ReflectionPadding2D( - stride=strides, - kernel_size=kernel_size, - name="{}_reflectionpadding2d".format(name))(input_tensor) - padding = "valid" - var_x = self.conv2d(input_tensor, filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - name="{}_conv2d".format(name), - **kwargs) - if use_instance_norm: - var_x = InstanceNormalization(name="{}_instancenorm".format(name))(var_x) - if not res_block_follows: - var_x = LeakyReLU(0.1, name="{}_leakyrelu".format(name))(var_x) - return var_x - - def upscale(self, input_tensor, filters, kernel_size=3, padding="same", - use_instance_norm=False, res_block_follows=False, scale_factor=2, **kwargs): - """ An upscale layer for sub-pixel up-scaling. + if initializer is None: + retval = ConvolutionAware() if _CONFIG["conv_aware_init"] else he_uniform() + logger.debug("Set default kernel_initializer: %s", retval) + else: + retval = initializer + logger.debug("Using model supplied initializer: %s", retval) + return retval - Adds reflection padding if it has been selected by the user, and other post-processing - if requested by the plugin. + def __call__(self, inputs): + """ Call the Faceswap Separable Convolutional 2D Block. Parameters ---------- - input_tensor: tensor - The input tensor to the layer - filters: int - The dimensionality of the output space (i.e. the number of output filters in the - convolution) - kernel_size: int, optional - An integer or tuple/list of 2 integers, specifying the height and width of the 2D - convolution window. Can be a single integer to specify the same value for all spatial - dimensions. Default: 3 - padding: ["valid", "same"], optional - The padding to use. Default: `"same"` - use_instance_norm: bool, optional - ``True`` if instance normalization should be applied after the convolutional layer. - Default: ``False`` - res_block_follows: bool, optional - If a residual block will follow this layer, then this should be set to `True` to add - a leaky ReLu after the convolutional layer. Default: ``False`` - scale_factor: int, optional - The amount to upscale the image. Default: `2` - kwargs: dict - Any additional Keras standard layer keyword arguments + inputs: Tensor + The input to the layer Returns ------- - tensor - The output tensor from the Upscale layer + Tensor + The output tensor from the Upscale Layer """ - logger.debug("input_tensor: %s, filters: %s, kernel_size: %s, use_instance_norm: %s, " - "kwargs: %s)", input_tensor, filters, kernel_size, use_instance_norm, kwargs) - name = self._get_name("upscale_{}".format(input_tensor.shape[1])) - if self.use_reflect_padding: - input_tensor = ReflectionPadding2D( - stride=1, - kernel_size=kernel_size, - name="{}_reflectionpadding2d".format(name))(input_tensor) - padding = "valid" - kwargs = self._set_default_initializer(kwargs) - if self.use_icnr_init: - original_init = self._switch_kernel_initializer( - kwargs, - ICNR(initializer=kwargs["kernel_initializer"])) - var_x = self.conv2d(input_tensor, filters * scale_factor * scale_factor, - kernel_size=kernel_size, - padding=padding, - name="{}_conv2d".format(name), - **kwargs) - if self.use_icnr_init: - self._switch_kernel_initializer(kwargs, original_init) - if use_instance_norm: - var_x = InstanceNormalization(name="{}_instancenorm".format(name))(var_x) - if not res_block_follows: - var_x = LeakyReLU(0.1, name="{}_leakyrelu".format(name))(var_x) - var_x = PixelShuffler(name="{}_pixelshuffler".format(name), size=scale_factor)(var_x) + var_x = SeparableConv2D(self._filters, + kernel_size=self._kernel_size, + strides=self._strides, + padding="same", + name="{}_seperableconv2d".format(self._name), + **self._kwargs)(inputs) + var_x = Activation("relu", name="{}_relu".format(self._name))(var_x) return var_x - # <<< DLight Model Blocks >>> # - def upscale2x(self, input_tensor, filters, - kernel_size=3, padding="same", interpolation="bilinear", res_block_follows=False, - sr_ratio=0.5, scale_factor=2, fast=False, **kwargs): - """ Custom hybrid upscale layer for sub-pixel up-scaling. - Most of up-scaling is approximating lighting gradients which can be accurately achieved - using linear fitting. This layer attempts to improve memory consumption by splitting - with bilinear and convolutional layers so that the sub-pixel update will get details - whilst the bilinear filter will get lighting. +# << UPSCALING >> + +class UpscaleBlock(): # pylint:disable=too-few-public-methods + """ An upscale layer for sub-pixel up-scaling. + + Adds reflection padding if it has been selected by the user, and other post-processing + if requested by the plugin. + + Parameters + ---------- + filters: int + The dimensionality of the output space (i.e. the number of output filters in the + convolution) + kernel_size: int, optional + An integer or tuple/list of 2 integers, specifying the height and width of the 2D + convolution window. Can be a single integer to specify the same value for all spatial + dimensions. Default: 3 + padding: ["valid", "same"], optional + The padding to use. NB: If reflect padding has been selected in the user configuration + options, then this argument will be ignored in favor of reflect padding. Default: `"same"` + scale_factor: int, optional + The amount to upscale the image. Default: `2` + use_instance_norm: bool, optional + ``True`` if instance normalization should be applied after the convolutional layer. + Default: ``False`` + res_block_follows: bool, optional + If a residual block will follow this layer, then this should be set to ``True`` to add + a leaky ReLu after the convolutional layer. Default: ``False`` + kwargs: dict + Any additional Keras standard layer keyword arguments to pass to the Convolutional 2D layer + """ - Adds reflection padding if it has been selected by the user, and other post-processing - if requested by the plugin. + def __init__(self, + filters, + kernel_size=3, + padding="same", + scale_factor=2, + use_instance_norm=False, + res_block_follows=False, + **kwargs): + self._name = _get_name("upscale_{}".format(filters)) + logger.debug("name: %s. filters: %s, kernel_size: %s, padding: %s, scale_factor: %s, " + "use_instance_norm: %s, res_block_follows: %s, kwargs: %s)", + self._name, filters, kernel_size, padding, scale_factor, use_instance_norm, + res_block_follows, kwargs) + + self._filters = filters + self._kernel_size = kernel_size + self._padding = padding + self._scale_factor = scale_factor + self._use_instance_norm = use_instance_norm + self._res_block_follows = res_block_follows + self._kwargs = kwargs + + def __call__(self, inputs): + """ Call the Faceswap Convolutional Layer. Parameters ---------- - input_tensor: tensor - The input tensor to the layer - filters: int - The dimensionality of the output space (i.e. the number of output filters in the - convolution) - kernel_size: int, optional - An integer or tuple/list of 2 integers, specifying the height and width of the 2D - convolution window. Can be a single integer to specify the same value for all spatial - dimensions. Default: 3 - padding: ["valid", "same"], optional - The padding to use. Default: `"same"` - interpolation: ["nearest", "bilinear"], optional - Interpolation to use for up-sampling. Default: `"bilinear"` - res_block_follows: bool, optional - If a residual block will follow this layer, then this should be set to `True` to add - a leaky ReLu after the convolutional layer. Default: ``False`` - scale_factor: int, optional - The amount to upscale the image. Default: `2` - sr_ratio: float, optional - The proportion of super resolution (pixel shuffler) filters to use. Non-fast mode only. - Default: `0.5` - kwargs: dict - Any additional Keras standard layer keyword arguments - fast: bool, optional - Use a faster up-scaling method that may appear more rugged. Default: ``False`` + inputs: Tensor + The input to the layer Returns ------- - tensor - The output tensor from the Upscale layer + Tensor + The output tensor from the Upscale Layer """ - name = self._get_name("upscale2x_{}".format("fast" if fast else "hyb")) - var_x = input_tensor - if not fast: - sr_filters = int(filters * sr_ratio) - filters = filters - sr_filters - var_x_sr = self.upscale(var_x, filters, - kernel_size=kernel_size, - padding=padding, - scale_factor=scale_factor, - res_block_follows=res_block_follows, - **kwargs) - - if fast or (not fast and filters > 0): - var_x2 = self.conv2d(var_x, filters, - kernel_size=3, - padding=padding, - name="{}_conv2d".format(name), - **kwargs) - var_x2 = UpSampling2D(size=(scale_factor, scale_factor), - interpolation=interpolation, - name="{}_upsampling2D".format(name))(var_x2) - if fast: - var_x1 = self.upscale(var_x, filters, - kernel_size=kernel_size, - padding=padding, - scale_factor=scale_factor, - res_block_follows=res_block_follows, **kwargs) - var_x = Add()([var_x2, var_x1]) - else: - var_x = Concatenate(name="{}_concatenate".format(name))([var_x_sr, var_x2]) - else: - var_x = var_x_sr + var_x = Conv2DBlock(self._filters * self._scale_factor * self._scale_factor, + self._kernel_size, + strides=(1, 1), + padding=self._padding, + use_instance_norm=self._use_instance_norm, + res_block_follows=self._res_block_follows, + name="{}_conv2d".format(self._name), + check_icnr_init=_CONFIG["icnr_init"], + **self._kwargs)(inputs) + var_x = PixelShuffler(name="{}_pixelshuffler".format(self._name), + size=self._scale_factor)(var_x) return var_x - # <<< DFaker Model Blocks >>> # - def res_block(self, input_tensor, filters, kernel_size=3, padding="same", **kwargs): - """ Residual block. + +class Upscale2xBlock(): # pylint:disable=too-few-public-methods + """ Custom hybrid upscale layer for sub-pixel up-scaling. + + Most of up-scaling is approximating lighting gradients which can be accurately achieved + using linear fitting. This layer attempts to improve memory consumption by splitting + with bilinear and convolutional layers so that the sub-pixel update will get details + whilst the bilinear filter will get lighting. + + Adds reflection padding if it has been selected by the user, and other post-processing + if requested by the plugin. + + Parameters + ---------- + filters: int + The dimensionality of the output space (i.e. the number of output filters in the + convolution) + kernel_size: int, optional + An integer or tuple/list of 2 integers, specifying the height and width of the 2D + convolution window. Can be a single integer to specify the same value for all spatial + dimensions. Default: 3 + padding: ["valid", "same"], optional + The padding to use. Default: `"same"` + interpolation: ["nearest", "bilinear"], optional + Interpolation to use for up-sampling. Default: `"bilinear"` + res_block_follows: bool, optional + If a residual block will follow this layer, then this should be set to ``True`` to add + a leaky ReLu after the convolutional layer. Default: ``False`` + scale_factor: int, optional + The amount to upscale the image. Default: `2` + sr_ratio: float, optional + The proportion of super resolution (pixel shuffler) filters to use. Non-fast mode only. + Default: `0.5` + fast: bool, optional + Use a faster up-scaling method that may appear more rugged. Default: ``False`` + kwargs: dict + Any additional Keras standard layer keyword arguments to pass to the Convolutional 2D layer + """ + def __init__(self, filters, kernel_size=3, padding="same", interpolation="bilinear", + res_block_follows=False, sr_ratio=0.5, scale_factor=2, fast=False, **kwargs): + self._name = _get_name("upscale2x_{}_{}".format(filters, "fast" if fast else "hyb")) + + self._fast = fast + self._filters = filters if self._fast else filters - int(filters * sr_ratio) + self._kernel_size = kernel_size + self._padding = padding + self._interpolation = interpolation + self._res_block_follows = res_block_follows + self._scale_factor = scale_factor + self._kwargs = kwargs + + def __call__(self, inputs): + """ Call the Faceswap Upscale 2x Layer. Parameters ---------- - input_tensor: tensor - The input tensor to the layer - filters: int - The dimensionality of the output space (i.e. the number of output filters in the - convolution) - kernel_size: int, optional - An integer or tuple/list of 2 integers, specifying the height and width of the 2D - convolution window. Can be a single integer to specify the same value for all spatial - dimensions. Default: 3 - padding: ["valid", "same"], optional - The padding to use. Default: `"same"` - kwargs: dict - Any additional Keras standard layer keyword arguments + inputs: Tensor + The input to the layer Returns ------- - tensor - The output tensor from the Upscale layer + Tensor + The output tensor from the Upscale Layer """ - logger.debug("input_tensor: %s, filters: %s, kernel_size: %s, kwargs: %s)", - input_tensor, filters, kernel_size, kwargs) - name = self._get_name("residual_{}".format(input_tensor.shape[1])) - var_x = LeakyReLU(alpha=0.2, name="{}_leakyrelu_0".format(name))(input_tensor) - if self.use_reflect_padding: - var_x = ReflectionPadding2D(stride=1, - kernel_size=kernel_size, - name="{}_reflectionpadding2d_0".format(name))(var_x) - padding = "valid" - var_x = self.conv2d(var_x, filters, - kernel_size=kernel_size, - padding=padding, - name="{}_conv2d_0".format(name), - **kwargs) - var_x = LeakyReLU(alpha=0.2, name="{}_leakyrelu_1".format(name))(var_x) - if self.use_reflect_padding: - var_x = ReflectionPadding2D(stride=1, - kernel_size=kernel_size, - name="{}_reflectionpadding2d_1".format(name))(var_x) - padding = "valid" - if not self.use_convaware_init: - original_init = self._switch_kernel_initializer(kwargs, VarianceScaling( - scale=0.2, - mode="fan_in", - distribution="uniform")) - var_x = self.conv2d(var_x, filters, - kernel_size=kernel_size, - padding=padding, - **kwargs) - if not self.use_convaware_init: - self._switch_kernel_initializer(kwargs, original_init) - var_x = Add()([var_x, input_tensor]) - var_x = LeakyReLU(alpha=0.2, name="{}_leakyrelu_3".format(name))(var_x) + var_x = inputs + if not self._fast: + var_x_sr = UpscaleBlock(self._filters, + kernel_size=self._kernel_size, + padding=self._padding, + scale_factor=self._scale_factor, + res_block_follows=self._res_block_follows, + **self._kwargs)(var_x) + if self._fast or (not self._fast and self._filters > 0): + var_x2 = Conv2D(self._filters, 3, + padding=self._padding, + name="{}_conv2d".format(self._name), + **self._kwargs)(var_x) + var_x2 = UpSampling2D(size=(self._scale_factor, self._scale_factor), + interpolation=self._interpolation, + name="{}_upsampling2D".format(self._name))(var_x2) + if self._fast: + var_x1 = UpscaleBlock(self._filters, + kernel_size=self._kernel_size, + padding=self._padding, + scale_factor=self._scale_factor, + res_block_follows=self._res_block_follows, + **self._kwargs)(var_x) + var_x = Add()([var_x2, var_x1]) + else: + var_x = Concatenate(name="{}_concatenate".format(self._name))([var_x_sr, var_x2]) + else: + var_x = var_x_sr return var_x - # <<< Unbalanced Model Blocks >>> # - def conv_sep(self, input_tensor, filters, kernel_size=5, strides=2, **kwargs): - """ Seperable Convolution Layer. + +# << OTHER BLOCKS >> +class ResidualBlock(): # pylint:disable=too-few-public-methods + """ Residual block from dfaker. + + Parameters + ---------- + filters: int + The dimensionality of the output space (i.e. the number of output filters in the + convolution) + kernel_size: int, optional + An integer or tuple/list of 2 integers, specifying the height and width of the 2D + convolution window. Can be a single integer to specify the same value for all spatial + dimensions. Default: 3 + padding: ["valid", "same"], optional + The padding to use. Default: `"same"` + kwargs: dict + Any additional Keras standard layer keyword arguments to pass to the Convolutional 2D layer + + Returns + ------- + tensor + The output tensor from the Upscale layer + """ + def __init__(self, filters, kernel_size=3, padding="same", **kwargs): + self._name = _get_name("residual_{}".format(filters)) + logger.debug("name: %s, filters: %s, kernel_size: %s, padding: %s, kwargs: %s)", + self._name, filters, kernel_size, padding, kwargs) + self._use_reflect_padding = _CONFIG["reflect_padding"] + + self._filters = filters + self._kernel_size = kernel_size + self._padding = "valid" if self._use_reflect_padding else padding + self._kwargs = kwargs + + def __call__(self, inputs): + """ Call the Faceswap Residual Block. Parameters ---------- - input_tensor: tensor - The input tensor to the layer - filters: int - The dimensionality of the output space (i.e. the number of output filters in the - convolution) - kernel_size: int, optional - An integer or tuple/list of 2 integers, specifying the height and width of the 2D - convolution window. Can be a single integer to specify the same value for all spatial - dimensions. Default: 5 - strides: tuple or int, optional - An integer or tuple/list of 2 integers, specifying the strides of the convolution along - the height and width. Can be a single integer to specify the same value for all spatial - dimensions. Default: `2` - kwargs: dict - Any additional Keras standard layer keyword arguments + inputs: Tensor + The input to the layer Returns ------- - tensor - The output tensor from the Upscale layer + Tensor + The output tensor from the Upscale Layer """ - logger.debug("input_tensor: %s, filters: %s, kernel_size: %s, strides: %s, kwargs: %s)", - input_tensor, filters, kernel_size, strides, kwargs) - name = self._get_name("separableconv2d_{}".format(input_tensor.shape[1])) - kwargs = self._set_default_initializer(kwargs) - var_x = SeparableConv2D(filters, - kernel_size=kernel_size, - strides=strides, - padding="same", - name="{}_seperableconv2d".format(name), - **kwargs)(input_tensor) - var_x = Activation("relu", name="{}_relu".format(name))(var_x) + var_x = LeakyReLU(alpha=0.2, name="{}_leakyrelu_0".format(self._name))(inputs) + if self._use_reflect_padding: + var_x = ReflectionPadding2D(stride=1, + kernel_size=self._kernel_size, + name="{}_reflectionpadding2d_0".format(self._name))(var_x) + var_x = Conv2D(self._filters, + kernel_size=self._kernel_size, + padding=self._padding, + name="{}_conv2d_0".format(self._name), + **self._kwargs)(var_x) + var_x = LeakyReLU(alpha=0.2, name="{}_leakyrelu_1".format(self._name))(var_x) + if self._use_reflect_padding: + var_x = ReflectionPadding2D(stride=1, + kernel_size=self._kernel_size, + name="{}_reflectionpadding2d_1".format(self._name))(var_x) + + kwargs = {key: val for key, val in self._kwargs.items() if key != "kernel_initializer"} + if not _CONFIG["conv_aware_init"]: + kwargs["kernel_initializer"] = VarianceScaling(scale=0.2, + mode="fan_in", + distribution="uniform") + var_x = Conv2D(self._filters, + kernel_size=self._kernel_size, + padding=self._padding, + name="{}_conv2d_1".format(self._name), + **kwargs)(var_x) + + var_x = Add()([var_x, inputs]) + var_x = LeakyReLU(alpha=0.2, name="{}_leakyrelu_3".format(self._name))(var_x) return var_x diff --git a/lib/model/normalization.py b/lib/model/normalization.py index 60036fdd94..270b761924 100644 --- a/lib/model/normalization.py +++ b/lib/model/normalization.py @@ -4,10 +4,10 @@ import sys import inspect -from keras.engine import Layer, InputSpec +from keras.layers import Layer, InputSpec from keras import initializers, regularizers, constraints from keras import backend as K -from keras.utils.generic_utils import get_custom_objects +from keras.utils import get_custom_objects class InstanceNormalization(Layer): @@ -50,9 +50,8 @@ class InstanceNormalization(Layer): ---------- - Layer Normalization - https://arxiv.org/abs/1607.06450 - - Instance Normalization: The Missing Ingredient for Fast Stylization - - https://arxiv.org/abs/1607.08022 - + - Instance Normalization: The Missing Ingredient for Fast Stylization - \ + https://arxiv.org/abs/1607.08022 """ def __init__(self, axis=None, @@ -162,6 +161,20 @@ def call(self, inputs, training=None): # pylint:disable=arguments-differ,unused return normed def get_config(self): + """Returns the config of the layer. + + A layer config is a Python dictionary (serializable) containing the configuration of a + layer. The same layer can be reinstated later (without its trained weights) from this + configuration. + + The configuration of a layer does not include connectivity information, nor the layer + class name. These are handled by `Network` (one layer of abstraction above). + + Returns + -------- + dict + A python dictionary containing the layer configuration + """ config = { "axis": self.axis, "epsilon": self.epsilon, diff --git a/lib/model/optimizers.py b/lib/model/optimizers.py deleted file mode 100644 index e2b50e4040..0000000000 --- a/lib/model/optimizers.py +++ /dev/null @@ -1,146 +0,0 @@ -#!/usr/bin/env python3 -""" Optimizers for faceswap.py """ -# Naming convention inherited from Keras so ignore invalid names -# pylint:disable=invalid-name - -import logging - -from keras import backend as K -from keras.optimizers import Adam as KerasAdam - -logger = logging.getLogger(__name__) # pylint: disable=invalid-name - - -class Adam(KerasAdam): - """Adapted Keras Adam Optimizer to allow support of calculations on CPU for Tensorflow. - - Default parameters follow those provided in the original paper. Adapted from - https://github.com/iperov/DeepFaceLab - - Parameters - ---------- - lr: float, optional - >= `0`. Learning rate. Default: `0.001` - beta_1: float, optional - `0` < beta < `1` Generally close to `1`. Default: `0.9` - beta_2: float, optional - `0` < beta < `1`. Generally close to `1`. Default: `0.999` - epsilon: float, optional - >= `0`. Fuzz factor. If ``None``, defaults to `K.epsilon()`. Default: ``None`` - decay: float, optional - >= 0. Learning rate decay over each update. Default: `0` - amsgrad: bool, optional - ``True`` to apply the AMSGrad variant of this algorithm from the paper "On the Convergence - of Adam and Beyond" otherwise ``False``. Default: ``False`` - cpu_mode: bool, optional - Set to ``True`` to perform some of the calculations on CPU for Nvidia backends, otherwise - ``False``. Default: ``False`` - kwargs: dict - Any additional standard Keras optimizer keyword arguments - - References - ---------- - - Adam - A Method for Stochastic Optimization - https://arxiv.org/abs/1412.6980v8 - - - On the Convergence of Adam and Beyond - https://openreview.net/forum?id=ryQu7f-RZ - """ - - def __init__(self, - lr=0.001, - beta_1=0.9, - beta_2=0.999, - epsilon=None, - decay=0., - amsgrad=False, - cpu_mode=False, - **kwargs): - super().__init__(lr, beta_1, beta_2, epsilon, decay, **kwargs) - self.cpu_mode = self._set_cpu_mode(cpu_mode) - - @staticmethod - def _set_cpu_mode(cpu_mode): - """ Sets the CPU mode to False if not using Tensorflow, otherwise the given value. - - Parameters - ---------- - cpu_mode: bool - Set to ``True`` to perform some of the calculations on CPU for Nvidia backends, - otherwise ``False``. - - Returns - ------- - bool - ``True`` if some calculations should be performed on CPU otherwise ``False`` - """ - retval = False if K.backend() != "tensorflow" else cpu_mode - logger.debug("Optimizer CPU Mode set to %s", retval) - return retval - - def get_updates(self, loss, params): - """ Obtain the optimizer loss updates. - - Parameters - ---------- - loss: list - List of tensors - - params: list - List of tensors - - Returns - ------- - list - List of tensors - """ - grads = self.get_gradients(loss, params) - self.updates = [K.update_add(self.iterations, 1)] - - lr = self.lr - if self.initial_decay > 0: - lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, - K.dtype(self.decay)))) - - t = K.cast(self.iterations, K.floatx()) + 1 - lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / - (1. - K.pow(self.beta_1, t))) - - # Pass off to CPU if requested - if self.cpu_mode: - with K.tf.device("/cpu:0"): - ms, vs, vhats = self._update_1(params) - else: - ms, vs, vhats = self._update_1(params) - - self.weights = [self.iterations] + ms + vs + vhats - - for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): - m_t = (self.beta_1 * m) + (1. - self.beta_1) * g - v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) - if self.amsgrad: - vhat_t = K.maximum(vhat, v_t) - p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) - self.updates.append(K.update(vhat, vhat_t)) - else: - p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) - - self.updates.append(K.update(m, m_t)) - self.updates.append(K.update(v, v_t)) - new_p = p_t - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(K.update(p, new_p)) - return self.updates - - def _update_1(self, params): - """ Perform the first update. Run under CPU context if running on Tensorflow and CPU mode - is enabled, otherwise run on the default device. """ - ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - if self.amsgrad: - vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - else: - vhats = [K.zeros(1) for _ in params] - return ms, vs, vhats diff --git a/lib/model/session.py b/lib/model/session.py index 35586adc66..09d9c39036 100644 --- a/lib/model/session.py +++ b/lib/model/session.py @@ -3,26 +3,32 @@ import logging +import numpy as np import tensorflow as tf +# pylint:disable=no-name-in-module,import-error from keras.layers import Activation -from tensorflow.python import errors_impl as tf_error # pylint:disable=no-name-in-module from keras.models import load_model as k_load_model, Model -import numpy as np -from lib.utils import get_backend, FaceswapError +from lib.utils import get_backend logger = logging.getLogger(__name__) # pylint:disable=invalid-name class KSession(): - """ Handles the settings of backend sessions. + """ Handles the settings of backend sessions for inference models. This class acts as a wrapper for various :class:`keras.Model()` functions, ensuring that - actions performed on a model are handled consistently within the correct graph. + actions performed on a model are handled consistently and can be performed in parallel in + separate threads. This is an early implementation of this class, and should be expanded out over time with relevant `AMD`, `CPU` and `NVIDIA` backend methods. + Notes + ----- + The documentation refers to :mod:`keras`. This is a pseudonym for either :mod:`keras` or + :mod:`tensorflow.keras` depending on the backend in use. + Parameters ---------- name: str @@ -30,53 +36,55 @@ class KSession(): model_path: str The path to the keras model file model_kwargs: dict, optional - Any kwargs that need to be passed to :func:`keras.models.load_models()`. Default: None + Any kwargs that need to be passed to :func:`keras.models.load_models()`. Default: ``None`` allow_growth: bool, optional - Enable the Tensorflow GPU allow_growth configuration option. This option prevents " - Tensorflow from allocating all of the GPU VRAM, but can lead to higher fragmentation and " - slower performance. Default: False + Enable the Tensorflow GPU allow_growth configuration option. This option prevents + Tensorflow from allocating all of the GPU VRAM, but can lead to higher fragmentation and + slower performance. Default: ``False`` + exclude_gpus: list, optional + A list of indices correlating to connected GPUs that Tensorflow should not use. Pass + ``None`` to not exclude any GPUs. Default: ``None`` + """ - def __init__(self, name, model_path, model_kwargs=None, allow_growth=False): + def __init__(self, name, model_path, model_kwargs=None, allow_growth=False, exclude_gpus=None): logger.trace("Initializing: %s (name: %s, model_path: %s, model_kwargs: %s, " - "allow_growth: %s)", - self.__class__.__name__, name, model_path, model_kwargs, allow_growth) + "allow_growth: %s, exclude_gpus)", self.__class__.__name__, name, model_path, + model_kwargs, allow_growth, exclude_gpus) self._name = name - self._session = self._set_session(allow_growth) + self._backend = get_backend() + self._set_session(allow_growth, exclude_gpus) self._model_path = model_path - self._model_kwargs = model_kwargs + self._model_kwargs = dict() if not model_kwargs else model_kwargs self._model = None logger.trace("Initialized: %s", self.__class__.__name__,) def predict(self, feed, batch_size=None): - """ Get predictions from the model in the correct session. + """ Get predictions from the model. - This method is a wrapper for :func:`keras.predict()` function. + This method is a wrapper for :func:`keras.predict()` function. For Tensorflow backends + this is a straight call to the predict function. For PlaidML backends, this attempts + to optimize the inference batch sizes to reduce the number of kernels that need to be + compiled. Parameters ---------- feed: numpy.ndarray or list - The feed to be provided to the model as input. This should be a ``numpy.ndarray`` - for single inputs or a ``list`` of ``numpy.ndarrays`` for multiple inputs. + The feed to be provided to the model as input. This should be a :class:`numpy.ndarray` + for single inputs or a `list` of :class:`numpy.ndarray` objects for multiple inputs. """ - if self._session is None: - if batch_size is None: - return self._model.predict(feed) + if self._backend == "amd" and batch_size is not None: return self._amd_predict_with_optimized_batchsizes(feed, batch_size) - - with self._session.as_default(): # pylint: disable=not-context-manager - with self._session.graph.as_default(): - return self._model.predict(feed, batch_size=batch_size) + return self._model.predict(feed, batch_size=batch_size) def _amd_predict_with_optimized_batchsizes(self, feed, batch_size): - """ Minimizes the amount of kernels to be compiled when using - the ``Amd`` backend with varying batchsizes while trying to keep - the batchsize as high as possible. + """ Minimizes the amount of kernels to be compiled when using the ``amd`` backend with + varying batch sizes while trying to keep the batchsize as high as possible. Parameters ---------- feed: numpy.ndarray or list The feed to be provided to the model as input. This should be a ``numpy.ndarray`` - for single inputs or a ``list`` of ``numpy.ndarrays`` for multiple inputs. + for single inputs or a ``list`` of ``numpy.ndarray`` objects for multiple inputs. batch_size: int The upper batchsize to use. """ @@ -99,52 +107,64 @@ def _amd_predict_with_optimized_batchsizes(self, feed, batch_size): return np.concatenate(results) return [np.concatenate(x) for x in zip(*results)] - def _set_session(self, allow_growth): - """ Sets the session and graph. + def _set_session(self, allow_growth, exclude_gpus): + """ Sets the backend session options. + + For AMD backend this does nothing. - If the backend is AMD then this does nothing and the global ``Keras`` ``Session`` - is used + For CPU backends, this hides any GPUs from Tensorflow. + + For Nvidia backends, this hides any GPUs that Tensorflow should not use and applies + any allow growth settings + + Parameters + ---------- + allow_growth: bool, optional + Enable the Tensorflow GPU allow_growth configuration option. This option prevents + Tensorflow from allocating all of the GPU VRAM, but can lead to higher fragmentation + and slower performance. Default: False + exclude_gpus: list, optional + A list of indices correlating to connected GPUs that Tensorflow should not use. Pass + ``None`` to not exclude any GPUs. Default: ``None`` """ - if get_backend() == "amd": - return None - - self.graph = tf.Graph() - config = tf.ConfigProto() - if allow_growth and get_backend() == "nvidia": - config.gpu_options.allow_growth = True - try: - session = tf.Session(graph=tf.Graph(), config=config) - except tf_error.InternalError as err: - if "driver version is insufficient" in str(err): - msg = ("Your Nvidia Graphics Driver is insufficient for running Faceswap. " - "Please upgrade to the latest version.") - raise FaceswapError(msg) from err - raise err - logger.debug("Created tf.session: (graph: %s, session: %s, config: %s)", - session.graph, session, config) - return session + if self._backend == "amd": + return + if self._backend == "cpu": + logger.verbose("Hiding GPUs from Tensorflow") + tf.config.set_visible_devices([], "GPU") + return + + gpus = tf.config.list_physical_devices('GPU') + if exclude_gpus: + gpus = [gpu for idx, gpu in enumerate(gpus) if idx not in exclude_gpus] + logger.debug("Filtering devices to: %s", gpus) + tf.config.set_visible_devices(gpus, "GPU") + + if allow_growth: + for gpu in gpus: + logger.info("Setting allow growth for GPU: %s", gpu) + tf.config.experimental.set_memory_growth(gpu, True) def load_model(self): - """ Loads a model within the correct session. + """ Loads a model. This method is a wrapper for :func:`keras.models.load_model()`. Loads a model and its - weights from :attr:`model_path`. Any additional ``kwargs`` to be passed to - :func:`keras.models.load_model()` should also be defined during initialization of the - class. + weights from :attr:`model_path` defined during initialization of this class. Any additional + ``kwargs`` to be passed to :func:`keras.models.load_model()` should also be defined during + initialization of the class. + + For Tensorflow backends, the `make_predict_function` method is called on the model to make + it thread safe. """ logger.verbose("Initializing plugin model: %s", self._name) - if self._session is None: - self._model = k_load_model(self._model_path, **self._model_kwargs) - else: - with self._session.as_default(): # pylint: disable=not-context-manager - with self._session.graph.as_default(): - self._model = k_load_model(self._model_path, **self._model_kwargs) + self._model = k_load_model(self._model_path, compile=False, **self._model_kwargs) + if self._backend != "amd": + self._model.make_predict_function() def define_model(self, function): - """ Defines a given model in the correct session. + """ Defines a model from the given function. - This method acts as a wrapper for :class:`keras.models.Model()` to ensure that the model - is defined within it's own graph. + This method acts as a wrapper for :class:`keras.models.Model()`. Parameters ---------- @@ -153,39 +173,34 @@ def define_model(self, function): ``outputs``. The function that generates these results should be passed in, NOT the results themselves, as the function needs to be executed within the correct context. """ - if self._session is None: - self._model = Model(*function()) - else: - with self._session.as_default(): # pylint: disable=not-context-manager - with self._session.graph.as_default(): - self._model = Model(*function()) + self._model = Model(*function()) def load_model_weights(self): """ Load model weights for a defined model inside the correct session. This method is a wrapper for :class:`keras.load_weights()`. Once a model has been defined - in :func:`define_model()` this method can be called to load its weights in the correct - graph from the :attr:`model_path` defined during initialization of this class. + in :func:`define_model()` this method can be called to load its weights from the + :attr:`model_path` defined during initialization of this class. + + For Tensorflow backends, the `make_predict_function` method is called on the model to make + it thread safe. """ logger.verbose("Initializing plugin model: %s", self._name) - if self._session is None: - self._model.load_weights(self._model_path) - else: - with self._session.as_default(): # pylint: disable=not-context-manager - with self._session.graph.as_default(): - self._model.load_weights(self._model_path) + self._model.load_weights(self._model_path) + if self._backend != "amd": + self._model.make_predict_function() def append_softmax_activation(self, layer_index=-1): """ Append a softmax activation layer to a model Occasionally a softmax activation layer needs to be added to a model's output. - This is a convenience fuction to append this layer to the loaded model. + This is a convenience function to append this layer to the loaded model. Parameters ---------- layer_index: int, optional The layer index of the model to select the output from to use as an input to the - softmax activation layer. Default: -1 (The final layer of the model) + softmax activation layer. Default: `-1` (The final layer of the model) """ logger.debug("Appending Softmax Activation to model: (layer_index: %s)", layer_index) softmax = Activation("softmax", name="softmax")(self._model.layers[layer_index].output) diff --git a/lib/plaidml_tools.py b/lib/plaidml_tools.py index 738388c42c..f5392a17ef 100644 --- a/lib/plaidml_tools.py +++ b/lib/plaidml_tools.py @@ -1,102 +1,136 @@ #!/usr/bin python3 -""" PlaidML tools +""" PlaidML tools. - Must be kept separate from keras as the keras backend needs to be set from this module +Statistics and setup for PlaidML on AMD devices. + +This module must be kept separate from Keras, and be called prior to any Keras import, as the +plaidML Keras backend is set from this module. """ import json import logging import os +import sys import plaidml _INIT = False _LOGGER = None +_EXCLUDE_DEVICES = [] class PlaidMLStats(): - """ Stats for plaidML """ - def __init__(self, loglevel="INFO", log=True): + """ Handles the initialization of PlaidML and the returning of GPU information for connected + cards from the PlaidML library. + + This class is initialized early in Faceswap's Launch process from :func:`setup_plaidml`, with + statistics made available from :class:`~lib.gpu_stats.GPUStats` + + Parameters + --------- + log_level: str, optional + The requested Faceswap log level. Also dictates the level that PlaidML logging is set at. + Default:`"INFO"` + log: bool, optional + Whether this class should output to the logger. If statistics are being accessed during a + crash, then the logger may not be available, so this gives the option to turn logging off + in those kinds of situations. Default:``True`` + """ + def __init__(self, log_level="INFO", log=True): if not _INIT and log: - # Logger is held internally, as we don't want to log - # when obtaining system stats on crash + # Logger held internally, as we don't want to log when obtaining system stats on crash global _LOGGER # pylint:disable=global-statement - _LOGGER = logging.getLogger(__name__) # pylint:disable=invalid-name - _LOGGER.debug("Initializing: %s: (loglevel: %s, log: %s)", - self.__class__.__name__, loglevel, log) - self.initialize(loglevel) - self.ctx = plaidml.Context() - self.supported_devices = self.get_supported_devices() - self.devices = self.get_all_devices() - - self.device_details = [json.loads(device.details.decode()) for device in self.devices] + _LOGGER = logging.getLogger(__name__) + _LOGGER.debug("Initializing: %s: (log_level: %s, log: %s)", + self.__class__.__name__, log_level, log) + self._initialize(log_level) + self._ctx = plaidml.Context() + self._supported_devices = self._get_supported_devices() + self._devices = self._get_all_devices() + + self._device_details = [json.loads(device.details.decode()) + for device in self._devices if device.details] + if self._devices and not self.active_devices: + self._load_active_devices() if _LOGGER: _LOGGER.debug("Initialized: %s", self.__class__.__name__) # PROPERTIES + @property + def devices(self): + """list: The :class:`pladml._DeviceConfig` objects for GPUs that PlaidML has + discovered. """ + return self._devices + @property def active_devices(self): - """ Return the active device IDs """ - return [idx for idx, d_id in enumerate(self.ids) if d_id in plaidml.settings.device_ids] + """ list: List of device indices for active GPU devices. """ + return [idx for idx, d_id in enumerate(self._ids) + if d_id in plaidml.settings.device_ids and idx not in _EXCLUDE_DEVICES] @property def device_count(self): - """ Return count of PlaidML Devices """ - return len(self.devices) + """ int: The total number of GPU Devices discovered. """ + return len(self._devices) @property def drivers(self): - """ Return all PlaidML device drivers """ - return [device.get("driverVersion", "No Driver Found") for device in self.device_details] + """ list: The driver versions for each GPU device that PlaidML has discovered. """ + return [device.get("driverVersion", "No Driver Found") for device in self._device_details] @property def vram(self): - """ Return Total VRAM for all PlaidML Devices """ + """ list: The VRAM of each GPU device that PlaidML has discovered. """ return [int(device.get("globalMemSize", 0)) / (1024 * 1024) - for device in self.device_details] - - @property - def max_alloc(self): - """ Return Maximum allowed VRAM allocation for all PlaidML Devices """ - return [int(device.get("maxMemAllocSize", 0)) / (1024 * 1024) - for device in self.device_details] - - @property - def ids(self): - """ Return all PlaidML Device IDs """ - return [device.id.decode() for device in self.devices] + for device in self._device_details] @property def names(self): - """ Return all PlaidML Device Names """ + """ list: The name of each GPU device that PlaidML has discovered. """ return ["{} - {} ({})".format( device.get("vendor", "unknown"), device.get("name", "unknown"), - "supported" if idx in self.supported_indices else "experimental") - for idx, device in enumerate(self.device_details)] + "supported" if idx in self._supported_indices else "experimental") + for idx, device in enumerate(self._device_details)] @property - def supported_indices(self): - """ Return the indices from self.devices of GPUs categorized as supported """ + def _ids(self): + """ list: The device identification for each GPU device that PlaidML has discovered. """ + return [device.id.decode() for device in self._devices] + + @property + def _experimental_indices(self): + """ list: The indices corresponding to :attr:`_ids` of GPU devices marked as + "experimental". """ retval = [idx for idx, device in enumerate(self.devices) - if device in self.supported_devices] + if device not in self._supported_indices] if _LOGGER: _LOGGER.debug(retval) return retval @property - def experimental_indices(self): - """ Return the indices from self.devices of GPUs categorized as experimental """ - retval = [idx for idx, device in enumerate(self.devices) - if device not in self.supported_devices] + def _supported_indices(self): + """ list: The indices corresponding to :attr:`_ids` of GPU devices marked as + "supported". """ + retval = [idx for idx, device in enumerate(self._devices) + if device in self._supported_devices] if _LOGGER: _LOGGER.debug(retval) return retval # INITIALIZATION - def initialize(self, loglevel): - """ Initialize PlaidML """ + def _initialize(self, log_level): + """ Initialize PlaidML. + + Set PlaidML to use Faceswap's logger, and set the logging level + + Parameters + ---------- + log_level: str, optional + The requested Faceswap log level. Also dictates the level that PlaidML logging is set + at. + """ global _INIT # pylint:disable=global-statement if _INIT: if _LOGGER: @@ -104,15 +138,15 @@ def initialize(self, loglevel): return if _LOGGER: _LOGGER.debug("Initializing PlaidML") - self.set_plaidml_logger() - self.set_verbosity(loglevel) + self._set_plaidml_logger() + self._set_verbosity(log_level) _INIT = True if _LOGGER: _LOGGER.debug("Initialized PlaidML") - @staticmethod - def set_plaidml_logger(): - """ Set PlaidMLs default logger to Faceswap Logger and prevent propagation """ + @classmethod + def _set_plaidml_logger(cls): + """ Set PlaidMLs default logger to Faceswap Logger and prevent propagation. """ if _LOGGER: _LOGGER.debug("Setting PlaidML Default Logger") plaidml.DEFAULT_LOG_HANDLER = logging.getLogger("plaidml_root") @@ -120,15 +154,20 @@ def set_plaidml_logger(): if _LOGGER: _LOGGER.debug("Set PlaidML Default Logger") - @staticmethod - def set_verbosity(loglevel): - """ Set the PlaidML Verbosity """ + @classmethod + def _set_verbosity(cls, log_level): + """ Set the PlaidML logging verbosity + + log_level: str + The requested Faceswap log level. Also dictates the level that PlaidML logging is set + at. + """ if _LOGGER: - _LOGGER.debug("Setting PlaidML Loglevel: %s", loglevel) - if isinstance(loglevel, int): - numeric_level = loglevel + _LOGGER.debug("Setting PlaidML Loglevel: %s", log_level) + if isinstance(log_level, int): + numeric_level = log_level else: - numeric_level = getattr(logging, loglevel.upper(), None) + numeric_level = getattr(logging, log_level.upper(), None) if numeric_level < 10: # DEBUG Logging plaidml._internal_set_vlog(1) # pylint:disable=protected-access @@ -139,55 +178,72 @@ def set_verbosity(loglevel): # WARNING Logging plaidml.quiet() - def get_supported_devices(self): - """ Return a list of supported devices """ + def _get_supported_devices(self): + """ Obtain GPU devices from PlaidML that are marked as "supported". + + Returns + ------- + list + The :class:`pladml._DeviceConfig` objects for GPUs that PlaidML has discovered. + """ experimental_setting = plaidml.settings.experimental plaidml.settings.experimental = False - devices, _ = plaidml.devices(self.ctx, limit=100, return_all=True) + devices = plaidml.devices(self._ctx, limit=100, return_all=True)[0] plaidml.settings.experimental = experimental_setting supported = [device for device in devices - if json.loads(device.details.decode()).get("type", "cpu").lower() == "gpu"] + if device.details + and json.loads(device.details.decode()).get("type", "cpu").lower() == "gpu"] if _LOGGER: _LOGGER.debug(supported) return supported - def get_all_devices(self): - """ Return list of supported and experimental devices """ + def _get_all_devices(self): + """ Obtain all available (experimental and supported) GPU devices from PlaidML. + + Returns + ------- + list + The :class:`pladml._DeviceConfig` objects for GPUs that PlaidML has discovered. + """ experimental_setting = plaidml.settings.experimental plaidml.settings.experimental = True - devices, _ = plaidml.devices(self.ctx, limit=100, return_all=True) + devices, _ = plaidml.devices(self._ctx, limit=100, return_all=True) plaidml.settings.experimental = experimental_setting - experimental = [device for device in devices - if json.loads(device.details.decode()).get("type", "cpu").lower() == "gpu"] + experi = [device for device in devices + if device.details + and json.loads(device.details.decode()).get("type", "cpu").lower() == "gpu"] if _LOGGER: - _LOGGER.debug("Experimental Devices: %s", experimental) - all_devices = experimental + self.supported_devices + _LOGGER.debug("Experimental Devices: %s", experi) + all_devices = experi + self._supported_devices if _LOGGER: _LOGGER.debug(all_devices) return all_devices - def load_active_devices(self): - """ Load settings from PlaidML.settings.usersettings or select biggest gpu """ + def _load_active_devices(self): + """ If the plaidml user configuration settings exist, then set the default GPU from the + settings file, Otherwise set the GPU to be the one with most VRAM. """ if not os.path.exists(plaidml.settings.user_settings): # pylint:disable=no-member if _LOGGER: _LOGGER.debug("Setting largest PlaidML device") - self.set_largest_gpu() + self._set_largest_gpu() else: if _LOGGER: _LOGGER.debug("Setting PlaidML devices from user_settings") - def set_largest_gpu(self): - """ Get a supported GPU with largest VRAM. If no supported, get largest experimental """ - category = "supported" if self.supported_devices else "experimental" + def _set_largest_gpu(self): + """ Set the default GPU to be a supported device with the most available VRAM. If no + supported device is available, then set the GPU to be the an experimental device with the + most VRAM available. """ + category = "supported" if self._supported_devices else "experimental" if _LOGGER: _LOGGER.debug("Obtaining largest %s device", category) - indices = getattr(self, "{}_indices".format(category)) + indices = getattr(self, "_{}_indices".format(category)) if not indices: _LOGGER.error("Failed to automatically detect your GPU.") _LOGGER.error("Please run `plaidml-setup` to set up your GPU.") - exit() + sys.exit(1) max_vram = max([self.vram[idx] for idx in indices]) if _LOGGER: _LOGGER.debug("Max VRAM: %s", max_vram) @@ -196,7 +252,7 @@ def set_largest_gpu(self): if _LOGGER: _LOGGER.debug("GPU IDX: %s", gpu_idx) - selected_gpu = self.ids[gpu_idx] + selected_gpu = self._ids[gpu_idx] if _LOGGER: _LOGGER.info("Setting GPU to largest available %s device. If you want to override " "this selection, run `plaidml-setup` from the command line.", category) @@ -205,13 +261,27 @@ def set_largest_gpu(self): plaidml.settings.device_ids = [selected_gpu] -def setup_plaidml(loglevel): - """ Setup plaidml for AMD Cards """ +def setup_plaidml(log_level, exclude_devices): + """ Setup PlaidML for AMD Cards. + + Sets the Keras backend to PlaidML, loads the plaidML backend and makes GPU Device information + from PlaidML available to :class:`~lib.gpu_stats.GPUStats`. + + + Parameters + ---------- + log_level: str + Faceswap's log level. Used for setting the log level inside PlaidML + exclude_devices: list + A list of integers of device IDs that should not be used by Faceswap + """ logger = logging.getLogger(__name__) # pylint:disable=invalid-name logger.info("Setting up for PlaidML") logger.verbose("Setting Keras Backend to PlaidML") + # Add explicitly excluded devices to list. The contents have already been checked in GPUStats + if exclude_devices: + _EXCLUDE_DEVICES.extend(int(idx) for idx in exclude_devices) os.environ["KERAS_BACKEND"] = "plaidml.keras.backend" - plaid = PlaidMLStats(loglevel) - plaid.load_active_devices() - logger.info("Using GPU: %s", [plaid.ids[i] for i in plaid.active_devices]) + plaid = PlaidMLStats(log_level) + logger.info("Using GPU(s): %s", [plaid.names[i] for i in plaid.active_devices]) logger.info("Successfully set up for PlaidML") diff --git a/lib/training_data.py b/lib/training_data.py index ba6cba79ed..e6bb5af9a6 100644 --- a/lib/training_data.py +++ b/lib/training_data.py @@ -31,56 +31,53 @@ class TrainingDataGenerator(): model_output_shapes: list A list of tuples defining the output shapes from the model, in the order that the outputs are returned. The tuples should be in (`height`, `width`, `channels`) format. - training_opts: dict - This is a dictionary of model training options as defined in - :mod:`plugins.train.model._base`. These options will be defined by the user from the - provided cli options or from the model ``config.ini``. At a minimum this ``dict`` should - contain the following keys: - - * **coverage_ratio** (`float`) - The ratio of the training image to be trained on. \ - Dictates how much of the image will be cropped out. E.G: a coverage ratio of 0.625 \ - will result in cropping a 160px box from a 256px image (256 * 0.625 = 160). - - * **augment_color** (`bool`) - ``True`` if color is to be augmented, otherwise ``False`` \ - - * **no_flip** (`bool`) - ``True`` if the image shouldn't be randomly flipped as part of \ - augmentation, otherwise ``False`` - - * **warp_to_landmarks** (`bool`) - ``True`` if the random warp method should warp to \ - similar landmarks from the other side, ``False`` if the standard random warp method \ - should be used. If ``True`` then the additional key ``landmarks`` must be provided. + coverage_ratio: float + The ratio of the training image to be trained on. Dictates how much of the image will be + cropped out. E.G: a coverage ratio of 0.625 will result in cropping a 160px box from a + 256px image (:math:`256 * 0.625 = 160`). + augment_color: bool + ``True`` if color is to be augmented, otherwise ``False`` + no_flip: bool + ``True`` if the image shouldn't be randomly flipped as part of augmentation, otherwise + ``False`` + warp_to_landmarks: bool + ``True`` if the random warp method should warp to similar landmarks from the other side, + ``False`` if the standard random warp method should be used. If ``True`` then + the key `landmarks` must be provided in the alignments dictionary. + alignments: dict + A dictionary containing landmarks and masks if these are required for training: * **landmarks** (`dict`, `optional`). Required if :attr:`warp_to_landmarks` is \ ``True``. Returning dictionary has a key of **side** (`str`) the value of which is a \ - `dict` of {**filename** (`str`): **68 point landmarks** (`numpy.ndarray`)}. + `dict` of {**filename** (`str`): **68 point landmarks** (:class:`numpy.ndarray`)}. * **masks** (`dict`, `optional`). Required if :attr:`penalized_mask_loss` or \ :attr:`learn_mask` is ``True``. Returning dictionary has a key of **side** (`str`) the \ value of which is a `dict` of {**filename** (`str`): :class:`lib.faces_detect.Mask`}. - config: dict - The configuration ``dict`` generated from :file:`config.train.ini` containing the trainer \ + The configuration `dict` generated from :file:`config.train.ini` containing the trainer \ plugin configuration options. """ - def __init__(self, model_input_size, model_output_shapes, training_opts, config): + def __init__(self, model_input_size, model_output_shapes, coverage_ratio, augment_color, + no_flip, warp_to_landmarks, alignments, config): logger.debug("Initializing %s: (model_input_size: %s, model_output_shapes: %s, " - "training_opts: %s, landmarks: %s, masks: %s, config: %s)", + "coverage_ratio: %s, augment_color: %s, no_flip: %s, warp_to_landmarks: %s, " + "alignments: %s, config: %s)", self.__class__.__name__, model_input_size, model_output_shapes, - {key: val - for key, val in training_opts.items() if key not in ("landmarks", "masks")}, - {key: len(val) - for key, val in training_opts.get("landmarks", dict()).items()}, - {key: len(val) for key, val in training_opts.get("masks", dict()).items()}, - config) + coverage_ratio, augment_color, no_flip, warp_to_landmarks, + list(alignments.keys()), config) self._config = config self._model_input_size = model_input_size self._model_output_shapes = model_output_shapes - self._training_opts = training_opts - self._landmarks = self._training_opts.get("landmarks", None) - self._masks = self._training_opts.get("masks", None) + self._coverage_ratio = coverage_ratio + self._augment_color = augment_color + self._no_flip = no_flip + self._warp_to_landmarks = warp_to_landmarks + self._landmarks = alignments.get("landmarks", None) + self._masks = alignments.get("masks", None) self._nearest_landmarks = {} - # Batchsize and processing class are set when this class is called by a batcher + # Batchsize and processing class are set when this class is called by a feeder # from lib.training_data self._batchsize = 0 self._processing = None @@ -99,8 +96,8 @@ def minibatch_ab(self, images, batchsize, side, images: list A list of image paths that will be used to compile the final augmented data from. batchsize: int - The batchsize for this iterator. Images will be returned in ``numpy.ndarray`` s of - this size from the iterator. + The batchsize for this iterator. Images will be returned in :class:`numpy.ndarray` + objects of this size from the iterator. side: {'a' or 'b'} The side of the model that this iterator is for. do_shuffle: bool, optional @@ -117,27 +114,27 @@ def minibatch_ab(self, images, batchsize, side, Yields ------ dict - The following items are contained in each ``dict`` yielded from this iterator: + The following items are contained in each `dict` yielded from this iterator: - * **feed** (`numpy.ndarray`) - The feed for the model. The array returned is in the \ - format (`batchsize`, `height`, `width`, `channels`). This is the :attr:`x` parameter \ - for :func:`keras.models.model.train_on_batch`. + * **feed** (:class:`numpy.ndarray`) - The feed for the model. The array returned is \ + in the format (`batchsize`, `height`, `width`, `channels`). This is the :attr:`x` \ + parameter for :func:`keras.models.model.train_on_batch`. - * **targets** (`list`) - A list of 4-dimensional ``numpy.ndarray`` s in the order \ - and size of each output of the model as defined in :attr:`model_output_shapes`. the \ - format of these arrays will be (`batchsize`, `height`, `width`, `3`). This is \ - the :attr:`y` parameter for :func:`keras.models.model.train_on_batch` **NB:** \ - masks are not included in the ``targets`` list. If required for feeding into the \ - Keras model, they will need to be added to this list in \ - :mod:`plugins.train.trainer._base` from the ``masks`` key. + * **targets** (`list`) - A list of 4-dimensional :class:`numpy.ndarray` objects in \ + the order and size of each output of the model as defined in \ + :attr:`model_output_shapes`. the format of these arrays will be (`batchsize`, \ + `height`, `width`, `3`). This is the :attr:`y` parameter for \ + :func:`keras.models.model.train_on_batch` **NB:** masks are not included in the \ + `targets` list. If required for feeding into the Keras model, they will need to be \ + added to this list in :mod:`plugins.train.trainer._base` from the `masks` key. - * **masks** (`numpy.ndarray`) - A 4-dimensional array containing the target masks in \ - the format (`batchsize`, `height`, `width`, `1`). + * **masks** (:class:`numpy.ndarray`) - A 4-dimensional array containing the target \ + masks in the format (`batchsize`, `height`, `width`, `1`). - * **samples** (`numpy.ndarray`) - A 4-dimensional array containing the samples for \ - feeding to the model's predict function for generating preview and time-lapse \ + * **samples** (:class:`numpy.ndarray`) - A 4-dimensional array containing the samples \ + for feeding to the model's predict function for generating preview and time-lapse \ samples. The array will be in the format (`batchsize`, `height`, `width`, \ - `channels`). **NB:** This item will only exist in the ``dict`` if :attr:`is_preview` \ + `channels`). **NB:** This item will only exist in the `dict` if :attr:`is_preview` \ or :attr:`is_timelapse` is ``True`` """ logger.debug("Queue batches: (image_count: %s, batchsize: %s, side: '%s', do_shuffle: %s, " @@ -148,7 +145,7 @@ def minibatch_ab(self, images, batchsize, side, is_preview or is_timelapse, self._model_input_size, self._model_output_shapes, - self._training_opts.get("coverage_ratio", 0.625), + self._coverage_ratio, self._config) args = (images, side, do_shuffle, batchsize) batcher = BackgroundGenerator(self._minibatch, thread_count=2, args=args) @@ -203,7 +200,7 @@ def _process_batch(self, filenames, side): self._processing.initialize(batch.shape[1]) # Get Landmarks prior to manipulating the image - if self._training_opts["warp_to_landmarks"]: + if self._warp_to_landmarks: batch_src_pts = self._get_landmarks(filenames, side) batch_dst_pts = self._get_closest_match(filenames, side, batch_src_pts) warp_kwargs = dict(batch_src_points=batch_src_pts, @@ -212,12 +209,12 @@ def _process_batch(self, filenames, side): warp_kwargs = dict() # Color Augmentation of the image only - if self._training_opts["augment_color"]: + if self._augment_color: batch[..., :3] = self._processing.color_adjust(batch[..., :3]) # Random Transform and flip batch = self._processing.transform(batch) - if not self._training_opts["no_flip"]: + if not self._no_flip: batch = self._processing.random_flip(batch) # Add samples to output if this is for display @@ -229,7 +226,7 @@ def _process_batch(self, filenames, side): # Random Warp # TODO change masks to have a input mask and a warped target mask processed["feed"] = [self._processing.warp(batch[..., :3], - self._training_opts["warp_to_landmarks"], + self._warp_to_landmarks, **warp_kwargs)] logger.trace("Processed batch: (filenames: %s, side: '%s', processed: %s)", @@ -276,7 +273,7 @@ def _resize_masks(target_size, masks): def _get_landmarks(self, filenames, side): """ Obtains the 68 Point Landmarks for the images in this batch. This is only called if - config item ``warp_to_landmarks`` is ``True``. If the landmarks for an image cannot be + config :attr:`_warp_to_landmarks` is ``True``. If the landmarks for an image cannot be found, then an error is raised. """ logger.trace("Retrieving landmarks: (filenames: %s, side: '%s')", filenames, side) src_points = [self._landmarks[side].get(filename, None) for filename in filenames] @@ -298,7 +295,7 @@ def _get_landmarks(self, filenames, side): return np.array(src_points) def _get_closest_match(self, filenames, side, batch_src_points): - """ Only called if the config item ``warp_to_landmarks`` is ``True``. Gets the closest + """ Only called if the :attr:`_warp_to_landmarks` is ``True``. Gets the closest matched 68 point landmarks from the opposite training set. """ logger.trace("Retrieving closest matched landmarks: (filenames: '%s', src_points: '%s'", filenames, batch_src_points) @@ -346,10 +343,10 @@ class ImageAugmentation(): are returned. The tuples should be in (`height`, `width`, `channels`) format. coverage_ratio: float The ratio of the training image to be trained on. Dictates how much of the image will be - cropped out. E.G: a coverage ratio of 0.625 will result in cropping a 160px box from a " - "256px image (256 * 0.625 = 160). + cropped out. E.G: a coverage ratio of 0.625 will result in cropping a 160px box from a + 256px image (:math:`256 * 0.625 = 160`) config: dict - The configuration ``dict`` generated from :file:`config.train.ini` containing the trainer \ + The configuration `dict` generated from :file:`config.train.ini` containing the trainer plugin configuration options. Attributes @@ -359,7 +356,7 @@ class ImageAugmentation(): image size in order to cache certain augmentation operations (see :func:`initialize`) is_display: bool Flag to indicate whether these augmentations are for time-lapses/preview images (``True``) - or standard training data (``False)`` + or standard training data (``False``) """ def __init__(self, batchsize, is_display, input_size, output_shapes, coverage_ratio, config): logger.debug("Initializing %s: (batchsize: %s, is_display: %s, input_size: %s, " @@ -390,8 +387,8 @@ def initialize(self, training_size): """ Initializes the caching of constants for use in various image augmentations. The training image size is not known prior to loading the images from disk and commencing - training, so it cannot be set in the ``__init__`` method. When the first training batch is - loaded this function should be called to initialize the class and perform various + training, so it cannot be set in the :func:`__init__` method. When the first training batch + is loaded this function should be called to initialize the class and perform various calculations based on this input size to cache certain constants for image augmentation calculations. @@ -448,7 +445,7 @@ def get_targets(self, batch): Parameters ---------- - batch: numpy.ndarray + batch: :class:`numpy.ndarray` This should be a 4-dimensional array of training images in the format (`batchsize`, `height`, `width`, `channels`). Targets should be requested after performing image transformations but prior to performing warps. @@ -458,16 +455,16 @@ def get_targets(self, batch): dict The following keys will be within the returned dictionary: - * **targets** (`list`) - A list of 4-dimensional ``numpy.ndarray`` s in the order \ - and size of each output of the model as defined in :attr:`output_shapes`. The \ + * **targets** (`list`) - A list of 4-dimensional :class:`numpy.ndarray` s in the \ + order and size of each output of the model as defined in :attr:`output_shapes`. The \ format of these arrays will be (`batchsize`, `height`, `width`, `3`). **NB:** \ - masks are not included in the ``targets`` list. If masks are to be included in the \ - output they will be returned as their own item from the ``masks`` key. + masks are not included in the `targets` list. If masks are to be included in the \ + output they will be returned as their own item from the `masks` key. - * **masks** (`numpy.ndarray`) - A 4-dimensional array containing the target masks in \ - the format (`batchsize`, `height`, `width`, `1`). + * **masks** (:class:`numpy.ndarray`) - A 4-dimensional array containing the target \ + masks in the format (`batchsize`, `height`, `width`, `1`). """ - logger.trace("Compiling targets") + logger.trace("Compiling targets: batch shape: %s", batch.shape) slices = self._constants["tgt_slices"] target_batch = [np.array([cv2.resize(image[slices, slices, :], (size, size), @@ -487,8 +484,8 @@ def get_targets(self, batch): def _separate_target_mask(target_batch): """ Return the batch and the batch of final masks - Returns the targets as a list of 4-dimensional ``numpy.ndarray`` s of shape (`batchsize`, - `height`, `width`, 3). + Returns the targets as a list of 4-dimensional :class:`numpy.ndarray` s of shape + (`batchsize`, `height`, `width`, `3`). The target masks are returned as its own item and is the 4th channel of the final target output. @@ -507,13 +504,13 @@ def color_adjust(self, batch): Parameters ---------- - batch: numpy.ndarray + batch: :class:`numpy.ndarray` The batch should be a 4-dimensional array of shape (`batchsize`, `height`, `width`, `3`) and in `BGR` format. Returns ---------- - numpy.ndarray + :class:`numpy.ndarray` A 4-dimensional array of the same shape as :attr:`batch` with color augmentation applied. """ @@ -575,13 +572,13 @@ def transform(self, batch): Parameters ---------- - batch: numpy.ndarray + batch: :class:`numpy.ndarray` The batch should be a 4-dimensional array of shape (`batchsize`, `height`, `width`, `channels`) and in `BGR` format. Returns ---------- - numpy.ndarray + :class:`numpy.ndarray` A 4-dimensional array of the same shape as :attr:`batch` with transformation applied. """ if self.is_display: @@ -625,13 +622,13 @@ def random_flip(self, batch): Parameters ---------- - batch: numpy.ndarray + batch: :class:`numpy.ndarray` The batch should be a 4-dimensional array of shape (`batchsize`, `height`, `width`, `channels`) and in `BGR` format. Returns ---------- - numpy.ndarray + :class:`numpy.ndarray` A 4-dimensional array of the same shape as :attr:`batch` with transformation applied. """ if not self.is_display: @@ -647,7 +644,7 @@ def warp(self, batch, to_landmarks=False, **kwargs): Parameters ---------- - batch: numpy.ndarray + batch: :class:`numpy.ndarray` The batch should be a 4-dimensional array of shape (`batchsize`, `height`, `width`, `3`) and in `BGR` format. to_landmarks: bool, optional @@ -657,15 +654,15 @@ def warp(self, batch, to_landmarks=False, **kwargs): kwargs: dict If :attr:`to_landmarks` is ``True`` the following additional kwargs must be passed in: - * **batch_src_points** (`numpy.ndarray`) - A batch of 68 point landmarks for the \ - source faces. This is a 3-dimensional array in the shape (`batchsize`, `68`, `2`). + * **batch_src_points** (:class:`numpy.ndarray`) - A batch of 68 point landmarks for \ + the source faces. This is a 3-dimensional array in the shape (`batchsize`, `68`, `2`). - * **batch_dst_points** (`numpy.ndarray`) - A batch of randomly chosen closest match \ - destination faces landmarks. This is a 3-dimensional array in the shape (`batchsize`, \ - `68`, `2`). + * **batch_dst_points** (:class:`numpy.ndarray`) - A batch of randomly chosen closest \ + match destination faces landmarks. This is a 3-dimensional array in the shape \ + (`batchsize`, `68`, `2`). Returns ---------- - numpy.ndarray + :class:`numpy.ndarray` A 4-dimensional array of the same shape as :attr:`batch` with warping applied. """ if to_landmarks: @@ -719,10 +716,10 @@ def _random_warp_landmarks(self, batch, batch_src_points, batch_dst_points): grid_z = np.array([griddata(dst, src, (grids[0], grids[1]), method="linear") for src, dst in zip(batch_src, batch_dst)]) - maps = grid_z.reshape(self._batchsize, - self._training_size, - self._training_size, - 2).astype("float32") + maps = grid_z.reshape((self._batchsize, + self._training_size, + self._training_size, + 2)).astype("float32") warped_batch = np.array([cv2.remap(image, map_[..., 1], map_[..., 0], diff --git a/lib/utils.py b/lib/utils.py index b74760a4e6..6ff2ed2d46 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -1,6 +1,7 @@ #!/usr/bin python3 """ Utilities available across all scripts """ +import importlib import json import logging import os @@ -24,26 +25,39 @@ ".ts", ".vob"] -class Backend(): +class _Backend(): # pylint:disable=too-few-public-methods """ Return the backend from config/.faceswap of from the `FACESWAP_BACKEND` Environment Variable. If file doesn't exist and a variable hasn't been set, create the config file. """ def __init__(self): - self.backends = {"1": "amd", "2": "cpu", "3": "nvidia"} - self.config_file = self.get_config_file() - self.backend = self.get_backend() - - @staticmethod - def get_config_file(): - """ Return location of config file """ + self._backends = {"1": "amd", "2": "cpu", "3": "nvidia"} + self._config_file = self._get_config_file() + self.backend = self._get_backend() + + @classmethod + def _get_config_file(cls): + """ Obtain the location of the main Faceswap configuration file. + + Returns + ------- + str + The path to the Faceswap configuration file + """ pypath = os.path.dirname(os.path.realpath(sys.argv[0])) config_file = os.path.join(pypath, "config", ".faceswap") return config_file - def get_backend(self): + def _get_backend(self): """ Return the backend from either the `FACESWAP_BACKEND` Environment Variable or from - the :loc:`config/.faceswap` configuration file. """ + the :file:`config/.faceswap` configuration file. If neither of these exist, prompt the user + to select a backend. + + Returns + ------- + str + The backend configuration in use by Faceswap + """ # Check if environment variable is set, if so use that if "FACESWAP_BACKEND" in os.environ: fs_backend = os.environ["FACESWAP_BACKEND"].lower() @@ -53,26 +67,31 @@ def get_backend(self): # Intercept for sphinx docs build if sys.argv[0].endswith("sphinx-build"): return "nvidia" - if not os.path.isfile(self.config_file): - self.configure_backend() + if not os.path.isfile(self._config_file): + self._configure_backend() while True: try: - with open(self.config_file, "r") as cnf: + with open(self._config_file, "r") as cnf: config = json.load(cnf) break except json.decoder.JSONDecodeError: - self.configure_backend() + self._configure_backend() continue fs_backend = config.get("backend", None) - if fs_backend is None or fs_backend.lower() not in self.backends.values(): - fs_backend = self.configure_backend() + if fs_backend is None or fs_backend.lower() not in self._backends.values(): + fs_backend = self._configure_backend() if current_process().name == "MainProcess": print("Setting Faceswap backend to {}".format(fs_backend.upper())) return fs_backend.lower() - def configure_backend(self): - """ Configure the backend if config file doesn't exist or there is a - problem with the file """ + def _configure_backend(self): + """ Get user input to select the backend that Faceswap should use. + + Returns + ------- + str + The backend configuration in use by Faceswap + """ print("First time configuration. Please select the required backend") while True: selection = input("1: AMD, 2: CPU, 3: NVIDIA: ") @@ -80,24 +99,57 @@ def configure_backend(self): print("'{}' is not a valid selection. Please try again".format(selection)) continue break - fs_backend = self.backends[selection].lower() + fs_backend = self._backends[selection].lower() config = {"backend": fs_backend} - with open(self.config_file, "w") as cnf: + with open(self._config_file, "w") as cnf: json.dump(config, cnf) - print("Faceswap config written to: {}".format(self.config_file)) + print("Faceswap config written to: {}".format(self._config_file)) return fs_backend -_FS_BACKEND = Backend().backend +_FS_BACKEND = _Backend().backend def get_backend(): - """ Return the faceswap backend """ + """ Get the backend that Faceswap is currently configured to use. + + Returns + ------- + str + The backend configuration in use by Faceswap + """ return _FS_BACKEND +def set_backend(backend): + """ Override the configured backend with the given backend. + + Parameters + ---------- + backend: ["amd", "cpu", "nvidia"] + The backend to set faceswap to + """ + global _FS_BACKEND # pylint:disable=global-statement + _FS_BACKEND = backend.lower() + + def get_folder(path, make_folder=True): - """ Return a path to a folder, creating it if it doesn't exist """ + """ Return a path to a folder, creating it if it doesn't exist + + Parameters + ---------- + path: str + The path to the folder to obtain + make_folder: bool, optional + ``True`` if the folder should be created if it does not already exist, ``False`` if the + folder should not be created + + Returns + ------- + :class:`pathlib.Path` or `None` + The path to the requested folder. If `make_folder` is set to ``False`` and the requested + path does not exist, then ``None`` is returned + """ logger = logging.getLogger(__name__) # pylint:disable=invalid-name logger.debug("Requested path: '%s'", path) output_dir = Path(path) @@ -110,7 +162,18 @@ def get_folder(path, make_folder=True): def get_image_paths(directory): - """ Return a list of images that reside in a folder """ + """ Obtain a list of full paths that reside within a folder. + + Parameters + ---------- + directory: str + The folder that contains the images to be returned + + Returns + ------- + list + The list of full paths to the images contained within the given folder + """ logger = logging.getLogger(__name__) # pylint:disable=invalid-name image_extensions = _image_extensions dir_contents = list() @@ -134,8 +197,19 @@ def get_image_paths(directory): def convert_to_secs(*args): - """ converts a time to second. Either convert_to_secs(min, secs) or - convert_to_secs(hours, minutes, secs). """ + """ Convert a time to seconds. + + Parameters + ---------- + args: tuple + 2 or 3 ints. If 2 ints are supplied, then (`minutes`, `seconds`) is implied. If 3 ints are + supplied then (`hours`, `minutes`, `seconds`) is implied. + + Returns + ------- + int + The given time converted to seconds + """ logger = logging.getLogger(__name__) # pylint:disable=invalid-name logger.debug("from time: %s", args) retval = 0.0 @@ -150,7 +224,24 @@ def convert_to_secs(*args): def full_path_split(path): - """ Split a given path into all of it's separate components """ + """ Split a full path to a location into all of it's separate components. + + Parameters + ---------- + path: str + The full path to be split + + Returns + ------- + list + The full path split into a separate item for each part + + Example + ------- + >>> path = "/foo/baz/bar" + >>> full_path_split(path) + >>> ["foo", "baz", "bar"] + """ logger = logging.getLogger(__name__) # pylint:disable=invalid-name allparts = list() while True: @@ -167,47 +258,47 @@ def full_path_split(path): return allparts -def backup_file(directory, filename): - """ Backup a given file by appending .bk to the end """ - logger = logging.getLogger(__name__) # pylint:disable=invalid-name - logger.trace("Backing up: '%s'", filename) - origfile = os.path.join(directory, filename) - backupfile = origfile + '.bk' - if os.path.exists(backupfile): - logger.trace("Removing existing file: '%s'", backup_file) - os.remove(backupfile) - if os.path.exists(origfile): - logger.trace("Renaming: '%s' to '%s'", origfile, backup_file) - os.rename(origfile, backupfile) - - -def set_system_verbosity(loglevel): - """ Set the verbosity level of tensorflow and suppresses - future and deprecation warnings from any modules - From: - https://stackoverflow.com/questions/35911252/disable-tensorflow-debugging-information - Can be set to: - 0 - all logs shown - 1 - filter out INFO logs - 2 - filter out WARNING logs - 3 - filter out ERROR logs """ +def set_system_verbosity(log_level): + """ Set the verbosity level of tensorflow and suppresses future and deprecation warnings from + any modules + + Parameters + ---------- + log_level: str + The requested Faceswap log level + + References + ---------- + https://stackoverflow.com/questions/35911252/disable-tensorflow-debugging-information + Can be set to: + 0: all logs shown. 1: filter out INFO logs. 2: filter out WARNING logs. 3: filter out ERROR + logs. + """ logger = logging.getLogger(__name__) # pylint:disable=invalid-name from lib.logger import get_loglevel # pylint:disable=import-outside-toplevel - numeric_level = get_loglevel(loglevel) - loglevel = "2" if numeric_level > 15 else "0" - logger.debug("System Verbosity level: %s", loglevel) - os.environ['TF_CPP_MIN_LOG_LEVEL'] = loglevel - if loglevel != '0': + numeric_level = get_loglevel(log_level) + log_level = "2" if numeric_level > 15 else "0" + logger.debug("System Verbosity level: %s", log_level) + os.environ['TF_CPP_MIN_LOG_LEVEL'] = log_level + if log_level != '0': for warncat in (FutureWarning, DeprecationWarning, UserWarning): warnings.simplefilter(action='ignore', category=warncat) -def deprecation_warning(func_name, additional_info=None): - """ Log at warning level that a function will be removed in future """ +def deprecation_warning(function, additional_info=None): + """ Log at warning level that a function will be removed in a future update. + + Parameters + ---------- + function: str + The function that will be deprecated. + additional_info: str, optional + Any additional information to display with the deprecation message. Default: ``None`` + """ logger = logging.getLogger(__name__) # pylint:disable=invalid-name - logger.debug("func_name: %s, additional_info: %s", func_name, additional_info) - msg = "{} has been deprecated and will be removed from a future update.".format(func_name) + logger.debug("func_name: %s, additional_info: %s", function, additional_info) + msg = "{} has been deprecated and will be removed from a future update.".format(function) if additional_info is not None: msg += " {}".format(additional_info) logger.warning(msg) @@ -215,7 +306,22 @@ def deprecation_warning(func_name, additional_info=None): def camel_case_split(identifier): """ Split a camel case name - from: https://stackoverflow.com/questions/29916065 """ + + Parameters + ---------- + identifier: str + The camel case text to be split + + Returns + ------- + list + A list of the given identifier split into it's constituent parts + + + References + ---------- + https://stackoverflow.com/questions/29916065 + """ matches = finditer( ".+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)", identifier) @@ -223,7 +329,14 @@ def camel_case_split(identifier): def safe_shutdown(got_error=False): - """ Close queues, threads and processes in event of crash """ + """ Close all tracked queues and threads in event of crash or on shut down. + + Parameters + ---------- + got_error: bool, optional + ``True`` if this function is being called as the result of raised error, otherwise + ``False``. Default: ``False`` + """ logger = logging.getLogger(__name__) # pylint:disable=invalid-name logger.debug("Safely shutting down") from lib.queue_manager import queue_manager # pylint:disable=import-outside-toplevel @@ -233,97 +346,107 @@ def safe_shutdown(got_error=False): class FaceswapError(Exception): - """ Faceswap Error for handling specific errors with useful information """ + """ Faceswap Error for handling specific errors with useful information. + + Raises + ------ + FaceswapError + on a captured error + """ pass # pylint:disable=unnecessary-pass -class GetModel(): - """ Check for models in their cache path - If available, return the path, if not available, get, unzip and install model +class GetModel(): # Pylint:disable=too-few-public-methods + """ Check for models in their cache path. - model_filename: The name of the model to be loaded (see notes below) - cache_dir: The model cache folder of the current plugin calling this class - IE: The folder that holds the model to be loaded. - git_model_id: The second digit in the github tag that identifies this model. - See https://github.com/deepfakes-models/faceswap-models for more - information + If available, return the path, if not available, get, unzip and install model - NB: Models must have a certain naming convention: - IE: _v. - EG: s3fd_v1.pb + Parameters + ---------- + model_filename: str or list + The name of the model to be loaded (see notes below) + cache_dir: str + The model cache folder of the current plugin calling this class. IE: The folder that holds + the model to be loaded. + git_model_id: int + The second digit in the github tag that identifies this model. See + https://github.com/deepfakes-models/faceswap-models for more information - Multiple models can exist within the model_filename. They should be passed as a list - and follow the same naming convention as above. Any differences in filename should - occur AFTER the version number. - IE: [_v.] - EG: [mtcnn_det_v1.1.py, mtcnn_det_v1.2.py, mtcnn_det_v1.3.py] - [resnet_ssd_v1.caffemodel, resnet_ssd_v1.prototext] - """ + Notes + ------ + Models must have a certain naming convention: `_v.` + (eg: `s3fd_v1.pb`). + + Multiple models can exist within the model_filename. They should be passed as a list and follow + the same naming convention as above. Any differences in filename should occur AFTER the version + number: `_v.` (eg: + `["mtcnn_det_v1.1.py", "mtcnn_det_v1.2.py", "mtcnn_det_v1.3.py"]`, `["resnet_ssd_v1.caffemodel" + ,"resnet_ssd_v1.prototext"]` + """ def __init__(self, model_filename, cache_dir, git_model_id): - self.logger = logging.getLogger(__name__) # pylint:disable=invalid-name + self.logger = logging.getLogger(__name__) if not isinstance(model_filename, list): model_filename = [model_filename] - self.model_filename = model_filename - self.cache_dir = cache_dir - self.git_model_id = git_model_id - self.url_base = "https://github.com/deepfakes-models/faceswap-models/releases/download" - self.chunk_size = 1024 # Chunk size for downloading and unzipping - self.retries = 6 - self.get() - self.model_path = self._model_path + self._model_filename = model_filename + self._cache_dir = cache_dir + self._git_model_id = git_model_id + self._url_base = "https://github.com/deepfakes-models/faceswap-models/releases/download" + self._chunk_size = 1024 # Chunk size for downloading and unzipping + self._retries = 6 + self._get() @property def _model_full_name(self): - """ Return the model full name from the filename(s) """ - common_prefix = os.path.commonprefix(self.model_filename) + """ str: The full model name from the filename(s). """ + common_prefix = os.path.commonprefix(self._model_filename) retval = os.path.splitext(common_prefix)[0] self.logger.trace(retval) return retval @property def _model_name(self): - """ Return the model name from the model full name """ + """ str: The model name from the model's full name. """ retval = self._model_full_name[:self._model_full_name.rfind("_")] self.logger.trace(retval) return retval @property def _model_version(self): - """ Return the model version from the model full name """ + """ int: The model's version number from the model full name. """ retval = int(self._model_full_name[self._model_full_name.rfind("_") + 2:]) self.logger.trace(retval) return retval @property - def _model_path(self): - """ Return the model path(s) in the cache folder """ - retval = [os.path.join(self.cache_dir, fname) for fname in self.model_filename] + def model_path(self): + """ str: The model path(s) in the cache folder. """ + retval = [os.path.join(self._cache_dir, fname) for fname in self._model_filename] retval = retval[0] if len(retval) == 1 else retval self.logger.trace(retval) return retval @property def _model_zip_path(self): - """ Full path to downloaded zip file """ - retval = os.path.join(self.cache_dir, "{}.zip".format(self._model_full_name)) + """ str: The full path to downloaded zip file. """ + retval = os.path.join(self._cache_dir, "{}.zip".format(self._model_full_name)) self.logger.trace(retval) return retval @property def _model_exists(self): - """ Check model(s) exist """ - if isinstance(self._model_path, list): - retval = all(os.path.exists(pth) for pth in self._model_path) + """ bool: ``True`` if the model exists in the cache folder otherwise ``False``. """ + if isinstance(self.model_path, list): + retval = all(os.path.exists(pth) for pth in self.model_path) else: - retval = os.path.exists(self._model_path) + retval = os.path.exists(self.model_path) self.logger.trace(retval) return retval @property def _plugin_section(self): - """ Get the plugin section from the config_dir """ - path = os.path.normpath(self.cache_dir) + """ str: The plugin section from the config_dir """ + path = os.path.normpath(self._cache_dir) split = path.split(os.sep) retval = split[split.index("plugins") + 1] self.logger.trace(retval) @@ -331,7 +454,7 @@ def _plugin_section(self): @property def _url_section(self): - """ Return the section ID in github for this plugin type """ + """ int: The section ID in github for this plugin type. """ sections = dict(extract=1, train=2, convert=3) retval = sections[self._plugin_section] self.logger.trace(retval) @@ -339,33 +462,34 @@ def _url_section(self): @property def _url_download(self): - """ Base URL for models """ - tag = "v{}.{}.{}".format(self._url_section, self.git_model_id, self._model_version) - retval = "{}/{}/{}.zip".format(self.url_base, tag, self._model_full_name) + """ strL Base download URL for models. """ + tag = "v{}.{}.{}".format(self._url_section, self._git_model_id, self._model_version) + retval = "{}/{}/{}.zip".format(self._url_base, tag, self._model_full_name) self.logger.trace("Download url: %s", retval) return retval @property def _url_partial_size(self): - """ Return how many bytes have already been downloaded """ + """ float: How many bytes have already been downloaded. """ zip_file = self._model_zip_path retval = os.path.getsize(zip_file) if os.path.exists(zip_file) else 0 self.logger.trace(retval) return retval - def get(self): - """ Check the model exists, if not, download and unzip into location """ + def _get(self): + """ Check the model exists, if not, download the model, unzip it and place it in the + model's cache folder. """ if self._model_exists: - self.logger.debug("Model exists: %s", self._model_path) + self.logger.debug("Model exists: %s", self.model_path) return - self.download_model() - self.unzip_model() + self._download_model() + self._unzip_model() os.remove(self._model_zip_path) - def download_model(self): - """ Download model zip to cache folder """ + def _download_model(self): + """ Download the model zip from github to the cache folder. """ self.logger.info("Downloading model: '%s' from: %s", self._model_name, self._url_download) - for attempt in range(self.retries): + for attempt in range(self._retries): try: downloaded_size = self._url_partial_size req = urllib.request.Request(self._url_download) @@ -374,24 +498,32 @@ def download_model(self): response = urllib.request.urlopen(req, timeout=10) self.logger.debug("header info: {%s}", response.info()) self.logger.debug("Return Code: %s", response.getcode()) - self.write_zipfile(response, downloaded_size) + self._write_zipfile(response, downloaded_size) break except (socket_error, socket_timeout, urllib.error.HTTPError, urllib.error.URLError) as err: - if attempt + 1 < self.retries: + if attempt + 1 < self._retries: self.logger.warning("Error downloading model (%s). Retrying %s of %s...", - str(err), attempt + 2, self.retries) + str(err), attempt + 2, self._retries) else: self.logger.error("Failed to download model. Exiting. (Error: '%s', URL: " "'%s')", str(err), self._url_download) self.logger.info("You can try running again to resume the download.") self.logger.info("Alternatively, you can manually download the model from: %s " "and unzip the contents to: %s", - self._url_download, self.cache_dir) + self._url_download, self._cache_dir) sys.exit(1) - def write_zipfile(self, response, downloaded_size): - """ Write the model zip file to disk """ + def _write_zipfile(self, response, downloaded_size): + """ Write the model zip file to disk. + + Parameters + ---------- + response: :class:`urllib.request.urlopen` + The response from the model download task + downloaded_size: int + The amount of bytes downloaded so far + """ length = int(response.getheader("content-length")) + downloaded_size if length == downloaded_size: self.logger.info("Zip already exists. Skipping download") @@ -406,25 +538,31 @@ def write_zipfile(self, response, downloaded_size): if downloaded_size != 0: pbar.update(downloaded_size) while True: - buffer = response.read(self.chunk_size) + buffer = response.read(self._chunk_size) if not buffer: break pbar.update(len(buffer)) out_file.write(buffer) pbar.close() - def unzip_model(self): + def _unzip_model(self): """ Unzip the model file to the cache folder """ self.logger.info("Extracting: '%s'", self._model_name) try: zip_file = zipfile.ZipFile(self._model_zip_path, "r") - self.write_model(zip_file) + self._write_model(zip_file) except Exception as err: # pylint:disable=broad-except self.logger.error("Unable to extract model file: %s", str(err)) sys.exit(1) - def write_model(self, zip_file): - """ Extract files from zip file and write, with progress bar """ + def _write_model(self, zip_file): + """ Extract files from zip file and write, with progress bar. + + Parameters + ---------- + zip_file: str + The downloaded model zip file + """ length = sum(f.file_size for f in zip_file.infolist()) fnames = zip_file.namelist() self.logger.debug("Zipfile: Filenames: %s, Total Size: %s", fnames, length) @@ -434,15 +572,85 @@ def write_model(self, zip_file): unit_scale=True, unit_divisor=1024) for fname in fnames: - out_fname = os.path.join(self.cache_dir, fname) + out_fname = os.path.join(self._cache_dir, fname) self.logger.debug("Extracting from: '%s' to '%s'", self._model_zip_path, out_fname) zipped = zip_file.open(fname) with open(out_fname, "wb") as out_file: while True: - buffer = zipped.read(self.chunk_size) + buffer = zipped.read(self._chunk_size) if not buffer: break pbar.update(len(buffer)) out_file.write(buffer) zip_file.close() pbar.close() + + +class KerasFinder(importlib.abc.MetaPathFinder): + """ Importlib Abstract Base Class for intercepting the import of Keras and returning either + Keras (AMD backend) or tensorflow.keras (any other backend). + + The Importlib documentation is sparse at best, and real world examples are pretty much + non-existent. Coupled with this, the import ``tensorflow.keras`` does not resolve so we need + to split out to the actual location of Keras within ``tensorflow_core``. This method works, but + it relies on hard coded paths, and is likely to not be the most robust. + + A custom loader is not used, as we can use the standard loader once we have returned the + correct spec. + """ + def __init__(self): + self._logger = logging.getLogger(__name__) + self._backend = get_backend() + self._tf_keras_locations = [["tensorflow_core", "python", "keras", "api", "_v2"], + ["tensorflow", "python", "keras", "api", "_v2"]] + + def find_spec(self, fullname, path, target=None): # pylint:disable=unused-argument + """ Obtain the spec for either keras or tensorflow.keras depending on the backend in use. + + If keras is not passed in as part of the :attr:`fullname` or the path is not ``None`` + (i.e this is a dependency import) then this returns ``None`` to use the standard import + library. + + Parameters + ---------- + fullname: str + The absolute name of the module to be imported + path: str + The search path for the module + target: module object, optional + Inherited from parent but unused + + Returns + ------- + :class:`importlib.ModuleSpec` + The spec for the Keras module to be imported + """ + prefix = fullname.split(".")[0] + suffix = fullname.split(".")[-1] + if prefix != "keras" or path is not None: + return None + self._logger.debug("Importing '%s' as keras for backend: '%s'", + "keras" if self._backend == "amd" else "tf.keras", self._backend) + path = sys.path if path is None else path + for entry in path: + locations = ([os.path.join(entry, *location) + for location in self._tf_keras_locations] + if self._backend != "amd" else [entry]) + for location in locations: + self._logger.debug("Scanning: '%s' for '%s'", location, suffix) + if os.path.isdir(os.path.join(location, suffix)): + filename = os.path.join(location, suffix, "__init__.py") + submodule_locations = [os.path.join(location, suffix)] + else: + filename = os.path.join(location, suffix + ".py") + submodule_locations = None + if not os.path.exists(filename): + continue + retval = importlib.util.spec_from_file_location( + fullname, + filename, + submodule_search_locations=submodule_locations) + self._logger.debug("Found spec: %s", retval) + return retval + self._logger.debug("Spec not found for '%s'. Falling back to default import", fullname) + return None diff --git a/plugins/extract/_base.py b/plugins/extract/_base.py index 005c12e6c0..3c104a247a 100644 --- a/plugins/extract/_base.py +++ b/plugins/extract/_base.py @@ -60,9 +60,9 @@ class Extractor(): https://github.com/deepfakes-models/faceswap-models for more information model_filename: str The name of the model file to be loaded - - Other Parameters - ---------------- + exclude_gpus: list, optional + A list of indices correlating to connected GPUs that Tensorflow should not use. Pass + ``None`` to not exclude any GPUs. Default: ``None`` configfile: str, optional Path to a custom configuration ``ini`` file. Default: Use system configfile instance: int, optional @@ -101,12 +101,14 @@ class Extractor(): plugins.extract.pipeline : The extract pipeline that configures and calls all plugins """ - def __init__(self, git_model_id=None, model_filename=None, configfile=None, instance=0): - logger.debug("Initializing %s: (git_model_id: %s, model_filename: %s, instance: %s, " - "configfile: %s, )", self.__class__.__name__, git_model_id, model_filename, - instance, configfile) + def __init__(self, git_model_id=None, model_filename=None, exclude_gpus=None, configfile=None, + instance=0): + logger.debug("Initializing %s: (git_model_id: %s, model_filename: %s, exclude_gpus: %s, " + "configfile: %s, instance: %s, )", self.__class__.__name__, git_model_id, + model_filename, exclude_gpus, configfile, instance) self._instance = instance + self._exclude_gpus = exclude_gpus self.config = _get_config(".".join(self.__module__.split(".")[-2:]), configfile=configfile) """ dict: Config for this plugin, loaded from ``extract.ini`` configfile """ diff --git a/plugins/extract/align/_base.py b/plugins/extract/align/_base.py index e01641872f..d089ab901e 100644 --- a/plugins/extract/align/_base.py +++ b/plugins/extract/align/_base.py @@ -50,13 +50,14 @@ class Aligner(Extractor): # pylint:disable=abstract-method """ def __init__(self, git_model_id=None, model_filename=None, - configfile=None, instance=0, normalize_method=None): + configfile=None, instance=0, normalize_method=None, **kwargs): logger.debug("Initializing %s: (normalize_method: %s)", self.__class__.__name__, normalize_method) super().__init__(git_model_id, model_filename, configfile=configfile, - instance=instance) + instance=instance, + **kwargs) self._normalize_method = None self.set_normalize_method(normalize_method) diff --git a/plugins/extract/align/fan.py b/plugins/extract/align/fan.py index 3be1d14511..79739b09cd 100644 --- a/plugins/extract/align/fan.py +++ b/plugins/extract/align/fan.py @@ -5,8 +5,6 @@ """ import cv2 import numpy as np -import keras -from keras import backend as K from lib.model.session import KSession from ._base import Aligner, logger @@ -15,8 +13,8 @@ class Align(Aligner): """ Perform transformation to align and get landmarks """ def __init__(self, **kwargs): - git_model_id = 9 - model_filename = "face-alignment-network_2d4_keras_v1.h5" + git_model_id = 13 + model_filename = "face-alignment-network_2d4_keras_v2.h5" super().__init__(git_model_id=git_model_id, model_filename=model_filename, **kwargs) self.name = "FAN" self.input_size = 256 @@ -29,14 +27,13 @@ def __init__(self, **kwargs): def init_model(self): """ Initialize FAN model """ - model_kwargs = dict(custom_objects={'TorchBatchNorm2D': TorchBatchNorm2D}) self.model = KSession(self.name, self.model_path, - model_kwargs=model_kwargs, - allow_growth=self.config["allow_growth"]) + allow_growth=self.config["allow_growth"], + exclude_gpus=self._exclude_gpus) self.model.load_model() # Feed a placeholder so Aligner is primed for Manual tool - placeholder_shape = (self.batchsize, 3, self.input_size, self.input_size) + placeholder_shape = (self.batchsize, self.input_size, self.input_size, 3) placeholder = np.zeros(placeholder_shape, dtype="float32") self.model.predict(placeholder) @@ -47,7 +44,7 @@ def process_input(self, batch): faces = self.crop(batch) logger.trace("Aligned image around center") faces = self._normalize_faces(faces) - batch["feed"] = np.array(faces, dtype="float32")[..., :3].transpose((0, 3, 1, 2)) / 255.0 + batch["feed"] = np.array(faces, dtype="float32")[..., :3] / 255.0 return batch def get_center_scale(self, detected_faces): @@ -122,8 +119,10 @@ def transform(points, center_scales, resolutions): def predict(self, batch): """ Predict the 68 point landmarks """ logger.debug("Predicting Landmarks") - batch["prediction"] = self.model.predict(batch["feed"])[-1] - logger.trace([pred.shape for pred in batch["prediction"]]) + # TODO Remove lazy transpose and change points from predict to use the correct + # order + batch["prediction"] = self.model.predict(batch["feed"])[-1].transpose(0, 3, 1, 2) + logger.trace(batch["prediction"].shape) return batch def process_output(self, batch): @@ -156,77 +155,3 @@ def get_pts_from_predict(self, batch): batch["landmarks"] = self.transform(subpixel_landmarks, batch["center_scale"], resolution) logger.trace("Obtained points from prediction: %s", batch["landmarks"]) - - -class TorchBatchNorm2D(keras.engine.base_layer.Layer): - # pylint:disable=too-many-instance-attributes - """" Required for FAN_keras model """ - def __init__(self, axis=-1, momentum=0.99, epsilon=1e-3, **kwargs): - super(TorchBatchNorm2D, self).__init__(**kwargs) - self.supports_masking = True - self.axis = axis - self.momentum = momentum - self.epsilon = epsilon - self._epsilon_const = K.constant(self.epsilon, dtype='float32') - - self.built = False - self.gamma = None - self.beta = None - self.moving_mean = None - self.moving_variance = None - - def build(self, input_shape): - dim = input_shape[self.axis] - if dim is None: - raise ValueError("Axis {} of input tensor should have a " - "defined dimension but the layer received " - "an input with shape {}." - .format(str(self.axis), str(input_shape))) - shape = (dim,) - self.gamma = self.add_weight(shape=shape, - name='gamma', - initializer='ones', - regularizer=None, - constraint=None) - self.beta = self.add_weight(shape=shape, - name='beta', - initializer='zeros', - regularizer=None, - constraint=None) - self.moving_mean = self.add_weight(shape=shape, - name='moving_mean', - initializer='zeros', - trainable=False) - self.moving_variance = self.add_weight(shape=shape, - name='moving_variance', - initializer='ones', - trainable=False) - self.built = True - - def call(self, inputs, **kwargs): - input_shape = K.int_shape(inputs) - - broadcast_shape = [1] * len(input_shape) - broadcast_shape[self.axis] = input_shape[self.axis] - - broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) - broadcast_moving_variance = K.reshape(self.moving_variance, - broadcast_shape) - broadcast_gamma = K.reshape(self.gamma, broadcast_shape) - broadcast_beta = K.reshape(self.beta, broadcast_shape) - invstd = ( - K.ones(shape=broadcast_shape, dtype='float32') - / K.sqrt(broadcast_moving_variance + self._epsilon_const) - ) - - return((inputs - broadcast_moving_mean) - * invstd - * broadcast_gamma - + broadcast_beta) - - def get_config(self): - config = {'axis': self.axis, - 'momentum': self.momentum, - 'epsilon': self.epsilon} - base_config = super(TorchBatchNorm2D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) diff --git a/plugins/extract/detect/_base.py b/plugins/extract/detect/_base.py index 3cf7b3fa5c..3fc281628d 100644 --- a/plugins/extract/detect/_base.py +++ b/plugins/extract/detect/_base.py @@ -58,13 +58,14 @@ class Detector(Extractor): # pylint:disable=abstract-method """ def __init__(self, git_model_id=None, model_filename=None, - configfile=None, instance=0, rotation=None, min_size=0): + configfile=None, instance=0, rotation=None, min_size=0, **kwargs): logger.debug("Initializing %s: (rotation: %s, min_size: %s)", self.__class__.__name__, rotation, min_size) super().__init__(git_model_id, model_filename, configfile=configfile, - instance=instance) + instance=instance, + **kwargs) self.rotation = self._get_rotation_angles(rotation) self.min_size = min_size diff --git a/plugins/extract/detect/mtcnn.py b/plugins/extract/detect/mtcnn.py index 4e623aa628..f2783f9d48 100644 --- a/plugins/extract/detect/mtcnn.py +++ b/plugins/extract/detect/mtcnn.py @@ -4,10 +4,9 @@ from __future__ import absolute_import, division, print_function import cv2 -from keras.layers import Conv2D, Dense, Flatten, Input, MaxPool2D, Permute, PReLU - import numpy as np - +# pylint:disable=import-error +from keras.layers import Conv2D, Dense, Flatten, Input, MaxPool2D, Permute, PReLU from lib.model.session import KSession from ._base import Detector, logger @@ -54,7 +53,10 @@ def validate_kwargs(self): def init_model(self): """ Initialize S3FD Model""" - self.model = MTCNN(self.model_path, self.config["allow_growth"], **self.kwargs) + self.model = MTCNN(self.model_path, + self.config["allow_growth"], + self._exclude_gpus, + **self.kwargs) def process_input(self, batch): """ Compile the detection image(s) for prediction """ @@ -105,15 +107,18 @@ def process_output(self, batch): class PNet(KSession): - """ Keras PNet model for MTCNN """ - def __init__(self, model_path, allow_growth): - super().__init__("MTCNN-PNet", model_path, allow_growth=allow_growth) + """ Keras P-Net model for MTCNN """ + def __init__(self, model_path, allow_growth, exclude_gpus): + super().__init__("MTCNN-PNet", + model_path, + allow_growth=allow_growth, + exclude_gpus=exclude_gpus) self.define_model(self.model_definition) self.load_model_weights() @staticmethod def model_definition(): - """ Keras PNetwork for MTCNN """ + """ Keras P-Network for MTCNN """ input_ = Input(shape=(None, None, 3)) var_x = Conv2D(10, (3, 3), strides=1, padding='valid', name='conv1')(input_) var_x = PReLU(shared_axes=[1, 2], name='PReLU1')(var_x) @@ -128,15 +133,18 @@ def model_definition(): class RNet(KSession): - """ Keras RNet model for MTCNN """ - def __init__(self, model_path, allow_growth): - super().__init__("MTCNN-RNet", model_path, allow_growth=allow_growth) + """ Keras R-Net model for MTCNN """ + def __init__(self, model_path, allow_growth, exclude_gpus): + super().__init__("MTCNN-RNet", + model_path, + allow_growth=allow_growth, + exclude_gpus=exclude_gpus) self.define_model(self.model_definition) self.load_model_weights() @staticmethod def model_definition(): - """ Keras RNetwork for MTCNN """ + """ Keras R-Network for MTCNN """ input_ = Input(shape=(24, 24, 3)) var_x = Conv2D(28, (3, 3), strides=1, padding='valid', name='conv1')(input_) var_x = PReLU(shared_axes=[1, 2], name='prelu1')(var_x) @@ -158,15 +166,18 @@ def model_definition(): class ONet(KSession): - """ Keras ONet model for MTCNN """ - def __init__(self, model_path, allow_growth): - super().__init__("MTCNN-ONet", model_path, allow_growth=allow_growth) + """ Keras O-Net model for MTCNN """ + def __init__(self, model_path, allow_growth, exclude_gpus): + super().__init__("MTCNN-ONet", + model_path, + allow_growth=allow_growth, + exclude_gpus=exclude_gpus) self.define_model(self.model_definition) self.load_model_weights() @staticmethod def model_definition(): - """ Keras ONetwork for MTCNN """ + """ Keras O-Network for MTCNN """ input_ = Input(shape=(48, 48, 3)) var_x = Conv2D(32, (3, 3), strides=1, padding='valid', name='conv1')(input_) var_x = PReLU(shared_axes=[1, 2], name='prelu1')(var_x) @@ -192,26 +203,26 @@ def model_definition(): class MTCNN(): """ MTCNN Detector for face alignment """ - # TODO Batching for rnet and onet + # TODO Batching for r-net and o-net - def __init__(self, model_path, allow_growth, minsize, threshold, factor): + def __init__(self, model_path, allow_growth, exclude_gpus, minsize, threshold, factor): """ minsize: minimum faces' size threshold: threshold=[th1, th2, th3], th1-3 are three steps threshold factor: the factor used to create a scaling pyramid of face sizes to detect in the image. - pnet, rnet, onet: caffemodel + p-net, r-net, o-net: caffemodel """ - logger.debug("Initializing: %s: (model_path: '%s', allow_growth: %s, minsize: %s, " - "threshold: %s, factor: %s)", self.__class__.__name__, model_path, - allow_growth, minsize, threshold, factor) + logger.debug("Initializing: %s: (model_path: '%s', allow_growth: %s, exclude_gpus: %s, " + "minsize: %s, threshold: %s, factor: %s)", self.__class__.__name__, + model_path, allow_growth, exclude_gpus, minsize, threshold, factor) self.minsize = minsize self.threshold = threshold self.factor = factor - self.pnet = PNet(model_path[0], allow_growth) - self.rnet = RNet(model_path[1], allow_growth) - self.onet = ONet(model_path[2], allow_growth) + self.pnet = PNet(model_path[0], allow_growth, exclude_gpus) + self.rnet = RNet(model_path[1], allow_growth, exclude_gpus) + self.onet = ONet(model_path[2], allow_growth, exclude_gpus) self._pnet_scales = None logger.debug("Initialized: %s", self.__class__.__name__) @@ -238,7 +249,7 @@ def detect_faces(self, batch): def detect_pnet(self, images, height, width): # pylint: disable=too-many-locals - """ first stage - fast proposal network (pnet) to obtain face candidates """ + """ first stage - fast proposal network (p-net) to obtain face candidates """ if self._pnet_scales is None: self._pnet_scales = calculate_scales(height, width, self.minsize, self.factor) rectangles = [[] for _ in range(images.shape[0])] @@ -256,7 +267,7 @@ def detect_pnet(self, images, height, width): cls_prob = np.swapaxes(cls_prob, 1, 2) roi = np.swapaxes(roi, 1, 3) for idx in range(batch_items): - # first index 0 = class score, 1 = one hot repr + # first index 0 = class score, 1 = one hot representation rectangle = detect_face_12net(cls_prob[idx, ...], roi[idx, ...], out_side, @@ -268,7 +279,7 @@ def detect_pnet(self, images, height, width): return [nms(x, 0.7, 'iou') for x in rectangles] def detect_rnet(self, images, rectangle_batch, height, width): - """ second stage - refinement of face candidates with rnet """ + """ second stage - refinement of face candidates with r-net """ ret = [] # TODO: batching for idx, rectangles in enumerate(rectangle_batch): @@ -295,7 +306,7 @@ def detect_rnet(self, images, rectangle_batch, height, width): return ret def detect_onet(self, images, rectangle_batch, height, width): - """ third stage - further refinement and facial landmarks positions with onet """ + """ third stage - further refinement and facial landmarks positions with o-net """ ret = list() # TODO: batching for idx, rectangles in enumerate(rectangle_batch): @@ -474,7 +485,7 @@ def filter_face_48net(cls_prob, roi, pts, rectangles, width, height, threshold): def nms(rectangles, threshold, method): # pylint:disable=too-many-locals - """ apply NMS(non-maximum suppression) on ROIs in same scale(matrix version) + """ apply non-maximum suppression on ROIs in same scale(matrix version) Input: rectangles: rectangles[i][0:3] is the position, rectangles[i][4] is score Output: diff --git a/plugins/extract/detect/s3fd.py b/plugins/extract/detect/s3fd.py index cc516fd09a..0b1e7d8daf 100644 --- a/plugins/extract/detect/s3fd.py +++ b/plugins/extract/detect/s3fd.py @@ -8,8 +8,8 @@ from scipy.special import logsumexp import numpy as np -import keras -import keras.backend as K +import keras # pylint:disable=import-error +import keras.backend as K # pylint:disable=import-error from lib.model.session import KSession from ._base import Detector, logger @@ -31,14 +31,18 @@ def __init__(self, **kwargs): def init_model(self): """ Initialize S3FD Model""" confidence = self.config["confidence"] / 100 - model_kwargs = dict(custom_objects=dict(O2K_Add=O2K_Add, - O2K_Slice=O2K_Slice, - O2K_Sum=O2K_Sum, - O2K_Sqrt=O2K_Sqrt, - O2K_Pow=O2K_Pow, - O2K_ConstantLayer=O2K_ConstantLayer, - O2K_Div=O2K_Div)) - self.model = S3fd(self.model_path, model_kwargs, self.config["allow_growth"], confidence) + model_kwargs = dict(custom_objects=dict(O2K_Add=AddO2K, + O2K_Slice=SliceO2K, + O2K_Sum=SumO2K, + O2K_Sqrt=SqrtO2K, + O2K_Pow=PowO2K, + O2K_ConstantLayer=ConstantLayerO2K, + O2K_Div=DivO2K)) + self.model = S3fd(self.model_path, + model_kwargs, + self.config["allow_growth"], + self._exclude_gpus, + confidence) def process_input(self, batch): """ Compile the detection image(s) for prediction """ @@ -61,14 +65,42 @@ def process_output(self, batch): # CUSTOM KERAS LAYERS # generated by onnx2keras ################################################################################ -class O2K_ElementwiseLayer(keras.engine.Layer): - def __init__(self, **kwargs): - super(O2K_ElementwiseLayer, self).__init__(**kwargs) - - def call(self, *args): +class ElementwiseLayerO2K(keras.layers.Layer): + """ Custom Keras Element Wise layer generated by onnx2keras. """ + def call(self, inputs, **kwargs): # pylint:disable=unused-argument + """This is where the layer's logic lives. + + Override for layers that inherit from this class. + + Parameters + ---------- + inputs: Input tensor, or list/tuple of input tensors. + The input to the layer + **kwargs: Additional keyword arguments. + Required for parent class but unused + Returns + ------- + A tensor or list/tuple of tensors. + The layer output + """ raise NotImplementedError() - def compute_output_shape(self, input_shape): + def compute_output_shape(self, input_shape): # pylint:disable=no-self-use + """Computes the output shape of the layer. + + Assumes that the layer will be built to match that input shape provided. + + Parameters + ---------- + input_shape: tuple or list of tuples + Shape tuple (tuple of integers) or list of shape tuples (one per output tensor of the + layer). Shape tuples can include ``None`` for free dimensions, instead of an integer. + + Returns + ------- + tuple + An output shape tuple. + """ # TODO: do this nicer ldims = len(input_shape[0]) rdims = len(input_shape[1]) @@ -81,72 +113,152 @@ def compute_output_shape(self, input_shape): return input_shape[0 if lprod > rprod else 1] -class O2K_Add(O2K_ElementwiseLayer): - def call(self, x, *args): - return x[0] + x[1] +class AddO2K(ElementwiseLayerO2K): + """ Custom Keras Add layer generated by onnx2keras. """ + def call(self, inputs, **kwargs): # pylint:disable=unused-argument + """This is where the layer's logic lives. + + Parameters + ---------- + inputs: Input tensor, or list/tuple of input tensors. + The input to the layer + **kwargs: Additional keyword arguments. + Required for parent class but unused + Returns + ------- + A tensor or list/tuple of tensors. + The layer output + """ + return inputs[0] + inputs[1] -class O2K_Slice(keras.engine.Layer): +class SliceO2K(keras.layers.Layer): + """ Custom Keras Slice layer generated by onnx2keras. """ def __init__(self, starts, ends, axes=None, steps=None, **kwargs): self._starts = starts self._ends = ends self._axes = axes self._steps = steps - super(O2K_Slice, self).__init__(**kwargs) + super().__init__(**kwargs) def get_config(self): - config = super(O2K_Slice, self).get_config() + """ Returns the config of the layer. + + A layer config is a Python dictionary (serializable) containing the configuration of a + layer. The same layer can be re-instantiated later (without its trained weights) from this + configuration. The config of a layer does not include connectivity information, nor the + layer class name. These are handled by `Network` (one layer of abstraction above). + + Returns + ------- + dict + The configuration for the layer + """ + config = super().get_config() config.update({ 'starts': self._starts, 'ends': self._ends, 'axes': self._axes, 'steps': self._steps }) return config - def get_slices(self, ndims): + def _get_slices(self, dimensions): + """ Obtain slices for the given number of dimensions. + + Parameters + ---------- + dimensions: int + The number of dimensions to obtain slices for + + Returns + ------- + list + The slices for the given number of dimensions + """ axes = self._axes steps = self._steps if axes is None: - axes = tuple(range(ndims)) + axes = tuple(range(dimensions)) if steps is None: steps = (1,) * len(axes) assert len(axes) == len(steps) == len(self._starts) == len(self._ends) return list(zip(axes, self._starts, self._ends, steps)) def compute_output_shape(self, input_shape): + """Computes the output shape of the layer. + + Assumes that the layer will be built to match that input shape provided. + + Parameters + ---------- + input_shape: tuple or list of tuples + Shape tuple (tuple of integers) or list of shape tuples (one per output tensor of the + layer). Shape tuples can include ``None`` for free dimensions, instead of an integer. + + Returns + ------- + tuple + An output shape tuple. + """ input_shape = list(input_shape) - for ax, start, end, steps in self.get_slices(len(input_shape)): - size = input_shape[ax] - if ax == 0: + for a_x, start, end, steps in self._get_slices(len(input_shape)): + size = input_shape[a_x] + if a_x == 0: raise AttributeError("Can not slice batch axis.") if size is None: if start < 0 or end < 0: raise AttributeError("Negative slices not supported on symbolic axes") logger.warning("Slicing symbolic axis might lead to problems.") - input_shape[ax] = (end - start) // steps + input_shape[a_x] = (end - start) // steps continue if start < 0: start = size - start if end < 0: end = size - end - input_shape[ax] = (min(size, end) - start) // steps + input_shape[a_x] = (min(size, end) - start) // steps return tuple(input_shape) - def call(self, x, *args): - ax_map = dict((x[0], slice(*x[1:])) for x in self.get_slices(K.ndim(x))) - shape = K.int_shape(x) + def call(self, inputs, **kwargs): # pylint:disable=unused-argument + """This is where the layer's logic lives. + + Parameters + ---------- + inputs: Input tensor, or list/tuple of input tensors. + The input to the layer + **kwargs: Additional keyword arguments. + Required for parent class but unused + Returns + ------- + A tensor or list/tuple of tensors. + The layer output + """ + ax_map = dict((x[0], slice(*x[1:])) for x in self._get_slices(K.ndim(inputs))) + shape = K.int_shape(inputs) slices = [(ax_map[a] if a in ax_map else slice(None)) for a in range(len(shape))] - x = x[tuple(slices)] - return x + retval = inputs[tuple(slices)] + return retval -class O2K_ReduceLayer(keras.engine.Layer): +class ReduceLayerO2K(keras.layers.Layer): + """ Custom Keras Reduce layer generated by onnx2keras. """ def __init__(self, axes=None, keepdims=True, **kwargs): self._axes = [axes] if isinstance(axes, int) else axes self._keepdims = bool(keepdims) - super(O2K_ReduceLayer, self).__init__(**kwargs) + super().__init__(**kwargs) def get_config(self): - config = super(O2K_ReduceLayer, self).get_config() + """ Returns the config of the layer. + + A layer config is a Python dictionary (serializable) containing the configuration of a + layer. The same layer can be re-instantiated later (without its trained weights) from this + configuration. The config of a layer does not include connectivity information, nor the + layer class name. These are handled by `Network` (one layer of abstraction above). + + Returns + ------- + dict + The configuration for the layer + """ + config = super().get_config() config.update({ 'axes': self._axes, 'keepdims': self._keepdims @@ -154,6 +266,21 @@ def get_config(self): return config def compute_output_shape(self, input_shape): + """Computes the output shape of the layer. + + Assumes that the layer will be built to match that input shape provided. + + Parameters + ---------- + input_shape: tuple or list of tuples + Shape tuple (tuple of integers) or list of shape tuples (one per output tensor of the + layer). Shape tuples can include ``None`` for free dimensions, instead of an integer. + + Returns + ------- + tuple + An output shape tuple. + """ if self._axes is None: return (1,)*len(input_shape) if self._keepdims else tuple() ret = list(input_shape) @@ -164,41 +291,139 @@ def compute_output_shape(self, input_shape): ret.pop(i) return tuple(ret) - def call(self, x, *args): + def call(self, inputs, **kwargs): # pylint:disable=unused-argument + """This is where the layer's logic lives. + + Override for layers which inherit from this class + + Parameters + ---------- + inputs: Input tensor, or list/tuple of input tensors. + The input to the layer + **kwargs: Additional keyword arguments. + Required for parent class but unused + Returns + ------- + A tensor or list/tuple of tensors. + The layer output + """ raise NotImplementedError() -class O2K_Sum(O2K_ReduceLayer): - def call(self, x, *args): - return K.sum(x, self._axes, self._keepdims) - - -class O2K_Sqrt(keras.engine.Layer): - def call(self, x, *args): - return K.sqrt(x) - - -class O2K_Pow(keras.engine.Layer): - def call(self, x, *args): - return K.pow(*x) +class SumO2K(ReduceLayerO2K): + """ Custom Keras Sum layer generated by onnx2keras. """ + def call(self, inputs, **kwargs): # pylint:disable=unused-argument + """This is where the layer's logic lives. + + Parameters + ---------- + inputs: Input tensor, or list/tuple of input tensors. + The input to the layer + **kwargs: Additional keyword arguments. + Required for parent class but unused + Returns + ------- + A tensor or list/tuple of tensors. + The layer output + """ + return K.sum(inputs, self._axes, self._keepdims) + + +class SqrtO2K(keras.layers.Layer): # pylint:disable=too-few-public-methods + """ Custom Keras Square Root layer generated by onnx2keras. """ + def call(self, inputs, **kwargs): # pylint:disable=unused-argument,no-self-use + """This is where the layer's logic lives. + + Parameters + ---------- + inputs: Input tensor, or list/tuple of input tensors. + The input to the layer + **kwargs: Additional keyword arguments. + Required for parent class but unused + Returns + ------- + A tensor or list/tuple of tensors. + The layer output + """ + return K.sqrt(inputs) + + +class PowO2K(keras.layers.Layer): # pylint:disable=too-few-public-methods + """ Custom Keras Power layer generated by onnx2keras. """ + def call(self, inputs, **kwargs): # pylint:disable=unused-argument,no-self-use + """This is where the layer's logic lives. + + Parameters + ---------- + inputs: Input tensor, or list/tuple of input tensors. + The input to the layer + **kwargs: Additional keyword arguments. + Required for parent class but unused + Returns + ------- + A tensor or list/tuple of tensors. + The layer output + """ + return K.pow(*inputs) -class O2K_ConstantLayer(keras.engine.Layer): +class ConstantLayerO2K(keras.layers.Layer): + """ Custom Keras Constant layer generated by onnx2keras. """ def __init__(self, constant_obj, dtype, **kwargs): self._dtype = np.dtype(dtype).name self._constant = np.array(constant_obj, dtype=self._dtype) - super(O2K_ConstantLayer, self).__init__(**kwargs) - - def call(self, *args): - # pylint:disable=arguments-differ + super().__init__(**kwargs) + + def call(self, inputs, **kwargs): # pylint:disable=unused-argument + """This is where the layer's logic lives. + + Parameters + ---------- + inputs: Input tensor, or list/tuple of input tensors. + The input to the layer. Required for parent class but unused + **kwargs: Additional keyword arguments. + Required for parent class but unused + Returns + ------- + A tensor or list/tuple of tensors. + The layer output + """ data = K.constant(self._constant, dtype=self._dtype) return data - def compute_output_shape(self, input_shape): + def compute_output_shape(self, input_shape): # pylint:disable=unused-argument + """Computes the output shape of the layer. + + Assumes that the layer will be built to match that input shape provided. + + Parameters + ---------- + input_shape: tuple or list of tuples + Shape tuple (tuple of integers) or list of shape tuples (one per output tensor of the + layer). Shape tuples can include ``None`` for free dimensions, instead of an integer. + This is unused for a constant layer + + Returns + ------- + tuple + An output shape tuple. + """ return self._constant.shape def get_config(self): - config = super(O2K_ConstantLayer, self).get_config() + """ Returns the config of the layer. + + A layer config is a Python dictionary (serializable) containing the configuration of a + layer. The same layer can be re-instantiated later (without its trained weights) from this + configuration. The config of a layer does not include connectivity information, nor the + layer class name. These are handled by `Network` (one layer of abstraction above). + + Returns + ------- + dict + The configuration for the layer + """ + config = super().get_config() config.update({ 'constant_obj': self._constant, 'dtype': self._dtype @@ -206,18 +431,36 @@ def get_config(self): return config -class O2K_Div(O2K_ElementwiseLayer): - # pylint:disable=arguments-differ - def call(self, x, *args): - return x[0] / x[1] +class DivO2K(ElementwiseLayerO2K): + """ Custom Keras Division layer generated by onnx2keras. """ + def call(self, inputs, **kwargs): # pylint:disable=unused-argument + """This is where the layer's logic lives. + + Parameters + ---------- + inputs: Input tensor, or list/tuple of input tensors. + The input to the layer + **kwargs: Additional keyword arguments. + Required for parent class but unused + Returns + ------- + A tensor or list/tuple of tensors. + The layer output + """ + return inputs[0] / inputs[1] class S3fd(KSession): """ Keras Network """ - def __init__(self, model_path, model_kwargs, allow_growth, confidence): - logger.debug("Initializing: %s: (model_path: '%s', allow_growth: %s)", - self.__class__.__name__, model_path, allow_growth) - super().__init__("S3FD", model_path, model_kwargs=model_kwargs, allow_growth=allow_growth) + def __init__(self, model_path, model_kwargs, allow_growth, exclude_gpus, confidence): + logger.debug("Initializing: %s: (model_path: '%s', model_kwargs: %s, allow_growth: %s, " + "exclude_gpus: %s, confidence: %s)", self.__class__.__name__, model_path, + model_kwargs, allow_growth, exclude_gpus, confidence) + super().__init__("S3FD", + model_path, + model_kwargs=model_kwargs, + allow_growth=allow_growth, + exclude_gpus=exclude_gpus) self.load_model() self.confidence = confidence self.average_img = np.array([104.0, 117.0, 123.0]) @@ -269,21 +512,25 @@ def softmax(inp, axis): return np.exp(inp - logsumexp(inp, axis=axis, keepdims=True)) @staticmethod - def decode(loc, priors): - """Decode locations from predictions using priors to undo - the encoding we did for offset regression at train time. - Args: - loc (tensor): location predictions for loc layers, - Shape: [num_priors,4] - priors (tensor): Prior boxes in center-offset form. - Shape: [num_priors,4]. - variances: (list[float]) Variances of prior boxes - Return: + def decode(location, priors): + """Decode locations from predictions using priors to undo the encoding we did for offset + regression at train time. + + Parameters + ---------- + location: tensor + location predictions for location layers, + priors: tensor + Prior boxes in center-offset form. + + Returns + ------- + :class:`numpy.ndarray` decoded bounding box predictions """ variances = [0.1, 0.2] - boxes = np.concatenate((priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], - priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])), axis=1) + boxes = np.concatenate((priors[:, :2] + location[:, :2] * variances[0] * priors[:, 2:], + priors[:, 2:] * np.exp(location[:, 2:] * variances[1])), axis=1) boxes[:, :2] -= boxes[:, 2:] / 2 boxes[:, 2:] += boxes[:, :2] return boxes diff --git a/plugins/extract/mask/_base.py b/plugins/extract/mask/_base.py index 9c4cfc9ecc..f058b181a7 100644 --- a/plugins/extract/mask/_base.py +++ b/plugins/extract/mask/_base.py @@ -50,12 +50,13 @@ class Masker(Extractor): # pylint:disable=abstract-method """ def __init__(self, git_model_id=None, model_filename=None, configfile=None, - instance=0, image_is_aligned=False): + instance=0, image_is_aligned=False, **kwargs): logger.debug("Initializing %s: (configfile: %s, )", self.__class__.__name__, configfile) super().__init__(git_model_id, model_filename, configfile=configfile, - instance=instance) + instance=instance, + **kwargs) self.input_size = 256 # Override for model specific input_size self.coverage_ratio = 1.0 # Override for model specific coverage_ratio diff --git a/plugins/extract/mask/unet_dfl.py b/plugins/extract/mask/unet_dfl.py index dd60727f31..86aa140035 100644 --- a/plugins/extract/mask/unet_dfl.py +++ b/plugins/extract/mask/unet_dfl.py @@ -32,8 +32,11 @@ def __init__(self, **kwargs): self.batchsize = self.config["batch-size"] def init_model(self): - self.model = KSession(self.name, self.model_path, - model_kwargs=dict(), allow_growth=self.config["allow_growth"]) + self.model = KSession(self.name, + self.model_path, + model_kwargs=dict(), + allow_growth=self.config["allow_growth"], + exclude_gpus=self._exclude_gpus) self.model.load_model() placeholder = np.zeros((self.batchsize, self.input_size, self.input_size, 3), dtype="float32") diff --git a/plugins/extract/mask/vgg_clear.py b/plugins/extract/mask/vgg_clear.py index 3b5cb46682..2b1a775429 100644 --- a/plugins/extract/mask/vgg_clear.py +++ b/plugins/extract/mask/vgg_clear.py @@ -33,8 +33,11 @@ def __init__(self, **kwargs): self.batchsize = self.config["batch-size"] def init_model(self): - self.model = KSession(self.name, self.model_path, - model_kwargs=dict(), allow_growth=self.config["allow_growth"]) + self.model = KSession(self.name, + self.model_path, + model_kwargs=dict(), + allow_growth=self.config["allow_growth"], + exclude_gpus=self._exclude_gpus) self.model.load_model() self.model.append_softmax_activation(layer_index=-1) placeholder = np.zeros((self.batchsize, self.input_size, self.input_size, 3), diff --git a/plugins/extract/mask/vgg_obstructed.py b/plugins/extract/mask/vgg_obstructed.py index 487712f00d..03e8776873 100644 --- a/plugins/extract/mask/vgg_obstructed.py +++ b/plugins/extract/mask/vgg_obstructed.py @@ -33,8 +33,11 @@ def __init__(self, **kwargs): self.batchsize = self.config["batch-size"] def init_model(self): - self.model = KSession(self.name, self.model_path, - model_kwargs=dict(), allow_growth=self.config["allow_growth"]) + self.model = KSession(self.name, + self.model_path, + model_kwargs=dict(), + allow_growth=self.config["allow_growth"], + exclude_gpus=self._exclude_gpus) self.model.load_model() self.model.append_softmax_activation(layer_index=-1) placeholder = np.zeros((self.batchsize, self.input_size, self.input_size, 3), diff --git a/plugins/extract/pipeline.py b/plugins/extract/pipeline.py index cadf2b5ba3..cba08356a8 100644 --- a/plugins/extract/pipeline.py +++ b/plugins/extract/pipeline.py @@ -52,6 +52,9 @@ class Extractor(): multiprocess: bool, optional Whether to attempt processing the plugins in parallel. This may get overridden internally depending on the plugin combination. Default: ``False`` + exclude_gpus: list, optional + A list of indices correlating to connected GPUs that Tensorflow should not use. Pass + ``None`` to not exclude any GPUs. Default: ``None`` rotate_images: str, optional Used to set the :attr:`plugins.extract.detect.rotation` attribute. Pass in a single number to use increments of that size up to 360, or pass in a ``list`` of ``ints`` to enumerate @@ -74,17 +77,18 @@ class Extractor(): The current phase that the pipeline is running. Used in conjunction with :attr:`passes` and :attr:`final_pass` to indicate to the caller which phase is being processed """ - def __init__(self, detector, aligner, masker, configfile=None, - multiprocess=False, rotate_images=None, min_size=20, - normalize_method=None, image_is_aligned=False): - logger.debug("Initializing %s: (detector: %s, aligner: %s, masker: %s, " - "configfile: %s, multiprocess: %s, rotate_images: %s, min_size: %s, " + def __init__(self, detector, aligner, masker, configfile=None, multiprocess=False, + exclude_gpus=None, rotate_images=None, min_size=20, normalize_method=None, + image_is_aligned=False): + logger.debug("Initializing %s: (detector: %s, aligner: %s, masker: %s, configfile: %s, " + "multiprocess: %s, exclude_gpus: %s, rotate_images: %s, min_size: %s, " "normalize_method: %s, image_is_aligned: %s)", - self.__class__.__name__, detector, aligner, masker, configfile, - multiprocess, rotate_images, min_size, normalize_method, image_is_aligned) + self.__class__.__name__, detector, aligner, masker, configfile, multiprocess, + exclude_gpus, rotate_images, min_size, normalize_method, image_is_aligned) self._instance = _get_instance() masker = [masker] if not isinstance(masker, list) else masker self._flow = self._set_flow(detector, aligner, masker) + self._exclude_gpus = exclude_gpus # We only ever need 1 item in each queue. This is 2 items cached (1 in queue 1 waiting # for queue) at each point. Adding more just stacks RAM with no speed benefit. self._queue_size = 1 @@ -505,7 +509,8 @@ def _load_align(self, aligner, configfile, normalize_method): return None aligner_name = aligner.replace("-", "_").lower() logger.debug("Loading Aligner: '%s'", aligner_name) - aligner = PluginLoader.get_aligner(aligner_name)(configfile=configfile, + aligner = PluginLoader.get_aligner(aligner_name)(exclude_gpus=self._exclude_gpus, + configfile=configfile, normalize_method=normalize_method, instance=self._instance) return aligner @@ -517,7 +522,8 @@ def _load_detect(self, detector, rotation, min_size, configfile): return None detector_name = detector.replace("-", "_").lower() logger.debug("Loading Detector: '%s'", detector_name) - detector = PluginLoader.get_detector(detector_name)(rotation=rotation, + detector = PluginLoader.get_detector(detector_name)(exclude_gpus=self._exclude_gpus, + rotation=rotation, min_size=min_size, configfile=configfile, instance=self._instance) @@ -530,7 +536,8 @@ def _load_mask(self, masker, image_is_aligned, configfile): return None masker_name = masker.replace("-", "_").lower() logger.debug("Loading Masker: '%s'", masker_name) - masker = PluginLoader.get_masker(masker_name)(image_is_aligned=image_is_aligned, + masker = PluginLoader.get_masker(masker_name)(exclude_gpus=self._exclude_gpus, + image_is_aligned=image_is_aligned, configfile=configfile, instance=self._instance) return masker diff --git a/plugins/extract/recognition/__init__.py b/plugins/extract/recognition/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lib/vgg_face2_keras.py b/plugins/extract/recognition/vgg_face2_keras.py similarity index 70% rename from lib/vgg_face2_keras.py rename to plugins/extract/recognition/vgg_face2_keras.py index 5c49c8080d..10ca5d7ce9 100644 --- a/lib/vgg_face2_keras.py +++ b/plugins/extract/recognition/vgg_face2_keras.py @@ -2,30 +2,25 @@ """ VGG_Face2 inference and sorting """ import logging -import sys -import os import psutil import cv2 import numpy as np from fastcluster import linkage, linkage_vector -from lib.utils import GetModel, FaceswapError + +from lib.model.layers import L2_normalize +from lib.model.session import KSession +from lib.utils import FaceswapError +from plugins.extract._base import Extractor logger = logging.getLogger(__name__) # pylint: disable=invalid-name -class VGGFace2(): +class VGGFace2(Extractor): # pylint:disable=abstract-method """ VGG Face feature extraction. Extracts feature vectors from faces in order to compare similarity. - Parameters - ---------- - backend: ['GPU', 'CPU'] - Whether to run inference on a GPU or on the CPU - loglevel: ['INFO', 'VERBODE', 'DEBUG', 'TRACE'] - The system log level - Notes ----- Input images should be in BGR Order @@ -38,79 +33,35 @@ class VGGFace2(): https://creativecommons.org/licenses/by-nc/4.0/ """ - def __init__(self, backend="GPU", allow_growth=False, loglevel="INFO"): - logger.debug("Initializing %s: (backend: %s, allow_growth: %s, loglevel: %s)", - self.__class__.__name__, backend, allow_growth, loglevel) - backend = backend.upper() + def __init__(self, *args, **kwargs): # pylint:disable=unused-argument + logger.debug("Initializing %s", self.__class__.__name__) git_model_id = 10 model_filename = ["vggface2_resnet50_v2.h5"] + super().__init__(git_model_id=git_model_id, model_filename=model_filename, **kwargs) + self._plugin_type = "recognition" + self.name = "VGG_Face2" self.input_size = 224 # Average image provided in https://github.com/ox-vgg/vgg_face2 - self.average_img = np.array([91.4953, 103.8827, 131.0912]) - - self.model = self._get_model(git_model_id, model_filename, backend, allow_growth) + self._average_img = np.array([91.4953, 103.8827, 131.0912]) logger.debug("Initialized %s", self.__class__.__name__) # <<< GET MODEL >>> # - @staticmethod - def _get_model(git_model_id, model_filename, backend, allow_growth): - """ Check if model is available, if not, download and unzip it - - Parameters - ---------- - git_model_id: int - The second digit in the github tag that identifies this model. See - https://github.com/deepfakes-models/faceswap-models for more information - model_filename: str - The name of the model to be loaded (see :class:`lib.utils.GetModel` for more - information) - backend: ['GPU', 'CPU'] - Whether to run inference on a GPU or on the CPU - allow_growth: bool - ``True`` if Tensorflow's allow_growth option should be set, otherwise ``False`` - - See Also - -------- - lib.utils.GetModel: The model downloading and allocation class. - """ - root_path = os.path.abspath(os.path.dirname(sys.argv[0])) - cache_path = os.path.join(root_path, "plugins", "extract", "recognition", ".cache") - model = GetModel(model_filename, cache_path, git_model_id).model_path - if backend == "CPU": - if os.environ.get("KERAS_BACKEND", "") == "plaidml.keras.backend": - logger.info("Switching to tensorflow backend.") - os.environ["KERAS_BACKEND"] = "tensorflow" - - if allow_growth: - # TODO This needs to be centralized. Just a hacky fix to read the allow growth config - # option from the Extraction config file - logger.info("Enabling Tensorflow 'allow_growth' option") - import tensorflow as tf - from keras.backend.tensorflow_backend import set_session - config = tf.ConfigProto() - config.gpu_options.allow_growth = True - config.gpu_options.visible_device_list = "0" - set_session(tf.Session(config=config)) - logger.debug("Set Tensorflow 'allow_growth' option") - - import keras - from lib.model.layers import L2_normalize - if backend == "CPU": - with keras.backend.tf.device("/cpu:0"): - return keras.models.load_model(model, { - "L2_normalize": L2_normalize - }) - else: - return keras.models.load_model(model, { - "L2_normalize": L2_normalize - }) - - def predict(self, face): + def init_model(self): + """ Initialize VGG Face 2 Model. """ + model_kwargs = dict(custom_objects={'L2_normalize': L2_normalize}) + self.model = KSession(self.name, + self.model_path, + model_kwargs=model_kwargs, + allow_growth=self.config["allow_growth"], + exclude_gpus=self._exclude_gpus) + self.model.load_model() + + def predict(self, batch): """ Return encodings for given image from vgg_face2. Parameters ---------- - face: numpy.ndarray + batch: numpy.ndarray The face to be fed through the predictor. Should be in BGR channel order Returns @@ -118,9 +69,10 @@ def predict(self, face): numpy.ndarray The encodings for the face """ + face = batch if face.shape[0] != self.input_size: face = self._resize_face(face) - face = face[None, :, :, :3] - self.average_img + face = face[None, :, :, :3] - self._average_img preds = self.model.predict(face) return preds[0, :] diff --git a/plugins/train/_config.py b/plugins/train/_config.py index 7ebce93dd3..8900b536a8 100644 --- a/plugins/train/_config.py +++ b/plugins/train/_config.py @@ -69,7 +69,7 @@ def set_globals(self): "\n\t87.5%% spans from ear to ear." "\n\t100.0%% is a mugshot.") self.add_item( - section=section, title="mask_type", datatype=str, default="none", + section=section, title="mask_type", datatype=str, default="extended", choices=PluginLoader.get_available_extractors("mask", add_none=True), group="mask", gui_radio=True, info="The mask to be used for training. If you have selected 'Learn Mask' or " @@ -143,6 +143,14 @@ def set_globals(self): "sizing. More complex padding schemes can reduce artifacts at the " "border of the image." "\n\t http://www-cs.engr.ccny.cuny.edu/~wolberg/cs470/hw/hw2_pad.txt") + self.add_item( + section=section, title="allow_growth", datatype=bool, default=False, group="network", + fixed=False, + info="[Nvidia Only]. Enable the Tensorflow GPU 'allow_growth' configuration option. " + "This option prevents Tensorflow from allocating all of the GPU VRAM at launch " + "but can lead to higher VRAM fragmentation and slower performance. Should only " + "be enabled if you are receiving errors regarding 'cuDNN fails to initialize' " + "when commencing training.") self.add_item( section=section, title="penalized_mask_loss", datatype=bool, default=True, group="loss", @@ -155,7 +163,8 @@ def set_globals(self): default="mae", choices=["mae", "mse", "logcosh", "smooth_loss", "l_inf_norm", "ssim", "gmsd", "pixel_gradient_diff"], - info="\n\t MAE - Mean absolute error will guide reconstructions of each pixel " + info="The loss function to use." + "\n\t MAE - Mean absolute error will guide reconstructions of each pixel " "towards its median value in the training dataset. Robust to outliers but as " "a median, it can potentially ignore some infrequent image types in the dataset." "\n\t MSE - Mean squared error will guide reconstructions of each pixel " @@ -163,7 +172,8 @@ def set_globals(self): "suspectible to outliers and typically produces slightly blurrier results." "\n\t LogCosh - log(cosh(x)) acts similiar to MSE for small errors and to " "MAE for large errors. Like MSE, it is very stable and prevents overshoots " - "when errors are near zero. Like MAE, it is robust to outliers." + "when errors are near zero. Like MAE, it is robust to outliers. NB: Due to a bug " + "in PlaidML, this loss does not work on AMD cards." "\n\t Smooth_L1 --- Modification of the MAE loss to correct two of its " "disadvantages. This loss has improved stability and guidance for small errors." "\n\t L_inf_norm --- The L_inf norm will reduce the largest individual pixel " @@ -174,7 +184,8 @@ def set_globals(self): "statistics of an image. Potentially delivers more realistic looking images." "\n\t GMSD - Gradient Magnitude Similarity Deviation seeks to match " "the global standard deviation of the pixel to pixel differences between two " - "images. Similiar in approach to SSIM." + "images. Similiar in approach to SSIM. NB: This loss does not currently work on " + "AMD cards." "\n\t Pixel_Gradient_Difference - Instead of minimizing the difference between " "the absolute value of each pixel in two reference images, compute the pixel to " "pixel spatial difference in each image and then minimize that difference " diff --git a/plugins/train/model/_base.py b/plugins/train/model/_base.py index 2540448f0f..de4dcd9e1d 100644 --- a/plugins/train/model/_base.py +++ b/plugins/train/model/_base.py @@ -1,1000 +1,1133 @@ #!/usr/bin/env python3 -""" Base class for Models. ALL Models should at least inherit from this class +""" +Base class for Models. ALL Models should at least inherit from this class. - When inheriting model_data should be a list of NNMeta objects. - See the class for details. +See :mod:`~plugins.train.model.original` for an annotated example for how to create model plugins. """ import logging import os +import platform import sys import time -from concurrent import futures +from collections import OrderedDict +from contextlib import nullcontext + +import numpy as np +import tensorflow as tf -import keras -from keras import losses +from keras import losses as k_losses from keras import backend as K from keras.layers import Input -from keras.models import load_model, Model -from keras.utils import get_custom_objects, multi_gpu_model +from keras.models import load_model, Model as KModel +from keras.optimizers import Adam from lib.serializer import get_serializer from lib.model.backup_restore import Backup -from lib.model.losses import (DSSIMObjective, PenalizedLoss, gradient_loss, mask_loss_wrapper, - generalized_loss, l_inf_norm, gmsd_loss, gaussian_blur) -from lib.model.nn_blocks import NNBlocks -from lib.model.optimizers import Adam -from lib.utils import deprecation_warning, FaceswapError +from lib.model import losses +from lib.model.nn_blocks import set_config as set_nnblock_config +from lib.utils import get_backend, FaceswapError from plugins.train._config import Config logger = logging.getLogger(__name__) # pylint: disable=invalid-name _CONFIG = None +def KerasModel(inputs, outputs, name): # pylint:disable=invalid-name + """ wrapper for :class:`keras.models.Model`. + + There are some minor foibles between Keras 2.2 and the Tensorflow version of Keras, so this + catches potential issues and fixes prior to returning the requested model. + + All models created within plugins should use this method, and should not call keras directly + for a model. + + Parameters + ---------- + inputs: a keras.Input object or list of keras.Input objects. + The input(s) of the model + outputs: keras objects + The output(s) of the model. + name: str + The name of the model. + + Returns + ------- + :class:`keras.models.Model` + A Keras Model + """ + if get_backend() == "amd": + logger.debug("Flattening inputs (%s) and outputs (%s) for AMD", inputs, outputs) + inputs = np.array(inputs).flatten().tolist() + outputs = np.array(outputs).flatten().tolist() + logger.debug("Flattened inputs (%s) and outputs (%s)", inputs, outputs) + return KModel(inputs, outputs, name=name) + + class ModelBase(): - """ Base class that all models should inherit from """ - def __init__(self, - model_dir, - gpus=1, - configfile=None, - snapshot_interval=0, - no_logs=False, - warp_to_landmarks=False, - augment_color=True, - no_flip=False, - training_image_size=256, - alignments_paths=None, - preview_scale=100, - input_shape=None, - encoder_dim=None, - trainer="original", - pingpong=False, - memory_saving_gradients=False, - optimizer_savings=False, - predict=False): - logger.debug("Initializing ModelBase (%s): (model_dir: '%s', gpus: %s, configfile: %s, " - "snapshot_interval: %s, no_logs: %s, warp_to_landmarks: %s, augment_color: " - "%s, no_flip: %s, training_image_size, %s, alignments_paths: %s, " - "preview_scale: %s, input_shape: %s, encoder_dim: %s, trainer: %s, " - "pingpong: %s, memory_saving_gradients: %s, optimizer_savings: %s, " - "predict: %s)", - self.__class__.__name__, model_dir, gpus, configfile, snapshot_interval, - no_logs, warp_to_landmarks, augment_color, no_flip, training_image_size, - alignments_paths, preview_scale, input_shape, encoder_dim, trainer, pingpong, - memory_saving_gradients, optimizer_savings, predict) - - self.predict = predict - self.model_dir = model_dir - self.vram_savings = VRAMSavings(pingpong, optimizer_savings, memory_saving_gradients) - - self.backup = Backup(self.model_dir, self.name) - self.gpus = gpus - self.configfile = configfile - self.input_shape = input_shape - self.encoder_dim = encoder_dim - self.trainer = trainer - - self.load_config() # Load config if plugin has not already referenced it - - self.state = State(self.model_dir, - self.name, - self.config_changeable_items, - no_logs, - self.vram_savings.pingpong, - training_image_size) - - self.blocks = NNBlocks(use_icnr_init=self.config["icnr_init"], - use_convaware_init=self.config["conv_aware_init"], - use_reflect_padding=self.config["reflect_padding"], - first_run=self.state.first_run) - - self.is_legacy = False - self.rename_legacy() - self.load_state_info() - - self.networks = dict() # Networks for the model - self.predictors = dict() # Predictors for model - self.history = dict() # Loss history per save iteration) - - # Training information specific to the model should be placed in this - # dict for reference by the trainer. - self.training_opts = {"alignments": alignments_paths, - "preview_scaling": preview_scale / 100, - "warp_to_landmarks": warp_to_landmarks, - "augment_color": augment_color, - "no_flip": no_flip, - "pingpong": self.vram_savings.pingpong, - "snapshot_interval": snapshot_interval, - "training_size": self.state.training_size, - "no_logs": self.state.current_session["no_logs"], - "coverage_ratio": self.calculate_coverage_ratio(), - "mask_type": self.config["mask_type"], - "mask_blur_kernel": self.config["mask_blur_kernel"], - "mask_threshold": self.config["mask_threshold"], - "learn_mask": (self.config["learn_mask"] and - self.config["mask_type"] is not None), - "penalized_mask_loss": (self.config["penalized_mask_loss"] and - self.config["mask_type"] is not None)} - logger.debug("training_opts: %s", self.training_opts) - - if self.multiple_models_in_folder: - deprecation_warning("Support for multiple model types within the same folder", - additional_info="Please split each model into separate folders to " - "avoid issues in future.") - - self.build() + """ Base class that all model plugins should inherit from. + + Parameters + ---------- + model_dir: str + The full path to the model save location + arguments: :class:`argparse.Namespace` + The arguments that were passed to the train or convert process as generated from + Faceswap's command line arguments + training_image_size: int, optional + The size of the training images in the training folder. Default: `256` + predict: bool, optional + ``True`` if the model is being loaded for inference, ``False`` if the model is being loaded + for training. Default: ``False`` + + Attributes + ---------- + input_shape: tuple or list + A `tuple` of `ints` defining the shape of the faces that the model takes as input. This + should be overridden by model plugins in their :func:`__init__` function. If the input size + is the same for both sides of the model, then this can be a single 3 dimensional `tuple`. + If the inputs have different sizes for `"A"` and `"B"` this should be a `list` of 2 3 + dimensional shape `tuples`, 1 for each side respectively. + trainer: str + Currently there is only one trainer available (`"original"`), so at present this attribute + can be ignored. If/when more trainers are added, then this attribute should be overridden + with the trainer name that a model requires in the model plugin's + :func:`__init__` function. + """ + def __init__(self, model_dir, arguments, training_image_size=256, predict=False): + logger.debug("Initializing ModelBase (%s): (model_dir: '%s', arguments: %s, " + "training_image_size: %s, predict: %s)", + self.__class__.__name__, model_dir, arguments, training_image_size, predict) + + self.input_shape = None # Must be set within the plugin after initializing + self.trainer = "original" # Override for plugin specific trainer + + self._args = arguments + self._is_predict = predict + self._model = None + + self._configfile = arguments.configfile if hasattr(arguments, "configfile") else None + self._load_config() + + if self.config["penalized_mask_loss"] and self.config["mask_type"] is None: + raise FaceswapError("Penalized Mask Loss has been selected but you have not chosen a " + "Mask to use. Please select a mask or disable Penalized Mask " + "Loss.") + + self._io = _IO(self, model_dir, self._is_predict) + self._check_multiple_models() + + self._settings = _Settings(self._args, self.config["allow_growth"], self._is_predict) + self._state = State(model_dir, + self.name, + self._config_changeable_items, + False if self._is_predict else self._args.no_logs, + training_image_size) + logger.debug("Initialized ModelBase (%s)", self.__class__.__name__) @property - def config_section(self): - """ The section name for loading config """ - retval = ".".join(self.__module__.split(".")[-2:]) - logger.debug(retval) + def model(self): + """:class:`Keras.models.Model`: The compiled model for this plugin. """ + return self._model + + @property + def command_line_arguments(self): + """ :class:`argparse.Namespace`: The command line arguments passed to the model plugin from + either the train or convert script """ + return self._args + + @property + def coverage_ratio(self): + """ float: The ratio of the training image to crop out and train on. """ + coverage_ratio = self.config.get("coverage", 62.5) / 100 + logger.debug("Requested coverage_ratio: %s", coverage_ratio) + cropped_size = (self._state.training_size * coverage_ratio) // 2 * 2 + retval = cropped_size / self._state.training_size + logger.debug("Final coverage_ratio: %s", retval) return retval + @property + def model_dir(self): + """str: The full path to the model folder location. """ + return self._io._model_dir # pylint:disable=protected-access + @property def config(self): - """ Return config dict for current plugin """ + """ dict: The configuration dictionary for current plugin, as set by the user's + configuration settings. """ global _CONFIG # pylint: disable=global-statement if not _CONFIG: - model_name = self.config_section + model_name = self._config_section logger.debug("Loading config for: %s", model_name) - _CONFIG = Config(model_name, configfile=self.configfile).config_dict + _CONFIG = Config(model_name, configfile=self._configfile).config_dict return _CONFIG - @property - def config_changeable_items(self): - """ Return the dict of config items that can be updated after the model - has been created """ - return Config(self.config_section, configfile=self.configfile).changeable_items - @property def name(self): - """ Set the model name based on the subclass """ + """ str: The name of this model based on the plugin name. """ basename = os.path.basename(sys.modules[self.__module__].__file__) - retval = os.path.splitext(basename)[0].lower() - logger.debug("model name: '%s'", retval) - return retval - - @property - def models_exist(self): - """ Return if all files exist and clear session """ - retval = all([os.path.isfile(model.filename) for model in self.networks.values()]) - logger.debug("Pre-existing models exist: %s", retval) - return retval - - @property - def multiple_models_in_folder(self): - """ Return true if there are multiple model types in the same folder, else false """ - model_files = [fname for fname in os.listdir(str(self.model_dir)) if fname.endswith(".h5")] - retval = False if not model_files else os.path.commonprefix(model_files) == "" - logger.debug("model_files: %s, retval: %s", model_files, retval) - return retval + return os.path.splitext(basename)[0].lower() @property def output_shapes(self): - """ Return the output shapes from the main AutoEncoder """ - out = list() - for predictor in self.predictors.values(): - out.extend([K.int_shape(output)[-3:] for output in predictor.outputs]) - break # Only get output from one autoencoder. Shapes are the same - return [tuple(shape) for shape in out] + """ list: A list of list of shape tuples for the outputs of the model with the batch + dimension removed. The outer list contains 2 sub-lists (one for each side "a" and "b"). + The inner sub-lists contain the output shapes for that side. """ + shapes = [tuple(K.int_shape(output)[-3:]) for output in self._model.outputs] + return [shapes[:len(shapes) // 2], shapes[len(shapes) // 2:]] @property - def output_shape(self): - """ The output shape of the model (shape of largest face output) """ - return self.output_shapes[self.largest_face_index] + def iterations(self): + """ int: The total number of iterations that the model has trained. """ + return self._state.iterations + # Private properties @property - def largest_face_index(self): - """ Return the index from model.outputs of the largest face - Required for multi-output model prediction. The largest face - is assumed to be the final output - """ - sizes = [shape[1] for shape in self.output_shapes if shape[2] == 3] - if not sizes: - return None - max_face = max(sizes) - retval = [idx for idx, shape in enumerate(self.output_shapes) - if shape[1] == max_face and shape[2] == 3][0] - logger.debug(retval) - return retval + def _config_section(self): + """ str: The section name for the current plugin for loading configuration options from the + config file. """ + return ".".join(self.__module__.split(".")[-2:]) @property - def largest_mask_index(self): - """ Return the index from model.outputs of the largest mask - Required for multi-output model prediction. The largest face - is assumed to be the final output - """ - sizes = [shape[1] for shape in self.output_shapes if shape[2] == 1] - if not sizes: - return None - max_mask = max(sizes) - retval = [idx for idx, shape in enumerate(self.output_shapes) - if shape[1] == max_mask and shape[2] == 1][0] - logger.debug(retval) - return retval + def _config_changeable_items(self): + """ dict: The configuration options that can be updated after the model has already been + created. """ + return Config(self._config_section, configfile=self._configfile).changeable_items @property - def feed_mask(self): - """ bool: ``True`` if the model expects a mask to be fed into input otherwise ``False`` """ - return self.config["mask_type"] is not None and (self.config["learn_mask"] or - self.config["penalized_mask_loss"]) + def state(self): + """:class:`State`: The state settings for the current plugin. """ + return self._state - def load_config(self): - """ Load the global config for reference in self.config """ + def _load_config(self): + """ Load the global config for reference in :attr:`config` and set the faceswap blocks + configuration options in `lib.model.nn_blocks` """ global _CONFIG # pylint: disable=global-statement if not _CONFIG: - model_name = self.config_section + model_name = self._config_section logger.debug("Loading config for: %s", model_name) - _CONFIG = Config(model_name, configfile=self.configfile).config_dict + _CONFIG = Config(model_name, configfile=self._configfile).config_dict - def calculate_coverage_ratio(self): - """ Coverage must be a ratio, leading to a cropped shape divisible by 2 """ - coverage_ratio = self.config.get("coverage", 62.5) / 100 - logger.debug("Requested coverage_ratio: %s", coverage_ratio) - cropped_size = (self.state.training_size * coverage_ratio) // 2 * 2 - coverage_ratio = cropped_size / self.state.training_size - logger.debug("Final coverage_ratio: %s", coverage_ratio) - return coverage_ratio + nn_block_keys = ['icnr_init', 'conv_aware_init', 'reflect_padding'] + set_nnblock_config({key: _CONFIG.pop(key) + for key in nn_block_keys}) + + def _check_multiple_models(self): + """ Check whether multiple models exist in the model folder, and that no models exist that + were trained with a different plugin than the requested plugin. + + Raises + ------ + FaceswapError + If multiple model files, or models for a different plugin from that requested exists + within the model folder + """ + multiple_models = self._io.multiple_models_in_folder + if multiple_models is None: + logger.debug("Contents of model folder are valid") + return + + if len(multiple_models) == 1: + msg = ("You have requested to train with the '{}' plugin, but a model file for the " + "'{}' plugin already exists in the folder '{}'.\nPlease select a different " + "model folder.".format(self.name, multiple_models[0], self.model_dir)) + else: + msg = ("There are multiple plugin types ('{}') stored in the model folder '{}'. This " + "is not supported.\nPlease split the model files into their own folders before " + "proceeding".format("', '".join(multiple_models), self.model_dir)) + raise FaceswapError(msg) def build(self): - """ Build the model. Override for custom build methods """ - self.add_networks() - self.load_models(swapped=False) - inputs = self.get_inputs() - try: - self.build_autoencoders(inputs) - except ValueError as err: - if "must be from the same graph" in str(err).lower(): - msg = ("There was an error loading saved weights. This is most likely due to " - "model corruption during a previous save." - "\nYou should restore weights from a snapshot or from backup files. " - "You can use the 'Restore' Tool to restore from backup.") - raise FaceswapError(msg) from err - if "multi_gpu_model" in str(err).lower(): - raise FaceswapError(str(err)) from err - raise err - self.log_summary() - self.compile_predictors(initialize=True) - - def get_inputs(self): - """ Return the inputs for the model """ + """ Build the model and assign to :attr:`model`. + + Within the defined strategy scope, either builds the model from scratch or loads an + existing model if one exists. + + If running inference, then the model is built only for the required side to perform the + swap function, otherwise the model is then compiled with the optimizer and chosen + loss function(s). + + Finally, a model summary is outputted to the logger at verbose level. + """ + self._update_legacy_models() + with self._settings.strategy_scope(): + if self._io.model_exists: + model = self._io._load() # pylint:disable=protected-access + if self._is_predict: + inference = _Inference(model, self._args.swap_model) + self._model = inference.model + else: + self._model = model + else: + self._validate_input_shape() + inputs = self._get_inputs() + self._model = self.build_model(inputs) + if not self._is_predict: + self._compile_model() + self._output_summary() + + def _update_legacy_models(self): + """ Load weights from legacy split models into new unified model, archiving old model files + to a new folder. """ + if self._legacy_mapping() is None: + return + if not all(os.path.isfile(os.path.join(self.model_dir, fname)) + for fname in self._legacy_mapping()): + return + archive_dir = "{}_TF1_Archived".format(self.model_dir) + if os.path.exists(archive_dir): + raise FaceswapError("We need to update your model files for use with Tensorflow 2.x, " + "but the archive folder already exists. Please remove the " + "following folder to continue: '{}'".format(archive_dir)) + + logger.info("Updating legacy models for Tensorflow 2.x") + logger.info("Your Tensorflow 1.x models will be archived in the following location: '%s'", + archive_dir) + os.rename(self.model_dir, archive_dir) + os.mkdir(self.model_dir) + new_model = self.build_model(self._get_inputs()) + for model_name, layer_name in self._legacy_mapping().items(): + logger.info("Updating legacy weights from '%s'...", model_name) + old_model = load_model(os.path.join(archive_dir, model_name), compile=False) + layer = [layer for layer in new_model.layers if layer.name == layer_name] + if not layer: + continue + layer = layer[0] + layer.set_weights(old_model.get_weights()) + filename = self._io._filename # pylint:disable=protected-access + logger.info("Saving Tensorflow 2.x model to '%s'", filename) + new_model.save(filename) + self._state.save() + + def _validate_input_shape(self): + """ Validate that the input shape is either a single shape tuple of 3 dimensions or + a list of 2 shape tuples of 3 dimensions. """ + assert len(self.input_shape) in (2, 3), "Input shape should either be a single 3 " \ + "dimensional shape tuple for use in both sides of the model, or a list of 2 3 " \ + "dimensional shape tuples for use in the 'A' and 'B' sides of the model" + if len(self.input_shape) == 2: + assert [len(shape) == 3 for shape in self.input_shape], "All input shapes should " \ + "have 3 dimensions" + + def _get_inputs(self): + """ Obtain the standardized inputs for the model. + + The inputs will be returned for the "A" and "B" sides in the shape as defined by + :attr:`input_shape`. + + Returns + ------- + list + A list of :class:`keras.layers.Input` tensors. This will be a list of 2 tensors (one + for each side) each of shapes :attr:`input_shape`. + """ logger.debug("Getting inputs") - inputs = [Input(shape=self.input_shape, name="face_in")] - output_network = [network for network in self.networks.values() if network.is_output][0] - if self.feed_mask: - # TODO penalized mask doesn't have a mask output, so we can't use output shapes - # mask should always be last output..this needs to be a rule - mask_shape = output_network.output_shapes[-1] - inputs.append(Input(shape=(mask_shape[1:-1] + (1,)), name="mask_in")) - logger.debug("Got inputs: %s", inputs) + if len(self.input_shape) == 3: + input_shapes = [self.input_shape, self.input_shape] + else: + input_shapes = self.input_shape + inputs = [Input(shape=shape, name="face_in_{}".format(side)) + for side, shape in zip(("a", "b"), input_shapes)] + logger.debug("inputs: %s", inputs) return inputs - def build_autoencoders(self, inputs): - """ Override for Model Specific autoencoder builds + def build_model(self, inputs): + """ Override for Model Specific autoencoder builds. - Inputs is defined in self.get_inputs() and is standardized for all models - if will generally be in the order: - [face (the input for image), - mask (the input for mask if it is used)] + Parameters + ---------- + inputs: list + A list of :class:`keras.layers.Input` tensors. This will be a list of 2 tensors (one + for each side) each of shapes :attr:`input_shape`. """ raise NotImplementedError - def add_networks(self): - """ Override to add neural networks """ - raise NotImplementedError + def _output_summary(self): + """ Output the summary of the model and all sub-models to the verbose logger. """ + self._model.summary(print_fn=lambda x: logger.verbose("%s", x)) + for layer in self._model.layers: + if isinstance(layer, KModel): + layer.summary(print_fn=lambda x: logger.verbose("%s", x)) - def load_state_info(self): - """ Load the input shape from state file if it exists """ - logger.debug("Loading Input Shape from State file") - if not self.state.inputs: - logger.debug("No input shapes saved. Using model config") - return - if not self.state.face_shapes: - logger.warning("Input shapes stored in State file, but no matches for 'face'." - "Using model config") - return - input_shape = self.state.face_shapes[0] - logger.debug("Setting input shape from state file: %s", input_shape) - self.input_shape = input_shape - - def add_network(self, network_type, side, network, is_output=False): - """ Add a NNMeta object """ - logger.debug("network_type: '%s', side: '%s', network: '%s', is_output: %s", - network_type, side, network, is_output) - filename = "{}_{}".format(self.name, network_type.lower()) - name = network_type.lower() - if side: - side = side.lower() - filename += "_{}".format(side.upper()) - name += "_{}".format(side) - filename += ".h5" - logger.debug("name: '%s', filename: '%s'", name, filename) - self.networks[name] = NNMeta(str(self.model_dir / filename), - network_type, - side, - network, - is_output) - - def add_predictor(self, side, model): - """ Add a predictor to the predictors dictionary """ - logger.debug("Adding predictor: (side: '%s', model: %s)", side, model) - if self.gpus > 1: - logger.debug("Converting to multi-gpu: side %s", side) - model = multi_gpu_model(model, self.gpus) - self.predictors[side] = model - if not self.state.inputs: - self.store_input_shapes(model) - - def store_input_shapes(self, model): - """ Store the input and output shapes to state """ - logger.debug("Adding input shapes to state for model") - inputs = {tensor.name: K.int_shape(tensor)[-3:] for tensor in model.inputs} - if not any(inp for inp in inputs.keys() if inp.startswith("face")): - raise ValueError("No input named 'face' was found. Check your input naming. " - "Current input names: {}".format(inputs)) - # Make sure they are all ints so that it can be json serialized - inputs = {key: tuple(int(i) for i in val) for key, val in inputs.items()} - self.state.inputs = inputs - logger.debug("Added input shapes: %s", self.state.inputs) - - def reset_pingpong(self): - """ Reset the models for pingpong training """ - logger.debug("Resetting models") - - # Clear models and graph - self.predictors = dict() - K.clear_session() - - # Load Models for current training run - for model in self.networks.values(): - model.network = Model.from_config(model.config) - model.network.set_weights(model.weights) - - inputs = self.get_inputs() - self.build_autoencoders(inputs) - self.compile_predictors(initialize=False) - logger.debug("Reset models") - - def compile_predictors(self, initialize=True): - """ Compile the predictors """ - logger.debug("Compiling Predictors") - learning_rate = self.config.get("learning_rate", 5e-5) - optimizer = self.get_optimizer(lr=learning_rate, beta_1=0.5, beta_2=0.999) - - for side, model in self.predictors.items(): - loss = Loss(model.inputs, model.outputs) - model.compile(optimizer=optimizer, loss=loss.funcs) - if initialize: - self.state.add_session_loss_names(side, loss.names) - self.history[side] = list() - logger.debug("Compiled Predictors. Losses: %s", loss.names) - - def get_optimizer(self, lr=5e-5, beta_1=0.5, beta_2=0.999): # pylint: disable=invalid-name - """ Build and return Optimizer """ - opt_kwargs = dict(lr=lr, beta_1=beta_1, beta_2=beta_2) - if (self.config.get("clipnorm", False) and - keras.backend.backend() != "plaidml.keras.backend"): - # NB: Clip-norm is ballooning VRAM usage, which is not expected behavior - # and may be a bug in Keras/Tensorflow. - # PlaidML has a bug regarding the clip-norm parameter - # See: https://github.com/plaidml/plaidml/issues/228 - # Workaround by simply removing it. - # TODO: Remove this as soon it is fixed in PlaidML. - opt_kwargs["clipnorm"] = 1.0 - logger.debug("Optimizer kwargs: %s", opt_kwargs) - return Adam(**opt_kwargs, cpu_mode=self.vram_savings.optimizer_savings) - - def converter(self, swap): - """ Converter for autoencoder models """ - logger.debug("Getting Converter: (swap: %s)", swap) - side = "a" if swap else "b" - model = self.predictors[side] - if self.predict: - # Must compile the model to be thread safe - model._make_predict_function() # pylint: disable=protected-access - retval = model.predict - logger.debug("Got Converter: %s", retval) + def save(self): + """ Save the model to disk. + + Saves the serialized model, with weights, to the folder location specified when + initializing the plugin. If loss has dropped on both sides of the model, then + a backup is taken. + """ + self._io._save() # pylint:disable=protected-access + + def snapshot(self): + """ Creates a snapshot of the model folder to the models parent folder, with the number + of iterations completed appended to the end of the model name. """ + self._io._snapshot() # pylint:disable=protected-access + + def _compile_model(self): + """ Compile the model to include the Optimizer and Loss Function(s). """ + logger.debug("Compiling Model") + optimizer = self._get_optimizer() + loss = _Loss(self._model.inputs, self._model.outputs) + self._model.compile(optimizer=optimizer, loss=loss.functions) + if not self._is_predict: + self._state.add_session_loss_names(loss.names) + logger.debug("Compiled Model: %s", self._model) + + def _get_optimizer(self): + """ Return a Keras Adam Optimizer with user selected parameters. + + Returns + ------- + :class:`keras.optimizers.Adam` + An Adam Optimizer with the given user settings + + Notes + ----- + Clip-norm is ballooning VRAM usage, which is not expected behavior and may be a bug in + Keras/Tensorflow. + + PlaidML has a bug regarding the clip-norm parameter See: + https://github.com/plaidml/plaidml/issues/228. We workaround by simply not adding this + parameter for AMD backend users. + """ + kwargs = dict(beta_1=0.5, beta_2=0.99) + + learning_rate = "lr" if get_backend() == "amd" else "learning_rate" + kwargs[learning_rate] = self.config.get("learning_rate", 5e-5) + + clipnorm = self.config.get("clipnorm", False) + if clipnorm and (self._args.distributed or self._args.mixed_precision): + logger.warning("Clipnorm has been selected, but is unsupported when using distributed " + "or mixed_precision training, so has been disabled. If you wish to " + "enable clipnorm, then you must disable these options.") + clipnorm = False + if clipnorm and get_backend() == "amd": + # TODO add clipnorm in for plaidML when it is fixed upstream. Still not fixed in + # release 0.7.0. + logger.warning("Due to a bug in plaidML, clipnorm cannot be used on AMD backends so " + "has been disabled") + clipnorm = False + if clipnorm: + kwargs["clipnorm"] = 1.0 + + retval = Adam(**kwargs) + if self._settings.use_mixed_precision: + retval = self._settings.LossScaleOptimizer(retval, loss_scale="dynamic") + logger.debug("Optimizer: %s, kwargs: %s", retval, kwargs) return retval + def _legacy_mapping(self): # pylint:disable=no-self-use + """ The mapping of separate model files to single model layers for transferring of legacy + weights. + + Returns + ------- + dict or ``None`` + Dictionary of original H5 filenames for legacy models mapped to new layer names or + ``None`` if the model did not exist in Faceswap prior to Tensorflow 2 + """ + return None + + def add_history(self, loss): + """ Add the current iteration's loss history to :attr:`_io.history`. + + Called from the trainer after each iteration, for tracking loss drop over time between + save iterations. + + Parameters + ---------- + loss: list + The loss values for the A and B side for the current iteration. This should be the + collated loss values for each side. + """ + self._io.history[0].append(loss[0]) + self._io.history[1].append(loss[1]) + + +class _IO(): + """ Model saving and loading functions. + + Handles the loading and saving of the plugin model from disk as well as the model backup and + snapshot functions. + + Parameters + ---------- + plugin: :class:`Model` + The parent plugin class that owns the IO functions. + model_dir: str + The full path to the model save location + is_predict: bool + ``True`` if the model is being loaded for inference. ``False`` if the model is being loaded + for training. + """ + def __init__(self, plugin, model_dir, is_predict): + self._plugin = plugin + self._is_predict = is_predict + self._model_dir = model_dir + self._history = [[], []] # Loss histories per save iteration + self._backup = Backup(self._model_dir, self._plugin.name) + @property - def iterations(self): - "Get current training iteration number" - return self.state.iterations - - def map_models(self, swapped): - """ Map the models for A/B side for swapping """ - logger.debug("Map models: (swapped: %s)", swapped) - models_map = {"a": dict(), "b": dict()} - sides = ("a", "b") if not swapped else ("b", "a") - for network in self.networks.values(): - if network.side == sides[0]: - models_map["a"][network.type] = network.filename - if network.side == sides[1]: - models_map["b"][network.type] = network.filename - logger.debug("Mapped models: (models_map: %s)", models_map) - return models_map - - def log_summary(self): - """ Verbose log the model summaries """ - if self.predict: - return - for side in sorted(list(self.predictors.keys())): - logger.verbose("[%s %s Summary]:", self.name.title(), side.upper()) - self.predictors[side].summary(print_fn=lambda x: logger.verbose("%s", x)) - for name, nnmeta in self.networks.items(): - if nnmeta.side is not None and nnmeta.side != side: - continue - logger.verbose("%s:", name.title()) - nnmeta.network.summary(print_fn=lambda x: logger.verbose("%s", x)) - - def do_snapshot(self): - """ Perform a model snapshot """ - logger.debug("Performing snapshot") - self.backup.snapshot_models(self.iterations) - logger.debug("Performed snapshot") + def _filename(self): + """str: The filename for this model.""" + return os.path.join(self._model_dir, "{}.h5".format(self._plugin.name)) - def load_models(self, swapped): - """ Load models from file """ - logger.debug("Load model: (swapped: %s)", swapped) + @property + def model_exists(self): + """ bool: ``True`` if a model of the type being loaded exists within the model folder + location otherwise ``False``. + """ + return os.path.isfile(self._filename) - if not self.models_exist and not self.predict: - logger.info("Creating new '%s' model in folder: '%s'", self.name, self.model_dir) - return None - if not self.models_exist and self.predict: - logger.error("Model could not be found in folder '%s'. Exiting", self.model_dir) - exit(0) - - if not self.is_legacy or not self.predict: - K.clear_session() - model_mapping = self.map_models(swapped) - for network in self.networks.values(): - if not network.side: - is_loaded = network.load() - else: - is_loaded = network.load(fullpath=model_mapping[network.side][network.type]) - if not is_loaded: - break - if is_loaded: - logger.info("Loaded model from disk: '%s'", self.model_dir) - return is_loaded - - def save_models(self): - """ Backup and save the models """ + @property + def history(self): + """ list: list of loss histories per side for the current save iteration. """ + return self._history + + @property + def multiple_models_in_folder(self): + """ :list: or ``None`` If there are multiple model types in the requested folder, or model + types that don't correspond to the requested plugin type, then returns the list of plugin + names that exist in the folder, otherwise returns ``None`` """ + plugins = [fname.replace(".h5", "") + for fname in os.listdir(self._model_dir) + if fname.endswith(".h5")] + test_names = plugins + [self._plugin.name] + test = False if not test_names else os.path.commonprefix(test_names) == "" + retval = None if not test else plugins + logger.debug("plugin name: %s, plugins: %s, test result: %s, retval: %s", + self._plugin.name, plugins, test, retval) + return retval + + def _load(self): + """ Loads the model from disk + + If the predict function is to be called and the model cannot be found in the model folder + then an error is logged and the process exits. + + When loading the model, the plugin model folder is scanned for custom layers which are + added to Keras' custom objects. + + Returns + ------- + :class:`keras.models.Model` + The saved model loaded from disk + """ + logger.debug("Loading model: %s", self._filename) + if self._is_predict and not self.model_exists: + logger.error("Model could not be found in folder '%s'. Exiting", self._model_dir) + sys.exit(1) + + model = load_model(self._filename, compile=False) + logger.info("Loaded model from disk: '%s'", self._filename) + return model + + def _save(self): + """ Backup and save the model and state file. + + Notes + ----- + The backup function actually backups the model from the previous save iteration rather than + the current save iteration. This is not a bug, but protection against long save times, as + models can get quite large, so renaming the current model file rather than copying it can + save substantial amount of time. + """ logger.debug("Backing up and saving models") - # Insert a new line to avoid spamming the same row as loss output - print("") - save_averages = self.get_save_averages() - backup_func = self.backup.backup_model if self.should_backup(save_averages) else None - if backup_func: - logger.info("Backing up models...") - executor = futures.ThreadPoolExecutor() - save_threads = [executor.submit(network.save, backup_func=backup_func) - for network in self.networks.values()] - save_threads.append(executor.submit(self.state.save, backup_func=backup_func)) - futures.wait(save_threads) - # call result() to capture errors - _ = [thread.result() for thread in save_threads] + print("") # Insert a new line to avoid spamming the same row as loss output + save_averages = self._get_save_averages() + if save_averages and self._should_backup(save_averages): + self._backup.backup_model(self._filename) + # pylint:disable=protected-access + self._backup.backup_model(self._plugin.state._filename) + + self._plugin.model.save(self._filename, include_optimizer=False) + self._plugin.state.save() + msg = "[Saved models]" if save_averages: - lossmsg = ["{}_{}: {:.5f}".format(self.state.loss_names[side][0], - side.capitalize(), - save_averages[side]) - for side in sorted(list(save_averages.keys()))] - msg += " - Average since last save: {}".format(", ".join(lossmsg)) + lossmsg = ["face_{}: {:.5f}".format(side, avg) + for side, avg in zip(("a", "b"), save_averages)] + msg += " - Average loss since last save: {}".format(", ".join(lossmsg)) logger.info(msg) - def get_save_averages(self): + def _get_save_averages(self): """ Return the average loss since the last save iteration and reset historical loss """ logger.debug("Getting save averages") - avgs = dict() - for side, loss in self.history.items(): - if not loss: - logger.debug("No loss in self.history: %s", side) - break - avgs[side] = sum(loss) / len(loss) - self.history[side] = list() # Reset historical loss - logger.debug("Average losses since last save: %s", avgs) - return avgs - - def should_backup(self, save_averages): - """ Check whether the loss averages for all losses is the lowest that has been seen. - - This protects against model corruption by only backing up the model - if any of the loss values have fallen. - TODO This is not a perfect system. If the model corrupts on save_iteration - 1 - then model may still backup + if not all(loss for loss in self._history): + logger.debug("No loss in history") + retval = [] + else: + retval = [sum(loss) / len(loss) for loss in self._history] + self._history = [[], []] # Reset historical loss + logger.debug("Average losses since last save: %s", retval) + return retval + + def _should_backup(self, save_averages): + """ Check whether the loss averages for this save iteration is the lowest that has been + seen. + + This protects against model corruption by only backing up the model if both sides have + seen a total fall in loss. + + Notes + ----- + This is by no means a perfect system. If the model corrupts at an iteration close + to a save iteration, then the averages may still be pushed lower than a previous + save average, resulting in backing up a corrupted model. + + Parameters + ---------- + save_averages: list + The average loss for each side for this save iteration """ backup = True + for side, loss in zip(("a", "b"), save_averages): + if not self._plugin.state.lowest_avg_loss.get(side, None): + logger.debug("Set initial save iteration loss average for '%s': %s", side, loss) + self._plugin.state.lowest_avg_loss[side] = loss + continue + backup = loss < self._plugin.state.lowest_avg_loss[side] if backup else backup - if not save_averages: - logger.debug("No save averages. Not backing up") - return False + if backup: # Update lowest loss values to the state file + # pylint:disable=unnecessary-comprehension + old_avgs = {key: val for key, val in self._plugin.state.lowest_avg_loss.items()} + self._plugin.state.lowest_avg_loss["a"] = save_averages[0] + self._plugin.state.lowest_avg_loss["b"] = save_averages[1] + logger.debug("Updated lowest historical save iteration averages from: %s to: %s", + old_avgs, self._plugin.state.lowest_avg_loss) - for side, loss in save_averages.items(): - if not self.state.lowest_avg_loss.get(side, None): - logger.debug("Setting initial save iteration loss average for '%s': %s", - side, loss) - self.state.lowest_avg_loss[side] = loss - continue - if backup: - # Only run this if backup is true. All losses must have dropped for a valid backup - backup = self.check_loss_drop(side, loss) + logger.debug("Should backup: %s", backup) + return backup - logger.debug("Lowest historical save iteration loss average: %s", - self.state.lowest_avg_loss) + def _snapshot(self): + """ Perform a model snapshot. - if backup: # Update lowest loss values to the state - for side, avg_loss in save_averages.items(): - logger.debug("Updating lowest save iteration average for '%s': %s", side, avg_loss) - self.state.lowest_avg_loss[side] = avg_loss + Notes + ----- + Snapshot function is called 1 iteration after the model was saved, so that it is built from + the latest save, hence iteration being reduced by 1. + """ + logger.debug("Performing snapshot. Iterations: %s", self._plugin.iterations) + self._backup.snapshot_models(self._plugin.iterations - 1) + logger.debug("Performed snapshot") - logger.debug("Backing up: %s", backup) - return backup - def check_loss_drop(self, side, avg): - """ Check whether total loss has dropped since lowest loss """ - if avg < self.state.lowest_avg_loss[side]: - logger.debug("Loss for '%s' has dropped", side) - return True - logger.debug("Loss for '%s' has not dropped", side) - return False - - def rename_legacy(self): - """ Legacy Original, LowMem and IAE models had inconsistent naming conventions - Rename them if they are found and update """ - legacy_mapping = {"iae": [("IAE_decoder.h5", "iae_decoder.h5"), - ("IAE_encoder.h5", "iae_encoder.h5"), - ("IAE_inter_A.h5", "iae_intermediate_A.h5"), - ("IAE_inter_B.h5", "iae_intermediate_B.h5"), - ("IAE_inter_both.h5", "iae_inter.h5")], - "original": [("encoder.h5", "original_encoder.h5"), - ("decoder_A.h5", "original_decoder_A.h5"), - ("decoder_B.h5", "original_decoder_B.h5"), - ("lowmem_encoder.h5", "original_encoder.h5"), - ("lowmem_decoder_A.h5", "original_decoder_A.h5"), - ("lowmem_decoder_B.h5", "original_decoder_B.h5")]} - if self.name not in legacy_mapping.keys(): - return - logger.debug("Renaming legacy files") +class _Settings(): + """ Tensorflow core training settings. - set_lowmem = False - updated = False - for old_name, new_name in legacy_mapping[self.name]: - old_path = os.path.join(str(self.model_dir), old_name) - new_path = os.path.join(str(self.model_dir), new_name) - if os.path.exists(old_path) and not os.path.exists(new_path): - logger.info("Updating legacy model name from: '%s' to '%s'", old_name, new_name) - os.rename(old_path, new_path) - if old_name.startswith("lowmem"): - set_lowmem = True - updated = True + Sets backend tensorflow settings prior to launching the model. - if not updated: - logger.debug("No legacy files to rename") - return + Tensorflow 2 uses distribution strategies for multi-GPU/system training. These are context + managers. To enable the code to be more readable, we handle strategies the same way for Nvidia + and AMD backends. PlaidML does not support strategies, but we need to still create a context + manager so that we don't need branching logic. - self.is_legacy = True - logger.debug("Creating state file for legacy model") - self.state.inputs = {"face:0": [64, 64, 3]} - self.state.training_size = 256 - self.state.config["coverage"] = 62.5 - self.state.config["reflect_padding"] = False - self.state.config["mask_type"] = None - self.state.config["mask_blur_kernel"] = 3 - self.state.config["mask_threshold"] = 4 - self.state.config["learn_mask"] = False - self.state.config["lowmem"] = False - self.encoder_dim = 1024 - - if set_lowmem: - logger.debug("Setting encoder_dim and lowmem flag for legacy lowmem model") - self.encoder_dim = 512 - self.state.config["lowmem"] = True - - self.state.replace_config(self.config_changeable_items) - self.state.save() - - -class VRAMSavings(): - """ VRAM Saving training methods """ - def __init__(self, pingpong, optimizer_savings, memory_saving_gradients): - logger.debug("Initializing %s: (pingpong: %s, optimizer_savings: %s, " - "memory_saving_gradients: %s)", self.__class__.__name__, - pingpong, optimizer_savings, memory_saving_gradients) - self.is_plaidml = keras.backend.backend() == "plaidml.keras.backend" - self.pingpong = self.set_pingpong(pingpong) - self.optimizer_savings = self.set_optimizer_savings(optimizer_savings) - self.memory_saving_gradients = self.set_gradient_type(memory_saving_gradients) - logger.debug("Initialized: %s", self.__class__.__name__) + Parameters + ---------- + arguments: :class:`argparse.Namespace` + The arguments that were passed to the train or convert process as generated from + Faceswap's command line arguments + allow_growth: bool + ``True`` if the Tensorflow allow_growth parameter should be set otherwise ``False`` + is_predict: bool, optional + ``True`` if the model is being loaded for inference, ``False`` if the model is being loaded + for training. Default: ``False`` + """ + def __init__(self, arguments, allow_growth, is_predict): + logger.debug("Initializing %s: (arguments: %s, allow_growth: %s, is_predict: %s)", + self.__class__.__name__, arguments, allow_growth, is_predict) + self._set_tf_settings(allow_growth, arguments.exclude_gpus) + + use_mixed_precision = not is_predict and arguments.mixed_precision + if use_mixed_precision: + self._mixed_precision = tf.keras.mixed_precision.experimental + else: + self._mixed_precision = None - def set_pingpong(self, pingpong): - """ Disable pingpong for plaidML users """ - if pingpong and self.is_plaidml: - logger.warning("Pingpong training not supported on plaidML. Disabling") - pingpong = False - logger.debug("pingpong: %s", pingpong) - if pingpong: - logger.info("Using Pingpong Training") - return pingpong - - def set_optimizer_savings(self, optimizer_savings): - """ Disable optimizer savings for plaidML users """ - if optimizer_savings and self.is_plaidml == "plaidml.keras.backend": - logger.warning("Optimizer Savings not supported on plaidML. Disabling") - optimizer_savings = False - logger.debug("optimizer_savings: %s", optimizer_savings) - if optimizer_savings: - logger.info("Using Optimizer Savings") - return optimizer_savings - - def set_gradient_type(self, memory_saving_gradients): - """ Monkey-patch Memory Saving Gradients if requested """ - if memory_saving_gradients and self.is_plaidml: - logger.warning("Memory Saving Gradients not supported on plaidML. Disabling") - memory_saving_gradients = False - logger.debug("memory_saving_gradients: %s", memory_saving_gradients) - if memory_saving_gradients: - logger.info("Using Memory Saving Gradients") - from lib.model import memory_saving_gradients - K.__dict__["gradients"] = memory_saving_gradients.gradients_memory - return memory_saving_gradients - - -class Loss(): - """ Holds loss names and functions for an Autoencoder """ - def __init__(self, inputs, outputs): - logger.debug("Initializing %s: (inputs: %s, outputs: %s)", - self.__class__.__name__, inputs, outputs) - self.inputs = inputs - self.outputs = outputs - self.names = self.get_loss_names() - self.funcs = self.get_loss_functions() - if len(self.names) > 1: - self.names.insert(0, "total_loss") - logger.debug("Initialized: %s", self.__class__.__name__) + self._use_mixed_precision = self._set_keras_mixed_precision(use_mixed_precision, + bool(arguments.exclude_gpus)) - @property - def loss_dict(self): - """ Return the loss dict """ - loss_dict = dict(mae=losses.mean_absolute_error, - mse=losses.mean_squared_error, - logcosh=losses.logcosh, - smooth_loss=generalized_loss, - l_inf_norm=l_inf_norm, - ssim=DSSIMObjective(), - gmsd=gmsd_loss, - pixel_gradient_diff=gradient_loss) - return loss_dict + distributed = False if not hasattr(arguments, "distributed") else arguments.distributed + self._strategy = self._get_strategy(distributed) + logger.debug("Initialized %s", self.__class__.__name__) @property - def config(self): - """ Return the global _CONFIG variable """ - return _CONFIG + def use_strategy(self): + """ bool: ``True`` if a distribution strategy is to be used otherwise ``False``. """ + return self._strategy is not None @property - def mask_preprocessing_func(self): - """ The selected pre-processing function for the mask """ - retval = None - if self.config.get("mask_blur", False): - retval = gaussian_blur(max(1, self.mask_shape[1] // 32)) - logger.debug(retval) - return retval + def use_mixed_precision(self): + """ bool: ``True`` if mixed precision training has been enabled, otherwise ``False``. """ + return self._use_mixed_precision @property - def selected_loss(self): - """ Return the selected loss function """ - retval = self.loss_dict[self.config.get("loss_function", "mae")] - logger.debug(retval) + def LossScaleOptimizer(self): # pylint:disable=invalid-name + """ :class:`tf.keras.mixed_precision.experimental.LossScaleOptimizer`: Shortcut to the loss + scale optimizer for mixed precision training. """ + return self._mixed_precision.LossScaleOptimizer + + @classmethod + def _set_tf_settings(cls, allow_growth, exclude_devices): + """ Specify Devices to place operations on and Allow TensorFlow to manage VRAM growth. + + Enables the Tensorflow allow_growth option if requested in the command line arguments + + Parameters + ---------- + allow_growth: bool + ``True`` if the Tensorflow allow_growth parameter should be set otherwise ``False`` + exclude_devices: list or ``None`` + List of GPU device indices that should not be made available to Tensorflow. Pass + ``None`` if all devices should be made available + """ + if get_backend() == "amd": + return # No settings for AMD + if get_backend() == "cpu": + logger.verbose("Hiding GPUs from Tensorflow") + tf.config.set_visible_devices([], "GPU") + return + + if not exclude_devices and not allow_growth: + logger.debug("Not setting any specific Tensorflow settings") + return + + gpus = tf.config.list_physical_devices('GPU') + if exclude_devices: + gpus = [gpu for idx, gpu in enumerate(gpus) if idx not in exclude_devices] + logger.debug("Filtering devices to: %s", gpus) + tf.config.set_visible_devices(gpus, "GPU") + + if allow_growth: + logger.debug("Setting Tensorflow 'allow_growth' option") + for gpu in gpus: + logger.info("Setting allow growth for GPU: %s", gpu) + tf.config.experimental.set_memory_growth(gpu, True) + logger.debug("Set Tensorflow 'allow_growth' option") + + def _set_keras_mixed_precision(self, use_mixed_precision, skip_check): + """ Enable the Keras experimental Mixed Precision API. + + Enables the Keras experimental Mixed Precision API if requested in the user configuration + file. + + Parameters + ---------- + use_mixed_precision: bool + ``True`` if experimental mixed precision support should be enabled for Nvidia GPUs + otherwise ``False``. + skip_check: bool + ``True`` if the mixed precision compatibility check should be skipped, otherwise + ``False``. + + There is a bug in Tensorflow that will cause a failure if + "set_visible_devices" has been set and mixed_precision is enabled. Specifically in + :file:`tensorflow.python.keras.mixed_precision.experimental.device_compatibility_check` + + From doc-string: "if list_local_devices() and tf.config.set_visible_devices() are both + called, TensorFlow will crash. However, GPU names and compute capabilities cannot be + checked without list_local_devices(). + + To get around this, we hack in to set a global parameter to indicate the test has + already been performed. This is likely to cause some issues, but not as many as + guaranteed failure when limiting GPU devices + """ + logger.debug("use_mixed_precision: %s, skip_check: %s", use_mixed_precision, skip_check) + if get_backend() != "nvidia" or not use_mixed_precision: + logger.debug("Not enabling 'mixed_precision' (backend: %s, use_mixed_precision: %s)", + get_backend(), use_mixed_precision) + return False + logger.info("Enabling Mixed Precision Training.") + + if skip_check: + # TODO remove this hacky fix to disable mixed precision compatibility testing if/when + # fixed upstream. + # pylint:disable=import-outside-toplevel,protected-access + from tensorflow.python.keras.mixed_precision.experimental import \ + device_compatibility_check + logger.debug("Overriding tensorflow _logged_compatibility_check parameter. Initial " + "value: %s", device_compatibility_check._logged_compatibility_check) + device_compatibility_check._logged_compatibility_check = True + logger.debug("New value: %s", device_compatibility_check._logged_compatibility_check) + + policy = self._mixed_precision.Policy('mixed_float16') + self._mixed_precision.set_policy(policy) + logger.debug("Enabled mixed precision. (Compute dtype: %s, variable_dtype: %s)", + policy.compute_dtype, policy.variable_dtype) + return True + + @classmethod + def _get_strategy(cls, distributed): + """ If we are running on Nvidia backend and the strategy is not `"default"` then return + the correct tensorflow distribution strategy, otherwise return ``None``. + + Notes + ----- + By default Tensorflow defaults mirrored strategy to use the Nvidia NCCL method for + reductions, however this is only available in Linux, so the method used falls back to + `Hierarchical Copy All Reduce` if the OS is not Linux. + + Parameters + ---------- + distributed: bool + ``True`` if Tensorflow mirrored strategy should be used for multiple GPU training. + ``False`` if the default strategy should be used. + + Returns + ------- + :class:`tensorflow.python.distribute.Strategy` or `None` + The request Tensorflow Strategy if the backend is Nvidia and the strategy is not + `"Default"` otherwise ``None`` + """ + if get_backend() != "nvidia": + retval = None + elif distributed: + if platform.system().lower() == "linux": + cross_device_ops = tf.distribute.NcclAllReduce() + else: + cross_device_ops = tf.distribute.HierarchicalCopyAllReduce() + logger.debug("cross_device_ops: %s", cross_device_ops) + retval = tf.distribute.MirroredStrategy(cross_device_ops=cross_device_ops) + else: + retval = tf.distribute.get_strategy() + logger.debug("Using strategy: %s", retval) return retval - @property - def selected_mask_loss(self): - """ Return the selected mask loss function. Currently returns mse - If a processing function has been requested wrap the loss function - in loss wrapper """ - loss_func = self.loss_dict["mse"] - func = self.mask_preprocessing_func - logger.debug("loss_func: %s, func: %s", loss_func, func) - retval = mask_loss_wrapper(loss_func, preprocessing_func=func) + def strategy_scope(self): + """ Return the strategy scope if we have set a strategy, otherwise return a null + context. + + Returns + ------- + :func:`tensorflow.python.distribute.Strategy.scope` or :func:`contextlib.nullcontext` + The tensorflow strategy scope if a strategy is valid in the current scenario. A null + context manager if the strategy is not valid in the current scenario + """ + retval = nullcontext() if self._strategy is None else self._strategy.scope() + logger.debug("Using strategy scope: %s", retval) return retval + +class _Loss(): + """ Holds loss names and functions for an Autoencoder. + + Parameters + ---------- + inputs: list + A list of input tensors to the model in the order ("a", "b") + outputs: list + A list of output tensors to the model in the order ("a", "b") + """ + def __init__(self, inputs, outputs): + logger.debug("Initializing %s: (inputs: %s, outputs: %s)", + self.__class__.__name__, inputs, outputs) + self._loss_dict = dict(mae=k_losses.mean_absolute_error, + mse=k_losses.mean_squared_error, + logcosh=k_losses.logcosh, + smooth_loss=losses.GeneralizedLoss(), + l_inf_norm=losses.LInfNorm(), + ssim=losses.DSSIMObjective(), + gmsd=losses.GMSDLoss(), + pixel_gradient_diff=losses.GradientLoss()) + self._inputs = inputs + self._names = self._get_loss_names(outputs) + self._funcs = self._get_loss_functions() + self._names.insert(0, "total") + logger.debug("Initialized: %s", self.__class__.__name__) + @property - def output_shapes(self): - """ The shapes of the output nodes """ - return [K.int_shape(output)[1:] for output in self.outputs] + def names(self): + """ list: The list of loss names for the model. """ + return self._names @property - def mask_input(self): - """ Return the mask input or None """ - mask_inputs = [inp for inp in self.inputs if inp.name.startswith("mask")] - if not mask_inputs: - return None - return mask_inputs[0] + def functions(self): + """ list: The list of loss functions for the model. """ + return self._funcs @property - def mask_shape(self): - """ Return the mask shape """ - if self.mask_input is None: - return None - return K.int_shape(self.mask_input)[1:] - - def get_loss_names(self): - """ Return the loss names based on model output """ - output_names = [output.name for output in self.outputs] - logger.debug("Model output names: %s", output_names) - loss_names = [name[name.find("/") + 1:name.rfind("/")].replace("_out", "") - for name in output_names] - if not all(name.startswith("face") or name.startswith("mask") for name in loss_names): - # Handle incorrectly named/legacy outputs - logger.debug("Renaming loss names from: %s", loss_names) - loss_names = self.update_loss_names() - loss_names = ["{}_loss".format(name) for name in loss_names] - logger.debug(loss_names) - return loss_names - - def update_loss_names(self): - """ Update loss names if named incorrectly or legacy model """ - output_types = ["mask" if shape[-1] == 1 else "face" for shape in self.output_shapes] - loss_names = ["{}{}".format(name, - "" if output_types.count(name) == 1 else "_{}".format(idx)) - for idx, name in enumerate(output_types)] - logger.debug("Renamed loss names to: %s", loss_names) - return loss_names - - def get_loss_functions(self): - """ Set the loss function """ - loss_funcs = [] - for idx, loss_name in enumerate(self.names): - if loss_name.startswith("mask"): - loss_funcs.append(self.selected_mask_loss) - elif self.config["penalized_mask_loss"] and self.config["mask_type"] is not None: - face_size = self.output_shapes[idx][1] - mask_size = self.mask_shape[1] - scaling = face_size / mask_size - logger.debug("face_size: %s mask_size: %s, mask_scaling: %s", - face_size, mask_size, scaling) - loss_funcs.append(PenalizedLoss(self.mask_input, self.selected_loss, - mask_scaling=scaling, - preprocessing_func=self.mask_preprocessing_func)) - else: - loss_funcs.append(self.selected_loss) - logger.debug("%s: %s", loss_name, loss_funcs[-1]) - logger.debug(loss_funcs) - return loss_funcs + def _config(self): + """ :dict: The configuration options for this plugin """ + return _CONFIG + @property + def _selected_mask_loss(self): + """ :func:`keras.losses.Loss`: The selected mask loss function. Currently returns mean + standard error as the default function. """ + loss_func = self._loss_dict["mse"] + logger.debug("loss_func: %s", loss_func) + return loss_func -class NNMeta(): - """ Class to hold a neural network and it's meta data + @property + def _mask_inputs(self): + """ list: The list of input tensors to the model that contain the mask. Returns ``None`` + if there is no mask input to the model. """ + mask_inputs = [inp for inp in self._inputs if inp.name.startswith("mask")] + return None if not mask_inputs else mask_inputs - filename: The full path and filename of the model file for this network. - type: The type of network. For networks that can be swapped - The type should be identical for the corresponding - A and B networks, and should be unique for every A/B pair. - Otherwise the type should be completely unique. - side: A, B or None. Used to identify which networks can - be swapped. - network: Define network to this. - is_output: Set to True to indicate that this network is an output to the Autoencoder - """ + @property + def _mask_shapes(self): + """ list: The list of shape tuples for the mask input tensors for the model. Returns + ``None`` if there is no mask input. """ + if self._mask_inputs is None: + return None + return [K.int_shape(mask_input) for mask_input in self._mask_inputs] - def __init__(self, filename, network_type, side, network, is_output): - logger.debug("Initializing %s: (filename: '%s', network_type: '%s', side: '%s', " - "network: %s, is_output: %s", self.__class__.__name__, filename, - network_type, side, network, is_output) - self.filename = filename - self.type = network_type.lower() - self.side = side - self.name = self.set_name() - self.network = network - self.is_output = is_output - self.network.name = self.name - self.config = network.get_config() # For pingpong restore - self.weights = network.get_weights() # For pingpong restore - logger.debug("Initialized %s", self.__class__.__name__) + @classmethod + def _get_loss_names(cls, outputs): + """ Name the losses based on model output - @property - def output_shapes(self): - """ Return the output shapes from the stored network """ - return [K.int_shape(output) for output in self.network.outputs] + Notes + ----- + TODO Currently there is an issue in Tensorflow that wraps all outputs in an Identity layer + when running in Eager Execution mode, which means we cannot use the name of the output + layers to name the losses (https://github.com/tensorflow/tensorflow/issues/32180). + With this in mind, losses are named based on their shapes - def set_name(self): - """ Set the network name """ - name = self.type - if self.side: - name += "_{}".format(self.side) - return name + Parameters + ---------- + outputs: list + A list of output tensors from the model plugin - @property - def output_names(self): - """ Return output node names """ - output_names = [output.name for output in self.network.outputs] - if self.is_output and not any(name.startswith("face_out") for name in output_names): - # Saved models break if their layer names are changed, so dummy - # in correct output names for legacy models - output_names = self.get_output_names() - return output_names - - def get_output_names(self): - """ Return the output names based on number of channels and instances """ - output_types = ["mask_out" if K.int_shape(output)[-1] == 1 else "face_out" - for output in self.network.outputs] - output_names = ["{}{}".format(name, - "" if output_types.count(name) == 1 else "_{}".format(idx)) - for idx, name in enumerate(output_types)] - logger.debug("Overridden output_names: %s", output_names) - return output_names - - def load(self, fullpath=None): - """ Load model """ - fullpath = fullpath if fullpath else self.filename - logger.debug("Loading model: '%s'", fullpath) - try: - network = load_model(self.filename, custom_objects=get_custom_objects()) - except ValueError as err: - if str(err).lower().startswith("cannot create group in read only mode"): - self.convert_legacy_weights() - return True - logger.warning("Failed loading existing training data. Generating new models") - logger.debug("Exception: %s", str(err)) - return False - except OSError as err: # pylint: disable=broad-except - logger.warning("Failed loading existing training data. Generating new models") - logger.debug("Exception: %s", str(err)) - return False - self.config = network.get_config() - self.network = network # Update network with saved model - self.network.name = self.name - return True + Returns + ------- + list + A list of names for the losses to be applied to the model + """ + # TODO Use output names if/when these are fixed upstream + split_outputs = [outputs[:len(outputs) // 2], outputs[len(outputs) // 2:]] + retval = [] + for side, side_output in zip(("a", "b"), split_outputs): + output_names = [output.name for output in side_output] + output_shapes = [K.int_shape(output)[1:] for output in side_output] + output_types = ["mask" if shape[-1] == 1 else "face" for shape in output_shapes] + logger.debug("side: %s, output names: %s, output_shapes: %s, output_types: %s", + side, output_names, output_shapes, output_types) + retval.extend(["{}_{}{}".format(name, side, + "" if output_types.count(name) == 1 + else "_{}".format(idx)) + for idx, name in enumerate(output_types)]) + logger.debug(retval) + return retval - def save(self, fullpath=None, backup_func=None): - """ Save model """ - fullpath = fullpath if fullpath else self.filename - if backup_func: - backup_func(fullpath) - logger.debug("Saving model: '%s'", fullpath) - self.weights = self.network.get_weights() - self.network.save(fullpath) + def _get_loss_functions(self): + """ Set the loss functions. - def convert_legacy_weights(self): - """ Convert legacy weights files to hold the model topology """ - logger.info("Adding model topology to legacy weights file: '%s'", self.filename) - self.network.load_weights(self.filename) - self.save(backup_func=None) - self.network.name = self.type + Returns + ------- + list + A list of loss functions to apply to the model + """ + selected_loss = self._loss_dict[self._config.get("loss_function", "mae")] + loss_funcs = [] + for name in self._names: + if name.startswith("mask"): + loss_funcs.append(self._selected_mask_loss) + elif self._config["penalized_mask_loss"]: + loss_funcs.append(losses.PenalizedLoss(selected_loss)) + else: + loss_funcs.append(selected_loss) + logger.debug("%s: %s", name, loss_funcs[-1]) + logger.debug(loss_funcs) + return loss_funcs class State(): - """ Class to hold the model's current state and autoencoder structure """ - def __init__(self, model_dir, model_name, config_changeable_items, - no_logs, pingpong, training_image_size): + """ Holds state information relating to the plugin's saved model. + + Parameters + ---------- + model_dir: str + The full path to the model save location + model_name: str + The name of the model plugin + config_changeable_items: dict + Configuration options that can be altered when resuming a model, and their current values + no_logs: bool + ``True`` if Tensorboard logs should not be generated, otherwise ``False`` + training_image_size: int + The size of the training images in the training folder + """ + def __init__(self, + model_dir, + model_name, + config_changeable_items, + no_logs, + training_image_size): logger.debug("Initializing %s: (model_dir: '%s', model_name: '%s', " - "config_changeable_items: '%s', no_logs: %s, pingpong: %s, " - "training_image_size: '%s'", self.__class__.__name__, model_dir, model_name, - config_changeable_items, no_logs, pingpong, training_image_size) - self.serializer = get_serializer("json") - filename = "{}_state.{}".format(model_name, self.serializer.file_extension) - self.filename = str(model_dir / filename) - self.name = model_name - self.iterations = 0 - self.session_iterations = 0 - self.training_size = training_image_size - self.sessions = dict() - self.lowest_avg_loss = dict() - self.inputs = dict() - self.config = dict() - self.load(config_changeable_items) - self.session_id = self.new_session_id() - self.create_new_session(no_logs, pingpong, config_changeable_items) + "config_changeable_items: '%s', no_logs: %s, training_image_size: '%s'", + self.__class__.__name__, model_dir, model_name, config_changeable_items, + no_logs, training_image_size) + self._serializer = get_serializer("json") + filename = "{}_state.{}".format(model_name, self._serializer.file_extension) + self._filename = os.path.join(model_dir, filename) + self._name = model_name + self._iterations = 0 + self._training_size = training_image_size + self._sessions = dict() + self._lowest_avg_loss = dict() + self._config = dict() + self._load(config_changeable_items) + self._session_id = self._new_session_id() + self._create_new_session(no_logs, config_changeable_items) logger.debug("Initialized %s:", self.__class__.__name__) @property - def face_shapes(self): - """ Return a list of stored face shape inputs """ - return [tuple(val) for key, val in self.inputs.items() if key.startswith("face")] + def loss_names(self): + """ list: The loss names for the current session """ + return self._sessions[self._session_id]["loss_names"] @property - def mask_shapes(self): - """ Return a list of stored mask shape inputs """ - return [tuple(val) for key, val in self.inputs.items() if key.startswith("mask")] + def current_session(self): + """ dict: The state dictionary for the current :attr:`session_id`. """ + return self._sessions[self._session_id] @property - def loss_names(self): - """ Return the loss names for this session """ - return self.sessions[self.session_id]["loss_names"] + def iterations(self): + """ int: The total number of iterations that the model has trained. """ + return self._iterations @property - def current_session(self): - """ Return the current session dict """ - return self.sessions[self.session_id] + def training_size(self): + """ int: The size of the training images in the training folder. """ + return self._training_size @property - def first_run(self): - """ Return True if this is the first run else False """ - return self.session_id == 1 + def lowest_avg_loss(self): + """dict: The lowest average save interval loss seen for each side. """ + return self._lowest_avg_loss - def new_session_id(self): - """ Return new session_id """ - if not self.sessions: + @property + def session_id(self): + """ int: The current training session id. """ + return self._session_id + + def _new_session_id(self): + """ Generate a new session id. Returns 1 if this is a new model, or the last session id + 1 + if it is a pre-existing model. + + Returns + ------- + int + The newly generated session id + """ + if not self._sessions: session_id = 1 else: - session_id = max(int(key) for key in self.sessions.keys()) + 1 + session_id = max(int(key) for key in self._sessions.keys()) + 1 logger.debug(session_id) return session_id - def create_new_session(self, no_logs, pingpong, config_changeable_items): - """ Create a new session """ - logger.debug("Creating new session. id: %s", self.session_id) - self.sessions[self.session_id] = {"timestamp": time.time(), - "no_logs": no_logs, - "pingpong": pingpong, - "loss_names": dict(), - "batchsize": 0, - "iterations": 0, - "config": config_changeable_items} - - def add_session_loss_names(self, side, loss_names): - """ Add the session loss names to the sessions dictionary """ - logger.debug("Adding session loss_names. (side: '%s', loss_names: %s", side, loss_names) - self.sessions[self.session_id]["loss_names"][side] = loss_names - - def add_session_batchsize(self, batchsize): - """ Add the session batchsize to the sessions dictionary """ - logger.debug("Adding session batchsize: %s", batchsize) - self.sessions[self.session_id]["batchsize"] = batchsize + def _create_new_session(self, no_logs, config_changeable_items): + """ Initialize a new session, creating the dictionary entry for the session in + :attr:`_sessions`. + + Parameters + ---------- + no_logs: bool + ``True`` if Tensorboard logs should not be generated, otherwise ``False`` + config_changeable_items: dict + Configuration options that can be altered when resuming a model, and their current + values + """ + logger.debug("Creating new session. id: %s", self._session_id) + self._sessions[self._session_id] = dict(timestamp=time.time(), + no_logs=no_logs, + loss_names=[], + batchsize=0, + iterations=0, + config=config_changeable_items) + + def add_session_loss_names(self, loss_names): + """ Add the session loss names to the sessions dictionary. + + The loss names are used for Tensorboard logging + + Parameters + ---------- + loss_names: list + The list of loss names for this session. + """ + logger.debug("Adding session loss_names: %s", loss_names) + self._sessions[self._session_id]["loss_names"] = loss_names + + def add_session_batchsize(self, batch_size): + """ Add the session batch size to the sessions dictionary. + + Parameters + ---------- + batch_size: int + The batch size for the current training session + """ + logger.debug("Adding session batch size: %s", batch_size) + self._sessions[self._session_id]["batchsize"] = batch_size def increment_iterations(self): - """ Increment total and session iterations """ - self.iterations += 1 - self.sessions[self.session_id]["iterations"] += 1 + """ Increment :attr:`iterations` and session iterations by 1. """ + self._iterations += 1 + self._sessions[self._session_id]["iterations"] += 1 + + def _load(self, config_changeable_items): + """ Load a state file and set the serialized values to the class instance. - def load(self, config_changeable_items): - """ Load state file """ + Updates the model's config with the values stored in the state file. + + Parameters + ---------- + config_changeable_items: dict + Configuration options that can be altered when resuming a model, and their current + values + """ logger.debug("Loading State") - if not os.path.exists(self.filename): + if not os.path.exists(self._filename): logger.info("No existing state file found. Generating.") return - state = self.serializer.load(self.filename) - self.name = state.get("name", self.name) - self.sessions = state.get("sessions", dict()) - self.lowest_avg_loss = state.get("lowest_avg_loss", dict()) - self.iterations = state.get("iterations", 0) - self.training_size = state.get("training_size", 256) - self.inputs = state.get("inputs", dict()) - self.config = state.get("config", dict()) + state = self._serializer.load(self._filename) + self._name = state.get("name", self._name) + self._sessions = state.get("sessions", dict()) + self._lowest_avg_loss = state.get("lowest_avg_loss", dict()) + self._iterations = state.get("iterations", 0) + self._training_size = state.get("training_size", 256) + self._config = state.get("config", dict()) logger.debug("Loaded state: %s", state) - self.replace_config(config_changeable_items) + self._replace_config(config_changeable_items) - def save(self, backup_func=None): - """ Save iteration number to state file """ + def save(self): + """ Save the state values to the serialized state file. """ logger.debug("Saving State") - if backup_func: - backup_func(self.filename) - state = {"name": self.name, - "sessions": self.sessions, - "lowest_avg_loss": self.lowest_avg_loss, - "iterations": self.iterations, - "inputs": self.inputs, - "training_size": self.training_size, + state = {"name": self._name, + "sessions": self._sessions, + "lowest_avg_loss": self._lowest_avg_loss, + "iterations": self._iterations, + "training_size": self._training_size, "config": _CONFIG} - self.serializer.save(self.filename, state) + self._serializer.save(self._filename, state) logger.debug("Saved State") - def replace_config(self, config_changeable_items): - """ Replace the loaded config with the one contained within the state file - Check for any fixed=False parameters changes and log info changes + def _replace_config(self, config_changeable_items): + """ Replace the loaded config with the one contained within the state file. + + Check for any `fixed`=``False`` parameter changes and log info changes. + + Update any legacy config items to their current versions. + + Parameters + ---------- + config_changeable_items: dict + Configuration options that can be altered when resuming a model, and their current + values """ global _CONFIG # pylint: disable=global-statement legacy_update = self._update_legacy_config() # Add any new items to state config for legacy purposes for key, val in _CONFIG.items(): - if key not in self.config.keys(): + if key not in self._config.keys(): logger.info("Adding new config item to state file: '%s': '%s'", key, val) - self.config[key] = val - self.update_changed_config_items(config_changeable_items) + self._config[key] = val + self._update_changed_config_items(config_changeable_items) logger.debug("Replacing config. Old config: %s", _CONFIG) - _CONFIG = self.config + _CONFIG = self._config if legacy_update: self.save() logger.debug("Replaced config. New config: %s", _CONFIG) @@ -1026,47 +1159,250 @@ def _update_legacy_config(self): new_items = ["loss_function", "learn_mask", "mask_type"] updated = False for old, new in zip(priors, new_items): - if old not in self.config: + if old not in self._config: logger.debug("Legacy item '%s' not in config. Skipping update", old) continue # dssim_loss > loss_function if old == "dssim_loss": - self.config[new] = "ssim" if self.config[old] else "mae" - del self.config[old] + self._config[new] = "ssim" if self._config[old] else "mae" + del self._config[old] updated = True logger.info("Updated config from legacy dssim format. New config loss " - "function: '%s'", self.config[new]) + "function: '%s'", self._config[new]) continue # Add learn mask option and set to True if model has "penalized_mask_loss" specified - if old == "mask_type" and new == "learn_mask" and new not in self.config: - self.config[new] = self.config["mask_type"] is not None + if old == "mask_type" and new == "learn_mask" and new not in self._config: + self._config[new] = self._config["mask_type"] is not None updated = True logger.info("Added new 'learn_mask' config item for this model. Value set to: %s", - self.config[new]) + self._config[new]) continue # Replace removed masks with most similar equivalent - if old == "mask_type" and new == "mask_type" and self.config[old] in ("facehull", - "dfl_full"): - old_mask = self.config[old] - self.config[new] = "components" + if old == "mask_type" and new == "mask_type" and self._config[old] in ("facehull", + "dfl_full"): + old_mask = self._config[old] + self._config[new] = "components" updated = True logger.info("Updated 'mask_type' from '%s' to '%s' for this model", - old_mask, self.config[new]) + old_mask, self._config[new]) logger.debug("State file updated for legacy config: %s", updated) return updated - def update_changed_config_items(self, config_changeable_items): - """ Update any parameters which are not fixed and have been changed """ + def _update_changed_config_items(self, config_changeable_items): + """ Update any parameters which are not fixed and have been changed. + + Parameters + ---------- + config_changeable_items: dict + Configuration options that can be altered when resuming a model, and their current + values + """ if not config_changeable_items: logger.debug("No changeable parameters have been updated") return for key, val in config_changeable_items.items(): - old_val = self.config[key] + old_val = self._config[key] if old_val == val: continue - self.config[key] = val + self._config[key] = val logger.info("Config item: '%s' has been updated from '%s' to '%s'", key, old_val, val) + + +class _Inference(): # pylint:disable=too-few-public-methods + """ Calculates required layers and compiles a saved model for inference. + + Parameters + ---------- + saved_model: :class:`keras.models.Model` + The saved trained Faceswap model + switch_sides: bool + ``True`` if the swap should be performed "B" > "A" ``False`` if the swap should be + "A" > "B" + """ + def __init__(self, saved_model, switch_sides): + logger.debug("Initializing: %s (saved_model: %s, switch_sides: %s)", + self.__class__.__name__, saved_model, switch_sides) + self._config = saved_model.get_config() + input_idx = 1 if switch_sides else 0 + self._output_idx = 0 if switch_sides else 1 + self._input_names = set(self._filter_node(self._config["input_layers"][input_idx])) + + self._inputs = self._get_inputs(saved_model.inputs, input_idx) + self._outputs_dropout = self._get_outputs_dropout() + self._model = self._make_inference_model(saved_model) + logger.debug("Initialized: %s", self.__class__.__name__) + + @property + def model(self): + """ :class:`keras.models.Model`: The Faceswap model, compiled for inference. """ + return self._model + + @classmethod + def _filter_node(cls, node): + """ Given in input list of nodes from a :attr:`keras.models.Model.get_config` dictionary, + filters the information out and unravels the dictionary into a more usable format + + Parameters + ---------- + node: list + A node entry from the :attr:`keras.models.Model.get_config` dictionary + + Returns + ------- + list + A squeezed list with only the layer name entries remaining + """ + retval = np.array(node)[..., 0].squeeze().tolist() + return retval if isinstance(retval, list) else [retval] + + @classmethod + def _get_inputs(cls, inputs, input_index): + """ Obtain the inputs for the requested swap direction. + + Parameters + ---------- + inputs: list + The full list of input tensors to the saved faceswap training model + input_index: int + The input index for the requested swap direction + + Returns + ------- + list + List of input tensors to feed the model for the requested swap direction + """ + input_split = len(inputs) // 2 + start_idx = input_split * input_index + retval = inputs[start_idx: start_idx + input_split] + logger.debug("model inputs: %s, input_split: %s, start_idx: %s, inference_inputs: %s", + inputs, input_split, start_idx, retval) + return retval + + def _get_outputs_dropout(self): + """ Obtain the output layer names from the full model that will not be used for inference. + + Returns + ------- + set + The output layer names from the saved Faceswap model that are not used for inference + for the requested swap direction + """ + outputs = self._config["output_layers"] + if get_backend() == "amd": + outputs = [outputs[:len(outputs) // 2], outputs[len(outputs) // 2:]] + side_outputs = set(self._filter_node(outputs)[self._output_idx]) + logger.debug("model outputs: %s, side_outputs: %s", outputs, side_outputs) + outputs_all = {layer + for side in self._filter_node(outputs) + for layer in side} + retval = outputs_all.difference(side_outputs) + logger.debug("outputs dropout: %s", retval) + return retval + + def _make_inference_model(self, saved_model): + """ Extract the sub-models from the saved model that are required for inference. + + Parameters + ---------- + saved_model: :class:`keras.models.Model` + The saved trained Faceswap model + + Returns + ------- + :class:`keras.models.Model` + The model compiled for inference + """ + logger.debug("Compiling inference model. saved_model: %s", saved_model) + struct = self._get_filtered_structure() + required_layers = self._get_required_layers(struct) + logger.debug("Compiling model") + layer_dict = {layer.name: layer for layer in saved_model.layers} + compiled_layers = dict() + for name, inbound in struct.items(): + if name not in required_layers: + logger.debug("Skipping unused layer: '%s'", name) + continue + layer = layer_dict[name] + logger.debug("Processing layer '%s': (layer: %s, inbound_nodes: %s)", + name, layer, inbound) + if not inbound: + logger.debug("Adding model inputs %s: %s", self._input_names, self._inputs) + model = layer(self._inputs) + else: + layer_inputs = [compiled_layers[inp] for inp in inbound] + logger.debug("Compiling layer '%s': layer inputs: %s", name, layer_inputs) + model = layer(layer_inputs) + compiled_layers[name] = model + retval = KerasModel(self._inputs, model, name="{}_inference".format(saved_model.name)) + logger.debug("Compiled inference model '%s': %s", retval.name, retval) + return retval + + def _get_filtered_structure(self): + """ Obtain the structure of the full model, filtering out inbound nodes and + layers that are not required for the requested swap destination. + + Input layers to the full model are not returned in the structure. + + Returns + ------- + :class:`collections.OrderedDict` + The layer name as key with the inbound node layer names for each layer as value. + """ + retval = OrderedDict() + for layer in self._config["layers"]: + name = layer["name"] + if not layer["inbound_nodes"]: + logger.debug("Skipping input layer: '%s'", name) + continue + inbound = self._filter_node(layer["inbound_nodes"]) + + if self._input_names.intersection(inbound): + # Strip the input inbound nodes for applying the correct input layer at compile + # time + logger.debug("Stripping inbound nodes for input '%s': %s", name, inbound) + inbound = "" + + if inbound and np.array(layer["inbound_nodes"]).shape[0] == 2: + # if inbound is not populated, then layer is already split at input + logger.debug("Filtering layer with split inbound nodes: '%s': %s", name, inbound) + inbound = inbound[self._output_idx] + inbound = inbound if isinstance(inbound, list) else [inbound] + logger.debug("Filtered inbound nodes for layer '%s': %s", name, inbound) + if name in self._outputs_dropout: + logger.debug("Dropping output layer '%s'", name) + continue + retval[name] = inbound + logger.debug("Model structure: %s", retval) + return retval + + @classmethod + def _get_required_layers(cls, filtered_structure): + """ Parse through the filtered model structure in reverse order to get the required layers + from the faceswap model for creating an inference model. + + Parameters + ---------- + filtered_structure: :class:`OrderedDict` + The full model structure with unused inbound nodes and layers removed + + Returns + ------- + set + The layers from the saved model that are required to build the inference model + """ + retval = set() + for idx, (name, inbound) in enumerate(reversed(filtered_structure.items())): + if idx == 0: + logger.debug("Adding output layer: '%s'", name) + retval.add(name) + if idx != 0 and name not in retval: + logger.debug("Skipping unused layer: '%s'", name) + continue + logger.debug("Adding inbound layers: %s", inbound) + retval.update(inbound) + logger.debug("Required layers: %s", retval) + return retval diff --git a/plugins/train/model/dfaker.py b/plugins/train/model/dfaker.py index d6b19be395..298ede4e03 100644 --- a/plugins/train/model/dfaker.py +++ b/plugins/train/model/dfaker.py @@ -5,51 +5,40 @@ from keras.initializers import RandomNormal from keras.layers import Input -from keras.models import Model as KerasModel -from .original import logger, Model as OriginalModel +from lib.model.nn_blocks import Conv2DOutput, UpscaleBlock, ResidualBlock +from .original import Model as OriginalModel, KerasModel class Model(OriginalModel): - """ Improved Autoeencoder Model """ + """ Dfaker Model """ def __init__(self, *args, **kwargs): - logger.debug("Initializing %s: (args: %s, kwargs: %s", - self.__class__.__name__, args, kwargs) - kwargs["input_shape"] = (64, 64, 3) - kwargs["encoder_dim"] = 1024 - self.kernel_initializer = RandomNormal(0, 0.02) super().__init__(*args, **kwargs) - logger.debug("Initialized %s", self.__class__.__name__) + self.input_shape = (64, 64, 3) + self.encoder_dim = 1024 + self.kernel_initializer = RandomNormal(0, 0.02) - def decoder(self): + def decoder(self, side): """ Decoder Network """ input_ = Input(shape=(8, 8, 512)) var_x = input_ - var_x = self.blocks.upscale(var_x, 512, res_block_follows=True) - var_x = self.blocks.res_block(var_x, 512, kernel_initializer=self.kernel_initializer) - var_x = self.blocks.upscale(var_x, 256, res_block_follows=True) - var_x = self.blocks.res_block(var_x, 256, kernel_initializer=self.kernel_initializer) - var_x = self.blocks.upscale(var_x, 128, res_block_follows=True) - var_x = self.blocks.res_block(var_x, 128, kernel_initializer=self.kernel_initializer) - var_x = self.blocks.upscale(var_x, 64) - var_x = self.blocks.conv2d(var_x, 3, - kernel_size=5, - padding="same", - activation="sigmoid", - name="face_out") + var_x = UpscaleBlock(512, res_block_follows=True)(var_x) + var_x = ResidualBlock(512, kernel_initializer=self.kernel_initializer)(var_x) + var_x = UpscaleBlock(256, res_block_follows=True)(var_x) + var_x = ResidualBlock(256, kernel_initializer=self.kernel_initializer)(var_x) + var_x = UpscaleBlock(128, res_block_follows=True)(var_x) + var_x = ResidualBlock(128, kernel_initializer=self.kernel_initializer)(var_x) + var_x = UpscaleBlock(64)(var_x) + var_x = Conv2DOutput(3, 5, name="face_out_{}".format(side))(var_x) outputs = [var_x] if self.config.get("learn_mask", False): var_y = input_ - var_y = self.blocks.upscale(var_y, 512) - var_y = self.blocks.upscale(var_y, 256) - var_y = self.blocks.upscale(var_y, 128) - var_y = self.blocks.upscale(var_y, 64) - var_y = self.blocks.conv2d(var_y, 1, - kernel_size=5, - padding="same", - activation="sigmoid", - name="mask_out") + var_y = UpscaleBlock(512)(var_y) + var_y = UpscaleBlock(256)(var_y) + var_y = UpscaleBlock(128)(var_y) + var_y = UpscaleBlock(64)(var_y) + var_y = Conv2DOutput(1, 5, name="mask_out_{}".format(side))(var_y) outputs.append(var_y) - return KerasModel([input_], outputs=outputs) + return KerasModel([input_], outputs=outputs, name="decoder_{}".format(side)) diff --git a/plugins/train/model/dfl_h128.py b/plugins/train/model/dfl_h128.py index 887d379937..3afcf408c4 100644 --- a/plugins/train/model/dfl_h128.py +++ b/plugins/train/model/dfl_h128.py @@ -1,65 +1,49 @@ #!/usr/bin/env python3 -""" DeepFakesLab H128 Model +""" DeepFaceLab H128 Model Based on https://github.com/iperov/DeepFaceLab """ from keras.layers import Dense, Flatten, Input, Reshape -from keras.models import Model as KerasModel -from .original import logger, Model as OriginalModel +from lib.model.nn_blocks import Conv2DOutput, Conv2DBlock, UpscaleBlock +from .original import Model as OriginalModel, KerasModel class Model(OriginalModel): - """ Low Memory version of Original Faceswap Model """ + """ H128 Model from DFL """ def __init__(self, *args, **kwargs): - logger.debug("Initializing %s: (args: %s, kwargs: %s", - self.__class__.__name__, args, kwargs) - - self.configfile = kwargs.get("configfile", None) - kwargs["input_shape"] = (128, 128, 3) - kwargs["encoder_dim"] = 256 if self.config["lowmem"] else 512 - super().__init__(*args, **kwargs) - logger.debug("Initialized %s", self.__class__.__name__) + self.input_shape = (128, 128, 3) + self.encoder_dim = 256 if self.config["lowmem"] else 512 def encoder(self): """ DFL H128 Encoder """ input_ = Input(shape=self.input_shape) - var_x = input_ - var_x = self.blocks.conv(var_x, 128) - var_x = self.blocks.conv(var_x, 256) - var_x = self.blocks.conv(var_x, 512) - var_x = self.blocks.conv(var_x, 1024) + var_x = Conv2DBlock(128)(input_) + var_x = Conv2DBlock(256)(var_x) + var_x = Conv2DBlock(512)(var_x) + var_x = Conv2DBlock(1024)(var_x) var_x = Dense(self.encoder_dim)(Flatten()(var_x)) var_x = Dense(8 * 8 * self.encoder_dim)(var_x) var_x = Reshape((8, 8, self.encoder_dim))(var_x) - var_x = self.blocks.upscale(var_x, self.encoder_dim) + var_x = UpscaleBlock(self.encoder_dim)(var_x) return KerasModel(input_, var_x) - def decoder(self): + def decoder(self, side): """ DFL H128 Decoder """ input_ = Input(shape=(16, 16, self.encoder_dim)) - # Face var_x = input_ - var_x = self.blocks.upscale(var_x, self.encoder_dim) - var_x = self.blocks.upscale(var_x, self.encoder_dim // 2) - var_x = self.blocks.upscale(var_x, self.encoder_dim // 4) - var_x = self.blocks.conv2d(var_x, 3, - kernel_size=5, - padding="same", - activation="sigmoid", - name="face_out") + var_x = UpscaleBlock(self.encoder_dim)(var_x) + var_x = UpscaleBlock(self.encoder_dim // 2)(var_x) + var_x = UpscaleBlock(self.encoder_dim // 4)(var_x) + var_x = Conv2DOutput(3, 5, name="face_out_{}".format(side))(var_x) outputs = [var_x] if self.config.get("learn_mask", False): var_y = input_ - var_y = self.blocks.upscale(var_y, self.encoder_dim) - var_y = self.blocks.upscale(var_y, self.encoder_dim // 2) - var_y = self.blocks.upscale(var_y, self.encoder_dim // 4) - var_y = self.blocks.conv2d(var_y, 1, - kernel_size=5, - padding="same", - activation="sigmoid", - name="mask_out") + var_y = UpscaleBlock(self.encoder_dim)(var_y) + var_y = UpscaleBlock(self.encoder_dim // 2)(var_y) + var_y = UpscaleBlock(self.encoder_dim // 4)(var_y) + var_y = Conv2DOutput(1, 5, name="mask_out_{}".format(side))(var_y) outputs.append(var_y) - return KerasModel(input_, outputs=outputs) + return KerasModel(input_, outputs=outputs, name="decoder_{}".format(side)) diff --git a/plugins/train/model/dfl_sae.py b/plugins/train/model/dfl_sae.py index 4d2212125d..ba8a5f761a 100644 --- a/plugins/train/model/dfl_sae.py +++ b/plugins/train/model/dfl_sae.py @@ -1,37 +1,27 @@ #!/usr/bin/env python3 -""" DeepFakesLab SAE Model +""" DeepFaceLab SAE Model Based on https://github.com/iperov/DeepFaceLab """ import numpy as np from keras.layers import Concatenate, Dense, Flatten, Input, Reshape -from keras.models import Model as KerasModel -from ._base import ModelBase, logger +from lib.model.nn_blocks import Conv2DOutput, Conv2DBlock, ResidualBlock, UpscaleBlock + +from ._base import ModelBase, KerasModel class Model(ModelBase): - """ Low Memory version of Original Faceswap Model """ + """ SAE Model from DFL """ def __init__(self, *args, **kwargs): - logger.debug("Initializing %s: (args: %s, kwargs: %s", - self.__class__.__name__, args, kwargs) - - self.configfile = kwargs.get("configfile", None) - kwargs["input_shape"] = (self.config["input_size"], self.config["input_size"], 3) - super().__init__(*args, **kwargs) - logger.debug("Initialized %s", self.__class__.__name__) - - @property - def architecture(self): - """ Return the architecture used from config """ - return self.config["architecture"].lower() - - @property - def use_mask(self): - """ Return True if a mask has been set else false """ - return self.config.get("learn_mask", False) + self.input_shape = (self.config["input_size"], self.config["input_size"], 3) + self.architecture = self.config["architecture"].lower() + self.use_mask = self.config.get("learn_mask", False) + self.multiscale_count = 3 if self.config["multiscale_decoder"] else 1 + self.encoder_dim = self.config["encoder_dims"] + self.decoder_dim = self.config["decoder_dims"] @property def ae_dims(self): @@ -41,148 +31,110 @@ def ae_dims(self): retval = 256 if self.architecture == "liae" else 512 return retval - @property - def multiscale_count(self): - """ Return 3 if multiscale decoder is set else 1 """ - retval = 3 if self.config["multiscale_decoder"] else 1 - return retval + def build_model(self, inputs): + """ Build the DFL-SAE Model """ + encoder = getattr(self, "encoder_{}".format(self.architecture))() + enc_output_shape = encoder.output_shape[1:] + encoder_a = encoder(inputs[0]) + encoder_b = encoder(inputs[1]) - def add_networks(self): - """ Add the DFL SAE Networks """ - logger.debug("Adding networks") - # Encoder - self.add_network("encoder", None, getattr(self, "encoder_{}".format(self.architecture))()) - - # Intermediate if self.architecture == "liae": - self.add_network("intermediate", "b", self.inter_liae()) - self.add_network("intermediate", None, self.inter_liae()) - - # Decoder - decoder_sides = [None] if self.architecture == "liae" else ["a", "b"] - for side in decoder_sides: - self.add_network("decoder", side, self.decoder(), is_output=True) - logger.debug("Added networks") - - def build_autoencoders(self, inputs): - """ Initialize DFL SAE model """ - logger.debug("Initializing model") - getattr(self, "build_{}_autoencoder".format(self.architecture))(inputs) - logger.debug("Initialized model") - - def build_liae_autoencoder(self, inputs): - """ Build the LIAE Autoencoder """ - for side in ("a", "b"): - encoder = self.networks["encoder"].network(inputs[0]) - if side == "a": - intermediate = Concatenate()([self.networks["intermediate"].network(encoder), - self.networks["intermediate"].network(encoder)]) - else: - intermediate = Concatenate()([self.networks["intermediate_b"].network(encoder), - self.networks["intermediate"].network(encoder)]) - output = self.networks["decoder"].network(intermediate) - autoencoder = KerasModel(inputs, output) - self.add_predictor(side, autoencoder) - - def build_df_autoencoder(self, inputs): - """ Build the DF Autoencoder """ - for side in ("a", "b"): - logger.debug("Adding Autoencoder. Side: %s", side) - decoder = self.networks["decoder_{}".format(side)].network - output = decoder(self.networks["encoder"].network(inputs[0])) - autoencoder = KerasModel(inputs, output) - self.add_predictor(side, autoencoder) + inter_both = self.inter_liae("both", enc_output_shape) + int_output_shape = (np.array(inter_both.output_shape[1:]) * (1, 1, 2)).tolist() + + inter_a = Concatenate()([inter_both(encoder_a), inter_both(encoder_a)]) + inter_b = Concatenate()([self.inter_liae("b", enc_output_shape)(encoder_b), + inter_both(encoder_b)]) + + decoder = self.decoder("both", int_output_shape) + outputs = [decoder(inter_a), decoder(inter_b)] + else: + outputs = [self.decoder("a", enc_output_shape)(encoder_a), + self.decoder("b", enc_output_shape)(encoder_b)] + autoencoder = KerasModel(inputs, + outputs, + name="{}_{}".format(self.name, self.architecture)) + return autoencoder def encoder_df(self): """ DFL SAE DF Encoder Network""" input_ = Input(shape=self.input_shape) - dims = self.input_shape[-1] * self.config["encoder_dims"] + dims = self.input_shape[-1] * self.encoder_dim lowest_dense_res = self.input_shape[0] // 16 - var_x = input_ - var_x = self.blocks.conv(var_x, dims) - var_x = self.blocks.conv(var_x, dims * 2) - var_x = self.blocks.conv(var_x, dims * 4) - var_x = self.blocks.conv(var_x, dims * 8) + var_x = Conv2DBlock(dims)(input_) + var_x = Conv2DBlock(dims * 2)(var_x) + var_x = Conv2DBlock(dims * 4)(var_x) + var_x = Conv2DBlock(dims * 8)(var_x) var_x = Dense(self.ae_dims)(Flatten()(var_x)) var_x = Dense(lowest_dense_res * lowest_dense_res * self.ae_dims)(var_x) var_x = Reshape((lowest_dense_res, lowest_dense_res, self.ae_dims))(var_x) - var_x = self.blocks.upscale(var_x, self.ae_dims) - return KerasModel(input_, var_x) + var_x = UpscaleBlock(self.ae_dims)(var_x) + return KerasModel(input_, var_x, name="encoder_df") def encoder_liae(self): """ DFL SAE LIAE Encoder Network """ input_ = Input(shape=self.input_shape) - dims = self.input_shape[-1] * self.config["encoder_dims"] - var_x = input_ - var_x = self.blocks.conv(var_x, dims) - var_x = self.blocks.conv(var_x, dims * 2) - var_x = self.blocks.conv(var_x, dims * 4) - var_x = self.blocks.conv(var_x, dims * 8) + dims = self.input_shape[-1] * self.encoder_dim + var_x = Conv2DBlock(dims)(input_) + var_x = Conv2DBlock(dims * 2)(var_x) + var_x = Conv2DBlock(dims * 4)(var_x) + var_x = Conv2DBlock(dims * 8)(var_x) var_x = Flatten()(var_x) - return KerasModel(input_, var_x) + return KerasModel(input_, var_x, name="encoder_liae") - def inter_liae(self): + def inter_liae(self, side, input_shape): """ DFL SAE LIAE Intermediate Network """ - input_ = Input(shape=self.networks["encoder"].output_shapes[0][1:]) + input_ = Input(shape=input_shape) lowest_dense_res = self.input_shape[0] // 16 var_x = input_ var_x = Dense(self.ae_dims)(var_x) var_x = Dense(lowest_dense_res * lowest_dense_res * self.ae_dims * 2)(var_x) var_x = Reshape((lowest_dense_res, lowest_dense_res, self.ae_dims * 2))(var_x) - var_x = self.blocks.upscale(var_x, self.ae_dims * 2) - return KerasModel(input_, var_x) + var_x = UpscaleBlock(self.ae_dims * 2)(var_x) + return KerasModel(input_, var_x, name="intermediate_{}".format(side)) - def decoder(self): + def decoder(self, side, input_shape): """ DFL SAE Decoder Network""" - if self.architecture == "liae": - input_shape = np.array(self.networks["intermediate"].output_shapes[0][1:]) * (1, 1, 2) - else: - input_shape = self.networks["encoder"].output_shapes[0][1:] input_ = Input(shape=input_shape) - outputs = list() + outputs = [] - dims = self.input_shape[-1] * self.config["decoder_dims"] + dims = self.input_shape[-1] * self.decoder_dim var_x = input_ - var_x1 = self.blocks.upscale(var_x, dims * 8, res_block_follows=True) - var_x1 = self.blocks.res_block(var_x1, dims * 8) - var_x1 = self.blocks.res_block(var_x1, dims * 8) + var_x1 = UpscaleBlock(dims * 8, res_block_follows=True)(var_x) + var_x1 = ResidualBlock(dims * 8)(var_x1) + var_x1 = ResidualBlock(dims * 8)(var_x1) if self.multiscale_count >= 3: - outputs.append(self.blocks.conv2d(var_x1, 3, - kernel_size=5, - padding="same", - activation="sigmoid", - name="face_out_32")) - - var_x2 = self.blocks.upscale(var_x1, dims * 4, res_block_follows=True) - var_x2 = self.blocks.res_block(var_x2, dims * 4) - var_x2 = self.blocks.res_block(var_x2, dims * 4) + outputs.append(Conv2DOutput(3, 5, name="face_out_32_{}".format(side))(var_x1)) + + var_x2 = UpscaleBlock(dims * 4, res_block_follows=True)(var_x1) + var_x2 = ResidualBlock(dims * 4)(var_x2) + var_x2 = ResidualBlock(dims * 4)(var_x2) if self.multiscale_count >= 2: - outputs.append(self.blocks.conv2d(var_x2, 3, - kernel_size=5, - padding="same", - activation="sigmoid", - name="face_out_64")) - - var_x3 = self.blocks.upscale(var_x2, dims * 2, res_block_follows=True) - var_x3 = self.blocks.res_block(var_x3, dims * 2) - var_x3 = self.blocks.res_block(var_x3, dims * 2) - - outputs.append(self.blocks.conv2d(var_x3, 3, - kernel_size=5, - padding="same", - activation="sigmoid", - name="face_out_128")) + outputs.append(Conv2DOutput(3, 5, name="face_out_64_{}".format(side))(var_x2)) + + var_x3 = UpscaleBlock(dims * 2, res_block_follows=True)(var_x2) + var_x3 = ResidualBlock(dims * 2)(var_x3) + var_x3 = ResidualBlock(dims * 2)(var_x3) + + outputs.append(Conv2DOutput(3, 5, name="face_out_128_{}".format(side))(var_x3)) if self.use_mask: var_y = input_ - var_y = self.blocks.upscale(var_y, self.config["decoder_dims"] * 8) - var_y = self.blocks.upscale(var_y, self.config["decoder_dims"] * 4) - var_y = self.blocks.upscale(var_y, self.config["decoder_dims"] * 2) - var_y = self.blocks.conv2d(var_y, 1, - kernel_size=5, - padding="same", - activation="sigmoid", - name="mask_out") + var_y = UpscaleBlock(self.decoder_dim * 8)(var_y) + var_y = UpscaleBlock(self.decoder_dim * 4)(var_y) + var_y = UpscaleBlock(self.decoder_dim * 2)(var_y) + var_y = Conv2DOutput(1, 5, name="mask_out_{}".format(side))(var_y) outputs.append(var_y) - return KerasModel(input_, outputs=outputs) + return KerasModel(input_, outputs=outputs, name="decoder_{}".format(side)) + + def _legacy_mapping(self): + """ The mapping of legacy separate model names to single model names """ + mappings = dict(df={"{}_encoder.h5".format(self.name): "encoder_df", + "{}_decoder_A.h5".format(self.name): "decoder_a", + "{}_decoder_B.h5".format(self.name): "decoder_b"}, + liae={"{}_encoder.h5".format(self.name): "encoder_liae", + "{}_intermediate_B.h5".format(self.name): "intermediate_both", + "{}_intermediate.h5".format(self.name): "intermediate_b", + "{}_decoder.h5".format(self.name): "decoder_both"}) + return mappings[self.config["architecture"]] diff --git a/plugins/train/model/dlight.py b/plugins/train/model/dlight.py index 3b9fc9b346..b68595c8f2 100644 --- a/plugins/train/model/dlight.py +++ b/plugins/train/model/dlight.py @@ -8,53 +8,31 @@ DeepHomage for lots of testing """ -from keras.layers import Dense, Flatten, Input, Reshape, AveragePooling2D, LeakyReLU -from keras.layers import UpSampling2D -from keras.layers.core import Dropout -from keras.layers.merge import Concatenate -from keras.layers.normalization import BatchNormalization -from keras.models import Model as KerasModel +from keras.layers import (AveragePooling2D, BatchNormalization, Concatenate, Dense, Dropout, + Flatten, Input, Reshape, LeakyReLU, UpSampling2D) +from lib.model.nn_blocks import (Conv2DOutput, Conv2DBlock, ResidualBlock, UpscaleBlock, + Upscale2xBlock) from lib.utils import FaceswapError -from ._base import logger -from .original import Model as OriginalModel +from ._base import ModelBase, KerasModel, logger -class Model(OriginalModel): - """ DeLight Autoencoder Model """ +class Model(ModelBase): + """ DLight Autoencoder Model """ def __init__(self, *args, **kwargs): - logger.debug("Initializing %s: (args: %s, kwargs: %s", - self.__class__.__name__, args, kwargs) - - kwargs["input_shape"] = (128, 128, 3) - kwargs["encoder_dim"] = -1 - self.dense_output = None - self.detail_level = None - self.features = None - self.encoder_filters = None - self.encoder_dim = None - self.details = None - self.upscale_ratio = None super().__init__(*args, **kwargs) + self.input_shape = (128, 128, 3) - logger.debug("Initialized %s", self.__class__.__name__) - - def _detail_level_setup(self): - logger.debug('self.config[output_size]: %d', self.config["output_size"]) self.features = dict(lowmem=0, fair=1, best=2)[self.config["features"]] - logger.debug('self.features: %d', self.features) self.encoder_filters = 64 if self.features > 0 else 48 - logger.debug('self.encoder_filters: %d', self.encoder_filters) + bonum_fortunam = 128 self.encoder_dim = {0: 512 + bonum_fortunam, 1: 1024 + bonum_fortunam, 2: 1536 + bonum_fortunam}[self.features] - logger.debug('self.encoder_dim: %d', self.encoder_dim) self.details = dict(fast=0, good=1)[self.config["details"]] - logger.debug('self.details: %d', self.details) - try: self.upscale_ratio = {128: 2, 256: 4, @@ -62,71 +40,50 @@ def _detail_level_setup(self): except KeyError: logger.error("Config error: output_size must be one of: 128, 256, or 384.") raise FaceswapError("Config error: output_size must be one of: 128, 256, or 384.") - logger.debug('output_size: %r', self.config["output_size"]) - logger.debug('self.upscale_ratio: %r', self.upscale_ratio) - - def build(self): - self._detail_level_setup() - super().build() - - def add_networks(self): - """ Add the DeLight model weights """ - logger.debug("Adding networks") - self.add_network("decoder", "a", self.decoder_a(), is_output=True) - self.add_network("decoder", "b", - self.decoder_b() if self.details > 0 else self.decoder_b_fast(), - is_output=True) - self.add_network("encoder", None, self.encoder()) - logger.debug("Added networks") - - def compile_predictors(self, **kwargs): # pylint: disable=arguments-differ - self.set_networks_trainable() - super().compile_predictors(**kwargs) - - def set_networks_trainable(self): - """ Set the network state to trainable """ - train_encoder = True - train_decoder_a = True - train_decoder_b = True - - encoder = self.networks['encoder'].network - for layer in encoder.layers: - layer.trainable = train_encoder - - decoder_a = self.networks['decoder_a'].network - for layer in decoder_a.layers: - layer.trainable = train_decoder_a - - decoder_b = self.networks['decoder_b'].network - for layer in decoder_b.layers: - layer.trainable = train_decoder_b + + logger.debug("output_size: %s, features: %s, encoder_filters: %s, encoder_dim: %s, " + " details: %s, upscale_ratio: %s", self.config["output_size"], self.features, + self.encoder_filters, self.encoder_dim, self.details, self.upscale_ratio) + + def build_model(self, inputs): + """ Build the Dlight Model. """ + encoder = self.encoder() + encoder_a = encoder(inputs[0]) + encoder_b = encoder(inputs[1]) + + decoder_b = self.decoder_b if self.details > 0 else self.decoder_b_fast + + outputs = [self.decoder_a()(encoder_a), decoder_b()(encoder_b)] + + autoencoder = KerasModel(inputs, outputs, name=self.name) + return autoencoder def encoder(self): """ DeLight Encoder Network """ input_ = Input(shape=self.input_shape) var_x = input_ - var_x1 = self.blocks.conv(var_x, self.encoder_filters // 2) + var_x1 = Conv2DBlock(self.encoder_filters // 2)(var_x) var_x2 = AveragePooling2D()(var_x) var_x2 = LeakyReLU(0.1)(var_x2) var_x = Concatenate()([var_x1, var_x2]) - var_x1 = self.blocks.conv(var_x, self.encoder_filters) + var_x1 = Conv2DBlock(self.encoder_filters)(var_x) var_x2 = AveragePooling2D()(var_x) var_x2 = LeakyReLU(0.1)(var_x2) var_x = Concatenate()([var_x1, var_x2]) - var_x1 = self.blocks.conv(var_x, self.encoder_filters * 2) + var_x1 = Conv2DBlock(self.encoder_filters * 2)(var_x) var_x2 = AveragePooling2D()(var_x) var_x2 = LeakyReLU(0.1)(var_x2) var_x = Concatenate()([var_x1, var_x2]) - var_x1 = self.blocks.conv(var_x, self.encoder_filters * 4) + var_x1 = Conv2DBlock(self.encoder_filters * 4)(var_x) var_x2 = AveragePooling2D()(var_x) var_x2 = LeakyReLU(0.1)(var_x2) var_x = Concatenate()([var_x1, var_x2]) - var_x1 = self.blocks.conv(var_x, self.encoder_filters * 8) + var_x1 = Conv2DBlock(self.encoder_filters * 8)(var_x) var_x2 = AveragePooling2D()(var_x) var_x2 = LeakyReLU(0.1)(var_x2) var_x = Concatenate()([var_x1, var_x2]) @@ -137,7 +94,7 @@ def encoder(self): var_x = Dropout(0.05)(var_x) var_x = Reshape((4, 4, 1024))(var_x) - return KerasModel(input_, var_x) + return KerasModel(input_, var_x, name="encoder") def decoder_a(self): """ DeLight Decoder A(old face) Network """ @@ -149,29 +106,27 @@ def decoder_a(self): var_xy = UpSampling2D(self.upscale_ratio, interpolation='bilinear')(var_xy) var_x = var_xy - var_x = self.blocks.upscale2x(var_x, decoder_a_complexity, fast=False) - var_x = self.blocks.upscale2x(var_x, decoder_a_complexity // 2, fast=False) - var_x = self.blocks.upscale2x(var_x, decoder_a_complexity // 4, fast=False) - var_x = self.blocks.upscale2x(var_x, decoder_a_complexity // 8, fast=False) + var_x = Upscale2xBlock(decoder_a_complexity, fast=False)(var_x) + var_x = Upscale2xBlock(decoder_a_complexity // 2, fast=False)(var_x) + var_x = Upscale2xBlock(decoder_a_complexity // 4, fast=False)(var_x) + var_x = Upscale2xBlock(decoder_a_complexity // 8, fast=False)(var_x) - var_x = self.blocks.conv2d(var_x, 3, kernel_size=5, padding="same", - activation="sigmoid", name="face_out") + var_x = Conv2DOutput(3, 5, name="face_out")(var_x) outputs = [var_x] if self.config.get("learn_mask", False): var_y = var_xy # mask decoder - var_y = self.blocks.upscale2x(var_y, mask_complexity, fast=False) - var_y = self.blocks.upscale2x(var_y, mask_complexity // 2, fast=False) - var_y = self.blocks.upscale2x(var_y, mask_complexity // 4, fast=False) - var_y = self.blocks.upscale2x(var_y, mask_complexity // 8, fast=False) + var_y = Upscale2xBlock(mask_complexity, fast=False)(var_y) + var_y = Upscale2xBlock(mask_complexity // 2, fast=False)(var_y) + var_y = Upscale2xBlock(mask_complexity // 4, fast=False)(var_y) + var_y = Upscale2xBlock(mask_complexity // 8, fast=False)(var_y) - var_y = self.blocks.conv2d(var_y, 1, kernel_size=5, padding="same", - activation="sigmoid", name="mask_out") + var_y = Conv2DOutput(1, 5, name="mask_out")(var_y) outputs.append(var_y) - return KerasModel([input_], outputs=outputs) + return KerasModel([input_], outputs=outputs, name="decoder_a") def decoder_b_fast(self): """ DeLight Fast Decoder B(new face) Network """ @@ -182,33 +137,31 @@ def decoder_b_fast(self): var_xy = input_ - var_xy = self.blocks.upscale(var_xy, 512, scale_factor=self.upscale_ratio) + var_xy = UpscaleBlock(512, scale_factor=self.upscale_ratio)(var_xy) var_x = var_xy - var_x = self.blocks.upscale2x(var_x, decoder_b_complexity, fast=True) - var_x = self.blocks.upscale2x(var_x, decoder_b_complexity // 2, fast=True) - var_x = self.blocks.upscale2x(var_x, decoder_b_complexity // 4, fast=True) - var_x = self.blocks.upscale2x(var_x, decoder_b_complexity // 8, fast=True) + var_x = Upscale2xBlock(decoder_b_complexity, fast=True)(var_x) + var_x = Upscale2xBlock(decoder_b_complexity // 2, fast=True)(var_x) + var_x = Upscale2xBlock(decoder_b_complexity // 4, fast=True)(var_x) + var_x = Upscale2xBlock(decoder_b_complexity // 8, fast=True)(var_x) - var_x = self.blocks.conv2d(var_x, 3, kernel_size=5, padding="same", - activation="sigmoid", name="face_out") + var_x = Conv2DOutput(3, 5, name="face_out")(var_x) outputs = [var_x] if self.config.get("learn_mask", False): var_y = var_xy # mask decoder - var_y = self.blocks.upscale2x(var_y, mask_complexity, fast=False) - var_y = self.blocks.upscale2x(var_y, mask_complexity // 2, fast=False) - var_y = self.blocks.upscale2x(var_y, mask_complexity // 4, fast=False) - var_y = self.blocks.upscale2x(var_y, mask_complexity // 8, fast=False) + var_y = Upscale2xBlock(mask_complexity, fast=False)(var_y) + var_y = Upscale2xBlock(mask_complexity // 2, fast=False)(var_y) + var_y = Upscale2xBlock(mask_complexity // 4, fast=False)(var_y) + var_y = Upscale2xBlock(mask_complexity // 8, fast=False)(var_y) - var_y = self.blocks.conv2d(var_y, 1, kernel_size=5, padding="same", - activation="sigmoid", name="mask_out") + var_y = Conv2DOutput(1, 5, name="mask_out")(var_y) outputs.append(var_y) - return KerasModel([input_], outputs=outputs) + return KerasModel([input_], outputs=outputs, name="decoder_b_fast") def decoder_b(self): """ DeLight Decoder B(new face) Network """ @@ -219,40 +172,45 @@ def decoder_b(self): var_xy = input_ - var_xy = self.blocks.upscale2x(var_xy, 512, scale_factor=self.upscale_ratio, fast=False) + var_xy = Upscale2xBlock(512, scale_factor=self.upscale_ratio, fast=False)(var_xy) var_x = var_xy - var_x = self.blocks.res_block(var_x, 512, use_bias=True) - var_x = self.blocks.res_block(var_x, 512, use_bias=False) - var_x = self.blocks.res_block(var_x, 512, use_bias=False) - var_x = self.blocks.upscale2x(var_x, decoder_b_complexity, fast=False) - var_x = self.blocks.res_block(var_x, decoder_b_complexity, use_bias=True) - var_x = self.blocks.res_block(var_x, decoder_b_complexity, use_bias=False) + var_x = ResidualBlock(512, use_bias=True)(var_x) + var_x = ResidualBlock(512, use_bias=False)(var_x) + var_x = ResidualBlock(512, use_bias=False)(var_x) + var_x = Upscale2xBlock(decoder_b_complexity, fast=False)(var_x) + var_x = ResidualBlock(decoder_b_complexity, use_bias=True)(var_x) + var_x = ResidualBlock(decoder_b_complexity, use_bias=False)(var_x) var_x = BatchNormalization()(var_x) - var_x = self.blocks.upscale2x(var_x, decoder_b_complexity // 2, fast=False) - var_x = self.blocks.res_block(var_x, decoder_b_complexity // 2, use_bias=True) - var_x = self.blocks.upscale2x(var_x, decoder_b_complexity // 4, fast=False) - var_x = self.blocks.res_block(var_x, decoder_b_complexity // 4, use_bias=False) + var_x = Upscale2xBlock(decoder_b_complexity // 2, fast=False)(var_x) + var_x = ResidualBlock(decoder_b_complexity // 2, use_bias=True)(var_x) + var_x = Upscale2xBlock(decoder_b_complexity // 4, fast=False)(var_x) + var_x = ResidualBlock(decoder_b_complexity // 4, use_bias=False)(var_x) var_x = BatchNormalization()(var_x) - var_x = self.blocks.upscale2x(var_x, decoder_b_complexity // 8, fast=False) + var_x = Upscale2xBlock(decoder_b_complexity // 8, fast=False)(var_x) - var_x = self.blocks.conv2d(var_x, 3, kernel_size=5, padding="same", - activation="sigmoid", name="face_out") + var_x = Conv2DOutput(3, 5, name="face_out")(var_x) outputs = [var_x] if self.config.get("learn_mask", False): var_y = var_xy # mask decoder - var_y = self.blocks.upscale2x(var_y, mask_complexity, fast=False) - var_y = self.blocks.upscale2x(var_y, mask_complexity // 2, fast=False) - var_y = self.blocks.upscale2x(var_y, mask_complexity // 4, fast=False) - var_y = self.blocks.upscale2x(var_y, mask_complexity // 8, fast=False) + var_y = Upscale2xBlock(mask_complexity, fast=False)(var_y) + var_y = Upscale2xBlock(mask_complexity // 2, fast=False)(var_y) + var_y = Upscale2xBlock(mask_complexity // 4, fast=False)(var_y) + var_y = Upscale2xBlock(mask_complexity // 8, fast=False)(var_y) - var_y = self.blocks.conv2d(var_y, 1, kernel_size=5, padding="same", - activation="sigmoid", name="mask_out") + var_y = Conv2DOutput(1, 5, name="mask_out")(var_y) outputs.append(var_y) - return KerasModel([input_], outputs=outputs) + return KerasModel([input_], outputs=outputs, name="decoder_b") + + def _legacy_mapping(self): + """ The mapping of legacy separate model names to single model names """ + decoder_b = "decoder_b" if self.details > 0 else "decoder_b_fast" + return {"{}_encoder.h5".format(self.name): "encoder", + "{}_decoder_A.h5".format(self.name): "decoder_a", + "{}_decoder_B.h5".format(self.name): decoder_b} diff --git a/plugins/train/model/iae.py b/plugins/train/model/iae.py index 775305e1f7..87559313c5 100644 --- a/plugins/train/model/iae.py +++ b/plugins/train/model/iae.py @@ -2,91 +2,79 @@ """ Improved autoencoder for faceswap """ from keras.layers import Concatenate, Dense, Flatten, Input, Reshape -from keras.models import Model as KerasModel -from ._base import ModelBase, logger +from lib.model.nn_blocks import Conv2DOutput, Conv2DBlock, UpscaleBlock +from ._base import ModelBase, KerasModel class Model(ModelBase): - """ Improved Autoeencoder Model """ + """ Improved Autoencoder Model """ def __init__(self, *args, **kwargs): - logger.debug("Initializing %s: (args: %s, kwargs: %s", - self.__class__.__name__, args, kwargs) - kwargs["input_shape"] = (64, 64, 3) - kwargs["encoder_dim"] = 1024 super().__init__(*args, **kwargs) - logger.debug("Initialized %s", self.__class__.__name__) + self.input_shape = (64, 64, 3) + self.encoder_dim = 1024 - def add_networks(self): - """ Add the IAE model weights """ - logger.debug("Adding networks") - self.add_network("encoder", None, self.encoder()) - self.add_network("decoder", None, self.decoder(), is_output=True) - self.add_network("intermediate", "a", self.intermediate()) - self.add_network("intermediate", "b", self.intermediate()) - self.add_network("inter", None, self.intermediate()) - logger.debug("Added networks") + def build_model(self, inputs): + """ Build the IAE Model """ + encoder = self.encoder() + decoder = self.decoder() + inter_a = self.intermediate("a") + inter_b = self.intermediate("b") + inter_both = self.intermediate("both") - def build_autoencoders(self, inputs): - """ Initialize IAE model """ - logger.debug("Initializing model") - decoder = self.networks["decoder"].network - encoder = self.networks["encoder"].network - inter_both = self.networks["inter"].network - for side in ("a", "b"): - inter_side = self.networks["intermediate_{}".format(side)].network - output = decoder(Concatenate()([inter_side(encoder(inputs[0])), - inter_both(encoder(inputs[0]))])) + encoder_a = encoder(inputs[0]) + encoder_b = encoder(inputs[1]) - autoencoder = KerasModel(inputs, output) - self.add_predictor(side, autoencoder) - logger.debug("Initialized model") + outputs = [decoder(Concatenate()([inter_a(encoder_a), inter_both(encoder_a)])), + decoder(Concatenate()([inter_b(encoder_b), inter_both(encoder_b)]))] + + autoencoder = KerasModel(inputs, outputs, name=self.name) + return autoencoder def encoder(self): """ Encoder Network """ input_ = Input(shape=self.input_shape) var_x = input_ - var_x = self.blocks.conv(var_x, 128) - var_x = self.blocks.conv(var_x, 256) - var_x = self.blocks.conv(var_x, 512) - var_x = self.blocks.conv(var_x, 1024) + var_x = Conv2DBlock(128)(var_x) + var_x = Conv2DBlock(256)(var_x) + var_x = Conv2DBlock(512)(var_x) + var_x = Conv2DBlock(1024)(var_x) var_x = Flatten()(var_x) - return KerasModel(input_, var_x) + return KerasModel(input_, var_x, name="encoder") - def intermediate(self): + def intermediate(self, side): """ Intermediate Network """ - input_ = Input(shape=(None, 4 * 4 * 1024)) - var_x = input_ - var_x = Dense(self.encoder_dim)(var_x) + input_ = Input(shape=(4 * 4 * 1024)) + var_x = Dense(self.encoder_dim)(input_) var_x = Dense(4 * 4 * int(self.encoder_dim/2))(var_x) var_x = Reshape((4, 4, int(self.encoder_dim/2)))(var_x) - return KerasModel(input_, var_x) + return KerasModel(input_, var_x, name="inter_{}".format(side)) def decoder(self): """ Decoder Network """ input_ = Input(shape=(4, 4, self.encoder_dim)) var_x = input_ - var_x = self.blocks.upscale(var_x, 512) - var_x = self.blocks.upscale(var_x, 256) - var_x = self.blocks.upscale(var_x, 128) - var_x = self.blocks.upscale(var_x, 64) - var_x = self.blocks.conv2d(var_x, 3, - kernel_size=5, - padding="same", - activation="sigmoid", - name="face_out") + var_x = UpscaleBlock(512)(var_x) + var_x = UpscaleBlock(256)(var_x) + var_x = UpscaleBlock(128)(var_x) + var_x = UpscaleBlock(64)(var_x) + var_x = Conv2DOutput(3, 5, name="face_out")(var_x) outputs = [var_x] if self.config.get("learn_mask", False): var_y = input_ - var_y = self.blocks.upscale(var_y, 512) - var_y = self.blocks.upscale(var_y, 256) - var_y = self.blocks.upscale(var_y, 128) - var_y = self.blocks.upscale(var_y, 64) - var_y = self.blocks.conv2d(var_y, 1, - kernel_size=5, - padding="same", - activation="sigmoid", - name="mask_out") + var_y = UpscaleBlock(512)(var_y) + var_y = UpscaleBlock(256)(var_y) + var_y = UpscaleBlock(128)(var_y) + var_y = UpscaleBlock(64)(var_y) + var_y = Conv2DOutput(1, 5, name="mask_out")(var_y) outputs.append(var_y) - return KerasModel(input_, outputs=outputs) + return KerasModel(input_, outputs=outputs, name="decoder") + + def _legacy_mapping(self): + """ The mapping of legacy separate model names to single model names """ + return {"{}_encoder.h5".format(self.name): "encoder", + "{}_intermediate_A.h5".format(self.name): "inter_a", + "{}_intermediate_B.h5".format(self.name): "inter_b", + "{}_inter.h5".format(self.name): "inter_both", + "{}_decoder.h5".format(self.name): "decoder"} diff --git a/plugins/train/model/lightweight.py b/plugins/train/model/lightweight.py index 366e2802d2..ae166fccc1 100644 --- a/plugins/train/model/lightweight.py +++ b/plugins/train/model/lightweight.py @@ -1,61 +1,51 @@ #!/usr/bin/env python3 -""" Original Model +""" Lightweight Model by torzdf + An extremely limited model for training on low-end graphics cards Based on the original https://www.reddit.com/r/deepfakes/ - code sample + contribs """ + code sample + contributions """ from keras.layers import Dense, Flatten, Input, Reshape -from keras.models import Model as KerasModel -from .original import logger, Model as OriginalModel +from lib.model.nn_blocks import Conv2DOutput, Conv2DBlock, UpscaleBlock +from .original import Model as OriginalModel, KerasModel class Model(OriginalModel): """ Lightweight Model for ~2GB Graphics Cards """ def __init__(self, *args, **kwargs): - logger.debug("Initializing %s: (args: %s, kwargs: %s", - self.__class__.__name__, args, kwargs) - - kwargs["input_shape"] = (64, 64, 3) - kwargs["encoder_dim"] = 512 super().__init__(*args, **kwargs) - logger.debug("Initialized %s", self.__class__.__name__) + self.encoder_dim = 512 def encoder(self): """ Encoder Network """ input_ = Input(shape=self.input_shape) var_x = input_ - var_x = self.blocks.conv(var_x, 128) - var_x = self.blocks.conv(var_x, 256) - var_x = self.blocks.conv(var_x, 512) + var_x = Conv2DBlock(128)(var_x) + var_x = Conv2DBlock(256)(var_x) + var_x = Conv2DBlock(512)(var_x) var_x = Dense(self.encoder_dim)(Flatten()(var_x)) var_x = Dense(4 * 4 * 512)(var_x) var_x = Reshape((4, 4, 512))(var_x) - var_x = self.blocks.upscale(var_x, 256) - return KerasModel(input_, var_x) + var_x = UpscaleBlock(256)(var_x) + return KerasModel(input_, var_x, name="encoder") - def decoder(self): + def decoder(self, side): """ Decoder Network """ input_ = Input(shape=(8, 8, 256)) var_x = input_ - var_x = self.blocks.upscale(var_x, 512) - var_x = self.blocks.upscale(var_x, 256) - var_x = self.blocks.upscale(var_x, 128) - var_x = self.blocks.conv2d(var_x, 3, - kernel_size=5, - padding="same", - activation="sigmoid", - name="face_out") + var_x = UpscaleBlock(512)(var_x) + var_x = UpscaleBlock(256)(var_x) + var_x = UpscaleBlock(128)(var_x) + var_x = Conv2DOutput(3, 5, activation="sigmoid", name="face_out_{}".format(side))(var_x) outputs = [var_x] if self.config.get("learn_mask", False): var_y = input_ - var_y = self.blocks.upscale(var_y, 512) - var_y = self.blocks.upscale(var_y, 256) - var_y = self.blocks.upscale(var_y, 128) - var_y = self.blocks.conv2d(var_y, 1, - kernel_size=5, - padding="same", - activation="sigmoid", - name="mask_out") + var_y = UpscaleBlock(512)(var_y) + var_y = UpscaleBlock(256)(var_y) + var_y = UpscaleBlock(128)(var_y) + var_y = Conv2DOutput(1, 5, + activation="sigmoid", + name="mask_out_{}".format(side))(var_y) outputs.append(var_y) - return KerasModel(input_, outputs=outputs) + return KerasModel(input_, outputs=outputs, name="decoder_{}".format(side)) diff --git a/plugins/train/model/original.py b/plugins/train/model/original.py index fa79862860..6ee13eef56 100644 --- a/plugins/train/model/original.py +++ b/plugins/train/model/original.py @@ -1,87 +1,163 @@ #!/usr/bin/env python3 """ Original Model - Based on the original https://www.reddit.com/r/deepfakes/ - code sample + contribs """ +Based on the original https://www.reddit.com/r/deepfakes/ code sample + contributions. -from keras.layers import Dense, Flatten, Input, Reshape +This model is heavily documented as it acts as a template that other model plugins can be developed +from. +""" +from keras.layers import Dense, Flatten, Reshape, Input -from keras.models import Model as KerasModel - -from ._base import ModelBase, logger +from lib.model.nn_blocks import Conv2DOutput, Conv2DBlock, UpscaleBlock +from ._base import KerasModel, ModelBase class Model(ModelBase): - """ Original Faceswap Model """ - def __init__(self, *args, **kwargs): - logger.debug("Initializing %s: (args: %s, kwargs: %s", - self.__class__.__name__, args, kwargs) + """ Original Faceswap Model. + + This is the original faceswap model and acts as a template for plugin development. - self.configfile = kwargs.get("configfile", None) - if "input_shape" not in kwargs: - kwargs["input_shape"] = (64, 64, 3) - if "encoder_dim" not in kwargs: - kwargs["encoder_dim"] = 512 if self.config["lowmem"] else 1024 + All plugins must define the following attribute override after calling the parent's + :func:`__init__` method: + * :attr:`input_shape` (`tuple` or `list`): a tuple of ints defining the shape of the \ + faces that the model takes as input. If the input size is the same for both sides, this \ + can be a single 3 dimensional tuple. If the inputs have different sizes for "A" and "B" \ + this should be a list of 2 3 dimensional shape tuples, 1 for each side. + + Any additional attributes used exclusively by this model should be defined here, but make sure + that you are not accidentally overriding any existing + :class:`~plugins.train.model._base.ModelBase` attributes. + + Parameters + ---------- + args: varies + The default command line arguments passed in from :class:`~scripts.train.Train` or + :class:`~scripts.train.Convert` + kwargs: varies + The default keyword arguments passed in from :class:`~scripts.train.Train` or + :class:`~scripts.train.Convert` + """ + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - logger.debug("Initialized %s", self.__class__.__name__) - - def add_networks(self): - """ Add the original model weights """ - logger.debug("Adding networks") - self.add_network("decoder", "a", self.decoder(), is_output=True) - self.add_network("decoder", "b", self.decoder(), is_output=True) - self.add_network("encoder", None, self.encoder()) - logger.debug("Added networks") - - def build_autoencoders(self, inputs): - """ Initialize original model """ - logger.debug("Initializing model") - for side in ("a", "b"): - logger.debug("Adding Autoencoder. Side: %s", side) - decoder = self.networks["decoder_{}".format(side)].network - output = decoder(self.networks["encoder"].network(inputs[0])) - autoencoder = KerasModel(inputs, output) - self.add_predictor(side, autoencoder) - logger.debug("Initialized model") + self.input_shape = (64, 64, 3) + self.low_mem = self.config.get("lowmem", False) + self.learn_mask = self.config["learn_mask"] + self.encoder_dim = 512 if self.low_mem else 1024 + + def build_model(self, inputs): + """ Create the model's structure. + + This function is automatically called immediately after :func:`__init__` has been called if + a new model is being created. It is ignored if an existing model is being loaded from disk + as the model structure will be defined in the saved model file. + + The model's final structure is defined here. + + For the original model, An encoder instance is defined, then the same instance is + referenced twice, one for each input "A" and "B" so that the same model is used for + both inputs. + + 2 Decoders are then defined (one for each side) with the encoder instances passed in as + input to the corresponding decoders. + + It is important to note that any models and sub-models should not call + :class:`keras.models.Model` directly, but rather call + :class:`plugins.train.model._base.KerasModel`. This acts as a wrapper for Keras' Model + class, but handles some minor differences which need to be handled between Nvidia and AMD + backends. + + The final output of the model should always call :class:`lib.model.nn_blocks.Conv2DOutput` + so that the correct data type is set for the final activation, to support Mixed Precision + Training. Failure to do so is likely to lead to issues when Mixed Precision is enabled. + + Parameters + ---------- + inputs: list + A list of input tensors for the model. This will be a list of 2 tensors of + shape :attr:`input_shape`, the first for side "a", the second for side "b". + + Returns + ------- + :class:`keras.models.Model` + The output of this function must be a keras model generated from + :class:`plugins.train.model._base.KerasModel`. See Keras documentation for the correct + structure, but note that parameter :attr:`name` is a required rather than an optional + argument in Faceswap. You should assign this to the attribute ``self.name`` that is + automatically generated from the plugin's filename. + """ + input_a = inputs[0] + input_b = inputs[1] + + encoder = self.encoder() + encoder_a = [encoder(input_a)] + encoder_b = [encoder(input_b)] + + outputs = [self.decoder("a")(encoder_a), self.decoder("b")(encoder_b)] + + autoencoder = KerasModel(inputs, outputs, name=self.name) + return autoencoder def encoder(self): - """ Encoder Network """ + """ The original Faceswap Encoder Network. + + The encoder for the original model has it's weights shared between both the "A" and "B" + side of the model, so only one instance is created :func:`build_model`. However this same + instance is then used twice (once for A and once for B) meaning that the weights get + shared. + + Returns + ------- + :class:`keras.models.Model` + The Keras encoder model, for sharing between inputs from both sides. + """ input_ = Input(shape=self.input_shape) var_x = input_ - var_x = self.blocks.conv(var_x, 128) - var_x = self.blocks.conv(var_x, 256) - var_x = self.blocks.conv(var_x, 512) - if not self.config.get("lowmem", False): - var_x = self.blocks.conv(var_x, 1024) + var_x = Conv2DBlock(128)(var_x) + var_x = Conv2DBlock(256)(var_x) + var_x = Conv2DBlock(512)(var_x) + if not self.low_mem: + var_x = Conv2DBlock(1024)(var_x) var_x = Dense(self.encoder_dim)(Flatten()(var_x)) var_x = Dense(4 * 4 * 1024)(var_x) var_x = Reshape((4, 4, 1024))(var_x) - var_x = self.blocks.upscale(var_x, 512) - return KerasModel(input_, var_x) + var_x = UpscaleBlock(512)(var_x) + return KerasModel(input_, var_x, name="encoder") + + def decoder(self, side): + """ The original Faceswap Decoder Network. - def decoder(self): - """ Decoder Network """ + The decoders for the original model have separate weights for each side "A" and "B", so two + instances are created in :func:`build_model`, one for each side. + + Parameters + ---------- + side: str + Either `"a` or `"b"`. This is used for naming the decoder model. + + Returns + ------- + :class:`keras.models.Model` + The Keras decoder model. This will be called twice, once for each side. + """ input_ = Input(shape=(8, 8, 512)) var_x = input_ - var_x = self.blocks.upscale(var_x, 256) - var_x = self.blocks.upscale(var_x, 128) - var_x = self.blocks.upscale(var_x, 64) - var_x = self.blocks.conv2d(var_x, 3, - kernel_size=5, - padding="same", - activation="sigmoid", - name="face_out") + var_x = UpscaleBlock(256)(var_x) + var_x = UpscaleBlock(128)(var_x) + var_x = UpscaleBlock(64)(var_x) + var_x = Conv2DOutput(3, 5, name="face_out_{}".format(side))(var_x) outputs = [var_x] - if self.config.get("learn_mask", False): + if self.learn_mask: var_y = input_ - var_y = self.blocks.upscale(var_y, 256) - var_y = self.blocks.upscale(var_y, 128) - var_y = self.blocks.upscale(var_y, 64) - var_y = self.blocks.conv2d(var_y, 1, - kernel_size=5, - padding="same", - activation="sigmoid", - name="mask_out") + var_y = UpscaleBlock(256)(var_y) + var_y = UpscaleBlock(128)(var_y) + var_y = UpscaleBlock(64)(var_y) + var_y = Conv2DOutput(1, 5, name="mask_out_{}".format(side))(var_y) outputs.append(var_y) - return KerasModel(input_, outputs=outputs) + return KerasModel(input_, outputs=outputs, name="decoder_{}".format(side)) + + def _legacy_mapping(self): + """ The mapping of legacy separate model names to single model names """ + return {"{}_encoder.h5".format(self.name): "encoder", + "{}_decoder_A.h5".format(self.name): "decoder_a", + "{}_decoder_B.h5".format(self.name): "decoder_b"} diff --git a/plugins/train/model/realface.py b/plugins/train/model/realface.py index 48df05c59a..948714a6dc 100644 --- a/plugins/train/model/realface.py +++ b/plugins/train/model/realface.py @@ -1,38 +1,34 @@ #!/usr/bin/env python3 """ RealFaceRC1, codenamed 'Pegasus' Based on the original https://www.reddit.com/r/deepfakes/ - code sample + contribs + code sample + contributions Major thanks goes to BryanLyon as it vastly powered by his ideas and insights. Without him it would not be possible to come up with the model. Additional thanks: Birb - source of inspiration, great Encoder ideas - Kvrooman - additional couseling on autoencoders and practical advices + Kvrooman - additional counseling on auto-encoders and practical advice """ +import sys from keras.initializers import RandomNormal from keras.layers import Dense, Flatten, Input, Reshape -from keras.models import Model as KerasModel -from ._base import ModelBase, logger + +from lib.model.nn_blocks import Conv2DOutput, Conv2DBlock, ResidualBlock, UpscaleBlock +from ._base import ModelBase, KerasModel, logger class Model(ModelBase): """ RealFace(tm) Faceswap Model """ def __init__(self, *args, **kwargs): - logger.debug("Initializing %s: (args: %s, kwargs: %s", - self.__class__.__name__, args, kwargs) - - self.configfile = kwargs.get("configfile", None) + super().__init__(*args, **kwargs) + self.input_shape = (self.config["input_size"], self.config["input_size"], 3) self.check_input_output() self.dense_width, self.upscalers_no = self.get_dense_width_upscalers_numbers() - kwargs["input_shape"] = (self.config["input_size"], self.config["input_size"], 3) self.kernel_initializer = RandomNormal(0, 0.02) - super().__init__(*args, **kwargs) - logger.debug("Initialized %s", self.__class__.__name__) - @property def downscalers_no(self): - """ Number of downscalers. Don't change! """ + """ Number of downscale blocks. Don't change! """ return 4 @property @@ -50,15 +46,15 @@ def check_input_output(self): if not 64 <= self.config["input_size"] <= 128 or self.config["input_size"] % 16 != 0: logger.error("Config error: input_size must be between 64 and 128 and be divisible by " "16.") - exit(1) + sys.exit(1) if not 64 <= self.config["output_size"] <= 256 or self.config["output_size"] % 32 != 0: logger.error("Config error: output_size must be between 64 and 256 and be divisible " "by 32.") - exit(1) + sys.exit(1) logger.debug("Input and output sizes are valid") def get_dense_width_upscalers_numbers(self): - """ Return the dense width and number of upscalers """ + """ Return the dense width and number of upscale blocks """ output_size = self.config["output_size"] sides = [(output_size // 2**n, n) for n in [4, 5] if (output_size // 2**n) < 10] closest = min([x * self._downscale_ratio for x, _ in sides], @@ -68,24 +64,16 @@ def get_dense_width_upscalers_numbers(self): logger.debug("dense_width: %s, upscalers_no: %s", dense_width, upscalers_no) return dense_width, upscalers_no - def add_networks(self): - """ Add the realface model weights """ - logger.debug("Adding networks") - self.add_network("decoder", "a", self.decoder_a(), is_output=True) - self.add_network("decoder", "b", self.decoder_b(), is_output=True) - self.add_network("encoder", None, self.encoder()) - logger.debug("Added networks") - - def build_autoencoders(self, inputs): - """ Initialize realface model """ - logger.debug("Initializing model") - for side in "a", "b": - logger.debug("Adding Autoencoder. Side: %s", side) - decoder = self.networks["decoder_{}".format(side)].network - output = decoder(self.networks["encoder"].network(inputs[0])) - autoencoder = KerasModel(inputs, output) - self.add_predictor(side, autoencoder) - logger.debug("Initialized model") + def build_model(self, inputs): + """ Build the RealFace model. """ + encoder = self.encoder() + encoder_a = encoder(inputs[0]) + encoder_b = encoder(inputs[1]) + + outputs = [self.decoder_a()(encoder_a), self.decoder_b()(encoder_b)] + + autoencoder = KerasModel(inputs, outputs, name=self.name) + return autoencoder def encoder(self): """ RealFace Encoder Network """ @@ -95,13 +83,13 @@ def encoder(self): encoder_complexity = self.config["complexity_encoder"] for idx in range(self.downscalers_no - 1): - var_x = self.blocks.conv(var_x, encoder_complexity * 2**idx) - var_x = self.blocks.res_block(var_x, encoder_complexity * 2**idx, use_bias=True) - var_x = self.blocks.res_block(var_x, encoder_complexity * 2**idx, use_bias=True) + var_x = Conv2DBlock(encoder_complexity * 2**idx)(var_x) + var_x = ResidualBlock(encoder_complexity * 2**idx, use_bias=True)(var_x) + var_x = ResidualBlock(encoder_complexity * 2**idx, use_bias=True)(var_x) - var_x = self.blocks.conv(var_x, encoder_complexity * 2**(idx + 1)) + var_x = Conv2DBlock(encoder_complexity * 2**(idx + 1))(var_x) - return KerasModel(input_, var_x) + return KerasModel(input_, var_x, name="encoder") def decoder_b(self): """ RealFace Decoder Network """ @@ -114,23 +102,19 @@ def decoder_b(self): var_xy = Dense(self.config["dense_nodes"])(Flatten()(var_xy)) var_xy = Dense(self.dense_width * self.dense_width * self.dense_filters)(var_xy) var_xy = Reshape((self.dense_width, self.dense_width, self.dense_filters))(var_xy) - var_xy = self.blocks.upscale(var_xy, self.dense_filters) + var_xy = UpscaleBlock(self.dense_filters)(var_xy) var_x = var_xy - var_x = self.blocks.res_block(var_x, self.dense_filters, use_bias=False) + var_x = ResidualBlock(self.dense_filters, use_bias=False)(var_x) decoder_b_complexity = self.config["complexity_decoder"] for idx in range(self.upscalers_no - 2): - var_x = self.blocks.upscale(var_x, decoder_b_complexity // 2**idx) - var_x = self.blocks.res_block(var_x, decoder_b_complexity // 2**idx, use_bias=False) - var_x = self.blocks.res_block(var_x, decoder_b_complexity // 2**idx, use_bias=True) - var_x = self.blocks.upscale(var_x, decoder_b_complexity // 2**(idx + 1)) + var_x = UpscaleBlock(decoder_b_complexity // 2**idx)(var_x) + var_x = ResidualBlock(decoder_b_complexity // 2**idx, use_bias=False)(var_x) + var_x = ResidualBlock(decoder_b_complexity // 2**idx, use_bias=True)(var_x) + var_x = UpscaleBlock(decoder_b_complexity // 2**(idx + 1))(var_x) - var_x = self.blocks.conv2d(var_x, 3, - kernel_size=5, - padding="same", - activation="sigmoid", - name="face_out") + var_x = Conv2DOutput(3, 5, name="face_out_b")(var_x) outputs = [var_x] @@ -138,18 +122,14 @@ def decoder_b(self): var_y = var_xy mask_b_complexity = 384 for idx in range(self.upscalers_no-2): - var_y = self.blocks.upscale(var_y, mask_b_complexity // 2**idx) - var_y = self.blocks.upscale(var_y, mask_b_complexity // 2**(idx + 1)) + var_y = UpscaleBlock(mask_b_complexity // 2**idx)(var_y) + var_y = UpscaleBlock(mask_b_complexity // 2**(idx + 1))(var_y) - var_y = self.blocks.conv2d(var_y, 1, - kernel_size=5, - padding="same", - activation="sigmoid", - name="mask_out") + var_y = Conv2DOutput(1, 5, name="mask_out_b")(var_y) outputs += [var_y] - return KerasModel(input_, outputs=outputs) + return KerasModel(input_, outputs=outputs, name="decoder_b") def decoder_a(self): """ RealFace Decoder (A) Network """ @@ -166,21 +146,17 @@ def decoder_a(self): var_xy = Dense(self.dense_width * self.dense_width * dense_filters)(var_xy) var_xy = Reshape((self.dense_width, self.dense_width, dense_filters))(var_xy) - var_xy = self.blocks.upscale(var_xy, dense_filters) + var_xy = UpscaleBlock(dense_filters)(var_xy) var_x = var_xy - var_x = self.blocks.res_block(var_x, dense_filters, use_bias=False) + var_x = ResidualBlock(dense_filters, use_bias=False)(var_x) decoder_a_complexity = int(self.config["complexity_decoder"] / 1.5) for idx in range(self.upscalers_no-2): - var_x = self.blocks.upscale(var_x, decoder_a_complexity // 2**idx) - var_x = self.blocks.upscale(var_x, decoder_a_complexity // 2**(idx + 1)) + var_x = UpscaleBlock(decoder_a_complexity // 2**idx)(var_x) + var_x = UpscaleBlock(decoder_a_complexity // 2**(idx + 1))(var_x) - var_x = self.blocks.conv2d(var_x, 3, - kernel_size=5, - padding="same", - activation="sigmoid", - name="face_out") + var_x = Conv2DOutput(3, 5, name="face_out_a")(var_x) outputs = [var_x] @@ -188,15 +164,17 @@ def decoder_a(self): var_y = var_xy mask_a_complexity = 384 for idx in range(self.upscalers_no-2): - var_y = self.blocks.upscale(var_y, mask_a_complexity // 2**idx) - var_y = self.blocks.upscale(var_y, mask_a_complexity // 2**(idx + 1)) + var_y = UpscaleBlock(mask_a_complexity // 2**idx)(var_y) + var_y = UpscaleBlock(mask_a_complexity // 2**(idx + 1))(var_y) - var_y = self.blocks.conv2d(var_y, 1, - kernel_size=5, - padding="same", - activation="sigmoid", - name="mask_out") + var_y = Conv2DOutput(1, 5, name="mask_out_a")(var_y) outputs += [var_y] - return KerasModel(input_, outputs=outputs) + return KerasModel(input_, outputs=outputs, name="decoder_a") + + def _legacy_mapping(self): + """ The mapping of legacy separate model names to single model names """ + return {"{}_encoder.h5".format(self.name): "encoder", + "{}_decoder_A.h5".format(self.name): "decoder_a", + "{}_decoder_B.h5".format(self.name): "decoder_b"} diff --git a/plugins/train/model/unbalanced.py b/plugins/train/model/unbalanced.py index d7e136fdd4..c29b77cd38 100644 --- a/plugins/train/model/unbalanced.py +++ b/plugins/train/model/unbalanced.py @@ -1,147 +1,131 @@ #!/usr/bin/env python3 """ Unbalanced Model Based on the original https://www.reddit.com/r/deepfakes/ - code sample + contribs """ + code sample + contributions """ from keras.initializers import RandomNormal from keras.layers import Dense, Flatten, Input, Reshape, SpatialDropout2D -from keras.models import Model as KerasModel -from .original import logger, Model as OriginalModel +from lib.model.nn_blocks import Conv2DOutput, Conv2DBlock, ResidualBlock, UpscaleBlock +from ._base import ModelBase, KerasModel -class Model(OriginalModel): +class Model(ModelBase): """ Unbalanced Faceswap Model """ def __init__(self, *args, **kwargs): - logger.debug("Initializing %s: (args: %s, kwargs: %s", - self.__class__.__name__, args, kwargs) - - self.configfile = kwargs.get("configfile", None) - self.lowmem = self.config.get("lowmem", False) - kwargs["input_shape"] = (self.config["input_size"], self.config["input_size"], 3) - kwargs["encoder_dim"] = 512 if self.lowmem else self.config["nodes"] + super().__init__(*args, **kwargs) + self.input_shape = (self.config["input_size"], self.config["input_size"], 3) + self.low_mem = self.config.get("lowmem", False) + self.encoder_dim = 512 if self.low_mem else self.config["nodes"] self.kernel_initializer = RandomNormal(0, 0.02) - super().__init__(*args, **kwargs) - logger.debug("Initialized %s", self.__class__.__name__) + def build_model(self, inputs): + """ build the Unbalanced Model. """ + encoder = self.encoder() + encoder_a = encoder(inputs[0]) + encoder_b = encoder(inputs[1]) - def add_networks(self): - """ Add the original model weights """ - logger.debug("Adding networks") - self.add_network("decoder", "a", self.decoder_a(), is_output=True) - self.add_network("decoder", "b", self.decoder_b(), is_output=True) - self.add_network("encoder", None, self.encoder()) - logger.debug("Added networks") + outputs = [self.decoder_a()(encoder_a), self.decoder_b()(encoder_b)] + + autoencoder = KerasModel(inputs, outputs, name=self.name) + return autoencoder def encoder(self): """ Unbalanced Encoder """ kwargs = dict(kernel_initializer=self.kernel_initializer) - encoder_complexity = 128 if self.lowmem else self.config["complexity_encoder"] - dense_dim = 384 if self.lowmem else 512 + encoder_complexity = 128 if self.low_mem else self.config["complexity_encoder"] + dense_dim = 384 if self.low_mem else 512 dense_shape = self.input_shape[0] // 16 input_ = Input(shape=self.input_shape) var_x = input_ - var_x = self.blocks.conv(var_x, encoder_complexity, use_instance_norm=True, **kwargs) - var_x = self.blocks.conv(var_x, encoder_complexity * 2, use_instance_norm=True, **kwargs) - var_x = self.blocks.conv(var_x, encoder_complexity * 4, **kwargs) - var_x = self.blocks.conv(var_x, encoder_complexity * 6, **kwargs) - var_x = self.blocks.conv(var_x, encoder_complexity * 8, **kwargs) + var_x = Conv2DBlock(encoder_complexity, use_instance_norm=True, **kwargs)(var_x) + var_x = Conv2DBlock(encoder_complexity * 2, use_instance_norm=True, **kwargs)(var_x) + var_x = Conv2DBlock(encoder_complexity * 4, **kwargs)(var_x) + var_x = Conv2DBlock(encoder_complexity * 6, **kwargs)(var_x) + var_x = Conv2DBlock(encoder_complexity * 8, **kwargs)(var_x) var_x = Dense(self.encoder_dim, kernel_initializer=self.kernel_initializer)(Flatten()(var_x)) var_x = Dense(dense_shape * dense_shape * dense_dim, kernel_initializer=self.kernel_initializer)(var_x) var_x = Reshape((dense_shape, dense_shape, dense_dim))(var_x) - return KerasModel(input_, var_x) + return KerasModel(input_, var_x, name="encoder") def decoder_a(self): """ Decoder for side A """ kwargs = dict(kernel_size=5, kernel_initializer=self.kernel_initializer) - decoder_complexity = 320 if self.lowmem else self.config["complexity_decoder_a"] - dense_dim = 384 if self.lowmem else 512 + decoder_complexity = 320 if self.low_mem else self.config["complexity_decoder_a"] + dense_dim = 384 if self.low_mem else 512 decoder_shape = self.input_shape[0] // 16 input_ = Input(shape=(decoder_shape, decoder_shape, dense_dim)) var_x = input_ - var_x = self.blocks.upscale(var_x, decoder_complexity, **kwargs) + var_x = UpscaleBlock(decoder_complexity, **kwargs)(var_x) var_x = SpatialDropout2D(0.25)(var_x) - var_x = self.blocks.upscale(var_x, decoder_complexity, **kwargs) - if self.lowmem: + var_x = UpscaleBlock(decoder_complexity, **kwargs)(var_x) + if self.low_mem: var_x = SpatialDropout2D(0.15)(var_x) else: var_x = SpatialDropout2D(0.25)(var_x) - var_x = self.blocks.upscale(var_x, decoder_complexity // 2, **kwargs) - var_x = self.blocks.upscale(var_x, decoder_complexity // 4, **kwargs) - var_x = self.blocks.conv2d(var_x, 3, - kernel_size=5, - padding="same", - activation="sigmoid", - name="face_out") + var_x = UpscaleBlock(decoder_complexity // 2, **kwargs)(var_x) + var_x = UpscaleBlock(decoder_complexity // 4, **kwargs)(var_x) + var_x = Conv2DOutput(3, 5, name="face_out_a")(var_x) outputs = [var_x] if self.config.get("learn_mask", False): var_y = input_ - var_y = self.blocks.upscale(var_y, decoder_complexity) - var_y = self.blocks.upscale(var_y, decoder_complexity) - var_y = self.blocks.upscale(var_y, decoder_complexity // 2) - var_y = self.blocks.upscale(var_y, decoder_complexity // 4) - var_y = self.blocks.conv2d(var_y, 1, - kernel_size=5, - padding="same", - activation="sigmoid", - name="mask_out") + var_y = UpscaleBlock(decoder_complexity)(var_y) + var_y = UpscaleBlock(decoder_complexity)(var_y) + var_y = UpscaleBlock(decoder_complexity // 2)(var_y) + var_y = UpscaleBlock(decoder_complexity // 4)(var_y) + var_y = Conv2DOutput(1, 5, name="mask_out_a")(var_y) outputs.append(var_y) - return KerasModel(input_, outputs=outputs) + return KerasModel(input_, outputs=outputs, name="decoder_a") def decoder_b(self): """ Decoder for side B """ kwargs = dict(kernel_size=5, kernel_initializer=self.kernel_initializer) - dense_dim = 384 if self.lowmem else self.config["complexity_decoder_b"] - decoder_complexity = 384 if self.lowmem else 512 + dense_dim = 384 if self.low_mem else self.config["complexity_decoder_b"] + decoder_complexity = 384 if self.low_mem else 512 decoder_shape = self.input_shape[0] // 16 input_ = Input(shape=(decoder_shape, decoder_shape, dense_dim)) var_x = input_ - if self.lowmem: - var_x = self.blocks.upscale(var_x, decoder_complexity, **kwargs) - var_x = self.blocks.upscale(var_x, decoder_complexity // 2, **kwargs) - var_x = self.blocks.upscale(var_x, decoder_complexity // 4, **kwargs) - var_x = self.blocks.upscale(var_x, decoder_complexity // 8, **kwargs) + if self.low_mem: + var_x = UpscaleBlock(decoder_complexity, **kwargs)(var_x) + var_x = UpscaleBlock(decoder_complexity // 2, **kwargs)(var_x) + var_x = UpscaleBlock(decoder_complexity // 4, **kwargs)(var_x) + var_x = UpscaleBlock(decoder_complexity // 8, **kwargs)(var_x) else: - var_x = self.blocks.upscale(var_x, decoder_complexity, - res_block_follows=True, **kwargs) - var_x = self.blocks.res_block(var_x, decoder_complexity, - kernel_initializer=self.kernel_initializer) - var_x = self.blocks.upscale(var_x, decoder_complexity, - res_block_follows=True, **kwargs) - var_x = self.blocks.res_block(var_x, decoder_complexity, - kernel_initializer=self.kernel_initializer) - var_x = self.blocks.upscale(var_x, decoder_complexity // 2, - res_block_follows=True, **kwargs) - var_x = self.blocks.res_block(var_x, decoder_complexity // 2, - kernel_initializer=self.kernel_initializer) - var_x = self.blocks.upscale(var_x, decoder_complexity // 4, **kwargs) - var_x = self.blocks.conv2d(var_x, 3, - kernel_size=5, - padding="same", - activation="sigmoid", - name="face_out") + var_x = UpscaleBlock(decoder_complexity, res_block_follows=True, **kwargs)(var_x) + var_x = ResidualBlock(decoder_complexity, + kernel_initializer=self.kernel_initializer)(var_x) + var_x = UpscaleBlock(decoder_complexity, res_block_follows=True, **kwargs)(var_x) + var_x = ResidualBlock(decoder_complexity, + kernel_initializer=self.kernel_initializer)(var_x) + var_x = UpscaleBlock(decoder_complexity // 2, res_block_follows=True, **kwargs)(var_x) + var_x = ResidualBlock(decoder_complexity // 2, + kernel_initializer=self.kernel_initializer)(var_x) + var_x = UpscaleBlock(decoder_complexity // 4, **kwargs)(var_x) + var_x = Conv2DOutput(3, 5, name="face_out_b")(var_x) outputs = [var_x] if self.config.get("learn_mask", False): var_y = input_ - var_y = self.blocks.upscale(var_y, decoder_complexity) - if not self.lowmem: - var_y = self.blocks.upscale(var_y, decoder_complexity) - var_y = self.blocks.upscale(var_y, decoder_complexity // 2) - var_y = self.blocks.upscale(var_y, decoder_complexity // 4) - if self.lowmem: - var_y = self.blocks.upscale(var_y, decoder_complexity // 8) - var_y = self.blocks.conv2d(var_y, 1, - kernel_size=5, - padding="same", - activation="sigmoid", - name="mask_out") + var_y = UpscaleBlock(decoder_complexity)(var_y) + if not self.low_mem: + var_y = UpscaleBlock(decoder_complexity)(var_y) + var_y = UpscaleBlock(decoder_complexity // 2)(var_y) + var_y = UpscaleBlock(decoder_complexity // 4)(var_y) + if self.low_mem: + var_y = UpscaleBlock(decoder_complexity // 8)(var_y) + var_y = Conv2DOutput(1, 5, name="mask_out_b")(var_y) outputs.append(var_y) - return KerasModel(input_, outputs=outputs) + return KerasModel(input_, outputs=outputs, name="decoder_b") + + def _legacy_mapping(self): + """ The mapping of legacy separate model names to single model names """ + return {"{}_encoder.h5".format(self.name): "encoder", + "{}_decoder_A.h5".format(self.name): "decoder_a", + "{}_decoder_B.h5".format(self.name): "decoder_b"} diff --git a/plugins/train/model/villain.py b/plugins/train/model/villain.py index e662f60bd5..f172d5195c 100644 --- a/plugins/train/model/villain.py +++ b/plugins/train/model/villain.py @@ -1,29 +1,24 @@ #!/usr/bin/env python3 """ Original - VillainGuy model - Based on the original https://www.reddit.com/r/deepfakes/ code sample + contribs + Based on the original https://www.reddit.com/r/deepfakes/ code sample + contributions Adapted from a model by VillainGuy (https://github.com/VillainGuy) """ from keras.initializers import RandomNormal from keras.layers import add, Dense, Flatten, Input, Reshape -from keras.models import Model as KerasModel from lib.model.layers import PixelShuffler -from .original import logger, Model as OriginalModel +from lib.model.nn_blocks import (Conv2DOutput, Conv2DBlock, ResidualBlock, SeparableConv2DBlock, + UpscaleBlock) +from .original import Model as OriginalModel, KerasModel class Model(OriginalModel): """ Villain Faceswap Model """ def __init__(self, *args, **kwargs): - logger.debug("Initializing %s: (args: %s, kwargs: %s", - self.__class__.__name__, args, kwargs) - - self.configfile = kwargs.get("configfile", None) - kwargs["input_shape"] = (128, 128, 3) - kwargs["encoder_dim"] = 512 if self.config["lowmem"] else 1024 - self.kernel_initializer = RandomNormal(0, 0.02) - super().__init__(*args, **kwargs) - logger.debug("Initialized %s", self.__class__.__name__) + self.input_shape = (128, 128, 3) + self.encoder_dim = 512 if self.low_mem else 1024 + self.kernel_initializer = RandomNormal(0, 0.02) def encoder(self): """ Encoder Network """ @@ -34,59 +29,51 @@ def encoder(self): in_conv_filters = 128 + (self.input_shape[0] - 128) // 4 dense_shape = self.input_shape[0] // 16 - var_x = self.blocks.conv(input_, in_conv_filters, res_block_follows=True, **kwargs) + var_x = Conv2DBlock(in_conv_filters, res_block_follows=True, **kwargs)(input_) tmp_x = var_x res_cycles = 8 if self.config.get("lowmem", False) else 16 for _ in range(res_cycles): - nn_x = self.blocks.res_block(var_x, in_conv_filters, **kwargs) + nn_x = ResidualBlock(in_conv_filters, **kwargs)(var_x) var_x = nn_x # consider adding scale before this layer to scale the residual chain var_x = add([var_x, tmp_x]) - var_x = self.blocks.conv(var_x, 128, **kwargs) + var_x = Conv2DBlock(128, **kwargs)(var_x) var_x = PixelShuffler()(var_x) - var_x = self.blocks.conv(var_x, 128, **kwargs) + var_x = Conv2DBlock(128, **kwargs)(var_x) var_x = PixelShuffler()(var_x) - var_x = self.blocks.conv(var_x, 128, **kwargs) - var_x = self.blocks.conv_sep(var_x, 256, **kwargs) - var_x = self.blocks.conv(var_x, 512, **kwargs) + var_x = Conv2DBlock(128, **kwargs)(var_x) + var_x = SeparableConv2DBlock(256, **kwargs)(var_x) + var_x = Conv2DBlock(512, **kwargs)(var_x) if not self.config.get("lowmem", False): - var_x = self.blocks.conv_sep(var_x, 1024, **kwargs) + var_x = SeparableConv2DBlock(1024, **kwargs)(var_x) var_x = Dense(self.encoder_dim, **kwargs)(Flatten()(var_x)) var_x = Dense(dense_shape * dense_shape * 1024, **kwargs)(var_x) var_x = Reshape((dense_shape, dense_shape, 1024))(var_x) - var_x = self.blocks.upscale(var_x, 512, **kwargs) - return KerasModel(input_, var_x) + var_x = UpscaleBlock(512, **kwargs)(var_x) + return KerasModel(input_, var_x, name="encoder") - def decoder(self): + def decoder(self, side): """ Decoder Network """ kwargs = dict(kernel_initializer=self.kernel_initializer) decoder_shape = self.input_shape[0] // 8 input_ = Input(shape=(decoder_shape, decoder_shape, 512)) var_x = input_ - var_x = self.blocks.upscale(var_x, 512, res_block_follows=True, **kwargs) - var_x = self.blocks.res_block(var_x, 512, **kwargs) - var_x = self.blocks.upscale(var_x, 256, res_block_follows=True, **kwargs) - var_x = self.blocks.res_block(var_x, 256, **kwargs) - var_x = self.blocks.upscale(var_x, self.input_shape[0], res_block_follows=True, **kwargs) - var_x = self.blocks.res_block(var_x, self.input_shape[0], **kwargs) - var_x = self.blocks.conv2d(var_x, 3, - kernel_size=5, - padding="same", - activation="sigmoid", - name="face_out") + var_x = UpscaleBlock(512, res_block_follows=True, **kwargs)(var_x) + var_x = ResidualBlock(512, **kwargs)(var_x) + var_x = UpscaleBlock(256, res_block_follows=True, **kwargs)(var_x) + var_x = ResidualBlock(256, **kwargs)(var_x) + var_x = UpscaleBlock(self.input_shape[0], res_block_follows=True, **kwargs)(var_x) + var_x = ResidualBlock(self.input_shape[0], **kwargs)(var_x) + var_x = Conv2DOutput(3, 5, name="face_out_{}".format(side))(var_x) outputs = [var_x] if self.config.get("learn_mask", False): var_y = input_ - var_y = self.blocks.upscale(var_y, 512) - var_y = self.blocks.upscale(var_y, 256) - var_y = self.blocks.upscale(var_y, self.input_shape[0]) - var_y = self.blocks.conv2d(var_y, 1, - kernel_size=5, - padding="same", - activation="sigmoid", - name="mask_out") + var_y = UpscaleBlock(512)(var_y) + var_y = UpscaleBlock(256)(var_y) + var_y = UpscaleBlock(self.input_shape[0])(var_y) + var_y = Conv2DOutput(1, 5, name="mask_out_{}".format(side))(var_y) outputs.append(var_y) - return KerasModel(input_, outputs=outputs) + return KerasModel(input_, outputs=outputs, name="decoder_{}".format(side)) diff --git a/plugins/train/model/villain_defaults.py b/plugins/train/model/villain_defaults.py index 68a4fad833..da3af3eecc 100755 --- a/plugins/train/model/villain_defaults.py +++ b/plugins/train/model/villain_defaults.py @@ -43,7 +43,7 @@ _HELPTEXT = ( "A Higher resolution version of the Original Model by VillainGuy.\n" - "Extremely VRAM heavy. Full model requires 9GB+ for batchsize 16\n" + "Extremely VRAM heavy. Don't try to run this if you have a small GPU.\n" ) diff --git a/plugins/train/trainer/_base.py b/plugins/train/trainer/_base.py index 5a61edbade..70f95b6a97 100644 --- a/plugins/train/trainer/_base.py +++ b/plugins/train/trainer/_base.py @@ -3,45 +3,11 @@ this class. At present there is only the :class:`~plugins.train.trainer.original` plugin, so that entirely -inherits from this class. - -This class heavily references the :attr:`plugins.train.model._base.ModelBase.training_opts` -``dict``. The following keys are expected from this ``dict``: - - * **alignments** (`dict`, `optional`) - If training with a mask or the warp to landmarks \ - command line option is selected then this is required, otherwise it can be ``None``. The \ - dictionary should contain 2 keys ("a" and "b") with the values being the path to the \ - alignments file for the corresponding side. - - * **preview_scaling** (`int`) - How much to scale displayed preview image by. - - * **training_size** ('int') - Size of the training images in pixels. - - * **coverage_ratio** ('float') - Ratio of face to be cropped out of the training image. - - * **mask_type** ('str') - The type of mask to select from the alignments file. - - * **mask_blur_kernel** ('int') - The size of the kernel to use for gaussian blurring the mask. - - * **mask_threshold** ('int') - The threshold for min/maxing mask to 0/100. - - * **learn_mask** ('bool') - Whether the mask should be trained in the model. - - * **penalized_mask_loss** ('bool') - Whether the mask should be penalized from loss. - - * **no_logs** ('bool') - Whether Tensorboard logging should be disabled. - - * **snapshot_interval** ('int') - How many iterations between model snapshot saves. - - * **warp_to_landmarks** ('bool') - Whether to use random_warp_landmarks instead of random_warp. - - * **augment_color** ('bool') - Whether to use color augmentation. - - * **no_flip** ('bool') - Whether to turn off random horizontal flipping. - - * **pingpong** ('bool') - Train each side separately per save iteration rather than together. +inherits from this class. If further plugins are developed, then common code should be kept here, +with "original" unique code split out to the original plugin. """ +# pylint:disable=too-many-lines import logging import os import time @@ -57,7 +23,7 @@ from lib.faces_detect import DetectedFace from lib.image import read_image_hash_batch from lib.training_data import TrainingDataGenerator -from lib.utils import FaceswapError, get_folder, get_image_paths +from lib.utils import FaceswapError, get_backend, get_folder, get_image_paths from plugins.train._config import Config logger = logging.getLogger(__name__) # pylint: disable=invalid-name @@ -83,7 +49,8 @@ def _get_config(plugin_name, configfile=None): class TrainerBase(): - """ Trainer plugin base Object. + """ Handles the feeding of training images to Faceswap models, the generation of Tensorboard + logs and the creation of sample/time-lapse preview images. All Trainer plugins must inherit from this class. @@ -111,69 +78,48 @@ def __init__(self, model, images, batch_size, configfile): self._images = images self._sides = sorted(key for key in self._images.keys()) - self._process_training_opts() - self._pingpong = PingPong(model, self._sides) - - self._batchers = {side: Batcher(side, - images[side], - self._model, - self._use_mask, - batch_size, - self._config) - for side in self._sides} + self._feeder = _Feeder(images, + self._model, + batch_size, + self._config, + self._get_alignments_data()) self._tensorboard = self._set_tensorboard() - self._samples = Samples(self._model, - self._use_mask, - self._model.training_opts["coverage_ratio"], - self._model.training_opts["preview_scaling"]) - self._timelapse = Timelapse(self._model, - self._use_mask, - self._model.training_opts["coverage_ratio"], - self._config.get("preview_images", 14), - self._batchers) + self._samples = _Samples(self._model, + self._model.coverage_ratio, + self._model.command_line_arguments.preview_scale / 100) + self._timelapse = _Timelapse(self._model, + self._model.coverage_ratio, + self._config.get("preview_images", 14), + self._feeder) logger.debug("Initialized %s", self.__class__.__name__) - @property - def pingpong(self): - """ :class:`pingpong`: Ping-pong object for ping-pong memory saving training. """ - return self._pingpong + def _get_alignments_data(self): + """ Extrapolate alignments and masks from the alignments file into a `dict` for the + training data generator. - @property - def _timestamp(self): - """ str: Current time formatted as HOURS:MINUTES:SECONDS """ - return time.strftime("%H:%M:%S") + Returns + ------- + dict: + Includes the key `landmarks` if landmarks are required for training and the key `masks` + if the masks are required for training. """ + retval = dict() - @property - def _landmarks_required(self): - """ bool: ``True`` if Landmarks are required otherwise ``False ``""" - retval = self._model.training_opts["warp_to_landmarks"] - logger.debug(retval) - return retval + get_masks = self._model.config["learn_mask"] or self._model.config["penalized_mask_loss"] + if not self._model.command_line_arguments.warp_to_landmarks and not get_masks: + return retval - @property - def _use_mask(self): - """ bool: ``True`` if a mask is required otherwise ``False`` """ - retval = (self._model.training_opts["learn_mask"] or - self._model.training_opts["penalized_mask_loss"]) - logger.debug(retval) - return retval - - def _process_training_opts(self): - """ Extrapolate alignments and masks from the alignments file into - :attr:`_model.training_opts`.""" - logger.debug(self._model.training_opts) - if not self._landmarks_required and not self._use_mask: - return + alignments = _TrainingAlignments(self._model, self._images) - alignments = TrainingAlignments(self._model.training_opts, self._images) - if self._landmarks_required: + if self._model.command_line_arguments.warp_to_landmarks: logger.debug("Adding landmarks to training opts dict") - self._model.training_opts["landmarks"] = alignments.landmarks + retval["landmarks"] = alignments.landmarks - if self._use_mask: + if get_masks: logger.debug("Adding masks to training opts dict") - self._model.training_opts["masks"] = alignments.masks + retval["masks"] = alignments.masks + logger.debug(retval) + return retval def _set_tensorboard(self): """ Set up Tensorboard callback for logging loss. @@ -182,77 +128,48 @@ def _set_tensorboard(self): Returns ------- - dict: - 2 Dictionary keys of "a" and "b" the values of which are the - :class:`tf.keras.callbacks.TensorBoard` objects for the respective sides. + :class:`tf.keras.callbacks.TensorBoard` + Tensorboard object for the the current training session. """ - if self._model.training_opts["no_logs"]: + if self._model.state.current_session["no_logs"]: logger.verbose("TensorBoard logging disabled") return None - if self._pingpong.active: - # Currently TensorBoard uses the tf.session, meaning that VRAM does not - # get cleared when model switching - # TODO find a fix for this - logger.warning("Currently TensorBoard logging is not supported for Ping-Pong " - "training. Session stats and graphing will not be available for this " - "training session.") - return None - logger.debug("Enabling TensorBoard Logging") - tensorboard = dict() - - for side in self._sides: - logger.debug("Setting up TensorBoard Logging. Side: %s", side) - log_dir = os.path.join(str(self._model.model_dir), - "{}_logs".format(self._model.name), - side, - "session_{}".format(self._model.state.session_id)) - tbs = tf.keras.callbacks.TensorBoard(log_dir=log_dir, **self._tensorboard_kwargs) - tbs.set_model(self._model.predictors[side]) - tensorboard[side] = tbs + + logger.debug("Setting up TensorBoard Logging") + log_dir = os.path.join(str(self._model.model_dir), + "{}_logs".format(self._model.name), + "session_{}".format(self._model.state.session_id)) + tensorboard = tf.keras.callbacks.TensorBoard(log_dir=log_dir, + histogram_freq=0, # Must be 0 or hangs + write_graph=get_backend() != "amd", + write_images=False, + update_freq="batch", + profile_batch=0, + embeddings_freq=0, + embeddings_metadata=None) + tensorboard.set_model(self._model.model) + tensorboard.on_train_begin(0) logger.info("Enabled TensorBoard Logging") return tensorboard - @property - def _tensorboard_kwargs(self): - """ dict: The keyword arguments to be passed to :class:`tf.keras.callbacks.TensorBoard`. - NB: Tensorflow 1.13 + needs an additional keyword argument which is not valid for earlier - versions """ - kwargs = dict(histogram_freq=0, # Must be 0 or hangs - batch_size=64, - write_graph=True, - write_grads=True) - tf_version = [int(ver) for ver in tf.__version__.split(".") if ver.isdigit()] - logger.debug("Tensorflow version: %s", tf_version) - if tf_version[0] > 1 or (tf_version[0] == 1 and tf_version[1] > 12): - kwargs["update_freq"] = "batch" - if tf_version[0] > 1 or (tf_version[0] == 1 and tf_version[1] > 13): - kwargs["profile_batch"] = 0 - logger.debug(kwargs) - return kwargs - - def __print_loss(self, loss): - """ Outputs the loss for the current iteration to the console. - - Parameters - ---------- - loss: dict - The loss for each side. The dictionary should contain 2 keys ("a" and "b") with the - values being a list of loss values for the current iteration corresponding to - each side. - """ - logger.trace(loss) - output = ["Loss {}: {:.5f}".format(side.capitalize(), loss[side][0]) - for side in sorted(loss.keys())] - output = ", ".join(output) - output = "[{}] [#{:05d}] {}".format(self._timestamp, self._model.iterations, output) - print("\r{}".format(output), end="") - def train_one_step(self, viewer, timelapse_kwargs): """ Running training on a batch of images for each side. Triggered from the training cycle in :class:`scripts.train.Train`. + * Runs a training batch through the model. + + * Outputs the iteration's loss values to the console + + * Logs loss to Tensorboard, if logging is requested. + + * If a preview or time-lapse has been requested, then pushes sample images through the \ + model to generate the previews + + * Creates a snapshot if the total iterations trained so far meet the requested snapshot \ + criteria + Notes ----- As every iteration is called explicitly, the Parameters defined should always be ``None`` @@ -267,85 +184,99 @@ def train_one_step(self, viewer, timelapse_kwargs): not required then this should be ``None``. Otherwise all values should be full paths the keys being `input_a`, `input_b`, `output`. """ + self._model.state.increment_iterations() logger.trace("Training one step: (iteration: %s)", self._model.iterations) do_preview = viewer is not None do_timelapse = timelapse_kwargs is not None - snapshot_interval = self._model.training_opts.get("snapshot_interval", 0) + snapshot_interval = self._model.command_line_arguments.snapshot_interval do_snapshot = (snapshot_interval != 0 and - self._model.iterations >= snapshot_interval and - self._model.iterations % snapshot_interval == 0) + self._model.iterations - 1 >= snapshot_interval and + (self._model.iterations - 1) % snapshot_interval == 0) - loss = dict() + model_inputs, model_targets = self._feeder.get_batch() try: - for side, batcher in self._batchers.items(): - if self._pingpong.active and side != self._pingpong.side: - continue - loss[side] = batcher.train_one_batch() - if not do_preview and not do_timelapse: - continue - if do_preview: - batcher.generate_preview(do_preview) - self._samples.images[side] = batcher.compile_sample(None) - if do_timelapse: - self._timelapse.get_sample(side, timelapse_kwargs) + loss = self._model.model.train_on_batch(model_inputs, y=model_targets) + except tf_errors.ResourceExhaustedError as err: + msg = ("You do not have enough GPU memory available to train the selected model at " + "the selected settings. You can try a number of things:" + "\n1) Close any other application that is using your GPU (web browsers are " + "particularly bad for this)." + "\n2) Lower the batchsize (the amount of images fed into the model each " + "iteration)." + "\n3) Try enabling 'Mixed Precision' training." + "\n4) Use a more lightweight model, or select the model's 'LowMem' option " + "(in config) if it has one.") + raise FaceswapError(msg) from err - self._model.state.increment_iterations() + self._log_tensorboard(loss) + loss = self._collate_and_store_loss(loss[1:]) + self._print_loss(loss) - for side, side_loss in loss.items(): - self._store_history(side, side_loss) - self._log_tensorboard(side, side_loss) + if do_snapshot: + self._model.snapshot() - if not self._pingpong.active: - self.__print_loss(loss) - else: - for key, val in loss.items(): - self._pingpong.loss[key] = val - self.__print_loss(self._pingpong.loss) + if do_preview: + self._feeder.generate_preview(do_preview) + self._samples.images = self._feeder.compile_sample(None) + samples = self._samples.show_sample() + if samples is not None: + viewer(samples, + "Training - 'S': Save Now. 'R': Refresh Preview. 'ENTER': Save and Quit") + + if do_timelapse: + self._timelapse.output_timelapse(timelapse_kwargs) - if do_preview: - samples = self._samples.show_sample() - if samples is not None: - viewer(samples, "Training - 'S': Save Now. 'ENTER': Save and Quit") + def _log_tensorboard(self, loss): + """ Log current loss to Tensorboard log files - if do_timelapse: - self._timelapse.output_timelapse() + Parameters + ---------- + loss: list + The list of loss ``floats`` output from the model + """ + if not self._tensorboard: + return + logger.trace("Updating TensorBoard log") + logs = {log[0]: log[1] + for log in zip(self._model.state.loss_names, loss)} + self._tensorboard.on_train_batch_end(self._model.iterations, logs=logs) - if do_snapshot: - self._model.do_snapshot() - except Exception as err: - raise err + def _collate_and_store_loss(self, loss): + """ Collate the loss into totals for each side. - def _store_history(self, side, loss): - """ Store the loss for this step into :attr:`model.history`. + The losses are then into a total for each side. Loss totals are added to + :attr:`model.state._history` to track the loss drop per save iteration for backup purposes. Parameters ---------- - side: {"a", "b"} - The side to store the loss for loss: list - The list of loss ``floats`` for this side + The list of loss ``floats`` for this iteration. + + Returns + ------- + list + List of 2 ``floats`` which is the total loss for each side """ - logger.trace("Updating loss history: '%s'", side) - self._model.history[side].append(loss[0]) # Either only loss or total loss - logger.trace("Updated loss history: '%s'", side) + split = len(loss) // 2 + combined_loss = [sum(loss[:split]), sum(loss[split:])] + self._model.add_history(combined_loss) + logger.trace("original loss: %s, comibed_loss: %s", loss, combined_loss) + return combined_loss - def _log_tensorboard(self, side, loss): - """ Log current loss to Tensorboard log files + def _print_loss(self, loss): + """ Outputs the loss for the current iteration to the console. Parameters ---------- - side: {"a", "b"} - The side to store the loss for loss: list - The list of loss ``floats`` for this side - """ - if not self._tensorboard: - return - logger.trace("Updating TensorBoard log: '%s'", side) - logs = {log[0]: log[1] - for log in zip(self._model.state.loss_names[side], loss)} - self._tensorboard[side].on_batch_end(self._model.state.iterations, logs) - logger.trace("Updated TensorBoard log: '%s'", side) + The loss for each side. List should contain 2 ``floats`` side "a" in position 0 and + side "b" in position `. + """ + output = ", ".join(["Loss {}: {:.5f}".format(side, side_loss) + for side, side_loss in zip(("A", "B"), loss)]) + timestamp = time.strftime("%H:%M:%S") + output = "[{}] [#{:05d}] {}".format(timestamp, self._model.iterations, output) + print("\r{}".format(output), end="") def clear_tensorboard(self): """ Stop Tensorboard logging. @@ -355,91 +286,129 @@ def clear_tensorboard(self): """ if not self._tensorboard: return - for side, tensorboard in self._tensorboard.items(): - logger.debug("Ending Tensorboard. Side: '%s'", side) - tensorboard.on_train_end(None) + logger.debug("Ending Tensorboard Session: %s", self._tensorboard) + self._tensorboard.on_train_end(None) -class Batcher(): - """ Handles the processing of a Batch for a single side. +class _Feeder(): + """ Handles the processing of a Batch for training the model and generating samples. Parameters ---------- - side: {"a" or "b"} - The side that this :class:`Batcher` belongs to - images: list - The list of full paths to the training images for this :class:`Batcher` + images: dict + The list of full paths to the training images for this :class:`_Feeder` for each side model: plugin from :mod:`plugins.train.model` The selected model that will be running this trainer - use_mask: bool - ``True`` if a mask is required for training otherwise ``False`` batch_size: int - The size of the batch to be processed at each iteration + The size of the batch to be processed for each side at each iteration config: :class:`lib.config.FaceswapConfig` The configuration for this trainer + alignments: dict + A dictionary containing landmarks and masks if these are required for training for each + side """ - def __init__(self, side, images, model, use_mask, batch_size, config): - logger.debug("Initializing %s: side: '%s', num_images: %s, use_mask: %s, batch_size: %s, " - "config: %s)", - self.__class__.__name__, side, len(images), use_mask, batch_size, config) + def __init__(self, images, model, batch_size, config, alignments): + logger.debug("Initializing %s: num_images: %s, batch_size: %s, config: %s)", + self.__class__.__name__, len(images), batch_size, config) self._model = model - self._use_mask = use_mask - self._side = side self._images = images self._config = config - self._target = None - self._samples = None - self._masks = None - - generator = self._load_generator() - self._feed = generator.minibatch_ab(images, batch_size, self._side) - - self._preview_feed = None - self._timelapse_feed = None - self._set_preview_feed() - - def _load_generator(self): - """ Load the :class:`lib.training_data.TrainingDataGenerator` for this batcher """ - logger.debug("Loading generator: %s", self._side) - input_size = self._model.input_shape[0] - output_shapes = self._model.output_shapes + self._alignments = alignments + self._target = dict() + self._samples = dict() + self._masks = dict() + + self._feeds = {side: self._load_generator(idx).minibatch_ab(images[side], batch_size, side) + for idx, side in enumerate(("a", "b"))} + + self._display_feeds = dict(preview=self._set_preview_feed(), timelapse=dict()) + logger.debug("Initialized %s:", self.__class__.__name__) + + def _load_generator(self, output_index): + """ Load the :class:`~lib.training_data.TrainingDataGenerator` for this feeder. + + Parameters + ---------- + output_index: int + The output index from the model to get output shapes for + + Returns + ------- + :class:`~lib.training_data.TrainingDataGenerator` + The training data generator + """ + logger.debug("Loading generator") + input_size = self._model.model.input_shape[output_index][1] + output_shapes = self._model.output_shapes[output_index] logger.debug("input_size: %s, output_shapes: %s", input_size, output_shapes) generator = TrainingDataGenerator(input_size, output_shapes, - self._model.training_opts, + self._model.coverage_ratio, + not self._model.command_line_arguments.no_augment_color, + self._model.command_line_arguments.no_flip, + self._model.command_line_arguments.warp_to_landmarks, + self._alignments, self._config) return generator - def train_one_batch(self): - """ Train on a single batch of images for this :class:`Batcher` + def _set_preview_feed(self): + """ Set the preview feed for this feeder. + + Creates a generator from :class:`lib.training_data.TrainingDataGenerator` specifically + for previews for the feeder. Returns ------- - list - The list of loss values (as ``float``) for this batch + dict + The side ("a" or "b") as key, :class:`~lib.training_data.TrainingDataGenerator` as + value. """ - logger.trace("Training one step: (side: %s)", self._side) - model_inputs, model_targets = self._get_next() - try: - loss = self._model.predictors[self._side].train_on_batch(model_inputs, model_targets) - except tf_errors.ResourceExhaustedError as err: - msg = ("You do not have enough GPU memory available to train the selected model at " - "the selected settings. You can try a number of things:" - "\n1) Close any other application that is using your GPU (web browsers are " - "particularly bad for this)." - "\n2) Lower the batchsize (the amount of images fed into the model each " - "iteration)." - "\n3) Try 'Memory Saving Gradients' and/or 'Optimizer Savings' and/or 'Ping " - "Pong Training'." - "\n4) Use a more lightweight model, or select the model's 'LowMem' option " - "(in config) if it has one.") - raise FaceswapError(msg) from err - loss = loss if isinstance(loss, list) else [loss] - return loss + retval = dict() + for idx, side in enumerate(("a", "b")): + logger.debug("Setting preview feed: (side: '%s')", side) + preview_images = self._config.get("preview_images", 14) + preview_images = min(max(preview_images, 2), 16) + batchsize = min(len(self._images[side]), preview_images) + retval[side] = self._load_generator(idx).minibatch_ab(self._images[side], + batchsize, + side, + do_shuffle=True, + is_preview=True) + logger.debug("Set preview feed. Batchsize: %s", batchsize) + return retval + + def get_batch(self): + """ Get the feed data and the targets for each training side for feeding into the model's + train function. + + Returns + ------- + model_inputs: list + The inputs to the model for each side A and B + model_targets: list + The targets for the model for each side A and B + """ + model_inputs = [] + model_targets = [] + for side in ("a", "b"): + side_inputs, side_targets = self._get_next(side) + if self._model.config["penalized_mask_loss"]: + side_targets = self._compile_masks(side_targets) + if not self._model.config["learn_mask"]: # Remove masks from the model targets + side_targets = side_targets[:-1] + logger.trace("side: %s, input_shapes: %s, target_shapes: %s", + side, [i.shape for i in side_inputs], [i.shape for i in side_targets]) + if get_backend() == "amd": + model_inputs.extend(side_inputs) + model_targets.extend(side_targets) + else: + model_inputs.append(side_inputs) + model_targets.append(side_targets) + return model_inputs, model_targets - def _get_next(self): + def _get_next(self, side): """ Return the next batch from the :class:`lib.training_data.TrainingDataGenerator` for - this batcher ready for feeding into the model. + this feeder ready for feeding into the model. Returns ------- @@ -449,11 +418,40 @@ def _get_next(self): A list of :class:`numpy.ndarray` for comparing the output of the model """ logger.trace("Generating targets") - batch = next(self._feed) - targets_use_mask = self._model.training_opts["learn_mask"] - model_inputs = batch["feed"] + batch["masks"] if self._use_mask else batch["feed"] + batch = next(self._feeds[side]) + targets_use_mask = (self._model.config["learn_mask"] + or self._model.config["penalized_mask_loss"]) model_targets = batch["targets"] + batch["masks"] if targets_use_mask else batch["targets"] - return model_inputs, model_targets + return batch["feed"], model_targets + + @classmethod + def _compile_masks(cls, targets): + """ Compile the masks into the targets for penalized loss. + + Penalized loss expects the target mask to be included for all outputs in the 4th channel + of the targets. The final output and final mask are always the last 2 outputs + + Parameters + ---------- + targets: list + The targets for the model, with the mask as the final entry in the list + + Returns + ------- + list + The targets for the model with the mask compiled into the 4th channel. The original + mask is still output as the final item in the list + """ + masks = targets[-1] + for idx, tgt in enumerate(targets[:-1]): + tgt_dim = tgt.shape[1] + if tgt_dim == masks.shape[1]: + add_masks = masks + else: + add_masks = np.array([cv2.resize(mask, (tgt_dim, tgt_dim)) + for mask in masks])[..., None] + targets[idx] = np.concatenate((tgt, add_masks), axis=-1) + return targets def generate_preview(self, do_preview): """ Generate the preview images. @@ -465,32 +463,16 @@ def generate_preview(self, do_preview): should not be generated, in which case currently stored previews should be deleted. """ if not do_preview: - self._samples = None - self._target = None - self._masks = None + self._samples = dict() + self._target = dict() + self._masks = dict() return logger.debug("Generating preview") - batch = next(self._preview_feed) - self._samples = batch["samples"] - self._target = batch["targets"][self._model.largest_face_index] - self._masks = batch["masks"][0] - - def _set_preview_feed(self): - """ Set the preview feed for this batcher. - - Creates a generator from :class:`lib.training_data.TrainingDataGenerator` specifically - for previews for the batcher. - """ - logger.debug("Setting preview feed: (side: '%s')", self._side) - preview_images = self._config.get("preview_images", 14) - preview_images = min(max(preview_images, 2), 16) - batchsize = min(len(self._images), preview_images) - self._preview_feed = self._load_generator().minibatch_ab(self._images, - batchsize, - self._side, - do_shuffle=True, - is_preview=True) - logger.debug("Set preview feed. Batchsize: %s", batchsize) + for side in ("a", "b"): + batch = next(self._display_feeds["preview"][side]) + self._samples[side] = batch["samples"] + self._target[side] = batch["targets"][-1] + self._masks[side] = batch["masks"][0] def compile_sample(self, batch_size, samples=None, images=None, masks=None): """ Compile the preview samples for display. @@ -499,18 +481,18 @@ def compile_sample(self, batch_size, samples=None, images=None, masks=None): ---------- batch_size: int The requested batch size for each training iterations - samples: :class:`numpy.ndarray`, optional - The sample images that should be used for creating the preview. If ``None`` then the - samples will be generated from the internal random image generator. - Default: ``None`` - images: :class:`numpy.ndarray`, optional - The target images that should be used for creating the preview. If ``None`` then the - targets will be generated from the internal random image generator. - Default: ``None`` - masks: :class:`numpy.ndarray`, optional - The masks that should be used for creating the preview. If ``None`` then the - masks will be generated from the internal random image generator. - Default: ``None`` + samples: dict, optional + Dictionary for side "a", "b" of :class:`numpy.ndarray`. The sample images that should + be used for creating the preview. If ``None`` then the samples will be generated from + the internal random image generator. Default: ``None`` + images: dict, optional + Dictionary for side "a", "b" of :class:`numpy.ndarray`. The target images that should + be used for creating the preview. If ``None`` then the targets will be generated from + the internal random image generator. Default: ``None`` + masks: dict, optional + Dictionary for side "a", "b" of :class:`numpy.ndarray`. The masks that should be used + for creating the preview. If ``None`` then the masks will be generated from the + internal random image generator. Default: ``None`` Returns ------- @@ -520,11 +502,15 @@ def compile_sample(self, batch_size, samples=None, images=None, masks=None): """ num_images = self._config.get("preview_images", 14) num_images = min(batch_size, num_images) if batch_size is not None else num_images - logger.debug("Compiling samples: (side: '%s', samples: %s)", self._side, num_images) - images = images if images is not None else self._target - masks = masks if masks is not None else self._masks - samples = samples if samples is not None else self._samples - retval = [samples[0:num_images], images[0:num_images], masks[0:num_images]] + retval = dict() + for side in ("a", "b"): + logger.debug("Compiling samples: (side: '%s', samples: %s)", side, num_images) + side_images = images[side] if images is not None else self._target[side] + side_masks = masks[side] if masks is not None else self._masks[side] + side_samples = samples[side] if samples is not None else self._samples[side] + retval[side] = [side_samples[0:num_images], + side_images[0:num_images], + side_masks[0:num_images]] return retval def compile_timelapse_sample(self): @@ -532,52 +518,57 @@ def compile_timelapse_sample(self): Returns ------- - list - The list of samples, targets and masks as :class:`numpy.ndarrays` for creating a - time-lapse frame + dict + For sides "a" and "b"; The list of samples, targets and masks as + :class:`numpy.ndarrays` for creating a time-lapse frame """ - batch = next(self._timelapse_feed) - batchsize = len(batch["samples"]) - images = batch["targets"][self._model.largest_face_index] - masks = batch["masks"][0] - sample = self.compile_sample(batchsize, - samples=batch["samples"], - images=images, - masks=masks) + batchsizes = [] + samples = dict() + images = dict() + masks = dict() + for side in ("a", "b"): + batch = next(self._display_feeds["timelapse"][side]) + batchsizes.append(len(batch["samples"])) + samples[side] = batch["samples"] + images[side] = batch["targets"][-1] + masks[side] = batch["masks"][0] + batchsize = min(batchsizes) + sample = self.compile_sample(batchsize, samples=samples, images=images, masks=masks) return sample def set_timelapse_feed(self, images, batch_size): - """ Set the time-lapse feed for this batcher. + """ Set the time-lapse feed for this feeder. Creates a generator from :class:`lib.training_data.TrainingDataGenerator` specifically - for generating time-lapse previews for the batcher. + for generating time-lapse previews for the feeder. Parameters ---------- images: list The list of full paths to the images for creating the time-lapse for this - :class:`Batcher` + :class:`_Feeder` batch_size: int The number of images to be used to create the time-lapse preview. """ - logger.debug("Setting time-lapse feed: (side: '%s', input_images: '%s', batch_size: %s)", - self._side, images, batch_size) - self._timelapse_feed = self._load_generator().minibatch_ab(images[:batch_size], - batch_size, self._side, - do_shuffle=False, - is_timelapse=True) - logger.debug("Set time-lapse feed") - - -class Samples(): + logger.debug("Setting time-lapse feed: (input_images: '%s', batch_size: %s)", + images, batch_size) + for idx, side in enumerate(("a", "b")): + self._display_feeds["timelapse"][side] = self._load_generator(idx).minibatch_ab( + images[side][:batch_size], + batch_size, + side, + do_shuffle=False, + is_timelapse=True) + logger.debug("Set time-lapse feed: %s", self._display_feeds["timelapse"]) + + +class _Samples(): # pylint:disable=too-few-public-methods """ Compile samples for display for preview and time-lapse Parameters ---------- model: plugin from :mod:`plugins.train.model` The selected model that will be running this trainer - use_mask: bool - ``True`` if a mask should be displayed otherwise ``False`` coverage_ratio: float Ratio of face to be cropped out of the training image. scaling: float, optional @@ -590,11 +581,11 @@ class Samples(): dictionary should contain 2 keys ("a" and "b") with the values being the training images for generating samples corresponding to each side. """ - def __init__(self, model, use_mask, coverage_ratio, scaling=1.0): - logger.debug("Initializing %s: model: '%s', use_mask: %s, coverage_ratio: %s)", - self.__class__.__name__, model, use_mask, coverage_ratio) + def __init__(self, model, coverage_ratio, scaling=1.0): + logger.debug("Initializing %s: model: '%s', coverage_ratio: %s)", + self.__class__.__name__, model, coverage_ratio) self._model = model - self._use_mask = use_mask + self._display_mask = model.config["learn_mask"] or model.config["penalized_mask_loss"] self.images = dict() self._coverage_ratio = coverage_ratio self._scaling = scaling @@ -608,23 +599,19 @@ def show_sample(self): :class:`numpy.ndarry` A compiled preview image ready for display or saving """ - if len(self.images) != 2: - logger.debug("Ping Pong training - Only one side trained. Aborting preview") - return None logger.debug("Showing sample") feeds = dict() figures = dict() headers = dict() - for side, samples in self.images.items(): + for idx, side in enumerate(("a", "b")): + samples = self.images[side] faces = samples[1] - if self._model.input_shape[0] / faces.shape[1] != 1.0: - feeds[side] = self._resize_sample(side, faces, self._model.input_shape[0]) - feeds[side] = feeds[side].reshape((-1, ) + self._model.input_shape) + input_shape = self._model.model.input_shape[idx][1:] + if input_shape[0] / faces.shape[1] != 1.0: + feeds[side] = self._resize_sample(side, faces, input_shape[0]) + feeds[side] = feeds[side].reshape((-1, ) + input_shape) else: feeds[side] = faces - if self._use_mask: - mask = samples[-1] - feeds[side] = [feeds[side], mask] preds = self._get_predictions(feeds["a"], feeds["b"]) @@ -654,12 +641,14 @@ def show_sample(self): logger.debug("Compiled sample") return np.clip(figure * 255, 0, 255).astype('uint8') - @staticmethod - def _resize_sample(side, sample, target_size): + @classmethod + def _resize_sample(cls, side, sample, target_size): """ Resize a given image to the target size. Parameters ---------- + side: str + The side ("a" or "b") that the samples are being generated for sample: :class:`numpy.ndarray` The sample to be resized target_size: int @@ -692,19 +681,33 @@ def _get_predictions(self, feed_a, feed_b): List of :class:`numpy.ndarray` of feed images for the "b" side Returns + ------- list: List of :class:`numpy.ndarray` of predictions received from the model """ logger.debug("Getting Predictions") preds = dict() - preds["a_a"] = self._model.predictors["a"].predict(feed_a) - preds["b_a"] = self._model.predictors["b"].predict(feed_a) - preds["a_b"] = self._model.predictors["a"].predict(feed_b) - preds["b_b"] = self._model.predictors["b"].predict(feed_b) - # Get the returned largest image from predictors that emit multiple items - if not isinstance(preds["a_a"], np.ndarray): - for key, val in preds.items(): - preds[key] = val[self._model.largest_face_index] + standard = self._model.model.predict([feed_a, feed_b]) + swapped = self._model.model.predict([feed_b, feed_a]) + + if self._model.config["learn_mask"] and get_backend() == "amd": + # Ravel results for plaidml + split = len(standard) // 2 + standard = [standard[:split], standard[split:]] + swapped = [swapped[:split], swapped[split:]] + + if self._model.config["learn_mask"]: # Add mask to 4th channel of final output + standard = [np.concatenate(side[-2:], axis=-1) for side in standard] + swapped = [np.concatenate(side[-2:], axis=-1) for side in swapped] + else: # Retrieve final output + standard = [side[-1] if isinstance(side, list) else side for side in standard] + swapped = [side[-1] if isinstance(side, list) else side for side in swapped] + + preds["a_a"] = standard[0] + preds["b_b"] = standard[1] + preds["a_b"] = swapped[0] + preds["b_a"] = swapped[1] + logger.debug("Returning predictions: %s", {key: val.shape for key, val in preds.items()}) return preds @@ -716,9 +719,14 @@ def _to_full_frame(self, side, samples, predictions): side: {"a" or "b"} The side that these samples are for samples: list - List of :class:`numpy.ndarray` of target images and feed images + List of :class:`numpy.ndarray` of feed images and target images predictions: list List of :class: `numpy.ndarray` of predictions from the model + + Returns + ------- + list + The images resized and collated for display in the preview frame """ logger.debug("side: '%s', number of sample arrays: %s, prediction.shapes: %s)", side, len(samples), [pred.shape for pred in predictions]) @@ -729,7 +737,7 @@ def _to_full_frame(self, side, samples, predictions): if target_size != full_size: frame = self._frame_overlay(full, target_size, (0, 0, 255)) - if self._use_mask: + if self._display_mask: images = self._compile_masked(images, samples[-1]) images = [self._resize_sample(side, image, target_size) for image in images] if target_size != full_size: @@ -739,8 +747,8 @@ def _to_full_frame(self, side, samples, predictions): images = [self._resize_sample(side, image, new_size) for image in images] return images - @staticmethod - def _frame_overlay(images, target_size, color): + @classmethod + def _frame_overlay(cls, images, target_size, color): """ Add a frame overlay to preview images indicating the region of interest. This is the red border that appears in the preview images. @@ -776,16 +784,17 @@ def _frame_overlay(images, target_size, color): logger.debug("Overlayed background. Shape: %s", retval.shape) return retval - @staticmethod - def _compile_masked(faces, masks): + @classmethod + def _compile_masked(cls, faces, masks): """ Add the mask to the faces for masked preview. Places an opaque red layer over areas of the face that are masked out. Parameters ---------- - faces: :class:`numpy.ndarray` - The sample faces that are to have the mask applied + faces: list + The :class:`numpy.ndarray` sample faces and predictions that are to have the mask + applied masks: :class:`numpy.ndarray` The masks that are to be applied to the faces @@ -794,19 +803,26 @@ def _compile_masked(faces, masks): list List of :class:`numpy.ndarray` faces with the opaque mask layer applied """ - retval = list() - masks3 = np.tile(1 - np.rint(masks), 3) - for mask in masks3: - mask[np.where((mask == [1., 1., 1.]).all(axis=2))] = [0., 0., 1.] - for previews in faces: - images = np.array([cv2.addWeighted(img, 1.0, masks3[idx], 0.3, 0) - for idx, img in enumerate(previews)]) - retval.append(images) + orig_masks = np.tile(1 - np.rint(masks), 3) + orig_masks[np.where((orig_masks == [1., 1., 1.]).all(axis=3))] = [0., 0., 1.] + + if faces[-1].shape[-1] == 4: # Mask contained in alpha channel of predictions + pred_masks = [np.tile(1 - np.rint(face[..., -1])[..., None], 3) for face in faces[-2:]] + for swap_masks in pred_masks: + swap_masks[np.where((swap_masks == [1., 1., 1.]).all(axis=3))] = [0., 0., 1.] + faces[-2:] = [face[..., :-1] for face in faces[-2:]] + masks3 = [orig_masks, *pred_masks] + else: + masks3 = np.repeat(np.expand_dims(orig_masks, axis=0), 3, axis=0) + + retval = [np.array([cv2.addWeighted(img, 1.0, mask, 0.3, 0) + for img, mask in zip(previews, compiled_masks)]) + for previews, compiled_masks in zip(faces, masks3)] logger.debug("masked shapes: %s", [faces.shape for faces in retval]) return retval - @staticmethod - def _overlay_foreground(backgrounds, foregrounds): + @classmethod + def _overlay_foreground(cls, backgrounds, foregrounds): """ Overlay the preview images into the center of the background images Parameters @@ -877,8 +893,8 @@ def _get_headers(self, side, width): logger.debug("header_box.shape: %s", header_box.shape) return header_box - @staticmethod - def _duplicate_headers(headers, columns): + @classmethod + def _duplicate_headers(cls, headers, columns): """ Duplicate headers for the number of columns displayed for each side. Parameters @@ -900,53 +916,33 @@ def _duplicate_headers(headers, columns): return headers -class Timelapse(): +class _Timelapse(): # pylint:disable=too-few-public-methods """ Create a time-lapse preview image. Parameters ---------- model: plugin from :mod:`plugins.train.model` The selected model that will be running this trainer - use_mask: bool - ``True`` if a mask should be displayed otherwise ``False`` coverage_ratio: float Ratio of face to be cropped out of the training image. scaling: float, optional The amount to scale the final preview image by. Default: `1.0` image_count: int The number of preview images to be displayed in the time-lapse - batchers: dict - The dictionary should contain 2 keys ("a" and "b") with the values being the - :class:`Batcher` for each side. + feeder: dict + The :class:`_Feeder` for generating the time-lapse images. """ - def __init__(self, model, use_mask, coverage_ratio, image_count, batchers): - logger.debug("Initializing %s: model: %s, use_mask: %s, coverage_ratio: %s, " - "image_count: %s, batchers: '%s')", self.__class__.__name__, model, - use_mask, coverage_ratio, image_count, batchers) + def __init__(self, model, coverage_ratio, image_count, feeder): + logger.debug("Initializing %s: model: %s, coverage_ratio: %s, image_count: %s, " + "feeder: '%s')", self.__class__.__name__, model, coverage_ratio, + image_count, feeder) self._num_images = image_count - self._samples = Samples(model, use_mask, coverage_ratio) + self._samples = _Samples(model, coverage_ratio) self._model = model - self._batchers = batchers + self._feeder = feeder self._output_file = None logger.debug("Initialized %s", self.__class__.__name__) - def get_sample(self, side, timelapse_kwargs): - """ Compile the time-lapse preview - - Parameters - ---------- - side: {"a" or "b"} - The side that the time-lapse is being generated for - timelapse_kwargs: dict - The keyword arguments for setting up the time-lapse. All values should be full paths - the keys being `input_a`, `input_b`, `output` - """ - logger.debug("Getting time-lapse samples: '%s'", side) - if not self._output_file: - self._setup(**timelapse_kwargs) - self._samples.images[side] = self._batchers[side].compile_timelapse_sample() - logger.debug("Got time-lapse samples: '%s' - %s", side, len(self._samples.images[side])) - def _setup(self, input_a=None, input_b=None, output=None): """ Setup the time-lapse folder locations and the time-lapse feed. @@ -971,13 +967,28 @@ def _setup(self, input_a=None, input_b=None, output=None): batchsize = min(len(images["a"]), len(images["b"]), self._num_images) - for side, image_files in images.items(): - self._batchers[side].set_timelapse_feed(image_files, batchsize) + self._feeder.set_timelapse_feed(images, batchsize) logger.debug("Set up time-lapse") - def output_timelapse(self): - """ Write the created time-lapse to the specified output folder. """ + def output_timelapse(self, timelapse_kwargs): + """ Generate the time-lapse samples and output the created time-lapse to the specified + output folder. + + Parameters + ---------- + timelapse_kwargs: dict: + The keyword arguments for setting up the time-lapse. All values should be full paths + the keys being `input_a`, `input_b`, `output` + """ logger.debug("Ouputting time-lapse") + if not self._output_file: + self._setup(**timelapse_kwargs) + + logger.debug("Getting time-lapse samples") + self._samples.images = self._feeder.compile_timelapse_sample() + logger.debug("Got time-lapse samples: %s", + {side: len(images) for side, images in self._samples.images.items()}) + image = self._samples.show_sample() if image is None: return @@ -987,70 +998,24 @@ def output_timelapse(self): logger.debug("Created time-lapse: '%s'", filename) -class PingPong(): - """ Side switcher for ping-pong training (memory saving feature) - - Parameters - ---------- - model: plugin from :mod:`plugins.train.model` - The selected model that will be running this trainer - sides: list - The sorted sides that are to be trained. Generally ["a", "b"] - - Attributes - ---------- - side: str - The side that is currently being trained - loss: dict - The loss for each side for ping pong training for the current ping pong session - """ - def __init__(self, model, sides): - logger.debug("Initializing %s: (model: '%s')", self.__class__.__name__, model) - self._model = model - self._sides = sides - self.side = sorted(sides)[0] - self.loss = {side: [0] for side in sides} - logger.debug("Initialized %s", self.__class__.__name__) - - @property - def active(self): - """ bool: ``True`` if Ping Pong training is active otherwise ``False``. """ - return self._model.training_opts.get("pingpong", False) - - def switch(self): - """ Switch ping-pong training from one side of the model to the other """ - if not self.active: - return - retval = [side for side in self._sides if side != self.side][0] - logger.info("Switching training to side %s", retval.title()) - self.side = retval - self._reload_model() - - def _reload_model(self): - """ Clear out the model from VRAM and reload for the next side to be trained with ping-pong - training """ - logger.verbose("Ping-Pong re-loading model") - self._model.reset_pingpong() - - -class TrainingAlignments(): +class _TrainingAlignments(): """ Obtain Landmarks and required mask from alignments file. Parameters ---------- - training_opts: dict - The dictionary of model training options (see module doc-string for information about - contents) + model: plugin from :mod:`plugins.train.model` + The model that will be running this trainer image_list: dict The file paths for the images to be trained on for each side. The dictionary should contain 2 keys ("a" and "b") with the values being a list of full paths corresponding to each side. """ - def __init__(self, training_opts, image_list): - logger.debug("Initializing %s: (training_opts: '%s', image counts: %s)", - self.__class__.__name__, training_opts, - {k: len(v) for k, v in image_list.items()}) - self._training_opts = training_opts - self._check_alignments_exist() + def __init__(self, model, image_list): + logger.debug("Initializing %s: (model: %s, image counts: %s)", + self.__class__.__name__, model, {k: len(v) for k, v in image_list.items()}) + self._args = model.command_line_arguments + self._config = model.config + self._training_size = model.state.training_size + self._alignments_paths = self._get_alignments_paths() self._hashes = self._get_image_hashes(image_list) self._detected_faces = self._load_alignments() self._check_all_faces() @@ -1065,6 +1030,35 @@ def landmarks(self): logger.trace(retval) return retval + def _get_alignments_paths(self): + """ Obtain the alignments file paths from the command line arguments passed to the model. + + If the argument does not exist or is empty, then scan the input folder for an alignments + file. + + Returns + ------- + dict + The alignments paths for each of the source and destination faces. Key is the + side, value is the path to the alignments file + + Raises + ------ + FaceswapError + If at least one alignments file does not exist + """ + retval = dict() + for side in ("a", "b"): + alignments_path = getattr(self._args, "alignments_path_{}".format(side)) + if not alignments_path: + image_path = getattr(self._args, "input_{}".format(side)) + alignments_path = os.path.join(image_path, "alignments.fsa") + if not os.path.exists(alignments_path): + raise FaceswapError("Alignments file does not exist: `{}`".format(alignments_path)) + retval[side] = alignments_path + logger.debug("Alignments paths: %s", retval) + return retval + def _transform_landmarks(self, side, detected_faces): """ Transform frame landmarks to their aligned face variant. @@ -1082,7 +1076,7 @@ def _transform_landmarks(self, side, detected_faces): """ landmarks = dict() for face in detected_faces.values(): - face.load_aligned(None, size=self._training_opts["training_size"]) + face.load_aligned(None, size=self._training_size) for filename in self._hash_to_filenames(side, face.hash): landmarks[filename] = face.aligned_landmarks return landmarks @@ -1117,29 +1111,16 @@ def _get_masks(self, side, detected_faces): masks = dict() for fhash, face in detected_faces.items(): - mask = face.mask[self._training_opts["mask_type"]] - mask.set_blur_and_threshold(blur_kernel=self._training_opts["mask_blur_kernel"], - threshold=self._training_opts["mask_threshold"]) + mask = face.mask[self._config["mask_type"]] + mask.set_blur_and_threshold(blur_kernel=self._config["mask_blur_kernel"], + threshold=self._config["mask_threshold"]) for filename in self._hash_to_filenames(side, fhash): masks[filename] = mask return masks - # Pre flight checks - def _check_alignments_exist(self): - """ Ensure the alignments files exist prior to running any longer running tasks. - - Raises - ------ - FaceswapError - If at least one alignments file does not exist - """ - for fullpath in self._training_opts["alignments"].values(): - if not os.path.exists(fullpath): - raise FaceswapError("Alignments file does not exist: `{}`".format(fullpath)) - # Hashes for image folders - @staticmethod - def _get_image_hashes(image_list): + @classmethod + def _get_image_hashes(cls, image_list): """ Return the hashes for all images used for training. Parameters @@ -1179,7 +1160,7 @@ def _load_alignments(self): """ logger.debug("Loading alignments") retval = dict() - for side, fullpath in self._training_opts["alignments"].items(): + for side, fullpath in self._alignments_paths.items(): logger.debug("side: '%s', path: '%s'", side, fullpath) path, filename = os.path.split(fullpath) alignments = Alignments(path, filename=filename) @@ -1257,7 +1238,7 @@ def _validate_face(self, face, filename, idx, side, side_hashes): FaceswapError If the current face doesn't pass validation """ - mask_type = self._training_opts["mask_type"] + mask_type = self._config["mask_type"] if mask_type is not None and "mask" not in face: msg = ("You have selected a Mask Type in your training configuration options but at " "least one face has no mask stored for it.\nYou should generate the required " diff --git a/requirements_amd.txt b/requirements_amd.txt index ecd1dd97bd..1bc3307a95 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -1,4 +1,3 @@ -r _requirements_base.txt -tensorflow>=1.12.0,<=1.15.3 -plaidml-keras==0.6.4 -plaidml==0.6.4 +tensorflow>=2.2.0,<2.3.0 +plaidml-keras==0.7.0 diff --git a/requirements_cpu.txt b/requirements_cpu.txt index 7c6097cf0d..971f5b50a7 100644 --- a/requirements_cpu.txt +++ b/requirements_cpu.txt @@ -1,2 +1,2 @@ -r _requirements_base.txt -tensorflow>=1.12.0,<=1.15.3 +tensorflow>=2.2.0,<2.3.0 diff --git a/requirements_nvidia.txt b/requirements_nvidia.txt index eff1ede17b..f695054174 100644 --- a/requirements_nvidia.txt +++ b/requirements_nvidia.txt @@ -1,2 +1,2 @@ -r _requirements_base.txt -tensorflow-gpu>=1.12.0,<=1.15.3 +tensorflow-gpu>=2.2.0,<2.3.0 diff --git a/scripts/convert.py b/scripts/convert.py index 659c036b6e..bddfb90b8f 100644 --- a/scripts/convert.py +++ b/scripts/convert.py @@ -11,8 +11,6 @@ import cv2 import numpy as np from tqdm import tqdm -import tensorflow as tf -from keras.backend.tensorflow_backend import set_session from scripts.fsmedia import Alignments, PostProcess, finalize from lib.serializer import get_serializer @@ -655,13 +653,10 @@ def __init__(self, in_queue, queue_size, arguments): self._faces_count = 0 self._verify_output = False - if arguments.allow_growth: - self._set_tf_allow_growth() - self._model = self._load_model() - self._output_indices = {"face": self._model.largest_face_index, - "mask": self._model.largest_mask_index} - self._predictor = self._model.converter(self._args.swap_model) + self._sizes = self._get_io_sizes() + self._coverage_ratio = self._model.coverage_ratio + self._thread = self._launch_predictor() logger.debug("Initialized %s: (out_queue: %s)", self.__class__.__name__, self._out_queue) @@ -694,28 +689,33 @@ def verify_output(self): @property def coverage_ratio(self): """ float: The coverage ratio that the model was trained at. """ - return self._model.training_opts["coverage_ratio"] + return self._coverage_ratio @property def has_predicted_mask(self): """ bool: ``True`` if the model was trained to learn a mask, otherwise ``False``. """ - return bool(self._model.state.config.get("learn_mask", False)) + return bool(self._model.config.get("learn_mask", False)) @property def output_size(self): """ int: The size in pixels of the Faceswap model output. """ - return self._model.output_shape[0] + return self._sizes["output"] - @property - def _input_size(self): - """ int: The size in pixels of the Faceswap model input. """ - return self._model.input_shape[0] + def _get_io_sizes(self): + """ Obtain the input size and output size of the model. - @property - def _input_mask(self): - """ :class:`numpy.ndarray`: A dummy mask for inputting to the model. """ - mask = np.zeros((1, ) + self._model.state.mask_shapes[0], dtype="float32") - return mask + Returns + ------- + dict + input_size in pixels and output_size in pixels + """ + input_shape = self._model.model.input_shape + input_shape = [input_shape] if not isinstance(input_shape, list) else input_shape + output_shape = self._model.model.output_shape + output_shape = [output_shape] if not isinstance(output_shape, list) else output_shape + retval = dict(input=input_shape[0][1], output=output_shape[-1][1]) + logger.debug(retval) + return retval @staticmethod def _get_batchsize(queue_size): @@ -737,20 +737,6 @@ def _get_batchsize(queue_size): logger.debug("Got batchsize: %s", batchsize) return batchsize - @staticmethod - def _set_tf_allow_growth(): - """ Enables the TensorFlow configuration option "allow_growth". - - TODO Move this temporary fix somewhere more appropriate - """ - # pylint: disable=no-member - logger.debug("Setting Tensorflow 'allow_growth' option") - config = tf.ConfigProto() - config.gpu_options.allow_growth = True - config.gpu_options.visible_device_list = "0" - set_session(tf.Session(config=config)) - logger.debug("Set Tensorflow 'allow_growth' option") - def _load_model(self): """ Load the Faceswap model. @@ -764,8 +750,8 @@ def _load_model(self): if not model_dir: raise FaceswapError("{} does not exist.".format(self._args.model_dir)) trainer = self._get_model_name(model_dir) - gpus = 1 if not hasattr(self._args, "gpus") else self._args.gpus - model = PluginLoader.get_model(trainer)(model_dir, gpus, predict=True) + model = PluginLoader.get_model(trainer)(model_dir, self._args, predict=True) + model.build() logger.debug("Loaded Model") return model @@ -901,15 +887,15 @@ def load_aligned(self, item): logger.trace("Loading aligned faces: '%s'", item["filename"]) for detected_face in item["detected_faces"]: detected_face.load_feed_face(item["image"], - size=self._input_size, - coverage_ratio=self.coverage_ratio, + size=self._sizes["input"], + coverage_ratio=self._coverage_ratio, dtype="float32") - if self._input_size == self.output_size: + if self._sizes["input"] == self._sizes["output"]: detected_face.reference = detected_face.feed else: detected_face.load_reference_face(item["image"], - size=self.output_size, - coverage_ratio=self.coverage_ratio, + size=self._sizes["output"], + coverage_ratio=self._coverage_ratio, dtype="float32") logger.trace("Loaded aligned faces: '%s'", item["filename"]) @@ -951,48 +937,21 @@ def _predict(self, feed_faces, batch_size=None): """ logger.trace("Predicting: Batchsize: %s", len(feed_faces)) feed = [feed_faces] - if self._model.feed_mask: - feed.append(np.repeat(self._input_mask, feed_faces.shape[0], axis=0)) logger.trace("Input shape(s): %s", [item.shape for item in feed]) - predicted = self._predictor(feed, batch_size=batch_size) + predicted = self._model.model.predict(feed, batch_size=batch_size) predicted = predicted if isinstance(predicted, list) else [predicted] logger.trace("Output shape(s): %s", [predict.shape for predict in predicted]) - predicted = self._filter_multi_out(predicted) - - # Compile masks into alpha channel or keep raw faces - predicted = np.concatenate(predicted, axis=-1) if len(predicted) == 2 else predicted[0] - predicted = predicted.astype("float32") + # Only take last output(s) + if predicted[-1].shape[-1] == 1: # Merge mask to alpha channel + predicted = np.concatenate(predicted[-2:], axis=-1).astype("float32") + else: + predicted = predicted[-1].astype("float32") logger.trace("Final shape: %s", predicted.shape) return predicted - def _filter_multi_out(self, predicted): - """ Filter the model output to just the required image. - - Some models have multi-scale outputs, so just make sure we take the largest - output. - - Parameters - ---------- - predicted: :class:`numpy.ndarray` - The predictions retrieved from the Faceswap model. - - Returns - ------- - :class:`numpy.ndarray` - The predictions with any superfluous outputs removed. - """ - if not predicted: - return predicted - face = predicted[self._output_indices["face"]] - mask_idx = self._output_indices["mask"] - mask = predicted[mask_idx] if mask_idx is not None else None - predicted = [face, mask] if mask is not None else [face] - logger.trace("Filtered output shape(s): %s", [predict.shape for predict in predicted]) - return predicted - def _queue_out_frames(self, batch, swapped_faces): """ Compile the batch back to original frames and put to the Out Queue. diff --git a/scripts/extract.py b/scripts/extract.py index 69b96fef34..af66043486 100644 --- a/scripts/extract.py +++ b/scripts/extract.py @@ -58,6 +58,7 @@ def __init__(self, arguments): maskers, configfile=configfile, multiprocess=not self._args.singleprocess, + exclude_gpus=self._args.exclude_gpus, rotate_images=self._args.rotate_images, min_size=self._args.min_size, normalize_method=normalization) diff --git a/scripts/gui.py b/scripts/gui.py index a390f11a4b..6675ba1d55 100644 --- a/scripts/gui.py +++ b/scripts/gui.py @@ -8,7 +8,7 @@ from lib.gui import (TaskBar, CliOptions, CommandNotebook, ConsoleOut, Session, DisplayNotebook, get_images, initialize_images, initialize_config, LastSession, - MainMenuBar, ProcessWrapper, StatusBar) + MainMenuBar, preview_trigger, ProcessWrapper, StatusBar) logger = logging.getLogger(__name__) # pylint: disable=invalid-name @@ -30,6 +30,7 @@ def __init__(self, debug): self.objects = dict() get_images().delete_preview() + preview_trigger().clear() self.protocol("WM_DELETE_WINDOW", self.close_app) self.build_gui() self._last_session = LastSession(self._config) @@ -162,6 +163,7 @@ def close_app(self, *args): # pylint: disable=unused-argument self._last_session.save() get_images().delete_preview() + preview_trigger().clear() self.quit() logger.debug("Closed GUI") sys.exit(0) diff --git a/scripts/train.py b/scripts/train.py index fecc53dfee..4763b4f864 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -9,20 +9,17 @@ from time import sleep import cv2 -import tensorflow as tf -from keras.backend.tensorflow_backend import set_session from lib.image import read_image from lib.keypress import KBHit from lib.multithreading import MultiThread -from lib.utils import (get_folder, get_image_paths, deprecation_warning, FaceswapError, - _image_extensions) +from lib.utils import (get_folder, get_image_paths, FaceswapError, _image_extensions) from plugins.plugin_loader import PluginLoader logger = logging.getLogger(__name__) # pylint: disable=invalid-name -class Train(): +class Train(): # pylint:disable=too-few-public-methods """ The Faceswap Training Process. The training process is responsible for training a model on a set of source faces and a set of @@ -42,8 +39,11 @@ def __init__(self, arguments): self._args = arguments self._timelapse = self._set_timelapse() self._images = self._get_images() + self._gui_preview_trigger = os.path.join(os.path.realpath(os.path.dirname(sys.argv[0])), + "lib", "gui", ".cache", ".preview_trigger") self._stop = False self._save_now = False + self._refresh_preview = False self._preview_buffer = dict() self._lock = Lock() @@ -59,20 +59,6 @@ def _image_size(self): logger.debug("Training image size: %s", size) return size - @property - def _alignments_paths(self): - """ dict: The alignments paths for each of the source and destination faces. Key is the - side, value is the path to the alignments file """ - alignments_paths = dict() - for side in ("a", "b"): - alignments_path = getattr(self._args, "alignments_path_{}".format(side)) - if not alignments_path: - image_path = getattr(self._args, "input_{}".format(side)) - alignments_path = os.path.join(image_path, "alignments.fsa") - alignments_paths[side] = alignments_path - logger.debug("Alignments paths: %s", alignments_paths) - return alignments_paths - def _set_timelapse(self): """ Set time-lapse paths if requested. @@ -143,21 +129,9 @@ def process(self): """ logger.debug("Starting Training Process") logger.info("Training data directory: %s", self._args.model_dir) - - # TODO Move these args to config and remove these deprecation warnings - if hasattr(self._args, "warp_to_landmarks") and self._args.warp_to_landmarks: - deprecation_warning("`-wl`, ``--warp-to-landmarks``", - additional_info="This option will be available within training " - "config settings (/config/train.ini).") - if hasattr(self._args, "no_augment_color") and self._args.no_augment_color: - deprecation_warning("`-nac`, ``--no-augment-color``", - additional_info="This option will be available within training " - "config settings (/config/train.ini).") thread = self._start_thread() # from lib.queue_manager import queue_manager; queue_manager.debug_monitor(1) - err = self._monitor(thread) - self._end_thread(thread, err) logger.debug("Completed Training Process") @@ -208,16 +182,13 @@ def _training(self): sleep(1) # Let preview instructions flush out to logger logger.debug("Commencing Training") logger.info("Loading data, this may take a while...") - - if self._args.allow_growth: - self._set_tf_allow_growth() model = self._load_model() trainer = self._load_trainer(model) self._run_training_cycle(model, trainer) except KeyboardInterrupt: try: logger.debug("Keyboard Interrupt Caught. Saving Weights and exiting") - model.save_models() + model.save() trainer.clear_tensorboard() except KeyboardInterrupt: logger.info("Saving model weights has been cancelled!") @@ -234,25 +205,13 @@ def _load_model(self): The requested model plugin """ logger.debug("Loading Model") - model_dir = get_folder(self._args.model_dir) - configfile = self._args.configfile if hasattr(self._args, "configfile") else None - augment_color = not self._args.no_augment_color + model_dir = str(get_folder(self._args.model_dir)) model = PluginLoader.get_model(self.trainer_name)( model_dir, - gpus=self._args.gpus, - configfile=configfile, - snapshot_interval=self._args.snapshot_interval, - no_logs=self._args.no_logs, - warp_to_landmarks=self._args.warp_to_landmarks, - augment_color=augment_color, - no_flip=self._args.no_flip, + self._args, training_image_size=self._image_size, - alignments_paths=self._alignments_paths, - preview_scale=self._args.preview_scale, - pingpong=self._args.pingpong, - memory_saving_gradients=self._args.memory_saving_gradients, - optimizer_savings=self._args.optimizer_savings, predict=False) + model.build() logger.debug("Loaded Model") return model @@ -297,28 +256,37 @@ def _run_training_cycle(self, model, trainer): else: display_func = None - for iteration in range(0, self._args.iterations): + for iteration in range(1, self._args.iterations + 1): logger.trace("Training iteration: %s", iteration) - save_iteration = iteration % self._args.save_interval == 0 - viewer = display_func if save_iteration or self._save_now else None + save_iteration = iteration % self._args.save_interval == 0 or iteration == 1 + + if save_iteration or self._save_now or self._refresh_preview: + viewer = display_func + else: + viewer = None timelapse = self._timelapse if save_iteration else None trainer.train_one_step(viewer, timelapse) if self._stop: logger.debug("Stop received. Terminating") break + + if self._refresh_preview and viewer is not None: + if self._args.redirect_gui: + print("\n") + logger.info("[Preview Updated]") + logger.debug("Removing gui trigger file: %s", self._gui_preview_trigger) + os.remove(self._gui_preview_trigger) + self._refresh_preview = False + if save_iteration: - logger.trace("Save Iteration: (iteration: %s", iteration) - if self._args.pingpong: - model.save_models() - trainer.pingpong.switch() - else: - model.save_models() + logger.debug("Save Iteration: (iteration: %s", iteration) + model.save() elif self._save_now: - logger.trace("Save Requested: (iteration: %s", iteration) - model.save_models() + logger.debug("Save Requested: (iteration: %s", iteration) + model.save() self._save_now = False logger.debug("Training cycle complete") - model.save_models() + model.save() trainer.clear_tensorboard() self._stop = True @@ -331,6 +299,7 @@ def _monitor(self, thread): ``True`` if there has been an error in the background thread otherwise ``False`` """ is_preview = self._args.preview + preview_trigger_set = False logger.debug("Launching Monitor") logger.info("===================================================") logger.info(" Starting") @@ -367,8 +336,13 @@ def _monitor(self, thread): logger.debug("Exit requested") break if is_preview and cv_key == ord("s"): + print("\n") logger.info("Save requested") self._save_now = True + if is_preview and cv_key == ord("r"): + print("\n") + logger.info("Refresh preview requested") + self._refresh_preview = True # Console Monitor if keypress.kbhit(): @@ -380,6 +354,18 @@ def _monitor(self, thread): logger.info("Save requested") self._save_now = True + # GUI Preview trigger update monitor + if self._args.redirect_gui: + if not preview_trigger_set and os.path.isfile(self._gui_preview_trigger): + print("\n") + logger.info("Refresh preview requested") + self._refresh_preview = True + preview_trigger_set = True + + if preview_trigger_set and not self._refresh_preview: + logger.debug("Resetting GUI preview trigger") + preview_trigger_set = False + sleep(1) except KeyboardInterrupt: logger.debug("Keyboard Interrupt received") @@ -388,20 +374,6 @@ def _monitor(self, thread): logger.debug("Closed Monitor") return err - @staticmethod - def _set_tf_allow_growth(): - """ Allow TensorFlow to manage VRAM growth. - - Enables the Tensorflow allow_growth option if requested in the command line arguments - """ - # pylint: disable=no-member - logger.debug("Setting Tensorflow 'allow_growth' option") - config = tf.ConfigProto() - config.gpu_options.allow_growth = True - config.gpu_options.visible_device_list = "0" - set_session(tf.Session(config=config)) - logger.debug("Set Tensorflow 'allow_growth' option") - def _show(self, image, name=""): """ Generate the preview and write preview file output. @@ -415,28 +387,28 @@ def _show(self, image, name=""): The name of the image for saving or display purposes. If an empty string is passed then it will automatically be names. Default: "" """ - logger.trace("Updating preview: (name: %s)", name) + logger.debug("Updating preview: (name: %s)", name) try: scriptpath = os.path.realpath(os.path.dirname(sys.argv[0])) if self._args.write_image: - logger.trace("Saving preview to disk") + logger.debug("Saving preview to disk") img = "training_preview.jpg" imgfile = os.path.join(scriptpath, img) cv2.imwrite(imgfile, image) # pylint: disable=no-member - logger.trace("Saved preview to: '%s'", img) + logger.debug("Saved preview to: '%s'", img) if self._args.redirect_gui: - logger.trace("Generating preview for GUI") + logger.debug("Generating preview for GUI") img = ".gui_training_preview.jpg" imgfile = os.path.join(scriptpath, "lib", "gui", ".cache", "preview", img) cv2.imwrite(imgfile, image) # pylint: disable=no-member - logger.trace("Generated preview for GUI: '%s'", img) + logger.debug("Generated preview for GUI: '%s'", img) if self._args.preview: - logger.trace("Generating preview for display: '%s'", name) + logger.debug("Generating preview for display: '%s'", name) with self._lock: self._preview_buffer[name] = image - logger.trace("Generated preview for display: '%s'", name) + logger.debug("Generated preview for display: '%s'", name) except Exception as err: logging.error("could not preview sample") raise err - logger.trace("Updated preview: (name: %s)", name) + logger.debug("Updated preview: (name: %s)", name) diff --git a/setup.py b/setup.py index 0a5320ef9f..62f43e80ea 100755 --- a/setup.py +++ b/setup.py @@ -16,8 +16,7 @@ INSTALL_FAILED = False # Revisions of tensorflow-gpu and cuda/cudnn requirements -TENSORFLOW_REQUIREMENTS = {"==1.12.0": ["9.0", "7.2"], - ">=1.13.1,<1.16": ["10.0", "7.4"]} # TF 2.0 Not currently supported +TENSORFLOW_REQUIREMENTS = {">=2.2.0,<2.3.0": ["10.1", "7.6"]} # Mapping of Python packages to their conda names if different from pypi or in non-default channel CONDA_MAPPING = { # "opencv-python": ("opencv", "conda-forge"), # Periodic issues with conda-forge opencv @@ -167,10 +166,10 @@ def check_python(self): self.output.info("Installed Python: {0} {1}".format(self.py_version[0], self.py_version[1])) if not (self.py_version[0].split(".")[0] == "3" - and self.py_version[0].split(".")[1] in ("3", "4", "5", "6", "7") + and self.py_version[0].split(".")[1] in ("6", "7", "8") and self.py_version[1] == "64bit") and not self.updater: - self.output.error("Please run this script with Python version 3.3, 3.4, 3.5, 3.6 or " - "3.7 64bit and try again.") + self.output.error("Please run this script with Python version 3.6, 3.7 or 3.8 " + "64bit and try again.") sys.exit(1) def output_runtime_info(self): @@ -186,7 +185,7 @@ def check_pip(self): if self.updater: return try: - import pip # noqa pylint:disable=unused-import + import pip # noqa pylint:disable=unused-import,import-outside-toplevel except ImportError: self.output.error("Import pip failed. Please Install python3-pip and try again") sys.exit(1) @@ -202,7 +201,7 @@ def upgrade_pip(self): pipexe.append("--user") pipexe.append("pip") run(pipexe) - import pip + import pip # pylint:disable=import-outside-toplevel pip_version = pip.__version__ self.output.info("Installed pip: {}".format(pip_version)) @@ -256,7 +255,7 @@ def update_tf_dep(self): return self.output.warning( - "The minimum Tensorflow requirement is 1.12. \n" + "The minimum Tensorflow requirement is 2.2 \n" "Tensorflow currently has no official prebuild for your CUDA, cuDNN " "combination.\nEither install a combination that Tensorflow supports or " "build and install your own tensorflow-gpu.\r\n" @@ -461,7 +460,7 @@ def cuda_check_linux(self): break if not chk: self.output.error("CUDA not found. Install and try again.\n" - "Recommended version: CUDA 9.0 cuDNN 7.1.3\n" + "Recommended version: CUDA 10.1 cuDNN 7.6\n" "CUDA: https://developer.nvidia.com/cuda-downloads\n" "cuDNN: https://developer.nvidia.com/rdp/cudnn-download") return @@ -644,6 +643,8 @@ def install_python_packages(self): verbose = pkg.startswith("tensorflow") or self.env.updater if self.conda_installer(pkg, verbose=verbose, channel=channel, conda_only=False): continue + if pkg.startswith("tensorflow-gpu"): + self._tensorflow_dependency_install() self.pip_installer(pkg) def install_conda_packages(self): @@ -687,7 +688,7 @@ def pip_installer(self, package): pipexe = [sys.executable, "-m", "pip"] # hide info/warning and fix cache hang pipexe.extend(["install", "--no-cache-dir"]) - if not self.env.updater: + if not self.env.updater and not package.startswith("tensorflow"): pipexe.append("-qq") # install as user to solve perm restriction if not self.env.is_admin and not self.env.is_virtualenv: @@ -701,6 +702,25 @@ def pip_installer(self, package): self.output.warning("Couldn't install {} with pip. " "Please install this package manually".format(package)) + def _tensorflow_dependency_install(self): + """ Install the Cuda/cuDNN dependencies from Conda when tensorflow is not available + in Conda """ + # TODO This will need to be more robust if/when we accept multiple Tensorflow Versions + versions = list(TENSORFLOW_REQUIREMENTS.values())[-1] + condaexe = ["conda", "search"] + pkgs = ["cudatoolkit", "cudnn"] + for pkg in pkgs: + chk = Popen(condaexe + [pkg], shell=True, stdout=PIPE) + available = [line.split() + for line in chk.communicate()[0].decode(self.env.encoding).splitlines() + if line.startswith(pkg)] + compatible = [req for req in available + if (pkg == "cudatoolkit" and req[1].startswith(versions[0])) + or (pkg == "cudnn" and versions[0] in req[2] + and req[1].startswith(versions[1]))] + candidate = "==".join(sorted(compatible, key=lambda x: x[1])[-1][:2]) + self.conda_installer(candidate, verbose=True, conda_only=True) + class Tips(): """ Display installation Tips """ diff --git a/tests/__init__.py b/tests/__init__.py index e69de29bb2..e0783c0b38 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python3 +""" Use custom Importer for importing Keras for tests """ +import sys +from lib.utils import KerasFinder + + +sys.meta_path.insert(0, KerasFinder()) diff --git a/tests/lib/model/initializers_test.py b/tests/lib/model/initializers_test.py index 5d6e85af25..01c44b4b79 100644 --- a/tests/lib/model/initializers_test.py +++ b/tests/lib/model/initializers_test.py @@ -4,15 +4,14 @@ Adapted from Keras tests. """ -from keras import initializers as k_initializers from keras import backend as K +from keras import initializers as k_initializers import pytest import numpy as np from lib.model import initializers from lib.utils import get_backend - CONV_SHAPE = (3, 3, 256, 2048) CONV_ID = get_backend().upper() @@ -41,7 +40,7 @@ def test_icnr(tensor_shape): tensor_shape: tuple The shape of the tensor to feed to the initializer """ - fan_in, _ = k_initializers._compute_fans(tensor_shape) # pylint:disable=protected-access + fan_in, _ = initializers.compute_fans(tensor_shape) std = np.sqrt(2. / fan_in) _runner(initializers.ICNR(initializer=k_initializers.he_uniform(), scale=2), tensor_shape, target_mean=0, target_std=std) @@ -56,7 +55,7 @@ def test_convolution_aware(tensor_shape): tensor_shape: tuple The shape of the tensor to feed to the initializer """ - fan_in, _ = k_initializers._compute_fans(tensor_shape) # pylint:disable=protected-access + fan_in, _ = initializers.compute_fans(tensor_shape) std = np.sqrt(2. / fan_in) - _runner(initializers.ConvolutionAware(seed=123, init=True), tensor_shape, + _runner(initializers.ConvolutionAware(seed=123), tensor_shape, target_mean=0, target_std=std) diff --git a/tests/lib/model/layers_test.py b/tests/lib/model/layers_test.py index 9eb4463f83..53362a0253 100644 --- a/tests/lib/model/layers_test.py +++ b/tests/lib/model/layers_test.py @@ -8,13 +8,12 @@ import pytest import numpy as np from keras import Input, Model, backend as K -from keras.utils.generic_utils import has_arg from numpy.testing import assert_allclose -from lib.model import layers +from lib.model import layers, normalization from lib.utils import get_backend - +from tests.utils import has_arg CONV_SHAPE = (3, 3, 256, 2048) CONV_ID = get_backend().upper() @@ -52,7 +51,7 @@ def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None, weights = layer.get_weights() layer.set_weights(weights) - if isinstance(layer, layers.ReflectionPadding2D): + if isinstance(layer, (layers.ReflectionPadding2D, normalization.InstanceNormalization)): layer.build(input_shape) expected_output_shape = layer.compute_output_shape(input_shape) @@ -110,7 +109,7 @@ def test_pixel_shuffler(dummy): # pylint:disable=unused-argument @pytest.mark.skipif(get_backend() == "amd", reason="amd does not support this layer") @pytest.mark.parametrize('dummy', [None], ids=[get_backend().upper()]) def test_subpixel_upscaling(dummy): # pylint:disable=unused-argument - """ Sub Pixel Upscaling layer test """ + """ Sub Pixel up-scaling layer test """ layer_test(layers.SubPixelUpscaling, input_shape=(2, 4, 4, 1024)) diff --git a/tests/lib/model/losses_test.py b/tests/lib/model/losses_test.py index 0276a1b193..5765713f75 100644 --- a/tests/lib/model/losses_test.py +++ b/tests/lib/model/losses_test.py @@ -9,6 +9,7 @@ from numpy.testing import assert_allclose from keras import backend as K +from keras import losses as k_losses from keras.layers import Conv2D from keras.models import Sequential from keras.optimizers import Adam @@ -17,27 +18,60 @@ from lib.utils import get_backend -_PARAMS = [(losses.gradient_loss, (1, 5, 6, 7), (1, 5, 6)), - (losses.generalized_loss, (5, 6, 7), (5, 6)), +_PARAMS = [(losses.GeneralizedLoss(), (2, 16, 16)), + (losses.GradientLoss(), (2, 16, 16)), # TODO Make sure these output dimensions are correct - (losses.l_inf_norm, (1, 5, 6, 7), (1, 1, 1)), + (losses.GMSDLoss(), (2, 1, 1)), # TODO Make sure these output dimensions are correct - (losses.gmsd_loss, (1, 5, 6, 7), (1, 1, 1))] -_IDS = ["gradient_loss", "generalized_loss", "l_inf_norm", "gmsd_loss"] + (losses.LInfNorm(), (2, 1, 1))] +_IDS = ["GeneralizedLoss", "GradientLoss", "GMSDLoss", "LInfNorm"] _IDS = ["{}[{}]".format(loss, get_backend().upper()) for loss in _IDS] -@pytest.mark.parametrize(["loss_func", "input_shape", "output_shape"], _PARAMS, ids=_IDS) -def test_objective_shapes(loss_func, input_shape, output_shape): +@pytest.mark.parametrize(["loss_func", "output_shape"], _PARAMS, ids=_IDS) +def test_loss_output(loss_func, output_shape): """ Basic shape tests for loss functions. """ - y_a = K.variable(np.random.random(input_shape)) - y_b = K.variable(np.random.random(input_shape)) + if get_backend() == "amd" and isinstance(loss_func, losses.GMSDLoss): + pytest.skip("GMSD Loss is not currently compatible with PlaidML") + y_a = K.variable(np.random.random((2, 16, 16, 3))) + y_b = K.variable(np.random.random((2, 16, 16, 3))) objective_output = loss_func(y_a, y_b) - assert K.eval(objective_output).shape == output_shape + if get_backend() == "amd": + assert K.eval(objective_output).shape == output_shape + else: + output = objective_output.numpy() + assert output.dtype == "float32" and not np.isnan(output) + + +_PLPARAMS = _PARAMS + [(k_losses.mean_absolute_error, (2, 16, 16)), + (k_losses.mean_squared_error, (2, 16, 16)), + (k_losses.logcosh, (2, 16, 16)), + (losses.DSSIMObjective(), ())] +_PLIDS = ["GeneralizedLoss", "GradientLoss", "GMSDLoss", "LInfNorm", "mae", "mse", "logcosh", + "DSSIMObjective"] +_PLIDS = ["{}[{}]".format(loss, get_backend().upper()) for loss in _PLIDS] + + +@pytest.mark.parametrize(["loss_func", "output_shape"], _PLPARAMS, ids=_PLIDS) +def test_penalized_loss(loss_func, output_shape): + """ Test penalized loss wrapper works as expected """ + if get_backend() == "amd": + if isinstance(loss_func, losses.GMSDLoss): + pytest.skip("GMSD Loss is not currently compatible with PlaidML") + if hasattr(loss_func, "__name__") and loss_func.__name__ == "logcosh": + pytest.skip("LogCosh Loss is not currently compatible with PlaidML") + y_a = K.variable(np.random.random((2, 16, 16, 4))) + y_b = K.variable(np.random.random((2, 16, 16, 3))) + p_loss = losses.PenalizedLoss(loss_func) + output = p_loss(y_a, y_b) + if get_backend() == "amd": + assert K.eval(output).shape == output_shape + else: + output = output.numpy() + assert output.dtype == "float32" and not np.isnan(output) @pytest.mark.parametrize('dummy', [None], ids=[get_backend().upper()]) -@pytest.mark.xfail(get_backend() == "amd", reason="plaidML generates NaNs") def test_dssim_channels_last(dummy): # pylint:disable=unused-argument """ Basic test for DSSIM Loss """ prev_data = K.image_data_format() diff --git a/tests/lib/model/nn_blocks_test.py b/tests/lib/model/nn_blocks_test.py index ebe8256f8c..bb9676c63b 100644 --- a/tests/lib/model/nn_blocks_test.py +++ b/tests/lib/model/nn_blocks_test.py @@ -12,14 +12,9 @@ from keras import Input, Model, backend as K from numpy.testing import assert_allclose -from lib.model.nn_blocks import NNBlocks +from lib.model import nn_blocks from lib.utils import get_backend -_PARAMS = ["use_icnr_init", "use_convaware_init", "use_reflect_padding"] -_VALUES = list(product([True, False], repeat=len(_PARAMS))) -_IDS = ["{}[{}]".format("|".join([_PARAMS[idx] for idx, b in enumerate(v) if b]), - get_backend().upper()) for v in _VALUES] - def block_test(layer_func, kwargs={}, input_shape=None): """Test routine for faceswap neural network blocks. @@ -61,16 +56,23 @@ def block_test(layer_func, kwargs={}, input_shape=None): return actual_output +_PARAMS = ["use_icnr_init", "use_convaware_init", "use_reflect_padding"] +_VALUES = list(product([True, False], repeat=len(_PARAMS))) +_IDS = ["{}[{}]".format("|".join([_PARAMS[idx] for idx, b in enumerate(v) if b]), + get_backend().upper()) for v in _VALUES] + + @pytest.mark.parametrize(_PARAMS, _VALUES, ids=_IDS) def test_blocks(use_icnr_init, use_convaware_init, use_reflect_padding): """ Test for all blocks contained within the NNBlocks Class """ - cls_ = NNBlocks(use_icnr_init=use_icnr_init, - use_convaware_init=use_convaware_init, - use_reflect_padding=use_reflect_padding) - block_test(cls_.conv2d, input_shape=(2, 5, 5, 128), kwargs=dict(filters=1024, kernel_size=3)) - block_test(cls_.conv, input_shape=(2, 8, 8, 32), kwargs=dict(filters=64)) - block_test(cls_.conv_sep, input_shape=(2, 8, 8, 32), kwargs=dict(filters=64)) - block_test(cls_.upscale, input_shape=(2, 4, 4, 128), kwargs=dict(filters=64)) - block_test(cls_.res_block, input_shape=(2, 2, 2, 64), kwargs=dict(filters=64)) - block_test(cls_.upscale2x, input_shape=(2, 4, 4, 128), kwargs=dict(filters=64, fast=False)) - block_test(cls_.upscale2x, input_shape=(2, 4, 4, 128), kwargs=dict(filters=64, fast=True)) + config = dict(icnr_init=use_icnr_init, + conv_aware_init=use_convaware_init, + reflect_padding=use_reflect_padding) + nn_blocks.set_config(config) + block_test(nn_blocks.Conv2DOutput(64, 3), input_shape=(2, 8, 8, 32)) + block_test(nn_blocks.Conv2DBlock(64), input_shape=(2, 8, 8, 32)) + block_test(nn_blocks.SeparableConv2DBlock(64), input_shape=(2, 8, 8, 32)) + block_test(nn_blocks.UpscaleBlock(64), input_shape=(2, 4, 4, 128)) + block_test(nn_blocks.Upscale2xBlock(64, fast=True), input_shape=(2, 4, 4, 128)) + block_test(nn_blocks.Upscale2xBlock(64, fast=False), input_shape=(2, 4, 4, 128)) + block_test(nn_blocks.ResidualBlock(64), input_shape=(2, 4, 4, 64)) diff --git a/tests/lib/model/normalization_test.py b/tests/lib/model/normalization_test.py index 49715b0576..53bf34d92d 100644 --- a/tests/lib/model/normalization_test.py +++ b/tests/lib/model/normalization_test.py @@ -10,7 +10,7 @@ from lib.model import normalization from lib.utils import get_backend -from .layers_test import layer_test +from tests.lib.model.layers_test import layer_test @pytest.mark.parametrize('dummy', [None], ids=[get_backend().upper()]) diff --git a/tests/lib/model/optimizers_test.py b/tests/lib/model/optimizers_test.py index 09c446d953..6a9642117a 100644 --- a/tests/lib/model/optimizers_test.py +++ b/tests/lib/model/optimizers_test.py @@ -8,23 +8,22 @@ from keras import optimizers as k_optimizers from keras.layers import Dense, Activation from keras.models import Sequential -from keras.utils import test_utils -from keras.utils.np_utils import to_categorical import numpy as np from numpy.testing import assert_allclose -from lib.model import optimizers from lib.utils import get_backend +from tests.utils import generate_test_data, to_categorical + def get_test_data(): - """ Obtain radomized test data for training """ + """ Obtain randomized test data for training """ np.random.seed(1337) - (x_train, y_train), _ = test_utils.get_test_data(num_train=1000, - num_test=200, - input_shape=(10,), - classification=True, - num_classes=2) + (x_train, y_train), _ = generate_test_data(num_train=1000, + num_test=200, + input_shape=(10,), + classification=True, + num_classes=2) y_train = to_categorical(y_train) return x_train, y_train @@ -34,44 +33,46 @@ def _test_optimizer(optimizer, target=0.75): model = Sequential() model.add(Dense(10, input_shape=(x_train.shape[1],))) - model.add(Activation('relu')) + model.add(Activation("relu")) model.add(Dense(y_train.shape[1])) - model.add(Activation('softmax')) - model.compile(loss='categorical_crossentropy', + model.add(Activation("softmax")) + model.compile(loss="categorical_crossentropy", optimizer=optimizer, - metrics=['accuracy']) + metrics=["accuracy"]) history = model.fit(x_train, y_train, epochs=2, batch_size=16, verbose=0) - # TODO PlaidML fails this test - assert history.history['acc'][-1] >= target + accuracy = "acc" if get_backend() == "amd" else "accuracy" + assert history.history[accuracy][-1] >= target config = k_optimizers.serialize(optimizer) optim = k_optimizers.deserialize(config) new_config = k_optimizers.serialize(optim) - new_config['class_name'] = new_config['class_name'].lower() + new_config["class_name"] = new_config["class_name"].lower() assert config == new_config # Test constraints. + if get_backend() == "amd": + # NB: PlaidML does not support constraints, so this test skipped for AMD backends + return model = Sequential() dense = Dense(10, input_shape=(x_train.shape[1],), kernel_constraint=lambda x: 0. * x + 1., bias_constraint=lambda x: 0. * x + 2.,) model.add(dense) - model.add(Activation('relu')) + model.add(Activation("relu")) model.add(Dense(y_train.shape[1])) - model.add(Activation('softmax')) - model.compile(loss='categorical_crossentropy', + model.add(Activation("softmax")) + model.compile(loss="categorical_crossentropy", optimizer=optimizer, - metrics=['accuracy']) + metrics=["accuracy"]) model.train_on_batch(x_train[:10], y_train[:10]) kernel, bias = dense.get_weights() assert_allclose(kernel, 1.) assert_allclose(bias, 2.) -@pytest.mark.parametrize('dummy', [None], ids=[get_backend().upper()]) -@pytest.mark.xfail(get_backend() == "amd", reason="plaidML fails the standard accuracy test") +@pytest.mark.parametrize("dummy", [None], ids=[get_backend().upper()]) def test_adam(dummy): # pylint:disable=unused-argument - """ Test for custom adam optimizer """ - _test_optimizer(optimizers.Adam()) - _test_optimizer(optimizers.Adam(decay=1e-3)) + """ Test for custom Adam optimizer """ + _test_optimizer(k_optimizers.Adam(), target=0.6) + _test_optimizer(k_optimizers.Adam(decay=1e-3), target=0.6) diff --git a/tests/startup_test.py b/tests/startup_test.py index 4389fcacaa..786907f199 100644 --- a/tests/startup_test.py +++ b/tests/startup_test.py @@ -4,15 +4,26 @@ import inspect import pytest + +import keras from keras import backend as K from lib.utils import get_backend +_BACKEND = get_backend() + @pytest.mark.parametrize('dummy', [None], ids=[get_backend().upper()]) def test_backend(dummy): # pylint:disable=unused-argument """ Sanity check to ensure that Keras backend is returning the correct object type. """ - backend = get_backend() test_var = K.variable((1, 1, 4, 4)) lib = inspect.getmodule(test_var).__name__.split(".")[0] - assert (backend == "cpu" and lib == "tensorflow") or (backend == "amd" and lib == "plaidml") + assert (_BACKEND == "cpu" and lib == "tensorflow") or (_BACKEND == "amd" and lib == "plaidml") + + +@pytest.mark.parametrize('dummy', [None], ids=[get_backend().upper()]) +def test_keras(dummy): # pylint:disable=unused-argument + """ Sanity check to ensure that tensorflow keras is being used for CPU and standard + keras for AMD. """ + assert ((_BACKEND == "cpu" and keras.__version__.endswith("-tf")) or + (_BACKEND == "amd" and not keras.__version__.endswith("-tf"))) diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 0000000000..248ec0a25b --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +""" Utils imported from Keras as their location changes between Tensorflow Keras and standard +Keras. Also ensures testing consistency """ +import inspect +import sys + +import numpy as np + + +def generate_test_data(num_train=1000, num_test=500, input_shape=(10,), + output_shape=(2,), + classification=True, num_classes=2): + """Generates test data to train a model on. classification=True overrides output_shape (i.e. + output_shape is set to (1,)) and the output consists in integers in [0, num_classes-1]. + + Otherwise: float output with shape output_shape. + """ + samples = num_train + num_test + if classification: + var_y = np.random.randint(0, num_classes, size=(samples,)) + var_x = np.zeros((samples,) + input_shape, dtype=np.float32) + for i in range(samples): + var_x[i] = np.random.normal(loc=var_y[i], scale=0.7, size=input_shape) + else: + y_loc = np.random.random((samples,)) + var_x = np.zeros((samples,) + input_shape, dtype=np.float32) + var_y = np.zeros((samples,) + output_shape, dtype=np.float32) + for i in range(samples): + var_x[i] = np.random.normal(loc=y_loc[i], scale=0.7, size=input_shape) + var_y[i] = np.random.normal(loc=y_loc[i], scale=0.7, size=output_shape) + + return (var_x[:num_train], var_y[:num_train]), (var_x[num_train:], var_y[num_train:]) + + +def to_categorical(var_y, num_classes=None, dtype='float32'): + """Converts a class vector (integers) to binary class matrix. + E.g. for use with categorical_crossentropy. + + Parameters + ---------- + var_y: int + Class vector to be converted into a matrix (integers from 0 to num_classes). + num_classes: int + Total number of classes. + dtype: str + The data type expected by the input, as a string (`float32`, `float64`, `int32`...) + + Returns + ------- + tensor + A binary matrix representation of the input. The classes axis is placed last. + + Example + ------- + >>> # Consider an array of 5 labels out of a set of 3 classes {0, 1, 2}: + >>> labels + >>> array([0, 2, 1, 2, 0]) + >>> # `to_categorical` converts this into a matrix with as many columns as there are classes. + >>> # The number of rows stays the same. + >>> to_categorical(labels) + >>> array([[ 1., 0., 0.], + >>> [ 0., 0., 1.], + >>> [ 0., 1., 0.], + >>> [ 0., 0., 1.], + >>> [ 1., 0., 0.]], dtype=float32) + """ + var_y = np.array(var_y, dtype='int') + input_shape = var_y.shape + if input_shape and input_shape[-1] == 1 and len(input_shape) > 1: + input_shape = tuple(input_shape[:-1]) + var_y = var_y.ravel() + if not num_classes: + num_classes = np.max(var_y) + 1 + var_n = var_y.shape[0] + categorical = np.zeros((var_n, num_classes), dtype=dtype) + categorical[np.arange(var_n), var_y] = 1 + output_shape = input_shape + (num_classes,) + categorical = np.reshape(categorical, output_shape) + return categorical + + +def has_arg(func, name, accept_all=False): + """Checks if a callable accepts a given keyword argument. + + For Python 2, checks if there is an argument with the given name. + For Python 3, checks if there is an argument with the given name, and also whether this + argument can be called with a keyword (i.e. if it is not a positional-only argument). + + Parameters + ---------- + func: object + Callable to inspect. + name: str + Check if `func` can be called with `name` as a keyword argument. + accept_all: bool, optional + What to return if there is no parameter called `name` but the function accepts a + `**kwargs` argument. Default: ``False`` + + Returns + ------- + bool + Whether `func` accepts a `name` keyword argument. + """ + if sys.version_info < (3,): + arg_spec = inspect.getargspec(func) + if accept_all and arg_spec.keywords is not None: + return True + return (name in arg_spec.args) + elif sys.version_info < (3, 3): + arg_spec = inspect.getfullargspec(func) + if accept_all and arg_spec.varkw is not None: + return True + return (name in arg_spec.args or + name in arg_spec.kwonlyargs) + else: + signature = inspect.signature(func) + parameter = signature.parameters.get(name) + if parameter is None: + if accept_all: + for param in signature.parameters.values(): + if param.kind == inspect.Parameter.VAR_KEYWORD: + return True + return False + return (parameter.kind in (inspect.Parameter.POSITIONAL_OR_KEYWORD, + inspect.Parameter.KEYWORD_ONLY)) diff --git a/tools/alignments/alignments.py b/tools/alignments/alignments.py index 8fd277df73..8100c7b20f 100644 --- a/tools/alignments/alignments.py +++ b/tools/alignments/alignments.py @@ -4,9 +4,8 @@ import logging from .media import AlignmentData -from .jobs import (Check, Dfl, Draw, Extract, Fix, Merge, # noqa pylint: disable=unused-import +from .jobs import (Check, Dfl, Draw, Extract, Merge, # noqa pylint: disable=unused-import Rename, RemoveAlignments, Sort, Spatial, UpdateHashes) -from .jobs_manual import Manual # noqa pylint: disable=unused-import logger = logging.getLogger(__name__) # pylint: disable=invalid-name @@ -58,9 +57,6 @@ def process(self): Launches the selected alignments job. """ - if self.args.job == "manual": - logger.warning("The 'manual' job is deprecated and will be removed from a future " - "update. Please use the new 'manual' tool.") if self.args.job == "update-hashes": job = UpdateHashes elif self.args.job.startswith("remove-"): diff --git a/tools/alignments/cli.py b/tools/alignments/cli.py index 60dd6f9291..793ce0e991 100644 --- a/tools/alignments/cli.py +++ b/tools/alignments/cli.py @@ -25,138 +25,125 @@ def get_argument_list(self): output_opts = " Use the output option (-o) to process results." align_eyes = " Can optionally use the align-eyes switch (-ae)." argument_list = list() - argument_list.append({ - "opts": ("-j", "--job"), - "action": Radio, - "type": str, - "choices": ("dfl", "draw", "extract", "fix", "manual", "merge", "missing-alignments", - "missing-frames", "leftover-faces", "multi-faces", "no-faces", - "remove-faces", "remove-frames", "rename", "sort", "spatial", - "update-hashes"), - "required": True, - "help": "R|Choose which action you want to perform. " - "NB: All actions require an alignments file (-a) to be passed in." - "\nL|'dfl': Create an alignments file from faces extracted from DeepFaceLab. " - "Specify 'dfl' as the 'alignments file' entry and the folder containing the " - "dfl faces as the 'faces folder' ('-a dfl -fc '" - "\nL|'draw': Draw landmarks on frames in the selected folder/video. A " - "subfolder will be created within the frames folder to hold the output." + - frames_dir + - "\nL|'extract': Re-extract faces from the source frames/video based on " - "alignment data. This is a lot quicker than re-detecting faces. Can pass in " - "the '-een' (--extract-every-n) parameter to only extract every nth frame." + - frames_and_faces_dir + align_eyes + - # TODO - Remove the fix job after a period of time. Implemented 2019/12/07 - "\nL|'fix': There was a bug when extracting from video which would shift all " - "the faces out by 1 frame. This was a shortlived bug, but this job will fix " - "alignments files that have this issue. NB: Only run this on alignments files " - "that you know need fixing." - "\nL|'manual': Manually view and edit landmarks." + frames_dir + - "\nL|'merge': Merge multiple alignment files into one. Specify a space " - "separated list of alignments files with the -a flag. Optionally specify a " - "faces (-fc) folder to filter the final alignments file to only those faces " - "that appear within the provided folder." - "\nL|'missing-alignments': Identify frames that do not exist in the " - "alignments file." + output_opts + frames_dir + - "\nL|'missing-frames': Identify frames in the alignments file that do not " - "appear within the frames folder/video." + output_opts + frames_dir + - "\nL|'leftover-faces': Identify faces in the faces folder that do not exist " - "in the alignments file." + output_opts + faces_dir + - "\nL|'multi-faces': Identify where multiple faces exist within the alignments " - "file." + output_opts + frames_or_faces_dir + - "\nL|'no-faces': Identify frames that exist within the alignment file but no " - "faces were detected." + output_opts + frames_dir + - "\nL|'remove-faces': Remove deleted faces from an alignments file. The " - "original alignments file will be backed up." + faces_dir + - "\nL|'remove-frames': Remove deleted frames from an alignments file. The " - "original alignments file will be backed up." + frames_dir + - "\nL|'rename' - Rename faces to correspond with their parent frame and " - "position index in the alignments file (i.e. how they are named after running " - "extract)." + faces_dir + - "\nL|'sort': Re-index the alignments from left to right. For alignments " - "with multiple faces this will ensure that the left-most face is at index 0 " - "Optionally pass in a faces folder (-fc) to also rename extracted faces." - "\nL|'spatial': Perform spatial and temporal filtering to smooth alignments " - "(EXPERIMENTAL!)" - "\nL|'update-hashes': Recalculate the face hashes. Only use this if you have " - "altered the extracted faces (e.g. colour adjust). The files MUST be " - "named '_face index' (i.e. how they are named after running " - "extract)." + faces_dir}) - argument_list.append({"opts": ("-a", "--alignments_file"), - "action": FilesFullPaths, - "dest": "alignments_file", - "nargs": "+", - "group": "data", - "required": True, - "filetypes": "alignments", - "help": "Full path to the alignments file to be processed. If " - "merging alignments, then multiple files can be selected, " - "space separated"}) - argument_list.append({"opts": ("-fc", "-faces_folder"), - "action": DirFullPaths, - "dest": "faces_dir", - "group": "data", - "help": "Directory containing extracted faces."}) - argument_list.append({"opts": ("-fr", "-frames_folder"), - "action": DirOrFileFullPaths, - "dest": "frames_dir", - "filetypes": "video", - "group": "data", - "help": "Directory containing source frames " - "that faces were extracted from."}) - argument_list.append({ - "opts": ("-o", "--output"), - "action": Radio, - "type": str, - "choices": ("console", "file", "move"), - "group": "processing", - "default": "console", - "help": "R|How to output discovered items ('faces' and 'frames' only):" - "\nL|'console': Print the list of frames to the screen. (DEFAULT)" - "\nL|'file': Output the list of frames to a text file (stored within the " - " source directory)." - "\nL|'move': Move the discovered items to a sub-folder within the source " - "directory."}) - argument_list.append({"opts": ("-een", "--extract-every-n"), - "type": int, - "action": Slider, - "dest": "extract_every_n", - "min_max": (1, 100), - "default": 1, - "rounding": 1, - "group": "extract", - "help": "[Extract only] Extract every 'nth' frame. This option will " - "skip frames when extracting faces. For example a value of " - "1 will extract faces from every frame, a value of 10 will " - "extract faces from every 10th frame."}) - argument_list.append({"opts": ("-sz", "--size"), - "type": int, - "action": Slider, - "min_max": (128, 512), - "default": 256, - "group": "extract", - "rounding": 64, - "help": "[Extract only] The output size of extracted faces."}) - argument_list.append({"opts": ("-ae", "--align-eyes"), - "action": "store_true", - "dest": "align_eyes", - "group": "extract", - "default": False, - "help": "[Extract only] Perform extra alignment to ensure " - "left/right eyes are at the same height."}) - argument_list.append({"opts": ("-l", "--large"), - "action": "store_true", - "group": "extract", - "default": False, - "help": "[Extract only] Only extract faces that have not been " - "upscaled to the required size (`-sz`, `--size). Useful " - "for excluding low-res images from a training set."}) - argument_list.append({"opts": ("-dm", "--disable-monitor"), - "action": "store_true", - "group": "manual tool", - "dest": "disable_monitor", - "default": False, - "help": "Enable this option if manual " - "alignments window is closing " - "instantly. (Manual only)"}) + argument_list.append(dict( + opts=("-j", "--job"), + action=Radio, + type=str, + choices=("dfl", "draw", "extract", "merge", "missing-alignments", "missing-frames", + "leftover-faces", "multi-faces", "no-faces", "remove-faces", "remove-frames", + "rename", "sort", "spatial", "update-hashes"), + required=True, + help="R|Choose which action you want to perform. NB: All actions require an " + "alignments file (-a) to be passed in." + "\nL|'dfl': Create an alignments file from faces extracted from DeepFaceLab. " + "Specify 'dfl' as the 'alignments file' entry and the folder containing the dfl " + "faces as the 'faces folder' ('-a dfl -fc ')" + "\nL|'draw': Draw landmarks on frames in the selected folder/video. A subfolder " + "will be created within the frames folder to hold the output.{0}" + "\nL|'extract': Re-extract faces from the source frames/video based on alignment " + "data. This is a lot quicker than re-detecting faces. Can pass in the '-een' " + "(--extract-every-n) parameter to only extract every nth frame.{1}{2}" + "\nL|'merge': Merge multiple alignment files into one. Specify a space separated " + "list of alignments files with the -a flag. Optionally specify a faces (-fc) " + "folder to filter the final alignments file to only those faces that appear " + "within the provided folder." + "\nL|'missing-alignments': Identify frames that do not exist in the alignments " + "file.{3}{0}" + "\nL|'missing-frames': Identify frames in the alignments file that do not appear " + "within the frames folder/video.{3}{0}" + "\nL|'leftover-faces': Identify faces in the faces folder that do not exist in " + "the alignments file.{3}{4}" + "\nL|'multi-faces': Identify where multiple faces exist within the alignments " + "file.{3}{5}" + "\nL|'no-faces': Identify frames that exist within the alignment file but no " + "faces were detected.{3}{0}" + "\nL|'remove-faces': Remove deleted faces from an alignments file. The original " + "alignments file will be backed up.{4}" + "\nL|'remove-frames': Remove deleted frames from an alignments file. The " + "original alignments file will be backed up.{0}" + "\nL|'rename' - Rename faces to correspond with their parent frame and position " + "index in the alignments file (i.e. how they are named after running extract).{4}" + "\nL|'sort': Re-index the alignments from left to right. For alignments with " + "multiple faces this will ensure that the left-most face is at index 0 " + "Optionally pass in a faces folder (-fc) to also rename extracted faces." + "\nL|'spatial': Perform spatial and temporal filtering to smooth alignments " + "(EXPERIMENTAL!)" + "\nL|'update-hashes': Recalculate the face hashes. Only use this if you have " + "altered the extracted faces (e.g. colour adjust). The files MUST be named " + "'_face index' (i.e. how they are named after running extract)." + "{4}".format(frames_dir, frames_and_faces_dir, align_eyes, output_opts, + faces_dir, frames_or_faces_dir))) + argument_list.append(dict( + opts=("-a", "--alignments_file"), + action=FilesFullPaths, + dest="alignments_file", + nargs="+", + group="data", + required=True, + filetypes="alignments", + help="Full path to the alignments file to be processed. If merging alignments, then " + "multiple files can be selected, space separated")) + argument_list.append(dict( + opts=("-fc", "-faces_folder"), + action=DirFullPaths, + dest="faces_dir", + group="data", + help="Directory containing extracted faces.")) + argument_list.append(dict( + opts=("-fr", "-frames_folder"), + action=DirOrFileFullPaths, + dest="frames_dir", + filetypes="video", + group="data", + help="Directory containing source frames that faces were extracted from.")) + argument_list.append(dict( + opts=("-o", "--output"), + action=Radio, + type=str, + choices=("console", "file", "move"), + group="processing", + default="console", + help="R|How to output discovered items ('faces' and 'frames' only):" + "\nL|'console': Print the list of frames to the screen. (DEFAULT)" + "\nL|'file': Output the list of frames to a text file (stored within the source " + "directory)." + "\nL|'move': Move the discovered items to a sub-folder within the source " + "directory.")) + argument_list.append(dict( + opts=("-een", "--extract-every-n"), + type=int, + action=Slider, + dest="extract_every_n", + min_max=(1, 100), + default=1, + rounding=1, + group="extract", + help="[Extract only] Extract every 'nth' frame. This option will skip frames when " + "extracting faces. For example a value of 1 will extract faces from every frame, " + "a value of 10 will extract faces from every 10th frame.")) + argument_list.append(dict( + opts=("-sz", "--size"), + type=int, + action=Slider, + min_max=(128, 512), + default=256, + group="extract", + rounding=64, + help="[Extract only] The output size of extracted faces.")) + argument_list.append(dict( + opts=("-ae", "--align-eyes"), + action="store_true", + dest="align_eyes", + group="extract", + default=False, + help="[Extract only] Perform extra alignment to ensure left/right eyes are at the " + "same height.")) + argument_list.append(dict( + opts=("-l", "--large"), + action="store_true", + group="extract", + default=False, + help="[Extract only] Only extract faces that have not been upscaled to the required " + "size (`-sz`, `--size). Useful for excluding low-res images from a training " + "set.")) return argument_list diff --git a/tools/alignments/jobs.py b/tools/alignments/jobs.py index 647bb3dd38..b253e03e8d 100644 --- a/tools/alignments/jobs.py +++ b/tools/alignments/jobs.py @@ -544,55 +544,6 @@ def _select_valid_faces(self, frame, image): return valid_faces -class Fix(): - """ Fix alignments that were impacted by the 'out by one' bug when extracting from video - - TODO This is a temporary job that should be deleted after a period of time. - Implemented 2019/12/07 - """ - def __init__(self, alignments, arguments): - logger.debug("Initializing %s: (arguments: %s)", self.__class__.__name__, arguments) - self.alignments = alignments - logger.debug("Initialized %s", self.__class__.__name__) - - def process(self): - """ Run the fix process """ - if not self._check_file_needs_fixing(): - sys.exit(0) - logger.info("[FIXING FRAMES]") - self._fix() - self.alignments.save() - - def _check_file_needs_fixing(self): - """ Check that these alignments are in video format and that the first frame in the " - "alignments file does not already start with 1 """ - retval = True - min_frame = min(key for key in self.alignments.data.keys()) - logger.debug("First frame: '%s'", min_frame) - fname = os.path.splitext(min_frame)[0] - frame_id = fname.split("_")[-1] - if ("_") not in fname or not frame_id.isdigit(): - logger.info("Alignments file not generated from a video. Nothing to do.") - retval = False - elif int(frame_id) == 1: - logger.info("Alignments file does not require fixing. First frame: '%s'", fname) - retval = False - logger.debug(retval) - return retval - - def _fix(self): - """ Renumber frame names, reducing each one by 1 """ - frame_names = sorted(key for key in self.alignments.data.keys()) - for old_name in tqdm(frame_names, desc="Fixing Alignments file"): - fname, ext = os.path.splitext(old_name) - vid_name, new_frame_id = ("_".join(fname.split("_")[:-1]), - int(fname.split("_")[-1]) - 1) - new_name = "{}_{:06d}{}".format(vid_name, new_frame_id, ext) - logger.debug("Re-assigning: '%s' > '%s'", old_name, new_name) - self.alignments.data[new_name] = self.alignments.data[old_name] - del self.alignments.data[old_name] - - class Merge(): """ Merge two alignments files into one """ def __init__(self, alignments, arguments): diff --git a/tools/alignments/jobs_manual.py b/tools/alignments/jobs_manual.py deleted file mode 100644 index 491b7c43d9..0000000000 --- a/tools/alignments/jobs_manual.py +++ /dev/null @@ -1,939 +0,0 @@ -#!/usr/bin/env python3 -""" Manual processing of alignments """ - -import logging -import platform -import sys -import cv2 -import numpy as np - -from lib.faces_detect import DetectedFace -from lib.queue_manager import queue_manager -from plugins.extract.pipeline import Extractor, ExtractMedia -from .annotate import Annotate -from .media import ExtractedFaces, Frames - -logger = logging.getLogger(__name__) # pylint: disable=invalid-name - - -class Interface(): - """ Key controls and interfacing options for OpenCV """ - def __init__(self, alignments, frames): - logger.debug("Initializing %s: (alignments: %s, frames: %s)", - self.__class__.__name__, alignments, frames) - self.alignments = alignments - self.frames = frames - self.controls = self.set_controls() - self.state = self.set_state() - self.skip_mode = {1: "Standard", - 2: "No Faces", - 3: "Multi-Faces", - 4: "Has Faces"} - logger.debug("Initialized %s", self.__class__.__name__) - - def set_controls(self): - """ Set keyboard controls, destination and help text """ - controls = {"z": {"action": self.iterate_frame, - "args": ("navigation", - 1), - "help": "Previous Frame"}, - "x": {"action": self.iterate_frame, - "args": ("navigation", 1), - "help": "Next Frame"}, - "[": {"action": self.iterate_frame, - "args": ("navigation", - 100), - "help": "100 Frames Back"}, - "]": {"action": self.iterate_frame, - "args": ("navigation", 100), - "help": "100 Frames Forward"}, - "{": {"action": self.iterate_frame, - "args": ("navigation", "first"), - "help": "Go to First Frame"}, - "}": {"action": self.iterate_frame, - "args": ("navigation", "last"), - "help": "Go to Last Frame"}, - 27: {"action": "quit", - "key_text": "ESC", - "args": ("navigation", None), - "help": "Exit", - "key_type": ord}, - "/": {"action": self.iterate_state, - "args": ("navigation", "frame-size"), - "help": "Cycle Frame Zoom"}, - "s": {"action": self.iterate_state, - "args": ("navigation", "skip-mode"), - "help": ("Skip Mode (All, No Faces, Multi Faces, Has Faces)")}, - " ": {"action": self.save_alignments, - "key_text": "SPACE", - "args": ("edit", None), - "help": "Save Alignments"}, - "r": {"action": self.reload_alignments, - "args": ("edit", None), - "help": "Reload Alignments (Discard all changes)"}, - "d": {"action": self.delete_alignment, - "args": ("edit", None), - "help": "Delete Selected Alignment"}, - "m": {"action": self.toggle_state, - "args": ("edit", "active"), - "help": "Change Mode (View, Edit)"}, - range(10): {"action": self.set_state_value, - "key_text": "0 to 9", - "args": ["edit", "selected"], - "help": "Select/Deselect Face at this Index", - "key_type": range}, - "c": {"action": self.copy_alignments, - "args": ("edit", -1), - "help": "Copy Alignments from Previous Frame with Alignments"}, - "v": {"action": self.copy_alignments, - "args": ("edit", 1), - "help": "Copy Alignments from Next Frame with Alignments"}, - "y": {"action": self.toggle_state, - "args": ("image", "display"), - "help": "Toggle Image"}, - "u": {"action": self.iterate_state, - "args": ("bounding_box", "color"), - "help": "Cycle Bounding Box Color"}, - "i": {"action": self.iterate_state, - "args": ("extract_box", "color"), - "help": "Cycle Extract Box Color"}, - "o": {"action": self.iterate_state, - "args": ("landmarks", "color"), - "help": "Cycle Landmarks Color"}, - "p": {"action": self.iterate_state, - "args": ("landmarks_mesh", "color"), - "help": "Cycle Landmarks Mesh Color"}, - "h": {"action": self.iterate_state, - "args": ("bounding_box", "size"), - "help": "Cycle Bounding Box thickness"}, - "j": {"action": self.iterate_state, - "args": ("extract_box", "size"), - "help": "Cycle Extract Box thickness"}, - "k": {"action": self.iterate_state, - "args": ("landmarks", "size"), - "help": "Cycle Landmarks - point size"}, - "l": {"action": self.iterate_state, - "args": ("landmarks_mesh", "size"), - "help": "Cycle Landmarks Mesh - thickness"}} - - logger.debug("Controls: %s", controls) - return controls - - @staticmethod - def set_state(): - """ Set the initial display state """ - state = {"bounding_box": dict(), - "extract_box": dict(), - "landmarks": dict(), - "landmarks_mesh": dict(), - "image": dict(), - "navigation": {"skip-mode": 1, - "frame-size": 1, - "frame_idx": 0, - "max_frame": 0, - "last_request": 0, - "frame_name": None}, - "edit": {"updated": False, - "update_faces": False, - "selected": None, - "active": 0, - "redraw": False}} - - # See lib_alignments/annotate.py for color mapping - color = 0 - for key in sorted(state.keys()): - if key not in ("bounding_box", "extract_box", "landmarks", "landmarks_mesh", "image"): - continue - state[key]["display"] = True - if key == "image": - continue - color += 1 - state[key]["size"] = 1 - state[key]["color"] = color - logger.debug("State: %s", state) - return state - - def save_alignments(self, *args): # pylint: disable=unused-argument - """ Save alignments """ - logger.debug("Saving Alignments") - if not self.state["edit"]["updated"]: - logger.debug("Save received, but state not updated. Not saving") - return - self.alignments.save() - self.state["edit"]["updated"] = False - self.set_redraw(True) - - def reload_alignments(self, *args): # pylint: disable=unused-argument - """ Reload alignments """ - logger.debug("Reloading Alignments") - if not self.state["edit"]["updated"]: - logger.debug("Reload received, but state not updated. Not reloading") - return - self.alignments.reload() - self.state["edit"]["updated"] = False - self.state["edit"]["update_faces"] = True - self.set_redraw(True) - - def delete_alignment(self, *args): # pylint: disable=unused-argument - """ Save alignments """ - logger.debug("Deleting Alignments") - selected_face = self.get_selected_face_id() - if self.get_edit_mode() == "View" or selected_face is None: - logger.debug("Delete received, but edit mode is 'View'. Not deleting") - return - frame = self.get_frame_name() - if self.alignments.delete_face_at_index(frame, selected_face): - self.state["edit"]["selected"] = None - self.state["edit"]["updated"] = True - self.state["edit"]["update_faces"] = True - self.set_redraw(True) - - def copy_alignments(self, *args): - """ Copy the alignments from the previous or next frame - to the current frame """ - logger.debug("Copying Alignments") - if self.get_edit_mode() != "Edit": - logger.debug("Copy received, but edit mode is not 'Edit'. Not copying") - return - frame_id = self.get_next_face_idx(args[1]) - if not 0 <= frame_id <= self.state["navigation"]["max_frame"]: - return - current_frame = self.get_frame_name() - get_frame = self.frames.file_list_sorted[frame_id]["frame_fullname"] - alignments = self.alignments.get_faces_in_frame(get_frame) - for alignment in alignments: - self.alignments. add_face(current_frame, alignment) - self.state["edit"]["updated"] = True - self.state["edit"]["update_faces"] = True - self.set_redraw(True) - - def toggle_state(self, item, category): - """ Toggle state of requested item """ - logger.debug("Toggling state: (item: %s, category: %s)", item, category) - self.state[item][category] = not self.state[item][category] - logger.debug("State toggled: (item: %s, category: %s, value: %s)", - item, category, self.state[item][category]) - self.set_redraw(True) - - def iterate_state(self, item, category): - """ Cycle through options (6 possible or 3 currently supported) """ - logger.debug("Cycling state: (item: %s, category: %s)", item, category) - if category == "color": - max_val = 7 - elif category == "frame-size": - max_val = 6 - elif category == "skip-mode": - max_val = 4 - else: - max_val = 3 - val = self.state[item][category] - val = val + 1 if val != max_val else 1 - self.state[item][category] = val - logger.debug("Cycled state: (item: %s, category: %s, value: %s)", - item, category, self.state[item][category]) - self.set_redraw(True) - - def set_state_value(self, item, category, value): - """ Set state of requested item or toggle off """ - logger.debug("Setting state value: (item: %s, category: %s, value: %s)", - item, category, value) - state = self.state[item][category] - value = str(value) if value is not None else value - if state == value: - self.state[item][category] = None - else: - self.state[item][category] = value - logger.debug("Setting state value: (item: %s, category: %s, value: %s)", - item, category, self.state[item][category]) - self.set_redraw(True) - - def iterate_frame(self, *args): - """ Iterate frame up or down, stopping at either end """ - logger.debug("Iterating frame: (args: %s)", args) - iteration = args[1] - max_frame = self.state["navigation"]["max_frame"] - if iteration in ("first", "last"): - next_frame = 0 if iteration == "first" else max_frame - self.state["navigation"]["frame_idx"] = next_frame - self.state["navigation"]["last_request"] = 0 - self.set_redraw(True) - return - - current_frame = self.state["navigation"]["frame_idx"] - next_frame = current_frame + iteration - end = 0 if iteration < 0 else max_frame - if (max_frame == 0 or - (end > 0 and next_frame >= end) or - (end == 0 and next_frame <= end)): - next_frame = end - self.state["navigation"]["frame_idx"] = next_frame - self.state["navigation"]["last_request"] = iteration - self.set_state_value("edit", "selected", None) - - def get_color(self, item): - """ Return color for selected item """ - return self.state[item]["color"] - - def get_size(self, item): - """ Return size for selected item """ - return self.state[item]["size"] - - def get_frame_scaling(self): - """ Return frame scaling factor for requested item """ - factors = (1, 1.25, 1.5, 2, 0.5, 0.75) - idx = self.state["navigation"]["frame-size"] - 1 - return factors[idx] - - def get_edit_mode(self): - """ Return text version and border color for edit mode """ - if self.state["edit"]["active"]: - return "Edit" - return "View" - - def get_skip_mode(self): - """ Return text version of skip mode """ - return self.skip_mode[self.state["navigation"]["skip-mode"]] - - def get_state_color(self): - """ Return a color based on current state - white - View Mode - yellow - Edit Mode - red - Unsaved alignments """ - color = (255, 255, 255) - if self.state["edit"]["updated"]: - color = (0, 0, 255) - elif self.state["edit"]["active"]: - color = (0, 255, 255) - return color - - def get_frame_name(self): - """ Return the current frame number """ - return self.state["navigation"]["frame_name"] - - def get_selected_face_id(self): - """ Return the index of the currently selected face """ - try: - return int(self.state["edit"]["selected"]) - except TypeError: - return None - - def redraw(self): - """ Return whether a redraw is required """ - return self.state["edit"]["redraw"] - - def set_redraw(self, request): - """ Turn redraw requirement on or off """ - self.state["edit"]["redraw"] = request - - def get_next_face_idx(self, increment): - """Get the index of the previous or next frame which has a face""" - navigation = self.state["navigation"] - frame_list = self.frames.file_list_sorted - frame_idx = navigation["frame_idx"] + increment - while True: - if not 0 <= frame_idx <= navigation["max_frame"]: - break - frame = frame_list[frame_idx]["frame_fullname"] - if not self.alignments.frame_has_faces(frame): - frame_idx += increment - else: - break - return frame_idx - - -class Help(): - """ Generate and display help in cli and in window """ - def __init__(self, interface): - logger.debug("Initializing %s: (interface: %s)", self.__class__.__name__, interface) - self.interface = interface - self.helptext = self.generate() - logger.debug("Initialized %s", self.__class__.__name__) - - def generate(self): - """ Generate help output """ - logger.debug("Generating help") - sections = ("navigation", "display", "color", "size", "edit") - helpout = {section: list() for section in sections} - helptext = "" - for key, val in self.interface.controls.items(): - logger.trace("Generating help for:(key: '%s', val: '%s'", key, val) - help_section = val["args"][0] - if help_section not in ("navigation", "edit"): - help_section = val["args"][1] - key_text = val.get("key_text", None) - key_text = key_text if key_text else key - logger.trace("Adding help for:(section: '%s', val: '%s', text: '%s'", - help_section, val["help"], key_text) - helpout[help_section].append((val["help"], key_text)) - - helpout["edit"].append(("Bounding Box - Move", "Left Click")) - helpout["edit"].append(("Bounding Box - Resize", "Middle Click")) - - for section in sections: - spacer = "=" * int((40 - len(section)) / 2) - display = "\n{0} {1} {0}\n".format(spacer, section.upper()) - helpsection = sorted(helpout[section]) - if section == "navigation": - helpsection = sorted(helpout[section], reverse=True) - display += "\n".join(" - '{}': {}".format(item[1], item[0]) - for item in helpsection) - - helptext += display - logger.debug("Added helptext: '%s'", helptext) - return helptext - - def render(self): - """ Render help text to image window """ - # pylint: disable=no-member - logger.trace("Rendering help text") - image = self.background() - display_text = self.helptext + self.compile_status() - self.text_to_image(image, display_text) - cv2.namedWindow("Help") - cv2.imshow("Help", image) - logger.trace("Rendered help text") - - def background(self): - """ Create an image to hold help text """ - # pylint: disable=no-member - logger.trace("Creating help text canvas") - height = 880 - width = 480 - image = np.zeros((height, width, 3), np.uint8) - color = self.interface.get_state_color() - cv2.rectangle(image, (0, 0), (width - 1, height - 1), color, 2) - logger.trace("Created help text canvas") - return image - - def compile_status(self): - """ Render the status text """ - logger.trace("Compiling Status text") - status = "\n=== STATUS\n" - navigation = self.interface.state["navigation"] - frame_scale = int(self.interface.get_frame_scaling() * 100) - status += " File: {}\n".format(self.interface.get_frame_name()) - status += " Frame: {} / {}\n".format( - navigation["frame_idx"] + 1, navigation["max_frame"] + 1) - status += " Frame Size: {}%\n".format(frame_scale) - status += " Skip Mode: {}\n".format(self.interface.get_skip_mode()) - status += " View Mode: {}\n".format(self.interface.get_edit_mode()) - if self.interface.get_selected_face_id() is not None: - status += " Selected Face Index: {}\n".format(self.interface.get_selected_face_id()) - if self.interface.state["edit"]["updated"]: - status += " Warning: There are unsaved changes\n" - - logger.trace("Compiled Status text") - return status - - @staticmethod - def text_to_image(image, display_text): - """ Write out and format help text to image """ - # pylint: disable=no-member - logger.trace("Converting help text to image") - pos_y = 0 - for line in display_text.split("\n"): - if line.startswith("==="): - pos_y += 10 - line = line.replace("=", "").strip() - line = line.replace("- '", "[ ").replace("':", " ]") - cv2.putText(image, line, (20, pos_y), - cv2.FONT_HERSHEY_SIMPLEX, 0.43, (255, 255, 255), 1) - pos_y += 20 - logger.trace("Converted help text to image") - - -class Manual(): - """ Manually adjust or create landmarks data """ - def __init__(self, alignments, arguments): - logger.debug("Initializing %s: (alignments: %s, arguments: %s)", - self.__class__.__name__, alignments, arguments) - self.arguments = arguments - self.alignments = alignments - self.frames = Frames(arguments.frames_dir) - self.extracted_faces = None - self.interface = None - self.help = None - self.mouse_handler = None - logger.debug("Initialized %s", self.__class__.__name__) - - def process(self): - """ Process manual extraction """ - logger.info("[MANUAL PROCESSING]") # Tidy up cli output - self.extracted_faces = ExtractedFaces(self.frames, self.alignments, size=256) - self.interface = Interface(self.alignments, self.frames) - self.help = Help(self.interface) - self.mouse_handler = MouseHandler(self.interface, self.arguments.loglevel) - - print(self.help.helptext) - max_idx = self.frames.count - 1 - self.interface.state["navigation"]["max_frame"] = max_idx - self.display_frames() - - def display_frames(self): - """ Iterate through frames """ - # pylint: disable=no-member - logger.debug("Display frames") - is_windows = platform.system() == "Windows" - is_conda = "conda" in sys.version.lower() - logger.debug("is_windows: %s, is_conda: %s", is_windows, is_conda) - cv2.namedWindow("Frame") - cv2.namedWindow("Faces") - cv2.setMouseCallback('Frame', self.mouse_handler.on_event) - - frame, faces = self.get_frame() - press = self.get_keys() - self.interface.set_redraw(True) - - while True: - if self.interface.redraw(): - self.help.render() - cv2.imshow("Frame", frame) - cv2.imshow("Faces", faces) - self.interface.set_redraw(False) - key = cv2.waitKey(1000) - - if self.window_closed(is_windows, is_conda, key): - queue_manager.terminate_queues() - break - - if key and key != -1: - logger.trace("Keypress received: '%s'", key) - if key in press.keys(): - action = press[key]["action"] - logger.debug("Keypress action: key: ('%s', action: '%s')", key, action) - if action == "quit": - break - - if press[key].get("key_type") == range: - args = press[key]["args"] + [chr(key)] - else: - args = press[key]["args"] - action(*args) - - if not self.interface.redraw(): - continue - - logger.trace("Redraw requested") - frame, faces = self.get_frame() - - cv2.destroyAllWindows() - - def window_closed(self, is_windows, is_conda, key): - """ Check whether the window has been closed - - MS Windows doesn't appear to read the window state property - properly, so we check for a negative key press. - - Conda (tested on Windows) doesn't appear to read the window - state property or negative key press properly, so we arbitrarily - use another property """ - # pylint: disable=no-member - logger.trace("Commencing closed window check") - closed = False - prop_autosize = cv2.getWindowProperty('Frame', cv2.WND_PROP_AUTOSIZE) - prop_visible = cv2.getWindowProperty('Frame', cv2.WND_PROP_VISIBLE) - if self.arguments.disable_monitor: - closed = False - elif is_conda and prop_autosize < 1: - closed = True - elif is_windows and not is_conda and key == -1: - closed = True - elif not is_windows and not is_conda and prop_visible < 1: - closed = True - logger.trace("Completed closed window check. Closed is %s", closed) - if closed: - logger.debug("Window closed detected") - return closed - - def get_keys(self): - """ Convert keys dict into something useful - for OpenCV """ - keys = dict() - for key, val in self.interface.controls.items(): - if val.get("key_type", str) == range: - for range_key in key: - keys[ord(str(range_key))] = val - elif val.get("key_type", str) == ord: - keys[key] = val - else: - keys[ord(key)] = val - - return keys - - def get_frame(self): - """ Compile the frame and get faces """ - image = self.frame_selector() - frame_name = self.interface.get_frame_name() - logger.debug("Frame Name: '%s'", frame_name) - alignments = self.alignments.get_faces_in_frame(frame_name) - faces_updated = self.interface.state["edit"]["update_faces"] - logger.debug("Faces Updated: %s", faces_updated) - self.extracted_faces.get_faces(frame_name) - roi = [face.original_roi for face in self.extracted_faces.faces] - - if faces_updated: - self.interface.state["edit"]["update_faces"] = False - - frame = FrameDisplay(image, alignments, roi, self.interface).image - faces = self.set_faces(frame_name).image - return frame, faces - - def frame_selector(self): - """ Return frame at given index """ - navigation = self.interface.state["navigation"] - frame_list = self.frames.file_list_sorted - frame = frame_list[navigation["frame_idx"]]["frame_fullname"] - skip_mode = self.interface.get_skip_mode().lower() - logger.debug("navigation: %s, frame: '%s', skip_mode: '%s'", navigation, frame, skip_mode) - - while True: - if navigation["last_request"] == 0: - break - if navigation["frame_idx"] in (0, navigation["max_frame"]): - break - if skip_mode == "standard": - break - if skip_mode == "no faces" and not self.alignments.frame_has_faces(frame): - break - if skip_mode == "multi-faces" and self.alignments.frame_has_multiple_faces(frame): - break - if skip_mode == "has faces" and self.alignments.frame_has_faces(frame): - break - self.interface.iterate_frame("navigation", navigation["last_request"]) - frame = frame_list[navigation["frame_idx"]]["frame_fullname"] - - image = self.frames.load_image(frame) - navigation["last_request"] = 0 - navigation["frame_name"] = frame - return image - - def set_faces(self, frame): - """ Pass the current frame faces to faces window """ - faces = self.extracted_faces.get_faces_in_frame(frame) - landmarks = [{"landmarks_xy": face.aligned_landmarks} - for face in self.extracted_faces.faces] - return FacesDisplay(faces, landmarks, self.extracted_faces.size, self.interface) - - -class FrameDisplay(): - """" Window that holds the frame """ - def __init__(self, image, alignments, roi, interface): - logger.trace("Initializing %s: (alignments: %s, roi: %s, interface: %s)", - self.__class__.__name__, alignments, roi, interface) - self.image = image - self.roi = roi - self.alignments = alignments - self.interface = interface - self.annotate_frame() - logger.trace("Initialized %s", self.__class__.__name__) - - def annotate_frame(self): - """ Annotate the frame """ - state = self.interface.state - logger.trace("State: %s", state) - annotate = Annotate(self.image, self.alignments, self.roi) - if not state["image"]["display"]: - annotate.draw_black_image() - - for item in ("bounding_box", "extract_box", "landmarks", "landmarks_mesh"): - color = self.interface.get_color(item) - size = self.interface.get_size(item) - state[item]["display"] = color != 7 - if not state[item]["display"]: - continue - logger.trace("Annotating: '%s'", item) - annotation = getattr(annotate, "draw_{}".format(item)) - annotation(color, size) - - selected_face = self.interface.get_selected_face_id() - if (selected_face is not None and - int(selected_face) < len(self.alignments)): - annotate.draw_grey_out_faces(selected_face) - - self.image = self.resize_frame(annotate.image) - - def resize_frame(self, image): - """ Set the displayed frame size and add state border""" - # pylint: disable=no-member - logger.trace("Resizing frame") - height, width = image.shape[:2] - color = self.interface.get_state_color() - cv2.rectangle(image, (0, 0), (width - 1, height - 1), color, 1) - scaling = self.interface.get_frame_scaling() - image = cv2.resize(image, (0, 0), fx=scaling, fy=scaling) - logger.trace("Resized frame") - return image - - -class FacesDisplay(): - """ Window that holds faces thumbnail """ - def __init__(self, extracted_faces, landmarks, size, interface): - logger.trace("Initializing %s: (extracted_faces: %s, landmarks: %s, size: %s, " - "interface: %s)", self.__class__.__name__, extracted_faces, - landmarks, size, interface) - self.row_length = 4 - self.faces = self.copy_faces(extracted_faces) - self.roi = self.set_full_roi(size) - self.landmarks = landmarks - self.interface = interface - - self.annotate_faces() - - self.image = self.build_faces_image(size) - logger.trace("Initialized %s", self.__class__.__name__) - - @staticmethod - def copy_faces(faces): - """ Copy the extracted faces so as not to save the annotations back """ - return [face.aligned_face.copy() for face in faces] - - @staticmethod - def set_full_roi(size): - """ ROI is the full frame for faces, so set based on size """ - return [np.array([[(0, 0), (0, size - 1), (size - 1, size - 1), (size - 1, 0)]], np.int32)] - - def annotate_faces(self): - """ Annotate each of the faces """ - state = self.interface.state - selected_face = self.interface.get_selected_face_id() - logger.trace("State: %s, Selected Face ID: %s", state, selected_face) - for idx, face in enumerate(self.faces): - annotate = Annotate(face, [self.landmarks[idx]], self.roi) - if not state["image"]["display"]: - annotate.draw_black_image() - - for item in ("landmarks", "landmarks_mesh"): - if not state[item]["display"]: - continue - logger.trace("Annotating: '%s'", item) - color = self.interface.get_color(item) - size = self.interface.get_size(item) - annotation = getattr(annotate, "draw_{}".format(item)) - annotation(color, size) - - if (selected_face is not None - and int(selected_face) < len(self.faces) - and int(selected_face) != idx): - annotate.draw_grey_out_faces(1) - - self.faces[idx] = annotate.image - - def build_faces_image(self, size): - """ Display associated faces """ - total_faces = len(self.faces) - logger.trace("Building faces panel. (total_faces: %s)", total_faces) - if not total_faces: - logger.trace("Returning empty row") - image = self.build_faces_row(list(), size) - return image - total_rows = int(total_faces / self.row_length) + 1 - for idx in range(total_rows): - logger.trace("Building row %s", idx) - face_idx = idx * self.row_length - row_faces = self.faces[face_idx:face_idx + self.row_length] - if not row_faces: - break - row = self.build_faces_row(row_faces, size) - image = row if idx == 0 else np.concatenate((image, row), axis=0) - return image - - def build_faces_row(self, faces, size): - """ Build a row of 4 faces """ - # pylint: disable=no-member - logger.trace("Building row for %s faces", len(faces)) - if len(faces) != 4: - remainder = 4 - (len(faces) % self.row_length) - for _ in range(remainder): - faces.append(np.zeros((size, size, 3), np.uint8)) - for idx, face in enumerate(faces): - color = self.interface.get_state_color() - cv2.rectangle(face, (0, 0), (size - 1, size - 1), - color, 1) - if idx == 0: - row = face - else: - row = np.concatenate((row, face), axis=1) - return row - - -class MouseHandler(): - """ Manual Extraction """ - def __init__(self, interface, loglevel): - logger.debug("Initializing %s: (interface: %s, loglevel: %s)", - self.__class__.__name__, interface, loglevel) - self.interface = interface - self.alignments = interface.alignments - self.frames = interface.frames - - self.queues = dict() - self.extractor = self.init_extractor() - - self.mouse_state = None - self.last_move = None - self.center = None - self.dims = None - self.media = {"frame_id": None, - "image": None, - "bounding_box": list(), - "bounding_last": list(), - "bounding_box_orig": list()} - logger.debug("Initialized %s", self.__class__.__name__) - - def init_extractor(self): - """ Initialize Aligner """ - logger.debug("Initialize Extractor") - extractor = Extractor(None, "fan", None, multiprocess=True, normalize_method="hist") - self.queues["in"] = extractor.input_queue - # Set the batchsize to 1 - extractor.set_batchsize("align", 1) - extractor.launch() - logger.debug("Initialized Extractor") - return extractor - - def on_event(self, event, x, y, flags, param): # pylint: disable=unused-argument,invalid-name - """ Handle the mouse events """ - # pylint: disable=no-member - if self.interface.get_edit_mode() != "Edit": - return - logger.trace("Mouse event: (event: %s, x: %s, y: %s, flags: %s, param: %s", - event, x, y, flags, param) - if not self.mouse_state and event not in (cv2.EVENT_LBUTTONDOWN, cv2.EVENT_MBUTTONDOWN): - return - - self.initialize() - - if event in (cv2.EVENT_LBUTTONUP, cv2.EVENT_MBUTTONUP): - self.mouse_state = None - self.last_move = None - elif event == cv2.EVENT_LBUTTONDOWN: - self.mouse_state = "left" - self.set_bounding_box(x, y) - elif event == cv2.EVENT_MBUTTONDOWN: - self.mouse_state = "middle" - self.set_bounding_box(x, y) - elif event == cv2.EVENT_MOUSEMOVE: - if self.mouse_state == "left": - self.move_bounding_box(x, y) - elif self.mouse_state == "middle": - self.resize_bounding_box(x, y) - - def initialize(self): - """ Update changed parameters """ - frame = self.interface.get_frame_name() - if frame == self.media["frame_id"]: - return - logger.debug("Initialize frame: '%s'", frame) - self.media["frame_id"] = frame - self.media["image"] = self.frames.load_image(frame) - self.dims = None - self.center = None - self.last_move = None - self.mouse_state = None - self.media["bounding_box"] = DetectedFace() - self.media["bounding_box_orig"] = None - - def set_bounding_box(self, pt_x, pt_y): - """ Select or create bounding box """ - if self.interface.get_selected_face_id() is None: - self.check_click_location(pt_x, pt_y) - - if self.interface.get_selected_face_id() is not None: - self.dims_from_alignment() - else: - self.dims_from_image() - - self.move_bounding_box(pt_x, pt_y) - - def check_click_location(self, pt_x, pt_y): - """ Check whether the point clicked is within an existing - bounding box and set face_id """ - frame = self.media["frame_id"] - alignments = self.alignments.get_faces_in_frame(frame) - scale = self.interface.get_frame_scaling() - pt_x = int(pt_x / scale) - pt_y = int(pt_y / scale) - - for idx, alignment in enumerate(alignments): - left = alignment["x"] - right = alignment["x"] + alignment["w"] - top = alignment["y"] - bottom = alignment["y"] + alignment["h"] - - if left <= pt_x <= right and top <= pt_y <= bottom: - self.interface.set_state_value("edit", "selected", idx) - break - - def dims_from_alignment(self): - """ Set the height and width of bounding box from alignment """ - frame = self.media["frame_id"] - face_id = self.interface.get_selected_face_id() - alignment = self.alignments.get_faces_in_frame(frame)[face_id] - self.dims = (alignment["w"], alignment["h"]) - - def dims_from_image(self): - """ Set the height and width of bounding - box at 10% of longest axis """ - size = max(self.media["image"].shape[:2]) - dim = int(size / 10.00) - self.dims = (dim, dim) - - def bounding_from_center(self): - """ Get bounding X Y from center """ - pt_x, pt_y = self.center - width, height = self.dims - scale = self.interface.get_frame_scaling() - self.media["bounding_box"].x = int((pt_x / scale) - width / 2) - self.media["bounding_box"].y = int((pt_y / scale) - height / 2) - self.media["bounding_box"].w = width - self.media["bounding_box"].h = height - - def move_bounding_box(self, pt_x, pt_y): - """ Move the bounding box """ - self.center = (pt_x, pt_y) - self.bounding_from_center() - self.update_landmarks() - - def resize_bounding_box(self, pt_x, pt_y): - """ Resize the bounding box """ - scale = self.interface.get_frame_scaling() - if not self.last_move: - self.last_move = (pt_x, pt_y) - self.media["bounding_box_orig"] = self.media["bounding_box"] - - move_x = int(pt_x - self.last_move[0]) - move_y = int(self.last_move[1] - pt_y) - - original = self.media["bounding_box_orig"] - updated = self.media["bounding_box"] - - minsize = int(20 / scale) - center = (int(self.center[0] / scale), int(self.center[1] / scale)) - updated.x = min(center[0] - (minsize // 2), original.x - move_x) - updated.y = min(center[1] - (minsize // 2), original.y - move_y) - updated.w = max(minsize, original.w + move_x) - updated.h = max(minsize, original.h + move_y) - self.update_landmarks() - self.last_move = (pt_x, pt_y) - - def update_landmarks(self): - """ Update the landmarks """ - feed = ExtractMedia(self.media["frame_id"], - self.media["image"], - detected_faces=[self.media["bounding_box"]]) - self.queues["in"].put(feed) - detected_face = next(self.extractor.detected_faces()).detected_faces[0] - alignment = detected_face.to_alignment() - # Mask will now be incorrect for updated landmarks so delete - alignment["mask"] = dict() - - frame = self.media["frame_id"] - - if self.interface.get_selected_face_id() is None: - idx = self.alignments.add_face(frame, alignment) - self.interface.set_state_value("edit", "selected", idx) - else: - self.alignments.update_face(frame, - self.interface.get_selected_face_id(), - alignment) - self.interface.set_redraw(True) - - self.interface.state["edit"]["updated"] = True - self.interface.state["edit"]["update_faces"] = True diff --git a/tools/manual/manual.py b/tools/manual/manual.py index f55a6a7446..f37f04a93d 100644 --- a/tools/manual/manual.py +++ b/tools/manual/manual.py @@ -44,7 +44,7 @@ def __init__(self, arguments): self._initialize_tkinter() self._globals = TkGlobals(arguments.frames) - extractor = Aligner(self._globals) + extractor = Aligner(self._globals, arguments.exclude_gpus) self._detected_faces = DetectedFaces(self._globals, arguments.alignments_path, arguments.frames, @@ -597,12 +597,17 @@ class Aligner(): ---------- tk_globals: :class:`~tools.manual.manual.TkGlobals` The tkinter variables that apply to the whole of the GUI + exclude_gpus: list or ``None`` + A list of indices correlating to connected GPUs that Tensorflow should not use. Pass + ``None`` to not exclude any GPUs. """ - def __init__(self, tk_globals): - logger.debug("Initializing: %s (tk_globals: %s)", self.__class__.__name__, tk_globals) + def __init__(self, tk_globals, exclude_gpus): + logger.debug("Initializing: %s (tk_globals: %s, exclude_gpus: %s)", + self.__class__.__name__, tk_globals, exclude_gpus) self._globals = tk_globals self._aligners = {"cv2-dnn": None, "FAN": None, "mask": None} self._aligner = "FAN" + self._exclude_gpus = exclude_gpus self._detected_faces = None self._frame_index = None self._face_index = None @@ -656,8 +661,13 @@ def _init_aligner(self): for model in ("mask", "cv2-dnn", "FAN"): logger.debug("Initializing aligner: %s", model) plugin = None if model == "mask" else model - aligner = Extractor(None, plugin, ["components", "extended"], - multiprocess=True, normalize_method="hist") + exclude_gpus = self._exclude_gpus if model == "FAN" else None + aligner = Extractor(None, + plugin, + ["components", "extended"], + exclude_gpus=exclude_gpus, + multiprocess=True, + normalize_method="hist") if plugin: aligner.set_batchsize("align", 1) # Set the batchsize to 1 aligner.launch() diff --git a/tools/mask/mask.py b/tools/mask/mask.py index bd31835cb9..5c1335936d 100644 --- a/tools/mask/mask.py +++ b/tools/mask/mask.py @@ -51,7 +51,7 @@ def __init__(self, arguments): self._alignments = Alignments(os.path.dirname(arguments.alignments), filename=os.path.basename(arguments.alignments)) - self._extractor = self._get_extractor() + self._extractor = self._get_extractor(arguments.exclude_gpus) self._extractor_input_thread = self._feed_extractor() logger.debug("Initialized %s", self.__class__.__name__) @@ -99,9 +99,15 @@ def _set_saver(self, arguments): logger.debug(saver) return saver - def _get_extractor(self): + def _get_extractor(self, exclude_gpus): """ Obtain a Mask extractor plugin and launch it + Parameters + ---------- + exclude_gpus: list or ``None`` + A list of indices correlating to connected GPUs that Tensorflow should not use. Pass + ``None`` to not exclude any GPUs. + Returns ------- :class:`plugins.extract.pipeline.Extractor`: @@ -112,6 +118,7 @@ def _get_extractor(self): return None logger.debug("masker: %s", self._mask_type) extractor = Extractor(None, None, self._mask_type, + exclude_gpus=exclude_gpus, image_is_aligned=self._input_is_faces) extractor.launch() logger.debug(extractor) diff --git a/tools/preview/cli.py b/tools/preview/cli.py index 4e767cc149..2394be1533 100644 --- a/tools/preview/cli.py +++ b/tools/preview/cli.py @@ -17,42 +17,34 @@ def get_info(): def get_argument_list(self): argument_list = list() - argument_list.append({"opts": ("-i", "--input-dir"), - "action": DirOrFileFullPaths, - "filetypes": "video", - "dest": "input_dir", - "group": "data", - "required": True, - "help": "Input directory or video. Either a directory containing " - "the image files you wish to process or path to a video " - "file."}) - argument_list.append({"opts": ("-al", "--alignments"), - "action": FileFullPaths, - "filetypes": "alignments", - "type": str, - "group": "data", - "dest": "alignments_path", - "help": "Path to the alignments file for the input, if not at the " - "default location"}) - argument_list.append({"opts": ("-m", "--model-dir"), - "action": DirFullPaths, - "dest": "model_dir", - "group": "data", - "required": True, - "help": "Model directory. A directory containing the trained model " - "you wish to process."}) - argument_list.append({"opts": ("-s", "--swap-model"), - "action": "store_true", - "dest": "swap_model", - "default": False, - "help": "Swap the model. Instead of A -> B, " - "swap B -> A"}) - argument_list.append({"opts": ("-ag", "--allow-growth"), - "action": "store_true", - "dest": "allow_growth", - "default": False, - "backend": "nvidia", - "help": "Sets allow_growth option of Tensorflow to spare memory " - "on some configurations."}) - + argument_list.append(dict( + opts=("-i", "--input-dir"), + action=DirOrFileFullPaths, + filetypes="video", + dest="input_dir", + group="data", + required=True, + help="Input directory or video. Either a directory containing the image files you " + "wish to process or path to a video file.")) + argument_list.append(dict( + opts=("-al", "--alignments"), + action=FileFullPaths, + filetypes="alignments", + type=str, + group="data", + dest="alignments_path", + help="Path to the alignments file for the input, if not at the default location")) + argument_list.append(dict( + opts=("-m", "--model-dir"), + action=DirFullPaths, + dest="model_dir", + group="data", + required=True, + help="Model directory. A directory containing the trained model you wish to process.")) + argument_list.append(dict( + opts=("-s", "--swap-model"), + action="store_true", + dest="swap_model", + default=False, + help="Swap the model. Instead of A -> B, swap B -> A")) return argument_list diff --git a/tools/sort/cli.py b/tools/sort/cli.py index c2f27c1761..2e12facdf6 100644 --- a/tools/sort/cli.py +++ b/tools/sort/cli.py @@ -159,15 +159,6 @@ def get_argument_list(): "the last bin." "Default value: 5"}) - argument_list.append({"opts": ("-be", "--backend"), - "action": Radio, - "type": str.upper, - "choices": ("CPU", "GPU"), - "default": "GPU", - "group": "settings", - "help": "Backend to use for VGG Face inference." - "Only used for sort by 'face'."}) - argument_list.append({"opts": ('-l', '--log-changes'), "action": 'store_true', "group": "settings", diff --git a/tools/sort/sort.py b/tools/sort/sort.py index a7446ca738..804ba08cd4 100644 --- a/tools/sort/sort.py +++ b/tools/sort/sort.py @@ -17,10 +17,8 @@ from lib.serializer import get_serializer_from_filename from lib.faces_detect import DetectedFace from lib.image import ImagesLoader, read_image -from lib.utils import get_backend -from lib.vgg_face2_keras import VGGFace2 as VGGFace +from plugins.extract.recognition.vgg_face2_keras import VGGFace2 as VGGFace from plugins.extract.pipeline import Extractor, ExtractMedia -from plugins.extract._config import Config logger = logging.getLogger(__name__) # pylint: disable=invalid-name @@ -58,13 +56,8 @@ def process(self): # Load VGG Face if sorting by face if self.args.sort_method.lower() == "face": - conf = Config("global", configfile=self.args.configfile) - allow_growth = (conf.config_dict["allow_growth"] and - self.args.backend.lower() == "gpu" and - get_backend() == "nvidia") - self.vgg_face = VGGFace(backend=self.args.backend, - allow_growth=allow_growth, - loglevel=self.args.loglevel) + self.vgg_face = VGGFace(exclude_gpus=self.args.exclude_gpus) + self.vgg_face.init_model() # If logging is enabled, prepare container if self.args.log_changes: @@ -91,10 +84,10 @@ def process(self): self.sort_process() - @staticmethod - def launch_aligner(): + def launch_aligner(self): """ Load the aligner plugin to retrieve landmarks """ - extractor = Extractor(None, "fan", None, normalize_method="hist") + extractor = Extractor(None, "fan", None, + normalize_method="hist", exclude_gpus=self.args.exclude_gpus) extractor.set_batchsize("align", 1) extractor.launch() return extractor