Skip to content

Commit

Permalink
Phaze-A: Add MobileNetV3 encoder
Browse files Browse the repository at this point in the history
  • Loading branch information
torzdf committed May 4, 2022
1 parent 332394e commit 0189029
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 63 deletions.
83 changes: 32 additions & 51 deletions plugins/train/model/phaze_a.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@
""" Phaze-A Model by TorzDF with thanks to BirbFakes and the myriad of testers. """

import numpy as np
import tensorflow as tf

from lib.model.nn_blocks import (
Conv2D, Conv2DBlock, Conv2DOutput, ResidualBlock, UpscaleBlock, Upscale2xBlock,
UpscaleResizeImagesBlock)
from lib.model.normalization import (
AdaInstanceNormalization, GroupNormalization, InstanceNormalization, LayerNormalization,
RMSNormalization)
from lib.utils import get_backend, FaceswapError
from lib.utils import get_backend, get_tf_version, FaceswapError

from ._base import KerasModel, ModelBase, logger, _get_all_sub_models

Expand Down Expand Up @@ -62,6 +61,10 @@
keras_name="MobileNet", scaling=(-1, 1), default_size=224),
mobilenet_v2=dict(
keras_name="MobileNetV2", scaling=(-1, 1), default_size=224),
mobilenet_v3_large=dict(
keras_name="MobileNetV3Large", no_amd=True, tf_min=2.4, scaling=(-1, 1), default_size=224),
mobilenet_v3_small=dict(
keras_name="MobileNetV3Small", no_amd=True, tf_min=2.4, scaling=(-1, 1), default_size=224),
nasnet_large=dict(
keras_name="NASNetLarge", scaling=(-1, 1), default_size=331, enforce_for_weights=True),
nasnet_mobile=dict(
Expand Down Expand Up @@ -208,16 +211,32 @@ def _get_input_shape(self):
Input shape is calculated from the selected Encoder's input size, scaled to the user
selected Input Scaling, rounded down to the nearest 16 pixels.
Notes
-----
Some models (NasNet) require the input size to be of a certain dimension if loading
imagenet weights. In these instances resize inputs and raise warning message
Returns
-------
tuple
The shape tuple for the input size to the Phaze-A model
"""
size = _MODEL_MAPPING[self.config["enc_architecture"]]["default_size"]
min_size = _MODEL_MAPPING[self.config["enc_architecture"]].get("min_size", 32)
arch = self.config["enc_architecture"]
enforce_size = _MODEL_MAPPING[arch].get("enforce_for_weights", False)
default_size = _MODEL_MAPPING[arch]["default_size"]
scaling = self.config["enc_scaling"] / 100
size = int(max(min_size, min(size, ((size * scaling) // 16) * 16)))
retval = (size, size, 3)

min_size = _MODEL_MAPPING[arch].get("min_size", 32)
size = int(max(min_size, min(default_size, ((default_size * scaling) // 16) * 16)))

if self.config["enc_load_weights"] and enforce_size and scaling != 1.0:
logger.warning("%s requires input size to be %spx when loading imagenet weights. "
"Adjusting input size from %spx to %spx",
arch, default_size, size, default_size)
retval = (default_size, default_size, 3)
else:
retval = (size, size, 3)

logger.debug("Encoder input set to: %s", retval)
return retval

Expand All @@ -238,7 +257,7 @@ def _validate_encoder_architecture(self):
raise FaceswapError(f"'{arch}' is not compatible with the AMD backend. Choose one of "
f"{valid}.")

tf_ver = float(".".join(tf.__version__.split(".")[:2])) # pylint:disable=no-member
tf_ver = get_tf_version()
tf_min = model.get("tf_min", 2.0)
if get_backend() != "amd" and tf_ver < tf_min:
raise FaceswapError(f"{arch}' is not compatible with your version of Tensorflow. The "
Expand Down Expand Up @@ -549,7 +568,10 @@ def _model_kwargs(self):
return dict(mobilenet=dict(alpha=self._config["mobilenet_width"],
depth_multiplier=self._config["mobilenet_depth"],
dropout=self._config["mobilenet_dropout"]),
mobilenet_v2=dict(alpha=self._config["mobilenet_width"]))
mobilenet_v2=dict(alpha=self._config["mobilenet_width"]),
mobilenet_v3=dict(alpha=self._config["mobilenet_width"],
minimalist=self._config["mobilenet_minimalistic"],
include_preprocessing=False))

@property
def _selected_model(self):
Expand All @@ -559,22 +581,6 @@ def _selected_model(self):
model["kwargs"] = self._model_kwargs.get(arch, {})
return model

@property
def _model_input_shape(self):
""" tuple: The required input shape for the encoder model.
Notes
-----
NasNet does not allow custom input sizes when loading pre-trained weights, so we need to
resize the input for this model
"""
default_size = self._selected_model.get("default_size")
if self._config["enc_load_weights"] and self._selected_model.get("enforce_for_weights"):
retval = (default_size, default_size, 3)
else:
retval = self._input_shape
return retval

def __call__(self):
""" Create the Phaze-A Encoder Model.
Expand All @@ -583,12 +589,9 @@ def __call__(self):
:class:`keras.models.Model`
The selected Encoder Model
"""
input_ = Input(shape=self._model_input_shape)
input_ = Input(shape=self._input_shape)
var_x = input_

if self._input_shape != self._model_input_shape:
var_x = self._resize_inputs(var_x)

scaling = self._selected_model.get("scaling")
if scaling:
# Some models expect different scaling.
Expand All @@ -611,28 +614,6 @@ def __call__(self):

return KerasModel(input_, var_x, name="encoder")

def _resize_inputs(self, inputs):
""" Some models (specifically NasNet) need a specific input size when loading trained
weights. This is slightly hacky, but arbitrarily resize the input for these instances.
Parameters
----------
inputs: tensor
The input tensor to be resized
Returns
-------
tensor
The resized input tensor
"""
input_size = self._input_shape[0]
new_size = self._model_input_shape[0]
logger.debug("Resizing input for encoder: '%s' from %s to %s due to trained weights usage",
self._config["enc_architecture"], input_size, new_size)
scale = new_size / input_size
interp = "bilinear" if scale > 1 else "nearest"
return K.resize_images(size=scale, interpolation=interp)(inputs)

def _get_encoder_model(self):
""" Return the model defined by the selected architecture.
Expand All @@ -648,7 +629,7 @@ def _get_encoder_model(self):
"""
if self._selected_model.get("keras_name"):
kwargs = self._selected_model["kwargs"]
kwargs["input_shape"] = self._model_input_shape
kwargs["input_shape"] = self._input_shape
kwargs["include_top"] = False
kwargs["weights"] = "imagenet" if self._config["enc_load_weights"] else None
retval = getattr(kapp, self._selected_model["keras_name"])(**kwargs)
Expand Down
41 changes: 29 additions & 12 deletions plugins/train/model/phaze_a_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@
if get_backend() != "amd":
_ENCODERS.extend(["efficientnet_b0", "efficientnet_b1", "efficientnet_b2", "efficientnet_b3",
"efficientnet_b4", "efficientnet_b5", "efficientnet_b6", "efficientnet_b7",
"resnet50_v2", "resnet101", "resnet101_v2", "resnet152", "resnet152_v2"])
"mobilenet_v3_large", "mobilenet_v3_small", "resnet50_v2", "resnet101",
"resnet101_v2", "resnet152", "resnet152_v2"])
_ENCODERS = sorted(_ENCODERS)


Expand Down Expand Up @@ -157,6 +158,9 @@
"\n\tmobilenet_v2: (32px - 224px). Additional MobileNet parameters can be set with "
"the 'mobilenet' options. Ref: MobileNetV2: Inverted Residuals and Linear "
"Bottlenecks (2018): https://arxiv.org/abs/1801.04381"
"\n\tmobilenet_v3: (32px - 224px). Additional MobileNet parameters can be set with "
"the 'mobilenet' options. Ref: Searching for MobileNetV3 (2019): "
"https://arxiv.org/pdf/1905.02244.pdf"
"\n\tnasnet: (32px - 331px (large) or 224px (mobile)). Ref: Learning Transferable "
"Architectures for Scalable Image Recognition (2017): "
"https://arxiv.org/abs/1707.07012"
Expand Down Expand Up @@ -569,20 +573,21 @@
"each layer. Values greater than 1.0 proportionally increase the number of filters "
"within each layer. 1.0 is the default number of layers used within the paper.\n"
"NB: This option is ignored for any non-mobilenet encoders.\n"
"NB: If loading ImageNet weights, then for mobilenet v1 only values of '0.25', "
"'0.5', '0.75' or '1.0 can be selected. For mobilenet v2 only values of '0.35', "
"'0.50', '0.75', '1.0', '1.3' or '1.4' can be selected",
"NB: If loading ImageNet weights, then for MobilenetV1 only values of '0.25', "
"'0.5', '0.75' or '1.0 can be selected. For MobilenetV2 only values of '0.35', "
"'0.50', '0.75', '1.0', '1.3' or '1.4' can be selected. For mobilenet_v3 only values "
"of '0.75' or '1.0' can be selected",
datatype=float,
min_max=(0.1, 2.0),
rounding=2,
group="mobilenet encoder configuration",
fixed=True),
mobilenet_depth=dict(
default=1,
info="The depth multiplier for mobilenet v1 encoder. This is the depth multiplier "
info="The depth multiplier for MobilenetV1 encoder. This is the depth multiplier "
"for depthwise convolution (known as the resolution multiplier within the original "
"paper).\n"
"NB: This option is only used for mobilenet v1 and is ignored for all other "
"NB: This option is only used for MobilenetV1 and is ignored for all other "
"encoders.\n"
"NB: If loading ImageNet weights, this must be set to 1.",
datatype=int,
Expand All @@ -592,13 +597,25 @@
fixed=True),
mobilenet_dropout=dict(
default=0.001,
info="The dropout rate for for mobilenet v1 encoder.\n"
"NB: This option is only used for mobilenet v1 and is ignored for all other "
"encoders.\n"
"NB: If loading ImageNet weights, this must be set to 1.0.",
info="The dropout rate for MobilenetV1 encoder.\n"
"NB: This option is only used for MobilenetV1 and is ignored for all other "
"encoders.",
datatype=float,
min_max=(0.1, 2.0),
rounding=2,
min_max=(0.001, 2.0),
rounding=3,
group="mobilenet encoder configuration",
fixed=True),
mobilenet_minimalistic=dict(
default=False,
info="Use a minimilist version of MobilenetV3.\n"
"In addition to large and small models MobilenetV3 also contains so-called "
"minimalistic models, these models have the same per-layer dimensions characteristic "
"as MobilenetV3 however, they don't utilize any of the advanced blocks "
"(squeeze-and-excite units, hard-swish, and 5x5 convolutions). While these models "
"are less efficient on CPU, they are much more performant on GPU/DSP.\n"
"NB: This option is only used for MobilenetV3 and is ignored for all other "
"encoders.\n",
datatype=bool,
group="mobilenet encoder configuration",
fixed=True),
)

0 comments on commit 0189029

Please sign in to comment.