diff --git a/docs/model_zoo/detection.rst b/docs/model_zoo/detection.rst index 59b139e281..166d9e8bc5 100644 --- a/docs/model_zoo/detection.rst +++ b/docs/model_zoo/detection.rst @@ -190,26 +190,30 @@ Checkout SSD demo tutorial here: :ref:`sphx_glr_build_examples_detection_demo_ss Faster-RCNN ----------- -Faster-RCNN models of VOC dataset are evaluated with native resolutions with ``shorter side >= 800`` but ``longer side <= 1300`` without changing aspect ratios. +Faster-RCNN models of VOC dataset are evaluated with native resolutions with ``shorter side >= 800`` but ``longer side <= 1333`` without changing aspect ratios. Checkout Faster-RCNN demo tutorial here: :ref:`sphx_glr_build_examples_detection_demo_faster_rcnn.py` .. table:: :widths: 50 5 25 20 - +-------------------------------------------+-----------------+-----------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ - | Model | Box AP | Training Command | Training Log | - +===========================================+=================+=========================================================================================================================================+=======================================================================================================================================+ - | faster_rcnn_resnet50_v1b_coco [2]_ | 37.0/57.8/39.6 | `shell script `_ | `log `_ | - +-------------------------------------------+-----------------+-----------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ - | faster_rcnn_resnet101_v1d_coco [2]_ | 40.1/60.9/43.3 | `shell script `_ | `log `_ | - +-------------------------------------------+-----------------+-----------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ - | faster_rcnn_fpn_resnet50_v1b_coco [4]_ | 38.4/60.2/41.6 | `shell script `_ | `log `_ | - +-------------------------------------------+-----------------+-----------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ - | faster_rcnn_fpn_resnet101_v1d_coco [4]_ | 40.8/62.4/44.7 | `shell script `_ | `log `_ | - +-------------------------------------------+-----------------+-----------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ - | faster_rcnn_fpn_bn_resnet50_v1b_coco [5]_ | 39.3/61.3/42.9 | `shell script `_ | `log `_ | - +-------------------------------------------+-----------------+-----------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ + +---------------------------------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+ + | Model | Box AP | Training Command | Training Log | + +=============================================+=================+===========================================================================================================================================+=========================================================================================================================================+ + | faster_rcnn_resnet50_v1b_coco [2]_ | 37.0/57.8/39.6 | `shell script `_ | `log `_ | + +---------------------------------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+ + | faster_rcnn_resnet101_v1d_coco [2]_ | 40.1/60.9/43.3 | `shell script `_ | `log `_ | + +---------------------------------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+ + | faster_rcnn_fpn_resnet50_v1b_coco [4]_ | 38.4/60.2/41.6 | `shell script `_ | `log `_ | + +---------------------------------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+ + | faster_rcnn_fpn_resnet101_v1d_coco [4]_ | 40.8/62.4/44.7 | `shell script `_ | `log `_ | + +---------------------------------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+ + | faster_rcnn_fpn_bn_resnet50_v1b_coco [5]_ | 39.3/61.3/42.9 | `shell script `_ | `log `_ | + +---------------------------------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+ + | faster_rcnn_fpn_syncbn_resnest50_coco [7]_ | 42.7/64.1/46.4 | `shell script `_ | `log `_ | + +---------------------------------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+ + | faster_rcnn_fpn_syncbn_resnest101_coco [7]_ | 44.9/66.4/48.9 | `shell script `_ | `log `_ | + +---------------------------------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+ YOLO-v3 ------- @@ -284,3 +288,6 @@ Note that ``dcnv2`` indicate that models include Modulated Deformable Convolutio .. [6] Zhou, Xingyi, Dequan Wang, and Philipp Krähenbühl. \ "Objects as Points." \ arXiv preprint arXiv:1904.07850 (2019). +.. [7] Hang Zhang, Chongruo Wu, Zhongyue Zhang, Yi Zhu, Zhi Zhang, Haibin Lin, Yue Sun, Tong He, Jonas Muller, R. Manmatha, Mu Li and Alex Smola \ + "ResNeSt: Split-Attention Network" \ + arXiv preprint (2020). diff --git a/gluoncv/data/__init__.py b/gluoncv/data/__init__.py index c9cfd4dd06..eaa120a154 100644 --- a/gluoncv/data/__init__.py +++ b/gluoncv/data/__init__.py @@ -5,7 +5,7 @@ from . import batchify from .imagenet.classification import ImageNet, ImageNet1kAttr from .dataloader import DetectionDataLoader, RandomTransformDataLoader -from .pascal_voc.detection import VOCDetection +from .pascal_voc.detection import VOCDetection, CustomVOCDetection from .mscoco.detection import COCODetection from .mscoco.detection import COCODetectionDALI from .mscoco.instance import COCOInstance diff --git a/gluoncv/data/pascal_voc/detection.py b/gluoncv/data/pascal_voc/detection.py index 8f56612733..88f4a49160 100644 --- a/gluoncv/data/pascal_voc/detection.py +++ b/gluoncv/data/pascal_voc/detection.py @@ -1,10 +1,14 @@ """Pascal VOC object detection dataset.""" from __future__ import absolute_import from __future__ import division -import os + +import glob import logging +import os import warnings + import numpy as np + try: import xml.etree.cElementTree as ET except ImportError: @@ -87,8 +91,9 @@ def __getitem__(self, idx): def _load_items(self, splits): """Load individual image indices from splits.""" ids = [] - for year, name in splits: - root = os.path.join(self._root, 'VOC' + str(year)) + for subfolder, name in splits: + root = os.path.join( + self._root, ('VOC' + str(subfolder)) if isinstance(subfolder, int) else subfolder) lf = os.path.join(root, 'ImageSets', 'Main', name + '.txt') with open(lf, 'r') as f: ids += [(root, line.strip()) for line in f.readlines()] @@ -122,9 +127,9 @@ def _load_label(self, idx): ymax = (float(xml_box.find('ymax').text) - 1) try: self._validate_label(xmin, ymin, xmax, ymax, width, height) + label.append([xmin, ymin, xmax, ymax, cls_id, difficult]) except AssertionError as e: - raise RuntimeError("Invalid label at {}, {}".format(anno_path, e)) - label.append([xmin, ymin, xmax, ymax, cls_id, difficult]) + logging.warning("Invalid label at %s, %s", anno_path, e) return np.array(label) def _validate_label(self, xmin, ymin, xmax, ymax, width, height): @@ -145,3 +150,30 @@ def _preload_labels(self): """Preload all labels into memory.""" logging.debug("Preloading %s labels into memory...", str(self)) return [self._load_label(idx) for idx in range(len(self))] + + +class CustomVOCDetection(VOCDetection): + """Custom Pascal VOC detection Dataset. + Classes are generated from dataset + generate_classes : bool, default False + If True, generate class labels base on the annotations instead of the default classe labels. + """ + + def __init__(self, generate_classes=False, **kwargs): + super(CustomVOCDetection, self).__init__(**kwargs) + if generate_classes: + self.CLASSES = self._generate_classes() + + def _generate_classes(self): + classes = set() + all_xml = glob.glob(os.path.join(self._root, 'Annotations', '*.xml')) + for each_xml_file in all_xml: + tree = ET.parse(each_xml_file) + root = tree.getroot() + for child in root: + if child.tag == 'object': + for item in child: + if item.tag == 'name': + classes.add(item.text) + classes = sorted(list(classes)) + return classes diff --git a/gluoncv/model_zoo/model_store.py b/gluoncv/model_zoo/model_store.py index c3e2bc46aa..0a822b37cb 100644 --- a/gluoncv/model_zoo/model_store.py +++ b/gluoncv/model_zoo/model_store.py @@ -59,9 +59,11 @@ ('da9756faa5b9b4e34dedcf83ee0733d5895796ad', 'ssd_512_mobilenet1.0_coco'), ('447328d89d70ae1e2ca49226b8d834e5a5456df3', 'faster_rcnn_resnet50_v1b_voc'), ('5b4690fb7c5b62c44fb36c67d0642b633697f1bb', 'faster_rcnn_resnet50_v1b_coco'), + ('6df46961827647d418b11ffaf616a6a60d9dd16e', 'faster_rcnn_fpn_syncbn_resnest50_coco'), ('a465eca35e78aba6ebdf99bf52031a447e501063', 'faster_rcnn_resnet101_v1d_coco'), ('233572743bc537291590f4edf8a0c17c14b234bb', 'faster_rcnn_fpn_resnet50_v1b_coco'), ('1194ab4ec6e06386aadd55820add312c8ef59c74', 'faster_rcnn_fpn_resnet101_v1d_coco'), + ('baebfa1b7d7f56dd33a7687efea4b014736bd791', 'faster_rcnn_fpn_syncbn_resnest101_coco'), ('e071cf1550bc0331c218a9072b59e9550595d1e7', 'mask_rcnn_resnet18_v1b_coco'), ('a3527fdc2cee5b1f32a61e5fd7cda8fb673e86e5', 'mask_rcnn_resnet50_v1b_coco'), ('4a3249c584f81c2a9b5d852b742637cd692ebdcb', 'mask_rcnn_resnet101_v1d_coco'), diff --git a/gluoncv/model_zoo/model_zoo.py b/gluoncv/model_zoo/model_zoo.py index a64c618350..2c91c2fd26 100644 --- a/gluoncv/model_zoo/model_zoo.py +++ b/gluoncv/model_zoo/model_zoo.py @@ -127,12 +127,15 @@ 'faster_rcnn_resnet50_v1b_coco': faster_rcnn_resnet50_v1b_coco, 'faster_rcnn_fpn_resnet50_v1b_coco': faster_rcnn_fpn_resnet50_v1b_coco, 'faster_rcnn_fpn_syncbn_resnet50_v1b_coco': faster_rcnn_fpn_syncbn_resnet50_v1b_coco, + 'faster_rcnn_fpn_syncbn_resnest50_coco': faster_rcnn_fpn_syncbn_resnest50_coco, 'faster_rcnn_resnet50_v1b_custom': faster_rcnn_resnet50_v1b_custom, 'faster_rcnn_resnet101_v1d_voc': faster_rcnn_resnet101_v1d_voc, 'faster_rcnn_resnet101_v1d_coco': faster_rcnn_resnet101_v1d_coco, 'faster_rcnn_fpn_resnet101_v1d_coco': faster_rcnn_fpn_resnet101_v1d_coco, 'faster_rcnn_fpn_syncbn_resnet101_v1d_coco': faster_rcnn_fpn_syncbn_resnet101_v1d_coco, + 'faster_rcnn_fpn_syncbn_resnest101_coco': faster_rcnn_fpn_syncbn_resnest101_coco, 'faster_rcnn_resnet101_v1d_custom': faster_rcnn_resnet101_v1d_custom, + 'faster_rcnn_fpn_syncbn_resnest269_coco': faster_rcnn_fpn_syncbn_resnest269_coco, 'custom_faster_rcnn_fpn': custom_faster_rcnn_fpn, 'mask_rcnn_resnet50_v1b_coco': mask_rcnn_resnet50_v1b_coco, 'mask_rcnn_fpn_resnet50_v1b_coco': mask_rcnn_fpn_resnet50_v1b_coco, diff --git a/gluoncv/model_zoo/rcnn/faster_rcnn/faster_rcnn.py b/gluoncv/model_zoo/rcnn/faster_rcnn/faster_rcnn.py index d3cc142170..802b400a26 100644 --- a/gluoncv/model_zoo/rcnn/faster_rcnn/faster_rcnn.py +++ b/gluoncv/model_zoo/rcnn/faster_rcnn/faster_rcnn.py @@ -585,7 +585,7 @@ def custom_faster_rcnn_fpn(classes, transfer=None, dataset='custom', pretrained_ module_list.append('bn') net = get_model( '_'.join(['faster_rcnn'] + module_list + [base_network_name, str(transfer)]), - pretrained=True) + pretrained=True, per_device_batch_size=kwargs['per_device_batch_size']) reuse_classes = [x for x in classes if x in net.classes] net.reset_class(classes, reuse_weights=reuse_classes) return net diff --git a/gluoncv/model_zoo/rcnn/faster_rcnn/predefined_models.py b/gluoncv/model_zoo/rcnn/faster_rcnn/predefined_models.py index c001326220..180544782c 100644 --- a/gluoncv/model_zoo/rcnn/faster_rcnn/predefined_models.py +++ b/gluoncv/model_zoo/rcnn/faster_rcnn/predefined_models.py @@ -14,12 +14,15 @@ 'faster_rcnn_resnet50_v1b_coco', 'faster_rcnn_fpn_resnet50_v1b_coco', 'faster_rcnn_fpn_syncbn_resnet50_v1b_coco', + 'faster_rcnn_fpn_syncbn_resnest50_coco', 'faster_rcnn_resnet50_v1b_custom', 'faster_rcnn_resnet101_v1d_voc', 'faster_rcnn_resnet101_v1d_coco', 'faster_rcnn_fpn_resnet101_v1d_coco', 'faster_rcnn_fpn_syncbn_resnet101_v1d_coco', - 'faster_rcnn_resnet101_v1d_custom'] + 'faster_rcnn_fpn_syncbn_resnest101_coco', + 'faster_rcnn_resnet101_v1d_custom', + 'faster_rcnn_fpn_syncbn_resnest269_coco'] def faster_rcnn_resnet50_v1b_voc(pretrained=False, pretrained_base=True, **kwargs): @@ -243,6 +246,72 @@ def faster_rcnn_fpn_syncbn_resnet50_v1b_coco(pretrained=False, pretrained_base=T pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs) +def faster_rcnn_fpn_syncbn_resnest50_coco(pretrained=False, pretrained_base=True, num_devices=0, + **kwargs): + r"""Faster R-CNN with ResNeSt + ResNeSt: Split Attention Network" + + Parameters + ---------- + pretrained : bool or str + Boolean value controls whether to load the default pretrained weights for model. + String value represents the hashtag for a certain version of pretrained weights. + pretrained_base : bool or str, optional, default is True + Load pretrained base network, the extra layers are randomized. Note that + if pretrained is `Ture`, this has no effect. + num_devices : int, default is 0 + Number of devices for sync batch norm layer. if less than 1, use all devices available. + ctx : Context, default CPU + The context in which to load the pretrained weights. + root : str, default '~/.mxnet/models' + Location for keeping the model parameters. + + Examples + -------- + >>> model = get_faster_rcnn_fpn_syncbn_resnest50_coco(pretrained=True) + >>> print(model) + """ + from ....model_zoo.resnest import resnest50 + from ....data import COCODetection + classes = COCODetection.CLASSES + pretrained_base = False if pretrained else pretrained_base + gluon_norm_kwargs = {'num_devices': num_devices} if num_devices >= 1 else {} + base_network = resnest50(pretrained=pretrained_base, dilated=False, use_global_stats=False, + norm_layer=SyncBatchNorm, norm_kwargs=gluon_norm_kwargs) + from gluoncv.nn.dropblock import set_drop_prob + from functools import partial + apply_drop_prob = partial(set_drop_prob, 0.0) + base_network.apply(apply_drop_prob) + sym_norm_kwargs = {'ndev': num_devices} if num_devices >= 1 else {} + features = FPNFeatureExpander( + network=base_network, + outputs=['layers1_relu11_fwd', 'layers2_relu15_fwd', 'layers3_relu23_fwd', + 'layers4_relu11_fwd'], num_filters=[256, 256, 256, 256], use_1x1=True, + use_upsample=True, use_elewadd=True, use_p6=True, no_bias=True, pretrained=pretrained_base, + norm_layer=mx.sym.contrib.SyncBatchNorm, norm_kwargs=sym_norm_kwargs) + top_features = None + # 1 Conv 1 FC layer before RCNN cls and reg + box_features = nn.HybridSequential() + for _ in range(4): + box_features.add(nn.Conv2D(256, 3, padding=1, use_bias=False), + SyncBatchNorm(**gluon_norm_kwargs), + nn.Activation('relu')) + box_features.add(nn.Dense(1024, weight_initializer=mx.init.Normal(0.01)), + nn.Activation('relu')) + + train_patterns = '(?!.*moving)' # excluding symbol bn moving mean and var + return get_faster_rcnn( + name='fpn_syncbn_resnest50', dataset='coco', pretrained=pretrained, features=features, + top_features=top_features, classes=classes, box_features=box_features, + short=(640, 800), max_size=1333, min_stage=2, max_stage=6, train_patterns=train_patterns, + nms_thresh=0.5, nms_topk=-1, post_nms=-1, roi_mode='align', roi_size=(7, 7), + strides=(4, 8, 16, 32, 64), clip=4.14, rpn_channel=256, base_size=16, + scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(384, 384), + rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, + rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=512, + pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs) + + def faster_rcnn_resnet50_v1b_custom(classes, transfer=None, pretrained_base=True, pretrained=False, **kwargs): r"""Faster RCNN model with resnet50_v1b base network on custom dataset. @@ -516,6 +585,72 @@ def faster_rcnn_fpn_syncbn_resnet101_v1d_coco(pretrained=False, pretrained_base= pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs) +def faster_rcnn_fpn_syncbn_resnest101_coco(pretrained=False, pretrained_base=True, num_devices=0, + **kwargs): + r"""Faster R-CNN with ResNeSt + ResNeSt: Split Attention Network" + + Parameters + ---------- + pretrained : bool or str + Boolean value controls whether to load the default pretrained weights for model. + String value represents the hashtag for a certain version of pretrained weights. + pretrained_base : bool or str, optional, default is True + Load pretrained base network, the extra layers are randomized. Note that + if pretrained is `Ture`, this has no effect. + num_devices : int, default is 0 + Number of devices for sync batch norm layer. if less than 1, use all devices available. + ctx : Context, default CPU + The context in which to load the pretrained weights. + root : str, default '~/.mxnet/models' + Location for keeping the model parameters. + + Examples + -------- + >>> model = get_faster_rcnn_fpn_syncbn_resnest101_coco(pretrained=True) + >>> print(model) + """ + from ....model_zoo.resnest import resnest101 + from ....data import COCODetection + classes = COCODetection.CLASSES + pretrained_base = False if pretrained else pretrained_base + gluon_norm_kwargs = {'num_devices': num_devices} if num_devices >= 1 else {} + base_network = resnest101(pretrained=pretrained_base, dilated=False, use_global_stats=False, + norm_layer=SyncBatchNorm, norm_kwargs=gluon_norm_kwargs) + from gluoncv.nn.dropblock import set_drop_prob + from functools import partial + apply_drop_prob = partial(set_drop_prob, 0.0) + base_network.apply(apply_drop_prob) + sym_norm_kwargs = {'ndev': num_devices} if num_devices >= 1 else {} + features = FPNFeatureExpander( + network=base_network, + outputs=['layers1_relu11_fwd', 'layers2_relu15_fwd', 'layers3_relu91_fwd', + 'layers4_relu11_fwd'], num_filters=[256, 256, 256, 256], use_1x1=True, + use_upsample=True, use_elewadd=True, use_p6=True, no_bias=True, pretrained=pretrained_base, + norm_layer=mx.sym.contrib.SyncBatchNorm, norm_kwargs=sym_norm_kwargs) + top_features = None + # 1 Conv 1 FC layer before RCNN cls and reg + box_features = nn.HybridSequential() + for _ in range(4): + box_features.add(nn.Conv2D(256, 3, padding=1, use_bias=False), + SyncBatchNorm(**gluon_norm_kwargs), + nn.Activation('relu')) + box_features.add(nn.Dense(1024, weight_initializer=mx.init.Normal(0.01)), + nn.Activation('relu')) + + train_patterns = '(?!.*moving)' # excluding symbol bn moving mean and var + return get_faster_rcnn( + name='fpn_syncbn_resnest101', dataset='coco', pretrained=pretrained, features=features, + top_features=top_features, classes=classes, box_features=box_features, + short=(640, 800), max_size=1333, min_stage=2, max_stage=6, train_patterns=train_patterns, + nms_thresh=0.5, nms_topk=-1, post_nms=-1, roi_mode='align', roi_size=(7, 7), + strides=(4, 8, 16, 32, 64), clip=4.14, rpn_channel=256, base_size=16, + scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(384, 384), + rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, + rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=512, + pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs) + + def faster_rcnn_resnet101_v1d_custom(classes, transfer=None, pretrained_base=True, pretrained=False, **kwargs): r"""Faster RCNN model with resnet101_v1d base network on custom dataset. @@ -564,3 +699,69 @@ def faster_rcnn_resnet101_v1d_custom(classes, transfer=None, pretrained_base=Tru reuse_classes = [x for x in classes if x in net.classes] net.reset_class(classes, reuse_weights=reuse_classes) return net + + +def faster_rcnn_fpn_syncbn_resnest269_coco(pretrained=False, pretrained_base=True, num_devices=0, + **kwargs): + r"""Faster R-CNN with ResNeSt + ResNeSt: Split Attention Network" + + Parameters + ---------- + pretrained : bool or str + Boolean value controls whether to load the default pretrained weights for model. + String value represents the hashtag for a certain version of pretrained weights. + pretrained_base : bool or str, optional, default is True + Load pretrained base network, the extra layers are randomized. Note that + if pretrained is `Ture`, this has no effect. + num_devices : int, default is 0 + Number of devices for sync batch norm layer. if less than 1, use all devices available. + ctx : Context, default CPU + The context in which to load the pretrained weights. + root : str, default '~/.mxnet/models' + Location for keeping the model parameters. + + Examples + -------- + >>> model = get_faster_rcnn_fpn_syncbn_resnest269_coco(pretrained=True) + >>> print(model) + """ + from ....model_zoo.resnest import resnest269 + from ....data import COCODetection + classes = COCODetection.CLASSES + pretrained_base = False if pretrained else pretrained_base + gluon_norm_kwargs = {'num_devices': num_devices} if num_devices >= 1 else {} + base_network = resnest269(pretrained=pretrained_base, dilated=False, use_global_stats=False, + norm_layer=SyncBatchNorm, norm_kwargs=gluon_norm_kwargs) + from gluoncv.nn.dropblock import set_drop_prob + from functools import partial + apply_drop_prob = partial(set_drop_prob, 0.0) + base_network.apply(apply_drop_prob) + sym_norm_kwargs = {'ndev': num_devices} if num_devices >= 1 else {} + features = FPNFeatureExpander( + network=base_network, + outputs=['layers1_relu11_fwd', 'layers2_relu119_fwd', 'layers3_relu191_fwd', + 'layers4_relu31_fwd'], num_filters=[256, 256, 256, 256], use_1x1=True, + use_upsample=True, use_elewadd=True, use_p6=True, no_bias=True, pretrained=pretrained_base, + norm_layer=mx.sym.contrib.SyncBatchNorm, norm_kwargs=sym_norm_kwargs) + top_features = None + # 1 Conv 1 FC layer before RCNN cls and reg + box_features = nn.HybridSequential() + for _ in range(4): + box_features.add(nn.Conv2D(256, 3, padding=1, use_bias=False), + SyncBatchNorm(**gluon_norm_kwargs), + nn.Activation('relu')) + box_features.add(nn.Dense(1024, weight_initializer=mx.init.Normal(0.01)), + nn.Activation('relu')) + + train_patterns = '(?!.*moving)' # excluding symbol bn moving mean and var + return get_faster_rcnn( + name='fpn_syncbn_resnest269', dataset='coco', pretrained=pretrained, features=features, + top_features=top_features, classes=classes, box_features=box_features, + short=(640, 864), max_size=1440, min_stage=2, max_stage=6, train_patterns=train_patterns, + nms_thresh=0.5, nms_topk=-1, post_nms=-1, roi_mode='align', roi_size=(7, 7), + strides=(4, 8, 16, 32, 64), clip=4.14, rpn_channel=256, base_size=16, + scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(384, 384), + rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, + rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=512, + pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs) diff --git a/scripts/detection/faster_rcnn/train_faster_rcnn.py b/scripts/detection/faster_rcnn/train_faster_rcnn.py index c0146b176e..824dd7b7d3 100644 --- a/scripts/detection/faster_rcnn/train_faster_rcnn.py +++ b/scripts/detection/faster_rcnn/train_faster_rcnn.py @@ -32,7 +32,6 @@ from gluoncv.utils.parallel import Parallelizable, Parallel from gluoncv.utils.metrics.rcnn import RPNAccMetric, RPNL1LossMetric, RCNNAccMetric, \ RCNNL1LossMetric -from gluoncv.data import COCODetection, VOCDetection try: import horovod.mxnet as hvd @@ -43,7 +42,8 @@ def parse_args(): parser = argparse.ArgumentParser(description='Train Faster-RCNN networks e2e.') parser.add_argument('--network', type=str, default='resnet50_v1b', - choices=['resnet18_v1b', 'resnet50_v1b', 'resnet101_v1d'], + choices=['resnet18_v1b', 'resnet50_v1b', 'resnet101_v1d', + 'resnest50', 'resnest101', 'resnest269'], help="Base network name which serves as feature extraction base.") parser.add_argument('--dataset', type=str, default='voc', help='Training dataset. Now support voc and coco.') @@ -301,6 +301,13 @@ def get_dataset(dataset, args): val_dataset = gdata.VOCDetection( splits=[(2007, 'test')]) val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=val_dataset.classes) + elif dataset.lower() in ['clipart', 'comic', 'watercolor']: + root = os.path.join('~', '.mxnet', 'datasets', dataset.lower()) + train_dataset = gdata.CustomVOCDetection(root=root, splits=[('', 'train')], + generate_classes=True) + val_dataset = gdata.CustomVOCDetection(root=root, splits=[('', 'test')], + generate_classes=True) + val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=val_dataset.classes) elif dataset.lower() == 'coco': train_dataset = gdata.COCODetection(splits='instances_train2017', use_crowd=False) val_dataset = gdata.COCODetection(splits='instances_val2017', skip_empty=False) @@ -426,7 +433,7 @@ def forward_backward(self, x): gt_label = label[:, :, 4:5] gt_box = label[:, :, :4] cls_pred, box_pred, roi, samples, matches, rpn_score, rpn_box, anchors, cls_targets, \ - box_targets, box_masks, _ = self.net(data, gt_box, gt_label) + box_targets, box_masks, _ = self.net(data, gt_box, gt_label) # losses of rpn rpn_score = rpn_score.squeeze(axis=-1) num_rpn_pos = (rpn_cls_targets >= 0).sum() @@ -463,7 +470,7 @@ def forward_backward(self, x): total_loss.backward() return rpn_loss1_metric, rpn_loss2_metric, rcnn_loss1_metric, rcnn_loss2_metric, \ - rpn_acc_metric, rpn_l1_loss_metric, rcnn_acc_metric, rcnn_l1_loss_metric + rpn_acc_metric, rpn_l1_loss_metric, rcnn_acc_metric, rcnn_l1_loss_metric def train(net, train_data, val_data, eval_metric, batch_size, ctx, args): @@ -632,6 +639,9 @@ def train(net, train_data, val_data, eval_metric, batch_size, ctx, args): ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()] ctx = ctx if ctx else [mx.cpu()] + # training data + train_dataset, val_dataset, eval_metric = get_dataset(args.dataset, args) + # network kwargs = {} module_list = [] @@ -662,11 +672,7 @@ def train(net, train_data, val_data, eval_metric, batch_size, ctx, args): norm_kwargs = None sym_norm_layer = None sym_norm_kwargs = None - if args.dataset == 'coco': - classes = COCODetection.CLASSES - else: - # default to VOC - classes = VOCDetection.CLASSES + classes = train_dataset.CLASSES net = get_model('custom_faster_rcnn_fpn', classes=classes, transfer=None, dataset=args.dataset, pretrained_base=not args.no_pretrained_base, base_network_name=args.network, norm_layer=norm_layer, @@ -709,8 +715,7 @@ def train(net, train_data, val_data, eval_metric, batch_size, ctx, args): net.collect_params('.*batchnorm.*').setattr('dtype', 'float32') net.collect_params('.*normalizedperclassboxcenterencoder.*').setattr('dtype', 'float32') - # training data - train_dataset, val_dataset, eval_metric = get_dataset(args.dataset, args) + # dataloader batch_size = args.batch_size // num_gpus if args.horovod else args.batch_size train_data, val_data = get_dataloader( net, train_dataset, val_dataset, FasterRCNNDefaultTrainTransform,