diff --git a/docs/model_zoo/detection.rst b/docs/model_zoo/detection.rst
index 59b139e281..166d9e8bc5 100644
--- a/docs/model_zoo/detection.rst
+++ b/docs/model_zoo/detection.rst
@@ -190,26 +190,30 @@ Checkout SSD demo tutorial here: :ref:`sphx_glr_build_examples_detection_demo_ss
 Faster-RCNN
 -----------
 
-Faster-RCNN models of VOC dataset are evaluated with native resolutions with ``shorter side >= 800`` but ``longer side <= 1300`` without changing aspect ratios.
+Faster-RCNN models of VOC dataset are evaluated with native resolutions with ``shorter side >= 800`` but ``longer side <= 1333`` without changing aspect ratios.
 
 Checkout Faster-RCNN demo tutorial here: :ref:`sphx_glr_build_examples_detection_demo_faster_rcnn.py`
 
 .. table::
    :widths: 50 5 25 20
 
-   +-------------------------------------------+-----------------+-----------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
-   | Model                                     | Box AP          | Training Command                                                                                                                        | Training Log                                                                                                                          |
-   +===========================================+=================+=========================================================================================================================================+=======================================================================================================================================+
-   | faster_rcnn_resnet50_v1b_coco [2]_        | 37.0/57.8/39.6  | `shell script <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_resnet50_v1b_coco.sh>`_        | `log <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_resnet50_v1b_coco_train.log>`_        |
-   +-------------------------------------------+-----------------+-----------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
-   | faster_rcnn_resnet101_v1d_coco [2]_       | 40.1/60.9/43.3  | `shell script <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_resnet101_v1d_coco.sh>`_       | `log <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_resnet101_v1d_coco_train.log>`_       |
-   +-------------------------------------------+-----------------+-----------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
-   | faster_rcnn_fpn_resnet50_v1b_coco [4]_    | 38.4/60.2/41.6  | `shell script <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_fpn_resnet50_v1b_coco.sh>`_    | `log <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_fpn_resnet50_v1b_coco_train.log>`_    |
-   +-------------------------------------------+-----------------+-----------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
-   | faster_rcnn_fpn_resnet101_v1d_coco [4]_   | 40.8/62.4/44.7  | `shell script <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_fpn_resnet101_v1d_coco.sh>`_   | `log <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_fpn_resnet101_v1d_coco_train.log>`_   |
-   +-------------------------------------------+-----------------+-----------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
-   | faster_rcnn_fpn_bn_resnet50_v1b_coco [5]_ | 39.3/61.3/42.9  | `shell script <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_fpn_bn_resnet50_v1b_coco.sh>`_ | `log <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_fpn_bn_resnet50_v1b_coco_train.log>`_ |
-   +-------------------------------------------+-----------------+-----------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
+   +---------------------------------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+
+   | Model                                       | Box AP          | Training Command                                                                                                                          | Training Log                                                                                                                            |
+   +=============================================+=================+===========================================================================================================================================+=========================================================================================================================================+
+   | faster_rcnn_resnet50_v1b_coco [2]_          | 37.0/57.8/39.6  | `shell script <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_resnet50_v1b_coco.sh>`_          | `log <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_resnet50_v1b_coco_train.log>`_          |
+   +---------------------------------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+
+   | faster_rcnn_resnet101_v1d_coco [2]_         | 40.1/60.9/43.3  | `shell script <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_resnet101_v1d_coco.sh>`_         | `log <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_resnet101_v1d_coco_train.log>`_         |
+   +---------------------------------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+
+   | faster_rcnn_fpn_resnet50_v1b_coco [4]_      | 38.4/60.2/41.6  | `shell script <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_fpn_resnet50_v1b_coco.sh>`_      | `log <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_fpn_resnet50_v1b_coco_train.log>`_      |
+   +---------------------------------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+
+   | faster_rcnn_fpn_resnet101_v1d_coco [4]_     | 40.8/62.4/44.7  | `shell script <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_fpn_resnet101_v1d_coco.sh>`_     | `log <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_fpn_resnet101_v1d_coco_train.log>`_     |
+   +---------------------------------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+
+   | faster_rcnn_fpn_bn_resnet50_v1b_coco [5]_   | 39.3/61.3/42.9  | `shell script <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_fpn_bn_resnet50_v1b_coco.sh>`_   | `log <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_fpn_bn_resnet50_v1b_coco_train.log>`_   |
+   +---------------------------------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+
+   | faster_rcnn_fpn_syncbn_resnest50_coco [7]_  | 42.7/64.1/46.4  | `shell script <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_fpn_syncbn_resnest50_coco.sh>`_  | `log <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_fpn_syncbn_resnest50_coco_train.log>`_  |
+   +---------------------------------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+
+   | faster_rcnn_fpn_syncbn_resnest101_coco [7]_ | 44.9/66.4/48.9  | `shell script <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_fpn_syncbn_resnest101_coco.sh>`_ | `log <https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/logs/detection/faster_rcnn_fpn_syncbn_resnest101_coco_train.log>`_ |
+   +---------------------------------------------+-----------------+-------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+
 
 YOLO-v3
 -------
@@ -284,3 +288,6 @@ Note that ``dcnv2`` indicate that models include Modulated Deformable Convolutio
 .. [6] Zhou, Xingyi, Dequan Wang, and Philipp Krähenbühl. \
        "Objects as Points." \
        arXiv preprint arXiv:1904.07850 (2019).
+.. [7] Hang Zhang, Chongruo Wu, Zhongyue Zhang, Yi Zhu, Zhi Zhang, Haibin Lin, Yue Sun, Tong He, Jonas Muller, R. Manmatha, Mu Li and Alex Smola \
+       "ResNeSt: Split-Attention Network" \
+       arXiv preprint (2020).
diff --git a/gluoncv/data/__init__.py b/gluoncv/data/__init__.py
index c9cfd4dd06..eaa120a154 100644
--- a/gluoncv/data/__init__.py
+++ b/gluoncv/data/__init__.py
@@ -5,7 +5,7 @@
 from . import batchify
 from .imagenet.classification import ImageNet, ImageNet1kAttr
 from .dataloader import DetectionDataLoader, RandomTransformDataLoader
-from .pascal_voc.detection import VOCDetection
+from .pascal_voc.detection import VOCDetection, CustomVOCDetection
 from .mscoco.detection import COCODetection
 from .mscoco.detection import COCODetectionDALI
 from .mscoco.instance import COCOInstance
diff --git a/gluoncv/data/pascal_voc/detection.py b/gluoncv/data/pascal_voc/detection.py
index 8f56612733..88f4a49160 100644
--- a/gluoncv/data/pascal_voc/detection.py
+++ b/gluoncv/data/pascal_voc/detection.py
@@ -1,10 +1,14 @@
 """Pascal VOC object detection dataset."""
 from __future__ import absolute_import
 from __future__ import division
-import os
+
+import glob
 import logging
+import os
 import warnings
+
 import numpy as np
+
 try:
     import xml.etree.cElementTree as ET
 except ImportError:
@@ -87,8 +91,9 @@ def __getitem__(self, idx):
     def _load_items(self, splits):
         """Load individual image indices from splits."""
         ids = []
-        for year, name in splits:
-            root = os.path.join(self._root, 'VOC' + str(year))
+        for subfolder, name in splits:
+            root = os.path.join(
+                self._root, ('VOC' + str(subfolder)) if isinstance(subfolder, int) else subfolder)
             lf = os.path.join(root, 'ImageSets', 'Main', name + '.txt')
             with open(lf, 'r') as f:
                 ids += [(root, line.strip()) for line in f.readlines()]
@@ -122,9 +127,9 @@ def _load_label(self, idx):
             ymax = (float(xml_box.find('ymax').text) - 1)
             try:
                 self._validate_label(xmin, ymin, xmax, ymax, width, height)
+                label.append([xmin, ymin, xmax, ymax, cls_id, difficult])
             except AssertionError as e:
-                raise RuntimeError("Invalid label at {}, {}".format(anno_path, e))
-            label.append([xmin, ymin, xmax, ymax, cls_id, difficult])
+                logging.warning("Invalid label at %s, %s", anno_path, e)
         return np.array(label)
 
     def _validate_label(self, xmin, ymin, xmax, ymax, width, height):
@@ -145,3 +150,30 @@ def _preload_labels(self):
         """Preload all labels into memory."""
         logging.debug("Preloading %s labels into memory...", str(self))
         return [self._load_label(idx) for idx in range(len(self))]
+
+
+class CustomVOCDetection(VOCDetection):
+    """Custom Pascal VOC detection Dataset.
+    Classes are generated from dataset
+    generate_classes : bool, default False
+        If True, generate class labels base on the annotations instead of the default classe labels.
+    """
+
+    def __init__(self, generate_classes=False, **kwargs):
+        super(CustomVOCDetection, self).__init__(**kwargs)
+        if generate_classes:
+            self.CLASSES = self._generate_classes()
+
+    def _generate_classes(self):
+        classes = set()
+        all_xml = glob.glob(os.path.join(self._root, 'Annotations', '*.xml'))
+        for each_xml_file in all_xml:
+            tree = ET.parse(each_xml_file)
+            root = tree.getroot()
+            for child in root:
+                if child.tag == 'object':
+                    for item in child:
+                        if item.tag == 'name':
+                            classes.add(item.text)
+        classes = sorted(list(classes))
+        return classes
diff --git a/gluoncv/model_zoo/model_store.py b/gluoncv/model_zoo/model_store.py
index c3e2bc46aa..0a822b37cb 100644
--- a/gluoncv/model_zoo/model_store.py
+++ b/gluoncv/model_zoo/model_store.py
@@ -59,9 +59,11 @@
     ('da9756faa5b9b4e34dedcf83ee0733d5895796ad', 'ssd_512_mobilenet1.0_coco'),
     ('447328d89d70ae1e2ca49226b8d834e5a5456df3', 'faster_rcnn_resnet50_v1b_voc'),
     ('5b4690fb7c5b62c44fb36c67d0642b633697f1bb', 'faster_rcnn_resnet50_v1b_coco'),
+    ('6df46961827647d418b11ffaf616a6a60d9dd16e', 'faster_rcnn_fpn_syncbn_resnest50_coco'),
     ('a465eca35e78aba6ebdf99bf52031a447e501063', 'faster_rcnn_resnet101_v1d_coco'),
     ('233572743bc537291590f4edf8a0c17c14b234bb', 'faster_rcnn_fpn_resnet50_v1b_coco'),
     ('1194ab4ec6e06386aadd55820add312c8ef59c74', 'faster_rcnn_fpn_resnet101_v1d_coco'),
+    ('baebfa1b7d7f56dd33a7687efea4b014736bd791', 'faster_rcnn_fpn_syncbn_resnest101_coco'),
     ('e071cf1550bc0331c218a9072b59e9550595d1e7', 'mask_rcnn_resnet18_v1b_coco'),
     ('a3527fdc2cee5b1f32a61e5fd7cda8fb673e86e5', 'mask_rcnn_resnet50_v1b_coco'),
     ('4a3249c584f81c2a9b5d852b742637cd692ebdcb', 'mask_rcnn_resnet101_v1d_coco'),
diff --git a/gluoncv/model_zoo/model_zoo.py b/gluoncv/model_zoo/model_zoo.py
index a64c618350..2c91c2fd26 100644
--- a/gluoncv/model_zoo/model_zoo.py
+++ b/gluoncv/model_zoo/model_zoo.py
@@ -127,12 +127,15 @@
     'faster_rcnn_resnet50_v1b_coco': faster_rcnn_resnet50_v1b_coco,
     'faster_rcnn_fpn_resnet50_v1b_coco': faster_rcnn_fpn_resnet50_v1b_coco,
     'faster_rcnn_fpn_syncbn_resnet50_v1b_coco': faster_rcnn_fpn_syncbn_resnet50_v1b_coco,
+    'faster_rcnn_fpn_syncbn_resnest50_coco': faster_rcnn_fpn_syncbn_resnest50_coco,
     'faster_rcnn_resnet50_v1b_custom': faster_rcnn_resnet50_v1b_custom,
     'faster_rcnn_resnet101_v1d_voc': faster_rcnn_resnet101_v1d_voc,
     'faster_rcnn_resnet101_v1d_coco': faster_rcnn_resnet101_v1d_coco,
     'faster_rcnn_fpn_resnet101_v1d_coco': faster_rcnn_fpn_resnet101_v1d_coco,
     'faster_rcnn_fpn_syncbn_resnet101_v1d_coco': faster_rcnn_fpn_syncbn_resnet101_v1d_coco,
+    'faster_rcnn_fpn_syncbn_resnest101_coco': faster_rcnn_fpn_syncbn_resnest101_coco,
     'faster_rcnn_resnet101_v1d_custom': faster_rcnn_resnet101_v1d_custom,
+    'faster_rcnn_fpn_syncbn_resnest269_coco': faster_rcnn_fpn_syncbn_resnest269_coco,
     'custom_faster_rcnn_fpn': custom_faster_rcnn_fpn,
     'mask_rcnn_resnet50_v1b_coco': mask_rcnn_resnet50_v1b_coco,
     'mask_rcnn_fpn_resnet50_v1b_coco': mask_rcnn_fpn_resnet50_v1b_coco,
diff --git a/gluoncv/model_zoo/rcnn/faster_rcnn/faster_rcnn.py b/gluoncv/model_zoo/rcnn/faster_rcnn/faster_rcnn.py
index d3cc142170..802b400a26 100644
--- a/gluoncv/model_zoo/rcnn/faster_rcnn/faster_rcnn.py
+++ b/gluoncv/model_zoo/rcnn/faster_rcnn/faster_rcnn.py
@@ -585,7 +585,7 @@ def custom_faster_rcnn_fpn(classes, transfer=None, dataset='custom', pretrained_
             module_list.append('bn')
         net = get_model(
             '_'.join(['faster_rcnn'] + module_list + [base_network_name, str(transfer)]),
-            pretrained=True)
+            pretrained=True, per_device_batch_size=kwargs['per_device_batch_size'])
         reuse_classes = [x for x in classes if x in net.classes]
         net.reset_class(classes, reuse_weights=reuse_classes)
     return net
diff --git a/gluoncv/model_zoo/rcnn/faster_rcnn/predefined_models.py b/gluoncv/model_zoo/rcnn/faster_rcnn/predefined_models.py
index c001326220..180544782c 100644
--- a/gluoncv/model_zoo/rcnn/faster_rcnn/predefined_models.py
+++ b/gluoncv/model_zoo/rcnn/faster_rcnn/predefined_models.py
@@ -14,12 +14,15 @@
            'faster_rcnn_resnet50_v1b_coco',
            'faster_rcnn_fpn_resnet50_v1b_coco',
            'faster_rcnn_fpn_syncbn_resnet50_v1b_coco',
+           'faster_rcnn_fpn_syncbn_resnest50_coco',
            'faster_rcnn_resnet50_v1b_custom',
            'faster_rcnn_resnet101_v1d_voc',
            'faster_rcnn_resnet101_v1d_coco',
            'faster_rcnn_fpn_resnet101_v1d_coco',
            'faster_rcnn_fpn_syncbn_resnet101_v1d_coco',
-           'faster_rcnn_resnet101_v1d_custom']
+           'faster_rcnn_fpn_syncbn_resnest101_coco',
+           'faster_rcnn_resnet101_v1d_custom',
+           'faster_rcnn_fpn_syncbn_resnest269_coco']
 
 
 def faster_rcnn_resnet50_v1b_voc(pretrained=False, pretrained_base=True, **kwargs):
@@ -243,6 +246,72 @@ def faster_rcnn_fpn_syncbn_resnet50_v1b_coco(pretrained=False, pretrained_base=T
         pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
 
 
+def faster_rcnn_fpn_syncbn_resnest50_coco(pretrained=False, pretrained_base=True, num_devices=0,
+                                          **kwargs):
+    r"""Faster R-CNN with ResNeSt
+    ResNeSt: Split Attention Network"
+
+    Parameters
+    ----------
+    pretrained : bool or str
+        Boolean value controls whether to load the default pretrained weights for model.
+        String value represents the hashtag for a certain version of pretrained weights.
+    pretrained_base : bool or str, optional, default is True
+        Load pretrained base network, the extra layers are randomized. Note that
+        if pretrained is `Ture`, this has no effect.
+    num_devices : int, default is 0
+        Number of devices for sync batch norm layer. if less than 1, use all devices available.
+    ctx : Context, default CPU
+        The context in which to load the pretrained weights.
+    root : str, default '~/.mxnet/models'
+        Location for keeping the model parameters.
+
+    Examples
+    --------
+    >>> model = get_faster_rcnn_fpn_syncbn_resnest50_coco(pretrained=True)
+    >>> print(model)
+    """
+    from ....model_zoo.resnest import resnest50
+    from ....data import COCODetection
+    classes = COCODetection.CLASSES
+    pretrained_base = False if pretrained else pretrained_base
+    gluon_norm_kwargs = {'num_devices': num_devices} if num_devices >= 1 else {}
+    base_network = resnest50(pretrained=pretrained_base, dilated=False, use_global_stats=False,
+                             norm_layer=SyncBatchNorm, norm_kwargs=gluon_norm_kwargs)
+    from gluoncv.nn.dropblock import set_drop_prob
+    from functools import partial
+    apply_drop_prob = partial(set_drop_prob, 0.0)
+    base_network.apply(apply_drop_prob)
+    sym_norm_kwargs = {'ndev': num_devices} if num_devices >= 1 else {}
+    features = FPNFeatureExpander(
+        network=base_network,
+        outputs=['layers1_relu11_fwd', 'layers2_relu15_fwd', 'layers3_relu23_fwd',
+                 'layers4_relu11_fwd'], num_filters=[256, 256, 256, 256], use_1x1=True,
+        use_upsample=True, use_elewadd=True, use_p6=True, no_bias=True, pretrained=pretrained_base,
+        norm_layer=mx.sym.contrib.SyncBatchNorm, norm_kwargs=sym_norm_kwargs)
+    top_features = None
+    # 1 Conv 1 FC layer before RCNN cls and reg
+    box_features = nn.HybridSequential()
+    for _ in range(4):
+        box_features.add(nn.Conv2D(256, 3, padding=1, use_bias=False),
+                         SyncBatchNorm(**gluon_norm_kwargs),
+                         nn.Activation('relu'))
+    box_features.add(nn.Dense(1024, weight_initializer=mx.init.Normal(0.01)),
+                     nn.Activation('relu'))
+
+    train_patterns = '(?!.*moving)'  # excluding symbol bn moving mean and var
+    return get_faster_rcnn(
+        name='fpn_syncbn_resnest50', dataset='coco', pretrained=pretrained, features=features,
+        top_features=top_features, classes=classes, box_features=box_features,
+        short=(640, 800), max_size=1333, min_stage=2, max_stage=6, train_patterns=train_patterns,
+        nms_thresh=0.5, nms_topk=-1, post_nms=-1, roi_mode='align', roi_size=(7, 7),
+        strides=(4, 8, 16, 32, 64), clip=4.14, rpn_channel=256, base_size=16,
+        scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(384, 384),
+        rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000,
+        rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=512,
+        pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
+
+
 def faster_rcnn_resnet50_v1b_custom(classes, transfer=None, pretrained_base=True,
                                     pretrained=False, **kwargs):
     r"""Faster RCNN model with resnet50_v1b base network on custom dataset.
@@ -516,6 +585,72 @@ def faster_rcnn_fpn_syncbn_resnet101_v1d_coco(pretrained=False, pretrained_base=
         pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
 
 
+def faster_rcnn_fpn_syncbn_resnest101_coco(pretrained=False, pretrained_base=True, num_devices=0,
+                                           **kwargs):
+    r"""Faster R-CNN with ResNeSt
+    ResNeSt: Split Attention Network"
+
+    Parameters
+    ----------
+    pretrained : bool or str
+        Boolean value controls whether to load the default pretrained weights for model.
+        String value represents the hashtag for a certain version of pretrained weights.
+    pretrained_base : bool or str, optional, default is True
+        Load pretrained base network, the extra layers are randomized. Note that
+        if pretrained is `Ture`, this has no effect.
+    num_devices : int, default is 0
+        Number of devices for sync batch norm layer. if less than 1, use all devices available.
+    ctx : Context, default CPU
+        The context in which to load the pretrained weights.
+    root : str, default '~/.mxnet/models'
+        Location for keeping the model parameters.
+
+    Examples
+    --------
+    >>> model = get_faster_rcnn_fpn_syncbn_resnest101_coco(pretrained=True)
+    >>> print(model)
+    """
+    from ....model_zoo.resnest import resnest101
+    from ....data import COCODetection
+    classes = COCODetection.CLASSES
+    pretrained_base = False if pretrained else pretrained_base
+    gluon_norm_kwargs = {'num_devices': num_devices} if num_devices >= 1 else {}
+    base_network = resnest101(pretrained=pretrained_base, dilated=False, use_global_stats=False,
+                              norm_layer=SyncBatchNorm, norm_kwargs=gluon_norm_kwargs)
+    from gluoncv.nn.dropblock import set_drop_prob
+    from functools import partial
+    apply_drop_prob = partial(set_drop_prob, 0.0)
+    base_network.apply(apply_drop_prob)
+    sym_norm_kwargs = {'ndev': num_devices} if num_devices >= 1 else {}
+    features = FPNFeatureExpander(
+        network=base_network,
+        outputs=['layers1_relu11_fwd', 'layers2_relu15_fwd', 'layers3_relu91_fwd',
+                 'layers4_relu11_fwd'], num_filters=[256, 256, 256, 256], use_1x1=True,
+        use_upsample=True, use_elewadd=True, use_p6=True, no_bias=True, pretrained=pretrained_base,
+        norm_layer=mx.sym.contrib.SyncBatchNorm, norm_kwargs=sym_norm_kwargs)
+    top_features = None
+    # 1 Conv 1 FC layer before RCNN cls and reg
+    box_features = nn.HybridSequential()
+    for _ in range(4):
+        box_features.add(nn.Conv2D(256, 3, padding=1, use_bias=False),
+                         SyncBatchNorm(**gluon_norm_kwargs),
+                         nn.Activation('relu'))
+    box_features.add(nn.Dense(1024, weight_initializer=mx.init.Normal(0.01)),
+                     nn.Activation('relu'))
+
+    train_patterns = '(?!.*moving)'  # excluding symbol bn moving mean and var
+    return get_faster_rcnn(
+        name='fpn_syncbn_resnest101', dataset='coco', pretrained=pretrained, features=features,
+        top_features=top_features, classes=classes, box_features=box_features,
+        short=(640, 800), max_size=1333, min_stage=2, max_stage=6, train_patterns=train_patterns,
+        nms_thresh=0.5, nms_topk=-1, post_nms=-1, roi_mode='align', roi_size=(7, 7),
+        strides=(4, 8, 16, 32, 64), clip=4.14, rpn_channel=256, base_size=16,
+        scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(384, 384),
+        rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000,
+        rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=512,
+        pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
+
+
 def faster_rcnn_resnet101_v1d_custom(classes, transfer=None, pretrained_base=True,
                                      pretrained=False, **kwargs):
     r"""Faster RCNN model with resnet101_v1d base network on custom dataset.
@@ -564,3 +699,69 @@ def faster_rcnn_resnet101_v1d_custom(classes, transfer=None, pretrained_base=Tru
         reuse_classes = [x for x in classes if x in net.classes]
         net.reset_class(classes, reuse_weights=reuse_classes)
     return net
+
+
+def faster_rcnn_fpn_syncbn_resnest269_coco(pretrained=False, pretrained_base=True, num_devices=0,
+                                           **kwargs):
+    r"""Faster R-CNN with ResNeSt
+    ResNeSt: Split Attention Network"
+
+    Parameters
+    ----------
+    pretrained : bool or str
+        Boolean value controls whether to load the default pretrained weights for model.
+        String value represents the hashtag for a certain version of pretrained weights.
+    pretrained_base : bool or str, optional, default is True
+        Load pretrained base network, the extra layers are randomized. Note that
+        if pretrained is `Ture`, this has no effect.
+    num_devices : int, default is 0
+        Number of devices for sync batch norm layer. if less than 1, use all devices available.
+    ctx : Context, default CPU
+        The context in which to load the pretrained weights.
+    root : str, default '~/.mxnet/models'
+        Location for keeping the model parameters.
+
+    Examples
+    --------
+    >>> model = get_faster_rcnn_fpn_syncbn_resnest269_coco(pretrained=True)
+    >>> print(model)
+    """
+    from ....model_zoo.resnest import resnest269
+    from ....data import COCODetection
+    classes = COCODetection.CLASSES
+    pretrained_base = False if pretrained else pretrained_base
+    gluon_norm_kwargs = {'num_devices': num_devices} if num_devices >= 1 else {}
+    base_network = resnest269(pretrained=pretrained_base, dilated=False, use_global_stats=False,
+                              norm_layer=SyncBatchNorm, norm_kwargs=gluon_norm_kwargs)
+    from gluoncv.nn.dropblock import set_drop_prob
+    from functools import partial
+    apply_drop_prob = partial(set_drop_prob, 0.0)
+    base_network.apply(apply_drop_prob)
+    sym_norm_kwargs = {'ndev': num_devices} if num_devices >= 1 else {}
+    features = FPNFeatureExpander(
+        network=base_network,
+        outputs=['layers1_relu11_fwd', 'layers2_relu119_fwd', 'layers3_relu191_fwd',
+                 'layers4_relu31_fwd'], num_filters=[256, 256, 256, 256], use_1x1=True,
+        use_upsample=True, use_elewadd=True, use_p6=True, no_bias=True, pretrained=pretrained_base,
+        norm_layer=mx.sym.contrib.SyncBatchNorm, norm_kwargs=sym_norm_kwargs)
+    top_features = None
+    # 1 Conv 1 FC layer before RCNN cls and reg
+    box_features = nn.HybridSequential()
+    for _ in range(4):
+        box_features.add(nn.Conv2D(256, 3, padding=1, use_bias=False),
+                         SyncBatchNorm(**gluon_norm_kwargs),
+                         nn.Activation('relu'))
+    box_features.add(nn.Dense(1024, weight_initializer=mx.init.Normal(0.01)),
+                     nn.Activation('relu'))
+
+    train_patterns = '(?!.*moving)'  # excluding symbol bn moving mean and var
+    return get_faster_rcnn(
+        name='fpn_syncbn_resnest269', dataset='coco', pretrained=pretrained, features=features,
+        top_features=top_features, classes=classes, box_features=box_features,
+        short=(640, 864), max_size=1440, min_stage=2, max_stage=6, train_patterns=train_patterns,
+        nms_thresh=0.5, nms_topk=-1, post_nms=-1, roi_mode='align', roi_size=(7, 7),
+        strides=(4, 8, 16, 32, 64), clip=4.14, rpn_channel=256, base_size=16,
+        scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(384, 384),
+        rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000,
+        rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=512,
+        pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
diff --git a/scripts/detection/faster_rcnn/train_faster_rcnn.py b/scripts/detection/faster_rcnn/train_faster_rcnn.py
index c0146b176e..824dd7b7d3 100644
--- a/scripts/detection/faster_rcnn/train_faster_rcnn.py
+++ b/scripts/detection/faster_rcnn/train_faster_rcnn.py
@@ -32,7 +32,6 @@
 from gluoncv.utils.parallel import Parallelizable, Parallel
 from gluoncv.utils.metrics.rcnn import RPNAccMetric, RPNL1LossMetric, RCNNAccMetric, \
     RCNNL1LossMetric
-from gluoncv.data import COCODetection, VOCDetection
 
 try:
     import horovod.mxnet as hvd
@@ -43,7 +42,8 @@
 def parse_args():
     parser = argparse.ArgumentParser(description='Train Faster-RCNN networks e2e.')
     parser.add_argument('--network', type=str, default='resnet50_v1b',
-                        choices=['resnet18_v1b', 'resnet50_v1b', 'resnet101_v1d'],
+                        choices=['resnet18_v1b', 'resnet50_v1b', 'resnet101_v1d',
+                                 'resnest50', 'resnest101', 'resnest269'],
                         help="Base network name which serves as feature extraction base.")
     parser.add_argument('--dataset', type=str, default='voc',
                         help='Training dataset. Now support voc and coco.')
@@ -301,6 +301,13 @@ def get_dataset(dataset, args):
         val_dataset = gdata.VOCDetection(
             splits=[(2007, 'test')])
         val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=val_dataset.classes)
+    elif dataset.lower() in ['clipart', 'comic', 'watercolor']:
+        root = os.path.join('~', '.mxnet', 'datasets', dataset.lower())
+        train_dataset = gdata.CustomVOCDetection(root=root, splits=[('', 'train')],
+                                                 generate_classes=True)
+        val_dataset = gdata.CustomVOCDetection(root=root, splits=[('', 'test')],
+                                               generate_classes=True)
+        val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=val_dataset.classes)
     elif dataset.lower() == 'coco':
         train_dataset = gdata.COCODetection(splits='instances_train2017', use_crowd=False)
         val_dataset = gdata.COCODetection(splits='instances_val2017', skip_empty=False)
@@ -426,7 +433,7 @@ def forward_backward(self, x):
             gt_label = label[:, :, 4:5]
             gt_box = label[:, :, :4]
             cls_pred, box_pred, roi, samples, matches, rpn_score, rpn_box, anchors, cls_targets, \
-                box_targets, box_masks, _ = self.net(data, gt_box, gt_label)
+            box_targets, box_masks, _ = self.net(data, gt_box, gt_label)
             # losses of rpn
             rpn_score = rpn_score.squeeze(axis=-1)
             num_rpn_pos = (rpn_cls_targets >= 0).sum()
@@ -463,7 +470,7 @@ def forward_backward(self, x):
                 total_loss.backward()
 
         return rpn_loss1_metric, rpn_loss2_metric, rcnn_loss1_metric, rcnn_loss2_metric, \
-            rpn_acc_metric, rpn_l1_loss_metric, rcnn_acc_metric, rcnn_l1_loss_metric
+               rpn_acc_metric, rpn_l1_loss_metric, rcnn_acc_metric, rcnn_l1_loss_metric
 
 
 def train(net, train_data, val_data, eval_metric, batch_size, ctx, args):
@@ -632,6 +639,9 @@ def train(net, train_data, val_data, eval_metric, batch_size, ctx, args):
         ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()]
         ctx = ctx if ctx else [mx.cpu()]
 
+    # training data
+    train_dataset, val_dataset, eval_metric = get_dataset(args.dataset, args)
+
     # network
     kwargs = {}
     module_list = []
@@ -662,11 +672,7 @@ def train(net, train_data, val_data, eval_metric, batch_size, ctx, args):
             norm_kwargs = None
             sym_norm_layer = None
             sym_norm_kwargs = None
-        if args.dataset == 'coco':
-            classes = COCODetection.CLASSES
-        else:
-            # default to VOC
-            classes = VOCDetection.CLASSES
+        classes = train_dataset.CLASSES
         net = get_model('custom_faster_rcnn_fpn', classes=classes, transfer=None,
                         dataset=args.dataset, pretrained_base=not args.no_pretrained_base,
                         base_network_name=args.network, norm_layer=norm_layer,
@@ -709,8 +715,7 @@ def train(net, train_data, val_data, eval_metric, batch_size, ctx, args):
         net.collect_params('.*batchnorm.*').setattr('dtype', 'float32')
         net.collect_params('.*normalizedperclassboxcenterencoder.*').setattr('dtype', 'float32')
 
-    # training data
-    train_dataset, val_dataset, eval_metric = get_dataset(args.dataset, args)
+    # dataloader
     batch_size = args.batch_size // num_gpus if args.horovod else args.batch_size
     train_data, val_data = get_dataloader(
         net, train_dataset, val_dataset, FasterRCNNDefaultTrainTransform,