From a55ea3b01dee63cf366fffc0d10385ec57d01807 Mon Sep 17 00:00:00 2001 From: chhluo Date: Fri, 15 Jul 2022 14:53:58 +0000 Subject: [PATCH] Update configs: CARAFE, ConvNext, Swin, timm, GN, WS/GN and PVT --- .../faster_rcnn_r50_fpn_carafe_1x_coco.py | 32 +---- .../mask_rcnn_r50_fpn_carafe_1x_coco.py | 32 +---- ...7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py | 17 +-- ...7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py | 108 +++++++++-------- ...nvnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco.py | 109 ++++++++++-------- ..._rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py | 17 ++- ...k_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py | 17 ++- .../mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py | 17 ++- ...x101_32x4d_fpn_gn_ws-all_20_23_24e_coco.py | 17 ++- ..._x50_32x4d_fpn_gn_ws-all_20_23_24e_coco.py | 17 ++- .../gn/mask_rcnn_r101_fpn_gn-all_3x_coco.py | 17 ++- .../gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py | 51 +++----- .../gn/mask_rcnn_r50_fpn_gn-all_3x_coco.py | 17 ++- ...ask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py | 18 ++- ...ask_rcnn_r50_fpn_gn-all_contrib_3x_coco.py | 17 ++- configs/pvt/retinanet_pvt-l_fpn_1x_coco.py | 3 +- configs/pvt/retinanet_pvt-t_fpn_1x_coco.py | 4 +- configs/pvt/retinanet_pvtv2-b0_fpn_1x_coco.py | 4 +- configs/pvt/retinanet_pvtv2-b4_fpn_1x_coco.py | 8 +- configs/pvt/retinanet_pvtv2-b5_fpn_1x_coco.py | 8 +- ...rcnn_r50_fpn_mocov2-pretrain_ms-2x_coco.py | 19 ++- ...k_rcnn_r50_fpn_swav-pretrain_ms-2x_coco.py | 19 ++- .../mask_rcnn_swin-t-p4-w7_fpn_1x_coco.py | 36 ++++-- ...n_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco.py | 4 +- ...k_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py | 104 +++++++++-------- .../retinanet_swin-t-p4-w7_fpn_1x_coco.py | 3 +- ...inanet_timm_efficientnet_b1_fpn_1x_coco.py | 3 +- .../retinanet_timm_tv_resnet50_fpn_1x_coco.py | 3 +- mmdet/engine/optimizers/__init__.py | 6 +- .../layer_decay_optimizer_constructor.py | 14 ++- .../roi_extractors/base_roi_extractor.py | 1 - .../single_level_roi_extractor.py | 2 + .../test_layer_decay_optimizer_constructor.py | 20 ++-- 33 files changed, 425 insertions(+), 339 deletions(-) diff --git a/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py b/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py index dedac3f46b4..98c2ae2ce3f 100644 --- a/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py +++ b/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py @@ -1,5 +1,6 @@ _base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' model = dict( + data_preprocessor=dict(pad_size_divisor=64), neck=dict( type='FPN_CARAFE', in_channels=[256, 512, 1024, 2048], @@ -17,34 +18,3 @@ encoder_kernel=3, encoder_dilation=1, compressed_channels=64))) -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=64), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=64), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) diff --git a/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py b/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py index 668c023981b..fe5ed693c71 100644 --- a/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py +++ b/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py @@ -1,5 +1,6 @@ _base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' model = dict( + data_preprocessor=dict(pad_size_divisor=64), neck=dict( type='FPN_CARAFE', in_channels=[256, 512, 1024, 2048], @@ -27,34 +28,3 @@ encoder_kernel=3, encoder_dilation=1, compressed_channels=64)))) -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=64), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=64), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) -] -data = dict( - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) diff --git a/configs/convnext/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py b/configs/convnext/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py index 0ccc31d2488..3dbbb4bb4ea 100644 --- a/configs/convnext/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py +++ b/configs/convnext/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py @@ -18,15 +18,8 @@ type='Pretrained', checkpoint=checkpoint_file, prefix='backbone.'))) -optimizer = dict( - _delete_=True, - constructor='LearningRateDecayOptimizerConstructor', - type='AdamW', - lr=0.0002, - betas=(0.9, 0.999), - weight_decay=0.05, - paramwise_cfg={ - 'decay_rate': 0.7, - 'decay_type': 'layer_wise', - 'num_layers': 12 - }) +optim_wrapper = dict(paramwise_cfg={ + 'decay_rate': 0.7, + 'decay_type': 'layer_wise', + 'num_layers': 12 +}) diff --git a/configs/convnext/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py b/configs/convnext/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py index 93304c001da..aeef99a4652 100644 --- a/configs/convnext/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py +++ b/configs/convnext/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py @@ -82,68 +82,74 @@ loss_bbox=dict(type='GIoULoss', loss_weight=10.0)) ])) -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) - # augmentation strategy originates from DETR / Sparse RCNN train_pipeline = [ - dict(type='LoadImageFromFile'), + dict( + type='LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='RandomFlip', flip_ratio=0.5), + dict(type='RandomFlip', prob=0.5), dict( - type='AutoAugment', - policies=[[ + type='RandomChoice', + transforms=[[ dict( - type='Resize', - img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), - (608, 1333), (640, 1333), (672, 1333), (704, 1333), - (736, 1333), (768, 1333), (800, 1333)], - multiscale_mode='value', + type='RandomChoiceResize', + scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), + (608, 1333), (640, 1333), (672, 1333), (704, 1333), + (736, 1333), (768, 1333), (800, 1333)], keep_ratio=True) ], - [ - dict( - type='Resize', - img_scale=[(400, 1333), (500, 1333), (600, 1333)], - multiscale_mode='value', - keep_ratio=True), - dict( - type='RandomCrop', - crop_type='absolute_range', - crop_size=(384, 600), - allow_negative_crop=True), - dict( - type='Resize', - img_scale=[(480, 1333), (512, 1333), (544, 1333), - (576, 1333), (608, 1333), (640, 1333), - (672, 1333), (704, 1333), (736, 1333), - (768, 1333), (800, 1333)], - multiscale_mode='value', - override=True, - keep_ratio=True) - ]]), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), + [ + dict( + type='RandomChoiceResize', + scales=[(400, 1333), (500, 1333), (600, 1333)], + keep_ratio=True), + dict( + type='RandomCrop', + crop_type='absolute_range', + crop_size=(384, 600), + allow_negative_crop=True), + dict( + type='RandomChoiceResize', + scales=[(480, 1333), (512, 1333), (544, 1333), + (576, 1333), (608, 1333), (640, 1333), + (672, 1333), (704, 1333), (736, 1333), + (768, 1333), (800, 1333)], + keep_ratio=True) + ]]), + dict(type='PackDetInputs') +] +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) + +max_epochs = 36 +train_cfg = dict(max_epochs=max_epochs) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=0, + end=max_epochs, + by_epoch=True, + milestones=[27, 33], + gamma=0.1) ] -data = dict(train=dict(pipeline=train_pipeline), persistent_workers=True) -optimizer = dict( - _delete_=True, +# Enable automatic-mixed-precision training with AmpOptimWrapper. +optim_wrapper = dict( + type='AmpOptimWrapper', constructor='LearningRateDecayOptimizerConstructor', - type='AdamW', - lr=0.0002, - betas=(0.9, 0.999), - weight_decay=0.05, paramwise_cfg={ 'decay_rate': 0.7, 'decay_type': 'layer_wise', 'num_layers': 6 - }) - -lr_config = dict(warmup_iters=1000, step=[27, 33]) -runner = dict(max_epochs=36) - -# you need to set mode='dynamic' if you are using pytorch<=1.5.0 -fp16 = dict(loss_scale=dict(init_scale=512)) + }, + optimizer=dict( + _delete_=True, + type='AdamW', + lr=0.0002, + betas=(0.9, 0.999), + weight_decay=0.05)) diff --git a/configs/convnext/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco.py b/configs/convnext/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco.py index e8a283f5483..5763524e20c 100644 --- a/configs/convnext/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco.py +++ b/configs/convnext/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco.py @@ -23,68 +23,75 @@ prefix='backbone.')), neck=dict(in_channels=[96, 192, 384, 768])) -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) - # augmentation strategy originates from DETR / Sparse RCNN train_pipeline = [ - dict(type='LoadImageFromFile'), + dict( + type='LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='RandomFlip', flip_ratio=0.5), + dict(type='RandomFlip', prob=0.5), dict( - type='AutoAugment', - policies=[[ + type='RandomChoice', + transforms=[[ dict( - type='Resize', - img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), - (608, 1333), (640, 1333), (672, 1333), (704, 1333), - (736, 1333), (768, 1333), (800, 1333)], - multiscale_mode='value', + type='RandomChoiceResize', + scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), + (608, 1333), (640, 1333), (672, 1333), (704, 1333), + (736, 1333), (768, 1333), (800, 1333)], keep_ratio=True) ], - [ - dict( - type='Resize', - img_scale=[(400, 1333), (500, 1333), (600, 1333)], - multiscale_mode='value', - keep_ratio=True), - dict( - type='RandomCrop', - crop_type='absolute_range', - crop_size=(384, 600), - allow_negative_crop=True), - dict( - type='Resize', - img_scale=[(480, 1333), (512, 1333), (544, 1333), - (576, 1333), (608, 1333), (640, 1333), - (672, 1333), (704, 1333), (736, 1333), - (768, 1333), (800, 1333)], - multiscale_mode='value', - override=True, - keep_ratio=True) - ]]), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), + [ + dict( + type='RandomChoiceResize', + scales=[(400, 1333), (500, 1333), (600, 1333)], + keep_ratio=True), + dict( + type='RandomCrop', + crop_type='absolute_range', + crop_size=(384, 600), + allow_negative_crop=True), + dict( + type='RandomChoiceResize', + scales=[(480, 1333), (512, 1333), (544, 1333), + (576, 1333), (608, 1333), (640, 1333), + (672, 1333), (704, 1333), (736, 1333), + (768, 1333), (800, 1333)], + keep_ratio=True) + ]]), + dict(type='PackDetInputs') +] +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) + +max_epochs = 36 +train_cfg = dict(max_epochs=max_epochs) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=0, + end=max_epochs, + by_epoch=True, + milestones=[27, 33], + gamma=0.1) ] -data = dict(train=dict(pipeline=train_pipeline), persistent_workers=True) -optimizer = dict( - _delete_=True, +# Enable automatic-mixed-precision training with AmpOptimWrapper. +optim_wrapper = dict( + type='AmpOptimWrapper', constructor='LearningRateDecayOptimizerConstructor', - type='AdamW', - lr=0.0001, - betas=(0.9, 0.999), - weight_decay=0.05, paramwise_cfg={ 'decay_rate': 0.95, 'decay_type': 'layer_wise', 'num_layers': 6 - }) - -lr_config = dict(warmup_iters=1000, step=[27, 33]) -runner = dict(max_epochs=36) - -# you need to set mode='dynamic' if you are using pytorch<=1.5.0 -fp16 = dict(loss_scale=dict(init_scale=512)) + }, + optimizer=dict( + _delete_=True, + type='AdamW', + lr=0.0001, + betas=(0.9, 0.999), + weight_decay=0.05, + )) diff --git a/configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py b/configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py index a790d932152..fe4c9f6c088 100644 --- a/configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py +++ b/configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py @@ -1,4 +1,17 @@ _base_ = './mask_rcnn_r101_fpn_gn_ws-all_2x_coco.py' # learning policy -lr_config = dict(step=[20, 23]) -runner = dict(type='EpochBasedRunner', max_epochs=24) +max_epochs = 24 +train_cfg = dict(max_epochs=max_epochs) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), + dict( + type='MultiStepLR', + begin=0, + end=max_epochs, + by_epoch=True, + milestones=[20, 23], + gamma=0.1) +] diff --git a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py index 55168085cd0..dfaaa9d5be0 100644 --- a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py +++ b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py @@ -1,4 +1,17 @@ _base_ = './mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py' # learning policy -lr_config = dict(step=[20, 23]) -runner = dict(type='EpochBasedRunner', max_epochs=24) +max_epochs = 24 +train_cfg = dict(max_epochs=max_epochs) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), + dict( + type='MultiStepLR', + begin=0, + end=max_epochs, + by_epoch=True, + milestones=[20, 23], + gamma=0.1) +] diff --git a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py index 63be60ff8c1..8e7e68632e9 100644 --- a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py +++ b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py @@ -16,5 +16,18 @@ norm_cfg=norm_cfg), mask_head=dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg))) # learning policy -lr_config = dict(step=[16, 22]) -runner = dict(type='EpochBasedRunner', max_epochs=24) +max_epochs = 24 +train_cfg = dict(max_epochs=max_epochs) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), + dict( + type='MultiStepLR', + begin=0, + end=max_epochs, + by_epoch=True, + milestones=[16, 22], + gamma=0.1) +] diff --git a/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco.py b/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco.py index cfa14c99543..4d3c8647e21 100644 --- a/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco.py +++ b/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco.py @@ -1,4 +1,17 @@ _base_ = './mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py' # learning policy -lr_config = dict(step=[20, 23]) -runner = dict(type='EpochBasedRunner', max_epochs=24) +max_epochs = 24 +train_cfg = dict(max_epochs=max_epochs) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), + dict( + type='MultiStepLR', + begin=0, + end=max_epochs, + by_epoch=True, + milestones=[20, 23], + gamma=0.1) +] diff --git a/configs/gn+ws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco.py b/configs/gn+ws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco.py index 79ce0adf1bf..2ebbac31afe 100644 --- a/configs/gn+ws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco.py +++ b/configs/gn+ws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco.py @@ -1,4 +1,17 @@ _base_ = './mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco.py' # learning policy -lr_config = dict(step=[20, 23]) -runner = dict(type='EpochBasedRunner', max_epochs=24) +max_epochs = 24 +train_cfg = dict(max_epochs=max_epochs) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), + dict( + type='MultiStepLR', + begin=0, + end=max_epochs, + by_epoch=True, + milestones=[20, 23], + gamma=0.1) +] diff --git a/configs/gn/mask_rcnn_r101_fpn_gn-all_3x_coco.py b/configs/gn/mask_rcnn_r101_fpn_gn-all_3x_coco.py index 12a9d17e559..a8d1a8265fa 100644 --- a/configs/gn/mask_rcnn_r101_fpn_gn-all_3x_coco.py +++ b/configs/gn/mask_rcnn_r101_fpn_gn-all_3x_coco.py @@ -1,5 +1,18 @@ _base_ = './mask_rcnn_r101_fpn_gn-all_2x_coco.py' # learning policy -lr_config = dict(step=[28, 34]) -runner = dict(type='EpochBasedRunner', max_epochs=36) +max_epochs = 36 +train_cfg = dict(max_epochs=max_epochs) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), + dict( + type='MultiStepLR', + begin=0, + end=max_epochs, + by_epoch=True, + milestones=[28, 34], + gamma=0.1) +] diff --git a/configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py b/configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py index 1de7d98e103..c6982eb4af8 100644 --- a/configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py +++ b/configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py @@ -1,6 +1,10 @@ _base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) model = dict( + data_preprocessor=dict( + mean=[103.530, 116.280, 123.675], + std=[1.0, 1.0, 1.0], + bgr_to_rgb=False), backbone=dict( norm_cfg=norm_cfg, init_cfg=dict( @@ -13,37 +17,20 @@ conv_out_channels=256, norm_cfg=norm_cfg), mask_head=dict(norm_cfg=norm_cfg))) -img_norm_cfg = dict( - mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), -] -test_pipeline = [ - dict(type='LoadImageFromFile'), + +# learning policy +max_epochs = 24 +train_cfg = dict(max_epochs=max_epochs) + +# learning rate +param_scheduler = [ dict( - type='MultiScaleFlipAug', - img_scale=(1333, 800), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']), - ]) + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), + dict( + type='MultiStepLR', + begin=0, + end=max_epochs, + by_epoch=True, + milestones=[16, 22], + gamma=0.1) ] -data = dict( - train=dict(pipeline=train_pipeline), - val=dict(pipeline=test_pipeline), - test=dict(pipeline=test_pipeline)) -# learning policy -lr_config = dict(step=[16, 22]) -runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/gn/mask_rcnn_r50_fpn_gn-all_3x_coco.py b/configs/gn/mask_rcnn_r50_fpn_gn-all_3x_coco.py index f9177196cb9..52a7645eb7b 100644 --- a/configs/gn/mask_rcnn_r50_fpn_gn-all_3x_coco.py +++ b/configs/gn/mask_rcnn_r50_fpn_gn-all_3x_coco.py @@ -1,5 +1,18 @@ _base_ = './mask_rcnn_r50_fpn_gn-all_2x_coco.py' # learning policy -lr_config = dict(step=[28, 34]) -runner = dict(type='EpochBasedRunner', max_epochs=36) +max_epochs = 36 +train_cfg = dict(max_epochs=max_epochs) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), + dict( + type='MultiStepLR', + begin=0, + end=max_epochs, + by_epoch=True, + milestones=[28, 34], + gamma=0.1) +] diff --git a/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py b/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py index 2f430fdab1a..e9c66760a3d 100644 --- a/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py +++ b/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py @@ -12,6 +12,20 @@ conv_out_channels=256, norm_cfg=norm_cfg), mask_head=dict(norm_cfg=norm_cfg))) + # learning policy -lr_config = dict(step=[16, 22]) -runner = dict(type='EpochBasedRunner', max_epochs=24) +max_epochs = 24 +train_cfg = dict(max_epochs=max_epochs) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), + dict( + type='MultiStepLR', + begin=0, + end=max_epochs, + by_epoch=True, + milestones=[16, 22], + gamma=0.1) +] diff --git a/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco.py b/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco.py index 66834f08ba3..c840a4f5236 100644 --- a/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco.py +++ b/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco.py @@ -1,5 +1,18 @@ _base_ = './mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py' # learning policy -lr_config = dict(step=[28, 34]) -runner = dict(type='EpochBasedRunner', max_epochs=36) +max_epochs = 36 +train_cfg = dict(max_epochs=max_epochs) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), + dict( + type='MultiStepLR', + begin=0, + end=max_epochs, + by_epoch=True, + milestones=[28, 34], + gamma=0.1) +] diff --git a/configs/pvt/retinanet_pvt-l_fpn_1x_coco.py b/configs/pvt/retinanet_pvt-l_fpn_1x_coco.py index e299f2a098e..1a6f604bdb3 100644 --- a/configs/pvt/retinanet_pvt-l_fpn_1x_coco.py +++ b/configs/pvt/retinanet_pvt-l_fpn_1x_coco.py @@ -4,4 +4,5 @@ num_layers=[3, 8, 27, 3], init_cfg=dict(checkpoint='https://github.com/whai362/PVT/' 'releases/download/v2/pvt_large.pth'))) -fp16 = dict(loss_scale=dict(init_scale=512)) +# Enable automatic-mixed-precision training with AmpOptimWrapper. +optim_wrapper = dict(type='AmpOptimWrapper') diff --git a/configs/pvt/retinanet_pvt-t_fpn_1x_coco.py b/configs/pvt/retinanet_pvt-t_fpn_1x_coco.py index a6cff7d0335..5f67c444f26 100644 --- a/configs/pvt/retinanet_pvt-t_fpn_1x_coco.py +++ b/configs/pvt/retinanet_pvt-t_fpn_1x_coco.py @@ -13,4 +13,6 @@ 'releases/download/v2/pvt_tiny.pth')), neck=dict(in_channels=[64, 128, 320, 512])) # optimizer -optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001) +optim_wrapper = dict( + optimizer=dict( + _delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001)) diff --git a/configs/pvt/retinanet_pvtv2-b0_fpn_1x_coco.py b/configs/pvt/retinanet_pvtv2-b0_fpn_1x_coco.py index cbe2295d8f6..cbebf90fb89 100644 --- a/configs/pvt/retinanet_pvtv2-b0_fpn_1x_coco.py +++ b/configs/pvt/retinanet_pvtv2-b0_fpn_1x_coco.py @@ -14,4 +14,6 @@ 'releases/download/v2/pvt_v2_b0.pth')), neck=dict(in_channels=[32, 64, 160, 256])) # optimizer -optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001) +optim_wrapper = dict( + optimizer=dict( + _delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001)) diff --git a/configs/pvt/retinanet_pvtv2-b4_fpn_1x_coco.py b/configs/pvt/retinanet_pvtv2-b4_fpn_1x_coco.py index 9891d7bd76a..5faf4c507ba 100644 --- a/configs/pvt/retinanet_pvtv2-b4_fpn_1x_coco.py +++ b/configs/pvt/retinanet_pvtv2-b4_fpn_1x_coco.py @@ -7,10 +7,12 @@ 'releases/download/v2/pvt_v2_b4.pth')), neck=dict(in_channels=[64, 128, 320, 512])) # optimizer -optimizer = dict( - _delete_=True, type='AdamW', lr=0.0001 / 1.4, weight_decay=0.0001) +optim_wrapper = dict( + optimizer=dict( + _delete_=True, type='AdamW', lr=0.0001 / 1.4, weight_decay=0.0001)) + # dataset settings -data = dict(samples_per_gpu=1, workers_per_gpu=1) +train_dataloader = dict(batch_size=1, num_workers=1) # NOTE: `auto_scale_lr` is for automatically scaling LR, # USER SHOULD NOT CHANGE ITS VALUES. diff --git a/configs/pvt/retinanet_pvtv2-b5_fpn_1x_coco.py b/configs/pvt/retinanet_pvtv2-b5_fpn_1x_coco.py index a9fea2ebe47..afff8719ece 100644 --- a/configs/pvt/retinanet_pvtv2-b5_fpn_1x_coco.py +++ b/configs/pvt/retinanet_pvtv2-b5_fpn_1x_coco.py @@ -8,10 +8,12 @@ 'releases/download/v2/pvt_v2_b5.pth')), neck=dict(in_channels=[64, 128, 320, 512])) # optimizer -optimizer = dict( - _delete_=True, type='AdamW', lr=0.0001 / 1.4, weight_decay=0.0001) +optim_wrapper = dict( + optimizer=dict( + _delete_=True, type='AdamW', lr=0.0001 / 1.4, weight_decay=0.0001)) + # dataset settings -data = dict(samples_per_gpu=1, workers_per_gpu=1) +train_dataloader = dict(batch_size=1, num_workers=1) # NOTE: `auto_scale_lr` is for automatically scaling LR, # USER SHOULD NOT CHANGE ITS VALUES. diff --git a/configs/selfsup_pretrain/mask_rcnn_r50_fpn_mocov2-pretrain_ms-2x_coco.py b/configs/selfsup_pretrain/mask_rcnn_r50_fpn_mocov2-pretrain_ms-2x_coco.py index 09aa15608de..804f280dd7d 100644 --- a/configs/selfsup_pretrain/mask_rcnn_r50_fpn_mocov2-pretrain_ms-2x_coco.py +++ b/configs/selfsup_pretrain/mask_rcnn_r50_fpn_mocov2-pretrain_ms-2x_coco.py @@ -12,21 +12,16 @@ init_cfg=dict( type='Pretrained', checkpoint='./mocov2_r50_800ep_pretrain.pth'))) -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) train_pipeline = [ - dict(type='LoadImageFromFile'), + dict( + type='LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( - type='Resize', - img_scale=[(1333, 640), (1333, 800)], - multiscale_mode='range', + type='RandomResize', scale=[(1333, 640), (1333, 800)], keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']) + dict(type='RandomFlip', prob=0.5), + dict(type='PackDetInputs') ] -data = dict(train=dict(pipeline=train_pipeline)) +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/selfsup_pretrain/mask_rcnn_r50_fpn_swav-pretrain_ms-2x_coco.py b/configs/selfsup_pretrain/mask_rcnn_r50_fpn_swav-pretrain_ms-2x_coco.py index fe473613492..25ae1ea95f9 100644 --- a/configs/selfsup_pretrain/mask_rcnn_r50_fpn_swav-pretrain_ms-2x_coco.py +++ b/configs/selfsup_pretrain/mask_rcnn_r50_fpn_swav-pretrain_ms-2x_coco.py @@ -12,21 +12,16 @@ init_cfg=dict( type='Pretrained', checkpoint='./swav_800ep_pretrain.pth.tar'))) -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) train_pipeline = [ - dict(type='LoadImageFromFile'), + dict( + type='LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict( - type='Resize', - img_scale=[(1333, 640), (1333, 800)], - multiscale_mode='range', + type='RandomResize', scale=[(1333, 640), (1333, 800)], keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.5), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']) + dict(type='RandomFlip', prob=0.5), + dict(type='PackDetInputs') ] -data = dict(train=dict(pipeline=train_pipeline)) +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/swin/mask_rcnn_swin-t-p4-w7_fpn_1x_coco.py b/configs/swin/mask_rcnn_swin-t-p4-w7_fpn_1x_coco.py index 337e85818c5..91b3c8e6bdc 100644 --- a/configs/swin/mask_rcnn_swin-t-p4-w7_fpn_1x_coco.py +++ b/configs/swin/mask_rcnn_swin-t-p4-w7_fpn_1x_coco.py @@ -26,17 +26,35 @@ init_cfg=dict(type='Pretrained', checkpoint=pretrained)), neck=dict(in_channels=[96, 192, 384, 768])) -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.0001, - betas=(0.9, 0.999), - weight_decay=0.05, +max_epochs = 12 +train_cfg = dict(max_epochs=max_epochs) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=0, + end=max_epochs, + by_epoch=True, + milestones=[8, 11], + gamma=0.1) +] + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', paramwise_cfg=dict( custom_keys={ 'absolute_pos_embed': dict(decay_mult=0.), 'relative_position_bias_table': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) - })) -lr_config = dict(warmup_iters=1000, step=[8, 11]) -runner = dict(max_epochs=12) + }), + optimizer=dict( + _delete_=True, + type='AdamW', + lr=0.0001, + betas=(0.9, 0.999), + weight_decay=0.05)) diff --git a/configs/swin/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco.py b/configs/swin/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco.py index 2be31143df5..a1d409bb923 100644 --- a/configs/swin/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco.py +++ b/configs/swin/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco.py @@ -1,3 +1,3 @@ _base_ = './mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py' -# you need to set mode='dynamic' if you are using pytorch<=1.5.0 -fp16 = dict(loss_scale=dict(init_scale=512)) +# Enable automatic-mixed-precision training with AmpOptimWrapper. +optim_wrapper = dict(type='AmpOptimWrapper') diff --git a/configs/swin/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py b/configs/swin/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py index 2612f6e331e..eb4c293d7bd 100644 --- a/configs/swin/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py +++ b/configs/swin/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py @@ -28,64 +28,74 @@ init_cfg=dict(type='Pretrained', checkpoint=pretrained)), neck=dict(in_channels=[96, 192, 384, 768])) -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) - # augmentation strategy originates from DETR / Sparse RCNN train_pipeline = [ - dict(type='LoadImageFromFile'), + dict( + type='LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), - dict(type='RandomFlip', flip_ratio=0.5), + dict(type='RandomFlip', prob=0.5), dict( - type='AutoAugment', - policies=[[ + type='RandomChoice', + transforms=[[ dict( - type='Resize', - img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), - (608, 1333), (640, 1333), (672, 1333), (704, 1333), - (736, 1333), (768, 1333), (800, 1333)], - multiscale_mode='value', + type='RandomChoiceResize', + scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), + (608, 1333), (640, 1333), (672, 1333), (704, 1333), + (736, 1333), (768, 1333), (800, 1333)], keep_ratio=True) ], - [ - dict( - type='Resize', - img_scale=[(400, 1333), (500, 1333), (600, 1333)], - multiscale_mode='value', - keep_ratio=True), - dict( - type='RandomCrop', - crop_type='absolute_range', - crop_size=(384, 600), - allow_negative_crop=True), - dict( - type='Resize', - img_scale=[(480, 1333), (512, 1333), (544, 1333), - (576, 1333), (608, 1333), (640, 1333), - (672, 1333), (704, 1333), (736, 1333), - (768, 1333), (800, 1333)], - multiscale_mode='value', - override=True, - keep_ratio=True) - ]]), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), + [ + dict( + type='RandomChoiceResize', + scales=[(400, 1333), (500, 1333), (600, 1333)], + keep_ratio=True), + dict( + type='RandomCrop', + crop_type='absolute_range', + crop_size=(384, 600), + allow_negative_crop=True), + dict( + type='RandomChoiceResize', + scales=[(480, 1333), (512, 1333), (544, 1333), + (576, 1333), (608, 1333), (640, 1333), + (672, 1333), (704, 1333), (736, 1333), + (768, 1333), (800, 1333)], + keep_ratio=True) + ]]), + dict(type='PackDetInputs') +] +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) + +max_epochs = 36 +train_cfg = dict(max_epochs=max_epochs) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, + end=1000), + dict( + type='MultiStepLR', + begin=0, + end=max_epochs, + by_epoch=True, + milestones=[27, 33], + gamma=0.1) ] -data = dict(train=dict(pipeline=train_pipeline)) -optimizer = dict( - _delete_=True, - type='AdamW', - lr=0.0001, - betas=(0.9, 0.999), - weight_decay=0.05, +# optimizer +optim_wrapper = dict( + type='OptimWrapper', paramwise_cfg=dict( custom_keys={ 'absolute_pos_embed': dict(decay_mult=0.), 'relative_position_bias_table': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) - })) -lr_config = dict(warmup_iters=1000, step=[27, 33]) -runner = dict(max_epochs=36) + }), + optimizer=dict( + _delete_=True, + type='AdamW', + lr=0.0001, + betas=(0.9, 0.999), + weight_decay=0.05)) diff --git a/configs/swin/retinanet_swin-t-p4-w7_fpn_1x_coco.py b/configs/swin/retinanet_swin-t-p4-w7_fpn_1x_coco.py index 331509323d4..2f40a87e8cf 100644 --- a/configs/swin/retinanet_swin-t-p4-w7_fpn_1x_coco.py +++ b/configs/swin/retinanet_swin-t-p4-w7_fpn_1x_coco.py @@ -27,4 +27,5 @@ init_cfg=dict(type='Pretrained', checkpoint=pretrained)), neck=dict(in_channels=[192, 384, 768], start_level=0, num_outs=5)) -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +# optimizer +optim_wrapper = dict(optimizer=dict(lr=0.01)) diff --git a/configs/timm_example/retinanet_timm_efficientnet_b1_fpn_1x_coco.py b/configs/timm_example/retinanet_timm_efficientnet_b1_fpn_1x_coco.py index 65001167cbf..e868f1ce3ad 100644 --- a/configs/timm_example/retinanet_timm_efficientnet_b1_fpn_1x_coco.py +++ b/configs/timm_example/retinanet_timm_efficientnet_b1_fpn_1x_coco.py @@ -17,4 +17,5 @@ out_indices=(1, 2, 3, 4)), neck=dict(in_channels=[24, 40, 112, 320])) -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +# optimizer +optim_wrapper = dict(optimizer=dict(lr=0.01)) diff --git a/configs/timm_example/retinanet_timm_tv_resnet50_fpn_1x_coco.py b/configs/timm_example/retinanet_timm_tv_resnet50_fpn_1x_coco.py index 0c5b7a89f65..843635a377e 100644 --- a/configs/timm_example/retinanet_timm_tv_resnet50_fpn_1x_coco.py +++ b/configs/timm_example/retinanet_timm_tv_resnet50_fpn_1x_coco.py @@ -16,4 +16,5 @@ pretrained=True, out_indices=(1, 2, 3, 4))) -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +# optimizer +optim_wrapper = dict(optimizer=dict(lr=0.01)) diff --git a/mmdet/engine/optimizers/__init__.py b/mmdet/engine/optimizers/__init__.py index e867d0761cb..83db069ee34 100644 --- a/mmdet/engine/optimizers/__init__.py +++ b/mmdet/engine/optimizers/__init__.py @@ -1,9 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .builder import OPTIMIZER_BUILDERS, build_optimizer from .layer_decay_optimizer_constructor import \ LearningRateDecayOptimizerConstructor -__all__ = [ - 'LearningRateDecayOptimizerConstructor', 'OPTIMIZER_BUILDERS', - 'build_optimizer' -] +__all__ = ['LearningRateDecayOptimizerConstructor'] diff --git a/mmdet/engine/optimizers/layer_decay_optimizer_constructor.py b/mmdet/engine/optimizers/layer_decay_optimizer_constructor.py index 1bc3469e888..a056c706046 100644 --- a/mmdet/engine/optimizers/layer_decay_optimizer_constructor.py +++ b/mmdet/engine/optimizers/layer_decay_optimizer_constructor.py @@ -1,10 +1,13 @@ # Copyright (c) OpenMMLab. All rights reserved. import json +from typing import List -from mmcv.runner import DefaultOptimizerConstructor, get_dist_info +import torch.nn as nn +from mmengine.dist import get_dist_info +from mmengine.optim import DefaultOptimWrapperConstructor +from mmdet.registry import OPTIM_WRAPPER_CONSTRUCTORS from mmdet.utils import get_root_logger -from .builder import OPTIMIZER_BUILDERS def get_layer_id_for_convnext(var_name, max_layer_id): @@ -75,12 +78,13 @@ def get_stage_id_for_convnext(var_name, max_stage_id): return max_stage_id - 1 -@OPTIMIZER_BUILDERS.register_module() -class LearningRateDecayOptimizerConstructor(DefaultOptimizerConstructor): +@OPTIM_WRAPPER_CONSTRUCTORS.register_module() +class LearningRateDecayOptimizerConstructor(DefaultOptimWrapperConstructor): # Different learning rates are set for different layers of backbone. # Note: Currently, this optimizer constructor is built for ConvNeXt. - def add_params(self, params, module, **kwargs): + def add_params(self, params: List[dict], module: nn.Module, + **kwargs) -> None: """Add all parameters of module to the params list. The parameters of the given module will be added to the list of param diff --git a/mmdet/models/roi_heads/roi_extractors/base_roi_extractor.py b/mmdet/models/roi_heads/roi_extractors/base_roi_extractor.py index ae6218948ce..9b2bde31073 100644 --- a/mmdet/models/roi_heads/roi_extractors/base_roi_extractor.py +++ b/mmdet/models/roi_heads/roi_extractors/base_roi_extractor.py @@ -32,7 +32,6 @@ def __init__(self, self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides) self.out_channels = out_channels self.featmap_strides = featmap_strides - self.fp16_enabled = False @property def num_inputs(self) -> int: diff --git a/mmdet/models/roi_heads/roi_extractors/single_level_roi_extractor.py b/mmdet/models/roi_heads/roi_extractors/single_level_roi_extractor.py index 13640b43693..59229e0b0b0 100644 --- a/mmdet/models/roi_heads/roi_extractors/single_level_roi_extractor.py +++ b/mmdet/models/roi_heads/roi_extractors/single_level_roi_extractor.py @@ -78,6 +78,8 @@ def forward(self, Returns: Tensor: RoI feature. """ + # convert fp32 to fp16 when amp is on + rois = rois.type_as(feats[0]) out_size = self.roi_layers[0].output_size num_levels = len(feats) roi_feats = feats[0].new_zeros( diff --git a/tests/test_engine/test_optimizers/test_layer_decay_optimizer_constructor.py b/tests/test_engine/test_optimizers/test_layer_decay_optimizer_constructor.py index e914ad42f64..bac43e224d6 100644 --- a/tests/test_engine/test_optimizers/test_layer_decay_optimizer_constructor.py +++ b/tests/test_engine/test_optimizers/test_layer_decay_optimizer_constructor.py @@ -146,19 +146,23 @@ def test_learning_rate_decay_optimizer_constructor(): # Test lr wd for ConvNeXT backbone = ToyConvNeXt() model = PseudoDataParallel(ToyDetector(backbone)) - optimizer_cfg = dict( - type='AdamW', lr=base_lr, betas=(0.9, 0.999), weight_decay=0.05) + optim_wrapper_cfg = dict( + type='OptimWrapper', + optimizer=dict( + type='AdamW', lr=base_lr, betas=(0.9, 0.999), weight_decay=0.05)) # stagewise decay stagewise_paramwise_cfg = dict( decay_rate=decay_rate, decay_type='stage_wise', num_layers=6) optim_constructor = LearningRateDecayOptimizerConstructor( - optimizer_cfg, stagewise_paramwise_cfg) - optimizer = optim_constructor(model) - check_optimizer_lr_wd(optimizer, expected_stage_wise_lr_wd_convnext) + optim_wrapper_cfg, stagewise_paramwise_cfg) + optim_wrapper = optim_constructor(model) + check_optimizer_lr_wd(optim_wrapper.optimizer, + expected_stage_wise_lr_wd_convnext) # layerwise decay layerwise_paramwise_cfg = dict( decay_rate=decay_rate, decay_type='layer_wise', num_layers=6) optim_constructor = LearningRateDecayOptimizerConstructor( - optimizer_cfg, layerwise_paramwise_cfg) - optimizer = optim_constructor(model) - check_optimizer_lr_wd(optimizer, expected_layer_wise_lr_wd_convnext) + optim_wrapper_cfg, layerwise_paramwise_cfg) + optim_wrapper = optim_constructor(model) + check_optimizer_lr_wd(optim_wrapper.optimizer, + expected_layer_wise_lr_wd_convnext)