Update configs: CARAFE, ConvNext, Swin, timm, GN, WS/GN and PVT

gist-ailab · Jul 19, 2022 · a55ea3b · a55ea3b
1 parent a0568b9
commit a55ea3b
Show file tree

Hide file tree

Showing 33 changed files with 425 additions and 339 deletions.
diff --git a/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py b/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py
@@ -1,5 +1,6 @@
 _base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
 model = dict(
+    data_preprocessor=dict(pad_size_divisor=64),
     neck=dict(
         type='FPN_CARAFE',
         in_channels=[256, 512, 1024, 2048],
@@ -17,34 +18,3 @@
             encoder_kernel=3,
             encoder_dilation=1,
             compressed_channels=64)))
-img_norm_cfg = dict(
-    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
-train_pipeline = [
-    dict(type='LoadImageFromFile'),
-    dict(type='LoadAnnotations', with_bbox=True),
-    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
-    dict(type='RandomFlip', flip_ratio=0.5),
-    dict(type='Normalize', **img_norm_cfg),
-    dict(type='Pad', size_divisor=64),
-    dict(type='DefaultFormatBundle'),
-    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
-]
-test_pipeline = [
-    dict(type='LoadImageFromFile'),
-    dict(
-        type='MultiScaleFlipAug',
-        img_scale=(1333, 800),
-        flip=False,
-        transforms=[
-            dict(type='Resize', keep_ratio=True),
-            dict(type='RandomFlip'),
-            dict(type='Normalize', **img_norm_cfg),
-            dict(type='Pad', size_divisor=64),
-            dict(type='ImageToTensor', keys=['img']),
-            dict(type='Collect', keys=['img']),
-        ])
-]
-data = dict(
-    train=dict(pipeline=train_pipeline),
-    val=dict(pipeline=test_pipeline),
-    test=dict(pipeline=test_pipeline))
diff --git a/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py b/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py
@@ -1,5 +1,6 @@
 _base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'
 model = dict(
+    data_preprocessor=dict(pad_size_divisor=64),
     neck=dict(
         type='FPN_CARAFE',
         in_channels=[256, 512, 1024, 2048],
@@ -27,34 +28,3 @@
                 encoder_kernel=3,
                 encoder_dilation=1,
                 compressed_channels=64))))
-img_norm_cfg = dict(
-    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
-train_pipeline = [
-    dict(type='LoadImageFromFile'),
-    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
-    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
-    dict(type='RandomFlip', flip_ratio=0.5),
-    dict(type='Normalize', **img_norm_cfg),
-    dict(type='Pad', size_divisor=64),
-    dict(type='DefaultFormatBundle'),
-    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
-]
-test_pipeline = [
-    dict(type='LoadImageFromFile'),
-    dict(
-        type='MultiScaleFlipAug',
-        img_scale=(1333, 800),
-        flip=False,
-        transforms=[
-            dict(type='Resize', keep_ratio=True),
-            dict(type='RandomFlip'),
-            dict(type='Normalize', **img_norm_cfg),
-            dict(type='Pad', size_divisor=64),
-            dict(type='ImageToTensor', keys=['img']),
-            dict(type='Collect', keys=['img']),
-        ])
-]
-data = dict(
-    train=dict(pipeline=train_pipeline),
-    val=dict(pipeline=test_pipeline),
-    test=dict(pipeline=test_pipeline))
diff --git a/configs/convnext/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py b/configs/convnext/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py
@@ -18,15 +18,8 @@
             type='Pretrained', checkpoint=checkpoint_file,
             prefix='backbone.')))
 
-optimizer = dict(
-    _delete_=True,
-    constructor='LearningRateDecayOptimizerConstructor',
-    type='AdamW',
-    lr=0.0002,
-    betas=(0.9, 0.999),
-    weight_decay=0.05,
-    paramwise_cfg={
-        'decay_rate': 0.7,
-        'decay_type': 'layer_wise',
-        'num_layers': 12
-    })
+optim_wrapper = dict(paramwise_cfg={
+    'decay_rate': 0.7,
+    'decay_type': 'layer_wise',
+    'num_layers': 12
+})
diff --git a/configs/convnext/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py b/configs/convnext/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py
@@ -82,68 +82,74 @@
             loss_bbox=dict(type='GIoULoss', loss_weight=10.0))
     ]))
 
-img_norm_cfg = dict(
-    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
-
 # augmentation strategy originates from DETR / Sparse RCNN
 train_pipeline = [
-    dict(type='LoadImageFromFile'),
+    dict(
+        type='LoadImageFromFile',
+        file_client_args={{_base_.file_client_args}}),
     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
-    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='RandomFlip', prob=0.5),
     dict(
-        type='AutoAugment',
-        policies=[[
+        type='RandomChoice',
+        transforms=[[
             dict(
-                type='Resize',
-                img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
-                           (608, 1333), (640, 1333), (672, 1333), (704, 1333),
-                           (736, 1333), (768, 1333), (800, 1333)],
-                multiscale_mode='value',
+                type='RandomChoiceResize',
+                scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
+                        (608, 1333), (640, 1333), (672, 1333), (704, 1333),
+                        (736, 1333), (768, 1333), (800, 1333)],
                 keep_ratio=True)
         ],
-                  [
-                      dict(
-                          type='Resize',
-                          img_scale=[(400, 1333), (500, 1333), (600, 1333)],
-                          multiscale_mode='value',
-                          keep_ratio=True),
-                      dict(
-                          type='RandomCrop',
-                          crop_type='absolute_range',
-                          crop_size=(384, 600),
-                          allow_negative_crop=True),
-                      dict(
-                          type='Resize',
-                          img_scale=[(480, 1333), (512, 1333), (544, 1333),
-                                     (576, 1333), (608, 1333), (640, 1333),
-                                     (672, 1333), (704, 1333), (736, 1333),
-                                     (768, 1333), (800, 1333)],
-                          multiscale_mode='value',
-                          override=True,
-                          keep_ratio=True)
-                  ]]),
-    dict(type='Normalize', **img_norm_cfg),
-    dict(type='Pad', size_divisor=32),
-    dict(type='DefaultFormatBundle'),
-    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+                    [
+                        dict(
+                            type='RandomChoiceResize',
+                            scales=[(400, 1333), (500, 1333), (600, 1333)],
+                            keep_ratio=True),
+                        dict(
+                            type='RandomCrop',
+                            crop_type='absolute_range',
+                            crop_size=(384, 600),
+                            allow_negative_crop=True),
+                        dict(
+                            type='RandomChoiceResize',
+                            scales=[(480, 1333), (512, 1333), (544, 1333),
+                                    (576, 1333), (608, 1333), (640, 1333),
+                                    (672, 1333), (704, 1333), (736, 1333),
+                                    (768, 1333), (800, 1333)],
+                            keep_ratio=True)
+                    ]]),
+    dict(type='PackDetInputs')
+]
+train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
+
+max_epochs = 36
+train_cfg = dict(max_epochs=max_epochs)
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
+        end=1000),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=max_epochs,
+        by_epoch=True,
+        milestones=[27, 33],
+        gamma=0.1)
 ]
-data = dict(train=dict(pipeline=train_pipeline), persistent_workers=True)
 
-optimizer = dict(
-    _delete_=True,
+# Enable automatic-mixed-precision training with AmpOptimWrapper.
+optim_wrapper = dict(
+    type='AmpOptimWrapper',
     constructor='LearningRateDecayOptimizerConstructor',
-    type='AdamW',
-    lr=0.0002,
-    betas=(0.9, 0.999),
-    weight_decay=0.05,
     paramwise_cfg={
         'decay_rate': 0.7,
         'decay_type': 'layer_wise',
         'num_layers': 6
-    })
-
-lr_config = dict(warmup_iters=1000, step=[27, 33])
-runner = dict(max_epochs=36)
-
-# you need to set mode='dynamic' if you are using pytorch<=1.5.0
-fp16 = dict(loss_scale=dict(init_scale=512))
+    },
+    optimizer=dict(
+        _delete_=True,
+        type='AdamW',
+        lr=0.0002,
+        betas=(0.9, 0.999),
+        weight_decay=0.05))
diff --git a/configs/convnext/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco.py b/configs/convnext/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco.py
@@ -23,68 +23,75 @@
             prefix='backbone.')),
     neck=dict(in_channels=[96, 192, 384, 768]))
 
-img_norm_cfg = dict(
-    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
-
 # augmentation strategy originates from DETR / Sparse RCNN
 train_pipeline = [
-    dict(type='LoadImageFromFile'),
+    dict(
+        type='LoadImageFromFile',
+        file_client_args={{_base_.file_client_args}}),
     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
-    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='RandomFlip', prob=0.5),
     dict(
-        type='AutoAugment',
-        policies=[[
+        type='RandomChoice',
+        transforms=[[
             dict(
-                type='Resize',
-                img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
-                           (608, 1333), (640, 1333), (672, 1333), (704, 1333),
-                           (736, 1333), (768, 1333), (800, 1333)],
-                multiscale_mode='value',
+                type='RandomChoiceResize',
+                scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
+                        (608, 1333), (640, 1333), (672, 1333), (704, 1333),
+                        (736, 1333), (768, 1333), (800, 1333)],
                 keep_ratio=True)
         ],
-                  [
-                      dict(
-                          type='Resize',
-                          img_scale=[(400, 1333), (500, 1333), (600, 1333)],
-                          multiscale_mode='value',
-                          keep_ratio=True),
-                      dict(
-                          type='RandomCrop',
-                          crop_type='absolute_range',
-                          crop_size=(384, 600),
-                          allow_negative_crop=True),
-                      dict(
-                          type='Resize',
-                          img_scale=[(480, 1333), (512, 1333), (544, 1333),
-                                     (576, 1333), (608, 1333), (640, 1333),
-                                     (672, 1333), (704, 1333), (736, 1333),
-                                     (768, 1333), (800, 1333)],
-                          multiscale_mode='value',
-                          override=True,
-                          keep_ratio=True)
-                  ]]),
-    dict(type='Normalize', **img_norm_cfg),
-    dict(type='Pad', size_divisor=32),
-    dict(type='DefaultFormatBundle'),
-    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
+                    [
+                        dict(
+                            type='RandomChoiceResize',
+                            scales=[(400, 1333), (500, 1333), (600, 1333)],
+                            keep_ratio=True),
+                        dict(
+                            type='RandomCrop',
+                            crop_type='absolute_range',
+                            crop_size=(384, 600),
+                            allow_negative_crop=True),
+                        dict(
+                            type='RandomChoiceResize',
+                            scales=[(480, 1333), (512, 1333), (544, 1333),
+                                    (576, 1333), (608, 1333), (640, 1333),
+                                    (672, 1333), (704, 1333), (736, 1333),
+                                    (768, 1333), (800, 1333)],
+                            keep_ratio=True)
+                    ]]),
+    dict(type='PackDetInputs')
+]
+train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
+
+max_epochs = 36
+train_cfg = dict(max_epochs=max_epochs)
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
+        end=1000),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=max_epochs,
+        by_epoch=True,
+        milestones=[27, 33],
+        gamma=0.1)
 ]
-data = dict(train=dict(pipeline=train_pipeline), persistent_workers=True)
 
-optimizer = dict(
-    _delete_=True,
+# Enable automatic-mixed-precision training with AmpOptimWrapper.
+optim_wrapper = dict(
+    type='AmpOptimWrapper',
     constructor='LearningRateDecayOptimizerConstructor',
-    type='AdamW',
-    lr=0.0001,
-    betas=(0.9, 0.999),
-    weight_decay=0.05,
     paramwise_cfg={
         'decay_rate': 0.95,
         'decay_type': 'layer_wise',
         'num_layers': 6
-    })
-
-lr_config = dict(warmup_iters=1000, step=[27, 33])
-runner = dict(max_epochs=36)
-
-# you need to set mode='dynamic' if you are using pytorch<=1.5.0
-fp16 = dict(loss_scale=dict(init_scale=512))
+    },
+    optimizer=dict(
+        _delete_=True,
+        type='AdamW',
+        lr=0.0001,
+        betas=(0.9, 0.999),
+        weight_decay=0.05,
+    ))
diff --git a/configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py b/configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py
@@ -1,4 +1,17 @@
 _base_ = './mask_rcnn_r101_fpn_gn_ws-all_2x_coco.py'
 # learning policy
-lr_config = dict(step=[20, 23])
-runner = dict(type='EpochBasedRunner', max_epochs=24)
+max_epochs = 24
+train_cfg = dict(max_epochs=max_epochs)
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=max_epochs,
+        by_epoch=True,
+        milestones=[20, 23],
+        gamma=0.1)
+]
diff --git a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py
@@ -1,4 +1,17 @@
 _base_ = './mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py'
 # learning policy
-lr_config = dict(step=[20, 23])
-runner = dict(type='EpochBasedRunner', max_epochs=24)
+max_epochs = 24
+train_cfg = dict(max_epochs=max_epochs)
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=max_epochs,
+        by_epoch=True,
+        milestones=[20, 23],
+        gamma=0.1)
+]