diff --git a/mmseg/configs/_base_/datasets/cityscapes.py b/mmseg/configs/_base_/datasets/cityscapes.py new file mode 100644 index 0000000000..03ddc229a0 --- /dev/null +++ b/mmseg/configs/_base_/datasets/cityscapes.py @@ -0,0 +1,79 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmcv.transforms.loading import LoadImageFromFile +from mmcv.transforms.processing import (RandomFlip, RandomResize, Resize, + TestTimeAug) +from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler + +from mmseg.datasets.cityscapes import CityscapesDataset +from mmseg.datasets.transforms.formatting import PackSegInputs +from mmseg.datasets.transforms.loading import LoadAnnotations +from mmseg.datasets.transforms.transforms import (PhotoMetricDistortion, + RandomCrop) +from mmseg.evaluation import IoUMetric + +# dataset settings +dataset_type = CityscapesDataset +data_root = 'data/cityscapes/' +crop_size = (512, 1024) +train_pipeline = [ + dict(type=LoadImageFromFile), + dict(type=LoadAnnotations), + dict( + type=RandomResize, + scale=(2048, 1024), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type=RandomCrop, crop_size=crop_size, cat_max_ratio=0.75), + dict(type=RandomFlip, prob=0.5), + dict(type=PhotoMetricDistortion), + dict(type=PackSegInputs) +] +test_pipeline = [ + dict(type=LoadImageFromFile), + dict(type=Resize, scale=(2048, 1024), keep_ratio=True), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type=LoadAnnotations), + dict(type=PackSegInputs) +] +img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] +tta_pipeline = [ + dict(type=LoadImageFromFile, backend_args=None), + dict( + type=TestTimeAug, + transforms=[[ + dict(type=Resize, scale_factor=r, keep_ratio=True) + for r in img_ratios + ], + [ + dict(type=RandomFlip, prob=0., direction='horizontal'), + dict(type=RandomFlip, prob=1., direction='horizontal') + ], [dict(type=LoadAnnotations)], + [dict(type=PackSegInputs)]]) +] +train_dataloader = dict( + batch_size=2, + num_workers=2, + persistent_workers=True, + sampler=dict(type=InfiniteSampler, shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='leftImg8bit/train', seg_map_path='gtFine/train'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type=DefaultSampler, shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='leftImg8bit/val', seg_map_path='gtFine/val'), + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type=IoUMetric, iou_metrics=['mIoU']) +test_evaluator = val_evaluator diff --git a/mmseg/configs/_base_/models/lraspp_m_v3_d8.py b/mmseg/configs/_base_/models/lraspp_m_v3_d8.py new file mode 100644 index 0000000000..22feb75a25 --- /dev/null +++ b/mmseg/configs/_base_/models/lraspp_m_v3_d8.py @@ -0,0 +1,43 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from torch.nn.modules.activation import ReLU +from torch.nn.modules.batchnorm import SyncBatchNorm as SyncBN + +from mmseg.models.backbones import MobileNetV3 +from mmseg.models.data_preprocessor import SegDataPreProcessor +from mmseg.models.decode_heads import LRASPPHead +from mmseg.models.losses import CrossEntropyLoss +from mmseg.models.segmentors import EncoderDecoder + +# model settings +norm_cfg = dict(type=SyncBN, eps=0.001, requires_grad=True) +data_preprocessor = dict( + type=SegDataPreProcessor, + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type=EncoderDecoder, + data_preprocessor=data_preprocessor, + backbone=dict( + type=MobileNetV3, + arch='large', + out_indices=(1, 3, 16), + norm_cfg=norm_cfg), + decode_head=dict( + type=LRASPPHead, + in_channels=(16, 24, 960), + in_index=(0, 1, 2), + channels=128, + input_transform='multiple_select', + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + act_cfg=dict(type=ReLU), + align_corners=False, + loss_decode=dict( + type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/mmseg/configs/mobilenet_v3/mobilenet_v3_d8_lraspp_4xb4_320k_cityscapes_512x1024.py b/mmseg/configs/mobilenet_v3/mobilenet_v3_d8_lraspp_4xb4_320k_cityscapes_512x1024.py new file mode 100644 index 0000000000..ca888a5d3c --- /dev/null +++ b/mmseg/configs/mobilenet_v3/mobilenet_v3_d8_lraspp_4xb4_320k_cityscapes_512x1024.py @@ -0,0 +1,22 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.config import read_base +from mmengine.model.weight_init import PretrainedInit + +with read_base(): + from .._base_.datasets.cityscapes import * + from .._base_.default_runtime import * + from .._base_.models.lraspp_m_v3_d8 import * + from .._base_.schedules.schedule_320k import * + +checkpoint = 'open-mmlab://contrib/mobilenet_v3_large' +crop_size = (512, 1024) +data_preprocessor.update(dict(size=crop_size)) +model.update( + dict( + data_preprocessor=data_preprocessor, + backbone=dict( + init_cfg=dict(type=PretrainedInit, checkpoint=checkpoint)))) +# Re-config the data sampler. +train_dataloader.update(dict(batch_size=4, num_workers=4)) +val_dataloader.update(dict(batch_size=1, num_workers=4)) +test_dataloader = val_dataloader diff --git a/mmseg/configs/mobilenet_v3/mobilenet_v3_d8_s_lraspp_4xb4_320k_cityscapes_512x1024.py b/mmseg/configs/mobilenet_v3/mobilenet_v3_d8_s_lraspp_4xb4_320k_cityscapes_512x1024.py new file mode 100644 index 0000000000..c7054f9e4a --- /dev/null +++ b/mmseg/configs/mobilenet_v3/mobilenet_v3_d8_s_lraspp_4xb4_320k_cityscapes_512x1024.py @@ -0,0 +1,30 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.config import read_base + +with read_base(): + from .mobilenet_v3_d8_lraspp_4xb4_320k_cityscapes_512x1024 import * + +checkpoint = 'open-mmlab://contrib/mobilenet_v3_small' +norm_cfg.update(dict(type=SyncBN, eps=0.001, requires_grad=True)) +model.update( + dict( + type=EncoderDecoder, + backbone=dict( + type=MobileNetV3, + init_cfg=dict(type=PretrainedInit, checkpoint=checkpoint), + arch='small', + out_indices=(0, 1, 12), + norm_cfg=norm_cfg), + decode_head=dict( + type=LRASPPHead, + in_channels=(16, 16, 576), + in_index=(0, 1, 2), + channels=128, + input_transform='multiple_select', + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + act_cfg=dict(type=ReLU), + align_corners=False, + loss_decode=dict( + type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0)))) diff --git a/mmseg/configs/mobilenet_v3/mobilenet_v3_d8_scratch_lraspp_4xb4_320k_cityscapes_512x1024.py b/mmseg/configs/mobilenet_v3/mobilenet_v3_d8_scratch_lraspp_4xb4_320k_cityscapes_512x1024.py new file mode 100644 index 0000000000..8e8c1a7cfc --- /dev/null +++ b/mmseg/configs/mobilenet_v3/mobilenet_v3_d8_scratch_lraspp_4xb4_320k_cityscapes_512x1024.py @@ -0,0 +1,16 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.config import read_base + +with read_base(): + from .._base_.datasets.cityscapes import * + from .._base_.default_runtime import * + from .._base_.models.lraspp_m_v3_d8 import * + from .._base_.schedules.schedule_320k import * + +crop_size = (512, 1024) +data_preprocessor = dict(size=crop_size) +# Re-config the data sampler. +model.update(dict(data_preprocessor=data_preprocessor)) +train_dataloader.update(dict(batch_size=4, num_workers=4)) +val_dataloader.update(dict(batch_size=1, num_workers=4)) +test_dataloader = val_dataloader diff --git a/mmseg/configs/mobilenet_v3/mobilenet_v3_d8_scratch_s_lraspp_4xb4_320k_cityscapes_512x1024.py b/mmseg/configs/mobilenet_v3/mobilenet_v3_d8_scratch_s_lraspp_4xb4_320k_cityscapes_512x1024.py new file mode 100644 index 0000000000..080400480b --- /dev/null +++ b/mmseg/configs/mobilenet_v3/mobilenet_v3_d8_scratch_s_lraspp_4xb4_320k_cityscapes_512x1024.py @@ -0,0 +1,28 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmengine.config import read_base + +with read_base(): + from .mobilenet_v3_d8_scratch_lraspp_4xb4_320k_cityscapes_512x1024 import * + +norm_cfg.update(dict(type=SyncBN, eps=0.001, requires_grad=True)) +model.update( + dict( + type=EncoderDecoder, + backbone=dict( + type=MobileNetV3, + arch='small', + out_indices=(0, 1, 12), + norm_cfg=norm_cfg), + decode_head=dict( + type=LRASPPHead, + in_channels=(16, 16, 576), + in_index=(0, 1, 2), + channels=128, + input_transform='multiple_select', + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + act_cfg=dict(type=ReLU), + align_corners=False, + loss_decode=dict( + type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0))))