diff --git a/README.md b/README.md index 4e80938..16721c7 100644 --- a/README.md +++ b/README.md @@ -163,15 +163,14 @@ data Modify some configuration accordingly in the config file like `configs/voc_unet.py` * for multi-label training use config file `configs/coco_multilabel_unet.py` and modify some configuration, the difference between single-label and multi-label training are mainly in following parameter in config file: `nclasses`, `multi_label`, `metrics` and `criterion`. Currently multi-label training is only supported in coco data format. -2. Non-distributed training - +2. Ditributed training ```shell -python tools/train.py configs/voc_unet.py +./tools/dist_train.sh configs/voc_unet.py gpu_num ``` -3. Ditributed training +3. Non-distributed training ```shell -./tools/dist_train.sh configs/voc_unet.py gpu_num +python tools/train.py configs/voc_unet.py ``` Snapshots and logs will be generated at `${vedaseg_root}/workdir`. @@ -182,15 +181,14 @@ Snapshots and logs will be generated at `${vedaseg_root}/workdir`. Modify some configuration accordingly in the config file like `configs/voc_unet.py` -2. Non-distributed testing - +2. Ditributed testing ```shell -python tools/test.py configs/voc_unet.py checkpoint_path +./tools/dist_test.sh configs/voc_unet.py checkpoint_path gpu_num ``` -3. Ditributed testing +3. Non-distributed testing ```shell -./tools/dist_test.sh configs/voc_unet.py checkpoint_path gpu_num +python tools/test.py configs/voc_unet.py checkpoint_path ``` ## Inference diff --git a/configs/coco_multilabel_unet.py b/configs/coco_multilabel_unet.py index 66debe7..73db3da 100644 --- a/configs/coco_multilabel_unet.py +++ b/configs/coco_multilabel_unet.py @@ -225,7 +225,7 @@ ), transforms=inference['transforms'], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', @@ -272,7 +272,7 @@ dict(type='ToTensor'), ], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', @@ -293,7 +293,7 @@ ), transforms=inference['transforms'], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', diff --git a/configs/coco_unet.py b/configs/coco_unet.py index 612eb5b..a5b51db 100644 --- a/configs/coco_unet.py +++ b/configs/coco_unet.py @@ -226,7 +226,7 @@ ), transforms=inference['transforms'], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', @@ -273,7 +273,7 @@ dict(type='ToTensor'), ], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', @@ -288,13 +288,13 @@ dataset=dict( type=dataset_type, root=dataset_root, - ann_file='instances_val2014.json', - img_prefix='val2014', + ann_file='instances_val2017.json', + img_prefix='val2017', multi_label=multi_label, ), transforms=inference['transforms'], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', diff --git a/configs/voc_deeplabv3.py b/configs/voc_deeplabv3.py index 5725d9e..ed910b3 100644 --- a/configs/voc_deeplabv3.py +++ b/configs/voc_deeplabv3.py @@ -96,7 +96,7 @@ ), transforms=inference['transforms'], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', @@ -137,7 +137,7 @@ dict(type='ToTensor'), ], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', @@ -157,7 +157,7 @@ ), transforms=inference['transforms'], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', diff --git a/configs/voc_deeplabv3plus.py b/configs/voc_deeplabv3plus.py index 7793222..fb45236 100644 --- a/configs/voc_deeplabv3plus.py +++ b/configs/voc_deeplabv3plus.py @@ -128,7 +128,7 @@ ), transforms=inference['transforms'], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', @@ -169,7 +169,7 @@ dict(type='ToTensor'), ], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', @@ -189,7 +189,7 @@ ), transforms=inference['transforms'], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', diff --git a/configs/voc_fpn.py b/configs/voc_fpn.py index 769c713..2b5fae6 100644 --- a/configs/voc_fpn.py +++ b/configs/voc_fpn.py @@ -228,7 +228,7 @@ ), transforms=inference['transforms'], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', @@ -274,7 +274,7 @@ dict(type='ToTensor'), ], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', @@ -294,7 +294,7 @@ ), transforms=inference['transforms'], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', diff --git a/configs/voc_pspnet.py b/configs/voc_pspnet.py index 39d67cc..7e36d48 100644 --- a/configs/voc_pspnet.py +++ b/configs/voc_pspnet.py @@ -94,7 +94,7 @@ ), transforms=inference['transforms'], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', @@ -140,7 +140,7 @@ dict(type='ToTensor'), ], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', @@ -160,7 +160,7 @@ ), transforms=inference['transforms'], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', diff --git a/configs/voc_unet.py b/configs/voc_unet.py index f645a88..2084451 100644 --- a/configs/voc_unet.py +++ b/configs/voc_unet.py @@ -223,7 +223,7 @@ ), transforms=inference['transforms'], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', @@ -269,7 +269,7 @@ dict(type='ToTensor'), ], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', @@ -289,7 +289,7 @@ ), transforms=inference['transforms'], sampler=dict( - type='DistributedSampler', + type='DefaultSampler', ), dataloader=dict( type='DataLoader', diff --git a/vedaseg/dataloaders/builder.py b/vedaseg/dataloaders/builder.py index a320e34..273debd 100644 --- a/vedaseg/dataloaders/builder.py +++ b/vedaseg/dataloaders/builder.py @@ -5,20 +5,17 @@ def build_dataloader(distributed, num_gpus, cfg, default_args=None): cfg_ = cfg.copy() - shuffle = cfg_.pop('shuffle') samples_per_gpu = cfg_.pop('samples_per_gpu') workers_per_gpu = cfg_.pop('workers_per_gpu') if distributed: - shuffle = False batch_size = samples_per_gpu num_workers = workers_per_gpu else: batch_size = num_gpus * samples_per_gpu num_workers = num_gpus * workers_per_gpu - cfg_.update({'shuffle': shuffle, - 'batch_size': batch_size, + cfg_.update({'batch_size': batch_size, 'num_workers': num_workers}) dataloader = build_from_cfg(cfg_, DATALOADERS, default_args) diff --git a/vedaseg/dataloaders/samplers/__init__.py b/vedaseg/dataloaders/samplers/__init__.py index 51b01ea..3f2bea5 100644 --- a/vedaseg/dataloaders/samplers/__init__.py +++ b/vedaseg/dataloaders/samplers/__init__.py @@ -1 +1,3 @@ from .builder import build_sampler +from .distributed import DefaultSampler +from .non_distributed import DefaultSampler diff --git a/vedaseg/dataloaders/samplers/builder.py b/vedaseg/dataloaders/samplers/builder.py index 8b8f204..5073bdc 100644 --- a/vedaseg/dataloaders/samplers/builder.py +++ b/vedaseg/dataloaders/samplers/builder.py @@ -1,8 +1,11 @@ from ...utils import build_from_cfg -from .registry import SAMPLERS +from .registry import NON_DISTRIBUTED_SAMPLERS, DISTRIBUTED_SAMPLERS -def build_sampler(cfg, default_args=None): - sampler = build_from_cfg(cfg, SAMPLERS, default_args) +def build_sampler(distributed, cfg, default_args=None): + if distributed: + sampler = build_from_cfg(cfg, DISTRIBUTED_SAMPLERS, default_args) + else: + sampler = build_from_cfg(cfg, NON_DISTRIBUTED_SAMPLERS, default_args) return sampler diff --git a/vedaseg/dataloaders/samplers/distributed.py b/vedaseg/dataloaders/samplers/distributed.py new file mode 100644 index 0000000..29cf108 --- /dev/null +++ b/vedaseg/dataloaders/samplers/distributed.py @@ -0,0 +1,13 @@ +from torch.utils.data import DistributedSampler + +from ...utils import get_dist_info +from .registry import DISTRIBUTED_SAMPLERS + + +@DISTRIBUTED_SAMPLERS.register_module +class DefaultSampler(DistributedSampler): + """Default distributed sampler.""" + + def __init__(self, dataset, shuffle=True): + rank, num_replicas = get_dist_info() + super().__init__(dataset, num_replicas, rank, shuffle) diff --git a/vedaseg/dataloaders/samplers/non_distributed.py b/vedaseg/dataloaders/samplers/non_distributed.py new file mode 100644 index 0000000..1233868 --- /dev/null +++ b/vedaseg/dataloaders/samplers/non_distributed.py @@ -0,0 +1,22 @@ +import torch +from torch.utils.data import Sampler + +from .registry import NON_DISTRIBUTED_SAMPLERS + + +@NON_DISTRIBUTED_SAMPLERS.register_module +class DefaultSampler(Sampler): + """Default non-distributed sampler.""" + + def __init__(self, dataset, shuffle=True): + self.dataset = dataset + self.shuffle = shuffle + + def __iter__(self): + if self.shuffle: + return iter(torch.randperm(len(self.dataset)).tolist()) + else: + return iter(range(len(self.dataset))) + + def __len__(self): + return len(self.dataset) diff --git a/vedaseg/dataloaders/samplers/registry.py b/vedaseg/dataloaders/samplers/registry.py index 65178f6..eed9d19 100644 --- a/vedaseg/dataloaders/samplers/registry.py +++ b/vedaseg/dataloaders/samplers/registry.py @@ -1,7 +1,4 @@ -from torch.utils.data import DistributedSampler - from ...utils import Registry -SAMPLERS = Registry('sampler') - -SAMPLERS.register_module(DistributedSampler) +DISTRIBUTED_SAMPLERS = Registry('distributed_sampler') +NON_DISTRIBUTED_SAMPLERS = Registry('non_distributed_sampler') diff --git a/vedaseg/runner/base.py b/vedaseg/runner/base.py index 9280086..d69d3a8 100644 --- a/vedaseg/runner/base.py +++ b/vedaseg/runner/base.py @@ -85,14 +85,16 @@ def _build_dataloader(self, cfg): transform = build_transform(cfg['transforms']) dataset = build_dataset(cfg['dataset'], dict(transform=transform)) - shuffle = cfg['dataloader'].get('shuffle', False) - sampler = build_sampler(cfg['sampler'], dict(dataset=dataset, - shuffle=shuffle)) if cfg.get( - 'sampler') is not None else None + shuffle = cfg['dataloader'].pop('shuffle', False) + sampler = build_sampler(self.distribute, + cfg['sampler'], + dict(dataset=dataset, + shuffle=shuffle)) dataloader = build_dataloader(self.distribute, self.gpu_num, - cfg['dataloader'], dict(dataset=dataset, - sampler=sampler)) + cfg['dataloader'], + dict(dataset=dataset, + sampler=sampler)) return dataloader