From 6974d3044c930ff9932ea227643b43f9d5298e55 Mon Sep 17 00:00:00 2001 From: mjq2020 <74635395+mjq2020@users.noreply.github.com> Date: Tue, 11 Jul 2023 18:34:20 +0800 Subject: [PATCH] optimizer code format (#106) * fix metric's dataset metainfo is none bug * fix image size type is List bug * fix code format no pass * optimizer code format --- edgelab/datasets/__init__.py | 40 ++--- edgelab/datasets/builder.py | 137 ------------------ .../SensorDataPreprocessor.py | 7 +- .../datasets/data_preprocessors/audio_augs.py | 1 - edgelab/datasets/fomodataset.py | 41 +----- edgelab/datasets/meter.py | 2 +- edgelab/datasets/pipelines/__init__.py | 4 +- edgelab/datasets/pipelines/audio_augs.py | 1 - edgelab/datasets/sensordataset.py | 4 +- edgelab/datasets/speechcommand.py | 2 +- edgelab/datasets/transforms/formatting.py | 3 - edgelab/datasets/transforms/loading.py | 1 - edgelab/datasets/utils/audio_augs.py | 1 - edgelab/datasets/utils/download.py | 4 +- edgelab/datasets/utils/functions.py | 6 +- 15 files changed, 23 insertions(+), 231 deletions(-) delete mode 100644 edgelab/datasets/builder.py diff --git a/edgelab/datasets/__init__.py b/edgelab/datasets/__init__.py index f9e97cab..027aa407 100644 --- a/edgelab/datasets/__init__.py +++ b/edgelab/datasets/__init__.py @@ -1,29 +1,11 @@ -from .cocodataset import CustomCocoDataset -from .data_preprocessors import * -from .fomodataset import FomoDatasets -from .meter import MeterData -from .pipelines import * -from .sensordataset import SensorDataset -from .speechcommand import Speechcommand -from .transforms import * -from .utils.functions import fomo_collate -from .vocdataset import CustomVocdataset -from .yolodataset import CustomYOLOv5CocoDataset - -__all__ = [ - 'Speechcommand', - 'MeterData', - 'AudioAugs', - 'CustomCocoDataset', - 'CustomVocdataset', - 'FomoDatasets', - 'SensorDataset', - 'RandomResizedCrop', - 'fomo_collate', - 'ETADataPreprocessor', - 'CustomYOLOv5CocoDataset', - 'SensorDataPreprocessor', - 'PackSensorInputs', - 'LoadSensorFromFile', - 'Bbox2FomoMask', -] +from .cocodataset import * # noqa +from .data_preprocessors import * # noqa +from .fomodataset import * # noqa +from .meter import * # noqa +from .pipelines import * # noqa +from .sensordataset import * # noqa +from .speechcommand import * # noqa +from .transforms import * # noqa +from .utils.functions import * # noqa +from .vocdataset import * # noqa +from .yolodataset import * # noqa diff --git a/edgelab/datasets/builder.py b/edgelab/datasets/builder.py deleted file mode 100644 index 290b0ae4..00000000 --- a/edgelab/datasets/builder.py +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import warnings -from functools import partial - -import torch -from mmcv.parallel import collate -from mmcv.runner import get_dist_info -from mmcv.utils import TORCH_VERSION, Registry, build_from_cfg, digit_version -from mmdet.datasets.builder import worker_init_fn -from mmdet.datasets.samplers import ( - ClassAwareSampler, - DistributedGroupSampler, - DistributedSampler, - GroupSampler, - InfiniteBatchSampler, - InfiniteGroupBatchSampler, -) -from torch.utils.data import DataLoader - - -def collate_fn(batch): - img, label = [x['img'] for x in batch], [y['target'] for y in batch] - for i, l in enumerate(label): - if l.shape[0] > 0: - l[:, 0] = i - return dict(img=torch.stack(img), target=torch.cat(label, 0)) - - -def build_dataloader( - dataset, - samples_per_gpu, - workers_per_gpu, - num_gpus=1, - dist=True, - shuffle=True, - seed=None, - runner_type='EpochBasedRunner', - persistent_workers=False, - class_aware_sampler=None, - **kwargs, -): - """Build PyTorch DataLoader. - - In distributed training, each GPU/process has a dataloader. - In non-distributed training, there is only one dataloader for all GPUs. - - Args: - dataset (Dataset): A PyTorch dataset. - samples_per_gpu (int): Number of training samples on each GPU, i.e., - batch size of each GPU. - workers_per_gpu (int): How many subprocesses to use for data loading - for each GPU. - num_gpus (int): Number of GPUs. Only used in non-distributed training. - dist (bool): Distributed training/test or not. Default: True. - shuffle (bool): Whether to shuffle the data at every epoch. - Default: True. - seed (int, Optional): Seed to be used. Default: None. - runner_type (str): Type of runner. Default: `EpochBasedRunner` - persistent_workers (bool): If True, the data loader will not shutdown - the worker processes after a dataset has been consumed once. - This allows to maintain the workers `Dataset` instances alive. - This argument is only valid when PyTorch>=1.7.0. Default: False. - class_aware_sampler (dict): Whether to use `ClassAwareSampler` - during training. Default: None. - kwargs: any keyword argument to be used to initialize DataLoader - - Returns: - DataLoader: A PyTorch dataloader. - """ - rank, world_size = get_dist_info() - - if dist: - # When model is :obj:`DistributedDataParallel`, - # `batch_size` of :obj:`dataloader` is the - # number of training samples on each GPU. - batch_size = samples_per_gpu - num_workers = workers_per_gpu - else: - # When model is obj:`DataParallel` - # the batch size is samples on all the GPUS - batch_size = num_gpus * samples_per_gpu - num_workers = num_gpus * workers_per_gpu - - if runner_type == 'IterBasedRunner': - # this is a batch sampler, which can yield - # a mini-batch indices each time. - # it can be used in both `DataParallel` and - # `DistributedDataParallel` - if shuffle: - batch_sampler = InfiniteGroupBatchSampler(dataset, batch_size, world_size, rank, seed=seed) - else: - batch_sampler = InfiniteBatchSampler(dataset, batch_size, world_size, rank, seed=seed, shuffle=False) - batch_size = 1 - sampler = None - else: - if class_aware_sampler is not None: - # ClassAwareSampler can be used in both distributed and - # non-distributed training. - num_sample_class = class_aware_sampler.get('num_sample_class', 1) - sampler = ClassAwareSampler( - dataset, samples_per_gpu, world_size, rank, seed=seed, num_sample_class=num_sample_class - ) - elif dist: - # DistributedGroupSampler will definitely shuffle the data to - # satisfy that images on each GPU are in the same group - if shuffle: - sampler = DistributedGroupSampler(dataset, samples_per_gpu, world_size, rank, seed=seed) - else: - sampler = DistributedSampler(dataset, world_size, rank, shuffle=False, seed=seed) - else: - sampler = GroupSampler(dataset, samples_per_gpu) if shuffle else None - batch_sampler = None - - init_fn = partial(worker_init_fn, num_workers=num_workers, rank=rank, seed=seed) if seed is not None else None - - if TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) >= digit_version('1.7.0'): - kwargs['persistent_workers'] = persistent_workers - elif persistent_workers is True: - warnings.warn('persistent_workers is invalid because your pytorch ' 'version is lower than 1.7.0') - - collate_ = collate_fn if 'collate' in kwargs else partial(collate, samples_per_gpu=samples_per_gpu) - kwargs.pop('collate') if 'collate' in kwargs else None - data_loader = DataLoader( - dataset, - batch_size=batch_size, - sampler=sampler, - num_workers=num_workers, - batch_sampler=batch_sampler, - collate_fn=collate_, - pin_memory=kwargs.pop('pin_memory', False), - worker_init_fn=init_fn, - **kwargs, - ) - - return data_loader - - return data_loader diff --git a/edgelab/datasets/data_preprocessors/SensorDataPreprocessor.py b/edgelab/datasets/data_preprocessors/SensorDataPreprocessor.py index d585c964..d8a1da64 100644 --- a/edgelab/datasets/data_preprocessors/SensorDataPreprocessor.py +++ b/edgelab/datasets/data_preprocessors/SensorDataPreprocessor.py @@ -1,8 +1,5 @@ -import json -from typing import Optional, Union +from typing import Optional -import numpy as np -import torch from mmcls.models.utils.batch_augments import RandomBatchAugment from mmcls.structures import ( ClsDataSample, @@ -12,10 +9,8 @@ stack_batch_scores, tensor_split, ) -from mmengine.logging import MessageHub from mmengine.model.base_model.data_preprocessor import BaseDataPreprocessor -from edgelab.engine.utils.batch_augs import BatchAugs from edgelab.registry import MODELS diff --git a/edgelab/datasets/data_preprocessors/audio_augs.py b/edgelab/datasets/data_preprocessors/audio_augs.py index f792d301..71e29521 100644 --- a/edgelab/datasets/data_preprocessors/audio_augs.py +++ b/edgelab/datasets/data_preprocessors/audio_augs.py @@ -100,7 +100,6 @@ def __init__(self, fs, p=0.5, fc_lp=None, fc_hp=None): def __call__(self, sample): if random.random() < self.p: - a = 0.25 if random.random() < 0.5: fc = 0.5 + random.random() * 0.25 filt = scipy.signal.firwin(self.num_taps, fc, window='hamming') diff --git a/edgelab/datasets/fomodataset.py b/edgelab/datasets/fomodataset.py index 3eef83f0..f318b83f 100644 --- a/edgelab/datasets/fomodataset.py +++ b/edgelab/datasets/fomodataset.py @@ -6,11 +6,8 @@ import torch import torchvision from mmdet.datasets.coco import CocoDataset -from mmengine.dataset.base_dataset import BaseDataset from mmengine.registry import DATASETS from sklearn.metrics import confusion_matrix -from torch.utils.data import Dataset -from torchvision.transforms import ToTensor from .pipelines.composition import AlbCompose @@ -74,42 +71,8 @@ def __len__(self): return len(self.data) def __getitem____(self, index): - image, ann = self.data[index] - - self.prepare_data(idx=index) - image = np.asarray(image) - return self.pipeline() - - bboxes = [] - labels = [] - min_hw_pixels = 2 - for annotation in ann: - # coco annotation specific https://cocodataset.org/#format-data - x, y, width, height = annotation['bbox'][:4] - if width == 0: - width += min_hw_pixels - if height == 0: - height += min_hw_pixels - annotation['bbox'][:4] = [x, y, width, height] - bboxes.append(annotation['bbox']) - labels.append(annotation['category_id']) - - bboxes = np.array(bboxes) - labels = np.array(labels) - - trans_param = {'image': image, 'bboxes': bboxes, self.bbox_params['label_fields'][0]: labels} - - result = self.transform(**trans_param) - image = result['image'] - bboxes = result['bboxes'] - labels = result[self.bbox_params['label_fields'][0]] - - H, W, C = image.shape - bbl = [] - for bbox, l in zip(bboxes, labels): - bbl.append([0, l, (bbox[0] + (bbox[2] / 2)) / W, (bbox[1] + (bbox[3] / 2)) / H, bbox[2] / W, bbox[3] / H]) - - return {'inputs': ToTensor()(image), 'data_samples': torch.from_numpy(np.asarray(bbl))} + result = self.prepare_data(idx=index) + return result def get_ann_info(self, idx): ann = self.__getitem__[idx]['target'] diff --git a/edgelab/datasets/meter.py b/edgelab/datasets/meter.py index f199eae3..45b9774e 100644 --- a/edgelab/datasets/meter.py +++ b/edgelab/datasets/meter.py @@ -23,7 +23,7 @@ def calc_angle(x1, y1, x2, y2): z = math.sqrt(x * x + y * y) try: angle = math.acos((z**2 + 1 - (x - 1) ** 2 - y**2) / (2 * z * 1)) / math.pi * 180 - except: + except Exception: angle = 0 if y < 0: diff --git a/edgelab/datasets/pipelines/__init__.py b/edgelab/datasets/pipelines/__init__.py index d170537a..f65950b2 100644 --- a/edgelab/datasets/pipelines/__init__.py +++ b/edgelab/datasets/pipelines/__init__.py @@ -1,5 +1,5 @@ -from .albu import * +from .albu import * # noqa from .audio_augs import AudioAugs from .transforms import Bbox2FomoMask -__all__ = ['AudioAugs', 'RandomResizedCrop', 'Bbox2FomoMask'] +__all__ = ['AudioAugs', 'Bbox2FomoMask'] diff --git a/edgelab/datasets/pipelines/audio_augs.py b/edgelab/datasets/pipelines/audio_augs.py index f792d301..71e29521 100644 --- a/edgelab/datasets/pipelines/audio_augs.py +++ b/edgelab/datasets/pipelines/audio_augs.py @@ -100,7 +100,6 @@ def __init__(self, fs, p=0.5, fc_lp=None, fc_hp=None): def __call__(self, sample): if random.random() < self.p: - a = 0.25 if random.random() < 0.5: fc = 0.5 + random.random() * 0.25 filt = scipy.signal.firwin(self.num_taps, fc, window='hamming') diff --git a/edgelab/datasets/sensordataset.py b/edgelab/datasets/sensordataset.py index fb537921..eb5c9a0f 100644 --- a/edgelab/datasets/sensordataset.py +++ b/edgelab/datasets/sensordataset.py @@ -1,9 +1,7 @@ -import glob import json import os -from typing import List, Optional, Sequence, Tuple, Union +from typing import Optional, Union -import numpy as np from mmcls.datasets import CustomDataset from edgelab.registry import DATASETS diff --git a/edgelab/datasets/speechcommand.py b/edgelab/datasets/speechcommand.py index b4213abc..5e3b3c48 100644 --- a/edgelab/datasets/speechcommand.py +++ b/edgelab/datasets/speechcommand.py @@ -106,7 +106,7 @@ def load_meta_file(self, root, f_meta): def _get_labels(self, root): f_names = glob.glob(root + f'{sep}**{sep}*.wav') self.labels = sorted(list(set([f.split(f'{os.path.sep}')[-2] for f in f_names]))) - self.labels = sorted([l for l in self.labels if l in self.words]) + self.labels = sorted([label for label in self.labels if label in self.words]) def __getitem__(self, index): fname = self.audio_files[index] diff --git a/edgelab/datasets/transforms/formatting.py b/edgelab/datasets/transforms/formatting.py index 69c26ff5..e1af36b1 100644 --- a/edgelab/datasets/transforms/formatting.py +++ b/edgelab/datasets/transforms/formatting.py @@ -1,13 +1,10 @@ -from collections import defaultdict from collections.abc import Sequence -from functools import partial import numpy as np import torch from mmcls.structures import ClsDataSample from mmcv.transforms import BaseTransform from mmengine.utils import is_str -from PIL import Image from edgelab.registry import TRANSFORMS diff --git a/edgelab/datasets/transforms/loading.py b/edgelab/datasets/transforms/loading.py index 50f703c8..15323277 100644 --- a/edgelab/datasets/transforms/loading.py +++ b/edgelab/datasets/transforms/loading.py @@ -2,7 +2,6 @@ import warnings from typing import Optional -import mmcv import mmengine.fileio as fileio import numpy as np from mmcv.transforms.base import BaseTransform diff --git a/edgelab/datasets/utils/audio_augs.py b/edgelab/datasets/utils/audio_augs.py index c95e0293..5db32158 100644 --- a/edgelab/datasets/utils/audio_augs.py +++ b/edgelab/datasets/utils/audio_augs.py @@ -98,7 +98,6 @@ def __init__(self, fs, p=0.5, fc_lp=None, fc_hp=None): def __call__(self, sample): if random.random() < self.p: - a = 0.25 if random.random() < 0.5: fc = 0.5 + random.random() * 0.25 filt = scipy.signal.firwin(self.num_taps, fc, window='hamming') diff --git a/edgelab/datasets/utils/download.py b/edgelab/datasets/utils/download.py index 18427fc1..0ebc6564 100644 --- a/edgelab/datasets/utils/download.py +++ b/edgelab/datasets/utils/download.py @@ -28,7 +28,7 @@ def defile(files, store_dir): res.append(cmd) -def download(links: List or AnyStr, store_path: AnyStr or __path__, unzip_dir=None): +def download(links: List or AnyStr, store_path: AnyStr, unzip_dir=None): if isinstance(links, str): links = [links] os.chdir(store_path) @@ -36,8 +36,6 @@ def download(links: List or AnyStr, store_path: AnyStr or __path__, unzip_dir=No os.mkdir('download') os.chdir('download') - print(links) - print(store_path) for link in links: file_name = link.split('/')[-1] unzip = check_compress(file_name) diff --git a/edgelab/datasets/utils/functions.py b/edgelab/datasets/utils/functions.py index f47a28af..26561ce3 100644 --- a/edgelab/datasets/utils/functions.py +++ b/edgelab/datasets/utils/functions.py @@ -6,7 +6,7 @@ @FUNCTIONS.register_module() def fomo_collate(batch): img, label = [x['inputs'] for x in batch], [y['data_samples'] for y in batch] - for i, l in enumerate(label): - if l.shape[0] > 0: - l[:, 0] = i + for i, label in enumerate(label): + if label.shape[0] > 0: + label[:, 0] = i return dict(inputs=torch.stack(img), data_samples=[DetDataSample(labels=torch.cat(label, 0))])