From 9c05821aea883a1e592dea14d8eb4af067ee9748 Mon Sep 17 00:00:00 2001 From: nullptr Date: Fri, 7 Jul 2023 21:06:37 +0800 Subject: [PATCH 1/3] style: format code using black Author: nullptr --- configs/_base_/default_runtime_cls.py | 7 - configs/_base_/default_runtime_det.py | 18 +- configs/_base_/default_runtime_pose.py | 8 +- configs/_base_/schedules/schedule_1x.py | 15 +- configs/_base_/schedules/schedule_20e.py | 15 +- configs/_base_/schedules/schedule_2x.py | 15 +- .../3axes_accelerometer_62.5Hz_1s_classify.py | 50 ++-- .../ali_classiyf_small_8k_8192.py | 166 ++++++----- .../fastestdet_shuffv2_spp_fomo_voc.py | 20 +- .../fastestdet/fastestdet_shuffv2_spp_voc.py | 100 ++++--- .../fomo/fomo_efficientnet_b0_x8_abl_coco.py | 10 +- .../fomo/fomo_mobnetv2_0.35_x8_abl_coco.py | 66 ++--- .../fomo/fomo_mobnetv2_0.35_x8_abl_coco_x.py | 66 ++--- configs/fomo/fomo_mobnetv2_fpn_0.35_x8_abl.py | 17 +- configs/fomo/fomo_mobnetv2_x8_coco.py | 88 +++--- configs/fomo/fomo_mobnetv2_x8_voc.py | 100 ++++--- .../fomo/fomo_mobnetv3_0.35_x8_abl_coco.py | 8 +- configs/fomo/fomo_rep_0.35_abl_coco.py | 71 ++--- .../fomo/fomo_shufflenetv2_0.1_x8_abl_coco.py | 10 +- .../fomo/fomo_squeezenet_0.1_x8_abl_coco.py | 10 +- configs/pfld/pfld_dan_fpn_x8_192.py | 87 +++--- configs/pfld/pfld_mbv2n_112.py | 65 +++-- configs/pfld/pfld_mbv3l_192.py | 55 ++-- configs/yolov3/yolov3_mbv2_416_voc.py | 115 +++----- configs/yolov5/base_arch.py | 258 +++++++++--------- configs/yolov5/yolov5_l_1xb16_300e_coco.py | 160 +++++------ configs/yolov5/yolov5_m_1xb16_300e_coco.py | 160 +++++------ configs/yolov5/yolov5_n_1xb16_300e_coco.py | 160 +++++------ configs/yolov5/yolov5_s_1xb16_300e_coco.py | 160 +++++------ configs/yolov5/yolov5_tiny_1xb16_300e_coco.py | 160 +++++------ configs/yolox/base_arch.py | 11 +- configs/yolox/yolox_tiny_1xb16_300e_coco.py | 163 ++++++----- edgelab/__init__.py | 2 +- edgelab/datasets/__init__.py | 18 +- edgelab/datasets/builder.py | 89 +++--- edgelab/datasets/cocodataset.py | 227 +++++++++------ .../SensorDataPreprocessor.py | 57 ++-- .../datasets/data_preprocessors/__init__.py | 3 +- .../datasets/data_preprocessors/audio_augs.py | 98 ++++--- .../data_preprocessors/pointpreprocessor.py | 33 ++- edgelab/datasets/fomo.py | 5 +- edgelab/datasets/fomodataset.py | 65 ++--- edgelab/datasets/meter.py | 54 ++-- edgelab/datasets/pipelines/albu.py | 258 ++++++++---------- edgelab/datasets/pipelines/audio_augs.py | 100 ++++--- edgelab/datasets/pipelines/composition.py | 42 +-- edgelab/datasets/pipelines/transforms.py | 10 +- edgelab/datasets/sensordataset.py | 52 ++-- edgelab/datasets/speechcommand.py | 122 +++++---- edgelab/datasets/transforms/__init__.py | 6 +- edgelab/datasets/transforms/formatting.py | 18 +- edgelab/datasets/transforms/loading.py | 41 ++- edgelab/datasets/utils/audio_augs.py | 160 +++++------ edgelab/datasets/utils/download.py | 49 ++-- edgelab/datasets/utils/functions.py | 2 +- edgelab/datasets/vocdataset.py | 53 ++-- edgelab/datasets/yolodataset.py | 140 +++++++--- edgelab/engine/__init__.py | 14 +- edgelab/engine/apis/mmdet/__init__.py | 14 +- edgelab/engine/apis/mmdet/test.py | 32 +-- edgelab/engine/apis/mmdet/train.py | 99 +++---- edgelab/engine/hooks/__init__.py | 10 +- edgelab/engine/hooks/evalhook.py | 72 +++-- edgelab/engine/hooks/logger/__init__.py | 5 +- edgelab/engine/hooks/logger/clearml.py | 51 ++-- edgelab/engine/hooks/logger/pavi.py | 120 ++++---- edgelab/engine/hooks/logger/tensorboard.py | 48 ++-- edgelab/engine/hooks/logger/text.py | 105 +++---- edgelab/engine/hooks/logger/wandb.py | 60 ++-- edgelab/engine/hooks/visualization_hook.py | 55 ++-- edgelab/engine/runner/loops.py | 44 ++- edgelab/engine/utils/batch_augs.py | 114 ++++---- edgelab/engine/utils/helper_funcs.py | 34 +-- edgelab/engine/utils/resample.py | 80 ++---- edgelab/evaluation/__init__.py | 2 +- edgelab/evaluation/fomo_metric.py | 26 +- edgelab/evaluation/point_metric.py | 24 +- edgelab/models/backbones/AxesNet.py | 25 +- edgelab/models/backbones/EfficientNet.py | 127 +++++---- edgelab/models/backbones/MobileNetv2.py | 63 ++--- edgelab/models/backbones/MobileNetv3.py | 132 +++++---- edgelab/models/backbones/ShuffleNetV2.py | 117 +++----- edgelab/models/backbones/SoundNet.py | 73 +++-- edgelab/models/backbones/SqueezeNet.py | 66 ++--- edgelab/models/backbones/__init__.py | 11 +- edgelab/models/backbones/pfld_mobilenet_v2.py | 23 +- edgelab/models/backbones/shufflenetv2.py | 64 ++--- edgelab/models/base/general.py | 70 ++--- edgelab/models/classifiers/Audio_speech.py | 39 ++- edgelab/models/classifiers/__init__.py | 2 +- edgelab/models/classifiers/accelerometer.py | 59 ++-- edgelab/models/classifiers/image.py | 33 +-- edgelab/models/detectors/fastestdet.py | 24 +- edgelab/models/detectors/pfld.py | 16 +- edgelab/models/heads/__init__.py | 9 +- edgelab/models/heads/axes_head.py | 23 +- edgelab/models/heads/cls_head.py | 3 - edgelab/models/heads/fastestdet_head.py | 86 +++--- edgelab/models/heads/pfld_head.py | 31 +-- edgelab/models/heads/taggregate_head.py | 5 +- edgelab/models/heads/yolo_head.py | 106 +++---- edgelab/models/layers/attention.py | 102 ++----- edgelab/models/layers/rep.py | 41 +-- edgelab/models/layers/test.py | 28 +- edgelab/models/losses/__init__.py | 4 +- edgelab/models/losses/bce_withlogits_loss.py | 15 +- edgelab/models/losses/classfication_loss.py | 14 +- edgelab/models/losses/nll_loss.py | 11 +- edgelab/models/losses/pfld_loss.py | 3 +- edgelab/models/mot/bytetrack.py | 16 +- edgelab/models/necks/__init__.py | 2 +- edgelab/models/necks/fpn.py | 47 ++-- edgelab/models/necks/spp.py | 39 +-- edgelab/models/tf/tf_common.py | 45 +-- edgelab/models/utils/computer_acc.py | 13 +- edgelab/models/utils/metrics.py | 44 ++- edgelab/registry.py | 67 ++--- edgelab/utils/__init__.py | 2 +- edgelab/utils/cv.py | 27 +- edgelab/version.py | 2 +- edgelab/visualization/__init__.py | 2 +- edgelab/visualization/visualizer.py | 86 +++--- tools/analysis/get_featmap.py | 71 ++--- tools/analysis/get_flops.py | 60 ++-- tools/dataset_converters/ei2coco.py | 69 +++-- tools/export_quantize.py | 78 ++---- tools/quan_test.py | 12 +- tools/utils/config.py | 58 ++-- tools/utils/inference.py | 258 +++++++----------- tools/utils/iot_camera.py | 68 ++--- tools/utils/quant_read.py | 29 +- 131 files changed, 3559 insertions(+), 4088 deletions(-) diff --git a/configs/_base_/default_runtime_cls.py b/configs/_base_/default_runtime_cls.py index 43dac7da..9a677f58 100644 --- a/configs/_base_/default_runtime_cls.py +++ b/configs/_base_/default_runtime_cls.py @@ -5,19 +5,14 @@ default_hooks = dict( # record the time of every iteration. timer=dict(type='IterTimerHook'), - # print log every 100 iterations. logger=dict(type='TextLoggerHook', interval=100), - # enable the parameter scheduler. param_scheduler=dict(type='ParamSchedulerHook'), - # save checkpoint per epoch. checkpoint=dict(type='CheckpointHook', save_best='auto', interval=1), - # set sampler seed in distributed evrionment. sampler_seed=dict(type='DistSamplerSeedHook'), - # validation results visualization, set True to enable it. visualization=dict(type='mmcls.VisualizationHook', enable=False), ) @@ -26,10 +21,8 @@ env_cfg = dict( # whether to enable cudnn benchmark cudnn_benchmark=False, - # set multi process parameters mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), - # set distributed parameters dist_cfg=dict(backend='nccl'), ) diff --git a/configs/_base_/default_runtime_det.py b/configs/_base_/default_runtime_det.py index afbe9be1..d1e9ca56 100644 --- a/configs/_base_/default_runtime_det.py +++ b/configs/_base_/default_runtime_det.py @@ -6,8 +6,8 @@ param_scheduler=dict(type='ParamSchedulerHook'), checkpoint=dict(type='CheckpointHook', interval=1), sampler_seed=dict(type='DistSamplerSeedHook'), - visualization=dict(type='mmdet.DetVisualizationHook') - ) + visualization=dict(type='mmdet.DetVisualizationHook'), +) env_cfg = dict( cudnn_benchmark=False, @@ -16,11 +16,12 @@ ) -vis_backends = [dict(type='LocalVisBackend'), - # dict(type='WandbVisBackend'), - dict(type='TensorboardVisBackend')] -visualizer = dict( - type='edgelab.FomoLocalVisualizer', vis_backends=vis_backends, name='visualizer') +vis_backends = [ + dict(type='LocalVisBackend'), + # dict(type='WandbVisBackend'), + dict(type='TensorboardVisBackend'), +] +visualizer = dict(type='edgelab.FomoLocalVisualizer', vis_backends=vis_backends, name='visualizer') log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) @@ -29,7 +30,6 @@ load_from = None resume = False -train_cfg = dict(by_epoch=True,max_epochs=300) +train_cfg = dict(by_epoch=True, max_epochs=300) val_cfg = dict() test_cfg = dict() - diff --git a/configs/_base_/default_runtime_pose.py b/configs/_base_/default_runtime_pose.py index 39c6b4c2..6632edbd 100644 --- a/configs/_base_/default_runtime_pose.py +++ b/configs/_base_/default_runtime_pose.py @@ -30,12 +30,10 @@ # dict(type='TensorboardVisBackend'), # dict(type='WandbVisBackend'), ] -visualizer = dict( - type='mmpose.PoseLocalVisualizer',radius=1, vis_backends=vis_backends, name='visualizer') +visualizer = dict(type='mmpose.PoseLocalVisualizer', radius=1, vis_backends=vis_backends, name='visualizer') # logger -log_processor = dict( - type='LogProcessor', window_size=50, by_epoch=True, num_digits=6) +log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True, num_digits=6) log_level = 'INFO' load_from = None resume = False @@ -44,6 +42,6 @@ backend_args = dict(backend='local') # training/validation/testing progress -train_cfg = dict(by_epoch=True,max_epochs=210,val_interval=5) +train_cfg = dict(by_epoch=True, max_epochs=210, val_interval=5) val_cfg = dict() test_cfg = dict() diff --git a/configs/_base_/schedules/schedule_1x.py b/configs/_base_/schedules/schedule_1x.py index 8b14fa8b..f91ebd34 100644 --- a/configs/_base_/schedules/schedule_1x.py +++ b/configs/_base_/schedules/schedule_1x.py @@ -5,21 +5,12 @@ # learning rate param_scheduler = [ - dict( - type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), - dict( - type='MultiStepLR', - begin=0, - end=12, - by_epoch=True, - milestones=[8, 11], - gamma=0.1) + dict(type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), + dict(type='MultiStepLR', begin=0, end=12, by_epoch=True, milestones=[8, 11], gamma=0.1), ] # optimizer -optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) +optim_wrapper = dict(type='OptimWrapper', optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) # Default setting for scaling LR automatically # - `enable` means enable scaling LR automatically diff --git a/configs/_base_/schedules/schedule_20e.py b/configs/_base_/schedules/schedule_20e.py index dae5a221..15c20b97 100644 --- a/configs/_base_/schedules/schedule_20e.py +++ b/configs/_base_/schedules/schedule_20e.py @@ -5,21 +5,12 @@ # learning rate param_scheduler = [ - dict( - type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), - dict( - type='MultiStepLR', - begin=0, - end=20, - by_epoch=True, - milestones=[16, 19], - gamma=0.1) + dict(type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), + dict(type='MultiStepLR', begin=0, end=20, by_epoch=True, milestones=[16, 19], gamma=0.1), ] # optimizer -optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) +optim_wrapper = dict(type='OptimWrapper', optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) # Default setting for scaling LR automatically # - `enable` means enable scaling LR automatically diff --git a/configs/_base_/schedules/schedule_2x.py b/configs/_base_/schedules/schedule_2x.py index 3a0e4574..76c95be0 100644 --- a/configs/_base_/schedules/schedule_2x.py +++ b/configs/_base_/schedules/schedule_2x.py @@ -5,21 +5,12 @@ # learning rate param_scheduler = [ - dict( - type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), - dict( - type='MultiStepLR', - begin=0, - end=24, - by_epoch=True, - milestones=[16, 22], - gamma=0.1) + dict(type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), + dict(type='MultiStepLR', begin=0, end=24, by_epoch=True, milestones=[16, 22], gamma=0.1), ] # optimizer -optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) +optim_wrapper = dict(type='OptimWrapper', optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) # Default setting for scaling LR automatically # - `enable` means enable scaling LR automatically diff --git a/configs/accelerometer/3axes_accelerometer_62.5Hz_1s_classify.py b/configs/accelerometer/3axes_accelerometer_62.5Hz_1s_classify.py index 0264732c..77f8209b 100644 --- a/configs/accelerometer/3axes_accelerometer_62.5Hz_1s_classify.py +++ b/configs/accelerometer/3axes_accelerometer_62.5Hz_1s_classify.py @@ -5,19 +5,23 @@ num_classes = 3 num_axes = 3 frequency = 62.5 -window=1000 - -model = dict(type='AccelerometerClassifier', - backbone=dict(type='AxesNet', - num_axes=num_axes, - frequency=frequency, - window=window, - num_classes=num_classes, - ), - head=dict(type='edgelab.ClsHead', - loss=dict(type='mmcls.CrossEntropyLoss', loss_weight=1.0), - topk=(1, 5), - )) +window = 1000 + +model = dict( + type='AccelerometerClassifier', + backbone=dict( + type='AxesNet', + num_axes=num_axes, + frequency=frequency, + window=window, + num_classes=num_classes, + ), + head=dict( + type='edgelab.ClsHead', + loss=dict(type='mmcls.CrossEntropyLoss', loss_weight=1.0), + topk=(1, 5), + ), +) # dataset settings dataset_type = 'edgelab.SensorDataset' @@ -25,16 +29,18 @@ batch_size = 1 workers = 1 -shape = (num_classes * int(62.5 * 1000 / 1000)) +shape = num_classes * int(62.5 * 1000 / 1000) -train_pipeline = [ dict(type='edgelab.LoadSensorFromFile'), - dict(type='edgelab.PackSensorInputs'), +train_pipeline = [ + dict(type='edgelab.LoadSensorFromFile'), + dict(type='edgelab.PackSensorInputs'), ] -test_pipeline = [ dict(type='edgelab.LoadSensorFromFile'), - dict(type='edgelab.PackSensorInputs'), +test_pipeline = [ + dict(type='edgelab.LoadSensorFromFile'), + dict(type='edgelab.PackSensorInputs'), ] - + train_dataloader = dict( batch_size=batch_size, num_workers=workers, @@ -74,9 +80,7 @@ lr = 0.0005 epochs = 10 -optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict(type='Adam', lr=lr, betas=[0.9, 0.99], weight_decay=0)) +optim_wrapper = dict(type='OptimWrapper', optimizer=dict(type='Adam', lr=lr, betas=[0.9, 0.99], weight_decay=0)) train_cfg = dict(by_epoch=True, max_epochs=epochs) @@ -86,4 +90,4 @@ # set visualizer vis_backends = [dict(type='LocalVisBackend')] -visualizer = dict(type='edgelab.SensorClsVisualizer', vis_backends=vis_backends, name='visualizer') \ No newline at end of file +visualizer = dict(type='edgelab.SensorClsVisualizer', vis_backends=vis_backends, name='visualizer') diff --git a/configs/audio_classify/ali_classiyf_small_8k_8192.py b/configs/audio_classify/ali_classiyf_small_8k_8192.py index c3845b5e..aa8a2cb4 100644 --- a/configs/audio_classify/ali_classiyf_small_8k_8192.py +++ b/configs/audio_classify/ali_classiyf_small_8k_8192.py @@ -7,38 +7,72 @@ "yes", ] words = [ - 'backward', 'bed', 'bird', 'cat', 'dog', 'down', 'eight', 'five', 'follow', - 'forward', 'four', 'go', 'happy', 'house', 'learn', 'left', 'marvin', - 'nine', 'no', 'off', 'on', 'one', 'right', 'seven', 'sheila', 'six', - 'stop', 'three', 'tree', 'two', 'up', 'visual', 'wow', 'yes', 'zero' + 'backward', + 'bed', + 'bird', + 'cat', + 'dog', + 'down', + 'eight', + 'five', + 'follow', + 'forward', + 'four', + 'go', + 'happy', + 'house', + 'learn', + 'left', + 'marvin', + 'nine', + 'no', + 'off', + 'on', + 'one', + 'right', + 'seven', + 'sheila', + 'six', + 'stop', + 'three', + 'tree', + 'two', + 'up', + 'visual', + 'wow', + 'yes', + 'zero', ] # model settings num_classes = 35 -model = dict(type='Audio_classify', - n_cls=len(words), - multilabel=False, - loss=dict(type='LabelSmoothCrossEntropyLoss', - reduction='sum', - smoothing=0.1), - backbone=dict(type='SoundNetRaw', - nf=2, - clip_length=64, - factors=[4, 4, 4], - out_channel=36), - head=dict(type='Audio_head', - in_channels=36, - n_classes=num_classes, - drop=0.2), - loss_cls=dict(type='LabelSmoothCrossEntropyLoss', - reduction='sum', - smoothing=0.1)) +model = dict( + type='Audio_classify', + n_cls=len(words), + multilabel=False, + loss=dict(type='LabelSmoothCrossEntropyLoss', reduction='sum', smoothing=0.1), + backbone=dict(type='SoundNetRaw', nf=2, clip_length=64, factors=[4, 4, 4], out_channel=36), + head=dict(type='Audio_head', in_channels=36, n_classes=num_classes, drop=0.2), + loss_cls=dict(type='LabelSmoothCrossEntropyLoss', reduction='sum', smoothing=0.1), +) # dataset settings dataset_type = 'Speechcommand' transforms = [ - 'amp', 'neg', 'tshift', 'tmask', 'ampsegment', 'cycshift', 'awgn', 'abgn', - 'apgn', 'argn', 'avgn', 'aun', 'phn', 'sine' + 'amp', + 'neg', + 'tshift', + 'tmask', + 'ampsegment', + 'cycshift', + 'awgn', + 'abgn', + 'apgn', + 'argn', + 'avgn', + 'aun', + 'phn', + 'sine', ] data_root = 'http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz' @@ -55,15 +89,17 @@ persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=True), - dataset=dict(type=dataset_type, - root=data_root, - sampling_rate=8000, - segment_length=width, - pipeline=train_pipeline, - mode='train', - use_background=True, - lower_volume=True, - words=words), + dataset=dict( + type=dataset_type, + root=data_root, + sampling_rate=8000, + segment_length=width, + pipeline=train_pipeline, + mode='train', + use_background=True, + lower_volume=True, + words=words, + ), ) val_dataloader = dict( batch_size=32, @@ -71,41 +107,49 @@ persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=True), - dataset=dict(type=dataset_type, - root=data_root, - sampling_rate=8000, - segment_length=width, - mode='val', - use_background=False, - lower_volume=True, - words=words), + dataset=dict( + type=dataset_type, + root=data_root, + sampling_rate=8000, + segment_length=width, + mode='val', + use_background=False, + lower_volume=True, + words=words, + ), ) test_dataloader = val_dataloader -data_preprocessor = dict(type='ETADataPreprocessor', - n_cls=len(words), - multilabel=False, - seq_len=width, - sampling_rate=8000, - - augs_mix=['mixup', 'timemix', 'freqmix', 'phmix'], - mix_ratio=1, - local_rank=0, - epoch_mix=12, - mix_loss='bce') +data_preprocessor = dict( + type='ETADataPreprocessor', + n_cls=len(words), + multilabel=False, + seq_len=width, + sampling_rate=8000, + augs_mix=['mixup', 'timemix', 'freqmix', 'phmix'], + mix_ratio=1, + local_rank=0, + epoch_mix=12, + mix_loss='bce', +) # optimizer lr = 0.0003 epochs = 1500 find_unused_parameters = True -optim_wrapper = dict(optimizer=dict( - type='AdamW', lr=lr, betas=(0.9, 0.99), weight_decay=5e-4, eps=1e-7)) +optim_wrapper = dict(optimizer=dict(type='AdamW', lr=lr, betas=(0.9, 0.99), weight_decay=5e-4, eps=1e-7)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) -#evaluator -val_evaluator = dict(type='mmcls.Accuracy',topk=(1, 5,)) +# evaluator +val_evaluator = dict( + type='mmcls.Accuracy', + topk=( + 1, + 5, + ), +) test_evaluator = val_evaluator train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=1000) @@ -113,12 +157,6 @@ test_cfg = dict() # learning policy param_scheduler = [ - dict(type='LinearLR', begin=0, end=30, start_factor=0.001, - by_epoch=False), # warm-up - dict(type='MultiStepLR', - begin=1, - end=500, - milestones=[100, 200, 250], - gamma=0.1, - by_epoch=True) + dict(type='LinearLR', begin=0, end=30, start_factor=0.001, by_epoch=False), # warm-up + dict(type='MultiStepLR', begin=1, end=500, milestones=[100, 200, 250], gamma=0.1, by_epoch=True), ] diff --git a/configs/fastestdet/fastestdet_shuffv2_spp_fomo_voc.py b/configs/fastestdet/fastestdet_shuffv2_spp_fomo_voc.py index 34614f45..f29f1af5 100644 --- a/configs/fastestdet/fastestdet_shuffv2_spp_fomo_voc.py +++ b/configs/fastestdet/fastestdet_shuffv2_spp_fomo_voc.py @@ -1,6 +1,6 @@ _base_ = './fastestdet_shuffv2_spp_voc.py' -num_classes=20, +num_classes = (20,) model = dict( type='FastestDet', backbone=dict( @@ -9,16 +9,14 @@ widen_factor=0.25, act_cfg=dict(type='LeakyReLU', negative_slope=0.1), ), - neck=dict(type='SPP', - input_channels=336, - output_channels=96, - layers=[1, 2, 3]), - bbox_head=dict(type='Fomo_Head', - input_channels=96, - num_classes=num_classes, - loss_bg=dict(type='BCEWithLogitsLoss', - reduction='mean'), - loss_cls=dict(type='BCEWithLogitsLoss', reduction='mean')), + neck=dict(type='SPP', input_channels=336, output_channels=96, layers=[1, 2, 3]), + bbox_head=dict( + type='Fomo_Head', + input_channels=96, + num_classes=num_classes, + loss_bg=dict(type='BCEWithLogitsLoss', reduction='mean'), + loss_cls=dict(type='BCEWithLogitsLoss', reduction='mean'), + ), ) evaluation = dict(interval=1, metric=['mAP'], fomo=True) diff --git a/configs/fastestdet/fastestdet_shuffv2_spp_voc.py b/configs/fastestdet/fastestdet_shuffv2_spp_voc.py index 6153d870..81052bef 100644 --- a/configs/fastestdet/fastestdet_shuffv2_spp_voc.py +++ b/configs/fastestdet/fastestdet_shuffv2_spp_voc.py @@ -1,6 +1,6 @@ _base_ = '../_base_/default_runtime.py' -num_classes=20 +num_classes = 20 model = dict( type='FastestDet', backbone=dict( @@ -9,71 +9,62 @@ widen_factor=0.25, act_cfg=dict(type='LeakyReLU', negative_slope=0.1), ), - neck=dict(type='SPP', - input_channels=336, - output_channels=96, - layers=[1, 2, 3]), + neck=dict(type='SPP', input_channels=336, output_channels=96, layers=[1, 2, 3]), bbox_head=dict( type='Fastest_Head', input_channels=96, num_classes=num_classes, ), # training and testing settings - train_cfg=dict(assigner=dict( - type='GridAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0)), - test_cfg=dict(nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - conf_thr=0.005, - nms=dict(type='nms', iou_threshold=0.45), - max_per_img=100)) + train_cfg=dict(assigner=dict(type='GridAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0)), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + conf_thr=0.005, + nms=dict(type='nms', iou_threshold=0.45), + max_per_img=100, + ), +) # dataset settings dataset_type = 'CustomVocdataset' data_root = 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar' -height=352 -width=352 -batch_size=32 -workers=4 +height = 352 +width = 352 +batch_size = 32 +workers = 4 -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], - to_rgb=True) +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), - dict(type='Expand', - mean=img_norm_cfg['mean'], - to_rgb=img_norm_cfg['to_rgb'], - ratio_range=(1, 2)), - dict(type='MinIoURandomCrop', - min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9), - min_crop_size=0.3), - dict(type='Resize', - img_scale=[(height, width)], - multiscale_mode='range', - keep_ratio=True), + dict(type='Expand', mean=img_norm_cfg['mean'], to_rgb=img_norm_cfg['to_rgb'], ratio_range=(1, 2)), + dict(type='MinIoURandomCrop', min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9), min_crop_size=0.3), + dict(type='Resize', img_scale=[(height, width)], multiscale_mode='range', keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.5), dict(type='PhotoMetricDistortion'), dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict(type='MultiScaleFlipAug', - img_scale=(height, height), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), - dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img']) - ]) + dict( + type='MultiScaleFlipAug', + img_scale=(height, height), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img']), + ], + ), ] data = dict( samples_per_gpu=batch_size, @@ -86,32 +77,33 @@ data_root=data_root, ann_file='ImageSets/Main/train.txt', # img_prefix=None, - pipeline=train_pipeline)), + pipeline=train_pipeline, + ), + ), val=dict( type=dataset_type, data_root=data_root, ann_file='ImageSets/Main/val.txt', # img_prefix=None, - pipeline=test_pipeline), + pipeline=test_pipeline, + ), test=dict( type=dataset_type, data_root=data_root, ann_file='ImageSets/Main/val.txt', # img_prefix=None, - pipeline=test_pipeline)) + pipeline=test_pipeline, + ), +) # optimizer -lr=0.001 -epochs=300 +lr = 0.001 +epochs = 300 optimizer = dict(type='SGD', lr=lr, momentum=0.949, weight_decay=0.0005) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) # learning policy -lr_config = dict(policy='step', - warmup='linear', - warmup_iters=8000, - warmup_ratio=0.000001, - step=[100, 200, 250]) +lr_config = dict(policy='step', warmup='linear', warmup_iters=8000, warmup_ratio=0.000001, step=[100, 200, 250]) # runtime settings evaluation = dict(interval=1, metric=['mAP']) find_unused_parameters = True diff --git a/configs/fomo/fomo_efficientnet_b0_x8_abl_coco.py b/configs/fomo/fomo_efficientnet_b0_x8_abl_coco.py index e8f16f99..8a0db1b4 100644 --- a/configs/fomo/fomo_efficientnet_b0_x8_abl_coco.py +++ b/configs/fomo/fomo_efficientnet_b0_x8_abl_coco.py @@ -1,9 +1,9 @@ _base_ = './fomo_mobnetv2_0.35_x8_abl_coco.py' -num_classes=2 +num_classes = 2 model = dict( type='Fomo', - backbone=dict(type='EfficientNet',arch='b0', out_indices=(3, )), + backbone=dict(type='EfficientNet', arch='b0', out_indices=(3,)), # backbone=dict(type='edgeEfficienNet',arch='b0', out_indices=(2, )), head=dict( type='FomoHead', @@ -11,9 +11,7 @@ num_classes=num_classes, middle_channels=96, act_cfg='ReLU6', - loss_cls=dict(type='BCEWithLogitsLoss', - reduction='none', - pos_weight=100), + loss_cls=dict(type='BCEWithLogitsLoss', reduction='none', pos_weight=100), loss_bg=dict(type='BCEWithLogitsLoss', reduction='none'), ), -) \ No newline at end of file +) diff --git a/configs/fomo/fomo_mobnetv2_0.35_x8_abl_coco.py b/configs/fomo/fomo_mobnetv2_0.35_x8_abl_coco.py index d48e5266..d319cdaf 100644 --- a/configs/fomo/fomo_mobnetv2_0.35_x8_abl_coco.py +++ b/configs/fomo/fomo_mobnetv2_0.35_x8_abl_coco.py @@ -1,33 +1,25 @@ _base_ = "../_base_/default_runtime_det.py" default_scope = "edgelab" custom_imports = dict(imports=["edgelab"], allow_failed_imports=False) -default_hooks = dict( - visualization=dict(type='mmdet.DetVisualizationHook', score_thr=0.8)) +default_hooks = dict(visualization=dict(type='mmdet.DetVisualizationHook', score_thr=0.8)) visualizer = dict(type='FomoLocalVisualizer', fomo=True) num_classes = 2 -data_preprocessor = dict(type='mmdet.DetDataPreprocessor', - mean=[0, 0, 0], - std=[255., 255., 255.], - bgr_to_rgb=True, - pad_size_divisor=32) +data_preprocessor = dict( + type='mmdet.DetDataPreprocessor', mean=[0, 0, 0], std=[255.0, 255.0, 255.0], bgr_to_rgb=True, pad_size_divisor=32 +) model = dict( type="Fomo", data_preprocessor=data_preprocessor, - backbone=dict(type="MobileNetv2", - widen_factor=0.35, - out_indices=(2, ), - rep=True), + backbone=dict(type="MobileNetv2", widen_factor=0.35, out_indices=(2,), rep=True), head=dict( type="FomoHead", input_channels=[16], num_classes=num_classes, middle_channel=48, act_cfg="ReLU6", - loss_cls=dict(type="BCEWithLogitsLoss", - reduction="none", - pos_weight=40), + loss_cls=dict(type="BCEWithLogitsLoss", reduction="none", pos_weight=40), loss_bg=dict(type="BCEWithLogitsLoss", reduction="none"), ), ) @@ -41,16 +33,9 @@ workers = 1 albu_train_transforms = [ - dict(type="RandomResizedCrop", - height=height, - width=width, - scale=(0.80, 1.2), - p=1), + dict(type="RandomResizedCrop", height=height, width=width, scale=(0.80, 1.2), p=1), dict(type="Rotate", limit=30), - dict(type="RandomBrightnessContrast", - brightness_limit=0.3, - contrast_limit=0.3, - p=0.5), + dict(type="RandomBrightnessContrast", brightness_limit=0.3, contrast_limit=0.3, p=0.5), dict(type='Blur', p=0.01), dict(type='MedianBlur', p=0.01), dict(type='ToGray', p=0.01), @@ -59,31 +44,26 @@ ] pre_transform = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), - dict(type='mmdet.LoadAnnotations', with_bbox=True) + dict(type='mmdet.LoadAnnotations', with_bbox=True), ] train_pipeline = [ *pre_transform, - dict(type='mmdet.Albu', - transforms=albu_train_transforms, - bbox_params=dict(type='BboxParams', - format='pascal_voc', - label_fields=['gt_bboxes_labels', - 'gt_ignore_flags']), - keymap={ - 'img': 'image', - 'gt_bboxes': 'bboxes' - }), - dict(type='mmdet.PackDetInputs', - meta_keys=('img_path', 'img_id', 'instances', 'img_shape', - 'ori_shape', 'gt_bboxes', 'gt_bboxes_labels')) + dict( + type='mmdet.Albu', + transforms=albu_train_transforms, + bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), + keymap={'img': 'image', 'gt_bboxes': 'bboxes'}, + ), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_path', 'img_id', 'instances', 'img_shape', 'ori_shape', 'gt_bboxes', 'gt_bboxes_labels'), + ), ] test_pipeline = [ *pre_transform, dict(type="mmdet.Resize", scale=(height, width)), - dict(type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor')) + dict(type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor')), ] train_dataloader = dict( @@ -123,8 +103,7 @@ find_unused_parameters = True -optim_wrapper = dict( - optimizer=dict(type="Adam", lr=lr, weight_decay=5e-4, eps=1e-7)) +optim_wrapper = dict(optimizer=dict(type="Adam", lr=lr, weight_decay=5e-4, eps=1e-7)) # evaluator val_evaluator = dict(type="FomoMetric") @@ -134,8 +113,7 @@ # learning policy param_scheduler = [ - dict(type="LinearLR", begin=0, end=30, start_factor=0.001, - by_epoch=False), # warm-up + dict(type="LinearLR", begin=0, end=30, start_factor=0.001, by_epoch=False), # warm-up dict( type="MultiStepLR", begin=1, diff --git a/configs/fomo/fomo_mobnetv2_0.35_x8_abl_coco_x.py b/configs/fomo/fomo_mobnetv2_0.35_x8_abl_coco_x.py index 7e51f284..7c8b2cbc 100644 --- a/configs/fomo/fomo_mobnetv2_0.35_x8_abl_coco_x.py +++ b/configs/fomo/fomo_mobnetv2_0.35_x8_abl_coco_x.py @@ -1,33 +1,25 @@ _base_ = "../_base_/default_runtime_det.py" default_scope = "edgelab" custom_imports = dict(imports=["edgelab"], allow_failed_imports=False) -default_hooks = dict( - visualization=dict(type='mmdet.DetVisualizationHook', score_thr=0.8)) +default_hooks = dict(visualization=dict(type='mmdet.DetVisualizationHook', score_thr=0.8)) visualizer = dict(type='FomoLocalVisualizer', fomo=True) num_classes = 2 -data_preprocessor = dict(type='mmdet.DetDataPreprocessor', - mean=[0, 0, 0], - std=[255., 255., 255.], - bgr_to_rgb=True, - pad_size_divisor=32) +data_preprocessor = dict( + type='mmdet.DetDataPreprocessor', mean=[0, 0, 0], std=[255.0, 255.0, 255.0], bgr_to_rgb=True, pad_size_divisor=32 +) model = dict( type="Fomo", data_preprocessor=data_preprocessor, - backbone=dict(type="MobileNetv2", - widen_factor=0.35, - out_indices=(2, ), - rep=True), + backbone=dict(type="MobileNetv2", widen_factor=0.35, out_indices=(2,), rep=True), head=dict( type="FomoHead", input_channels=[16], num_classes=num_classes, middle_channel=48, act_cfg="ReLU6", - loss_cls=dict(type="BCEWithLogitsLoss", - reduction="none", - pos_weight=40), + loss_cls=dict(type="BCEWithLogitsLoss", reduction="none", pos_weight=40), loss_bg=dict(type="BCEWithLogitsLoss", reduction="none"), ), ) @@ -41,16 +33,9 @@ workers = 4 albu_train_transforms = [ - dict(type="RandomResizedCrop", - height=height, - width=width, - scale=(0.80, 1.2), - p=1), + dict(type="RandomResizedCrop", height=height, width=width, scale=(0.80, 1.2), p=1), dict(type="Rotate", limit=30), - dict(type="RandomBrightnessContrast", - brightness_limit=0.3, - contrast_limit=0.3, - p=0.5), + dict(type="RandomBrightnessContrast", brightness_limit=0.3, contrast_limit=0.3, p=0.5), dict(type='Blur', p=0.01), dict(type='MedianBlur', p=0.01), dict(type='ToGray', p=0.01), @@ -59,31 +44,26 @@ ] pre_transform = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), - dict(type='mmdet.LoadAnnotations', with_bbox=True) + dict(type='mmdet.LoadAnnotations', with_bbox=True), ] train_pipeline = [ *pre_transform, - dict(type='mmdet.Albu', - transforms=albu_train_transforms, - bbox_params=dict(type='BboxParams', - format='pascal_voc', - label_fields=['gt_bboxes_labels', - 'gt_ignore_flags']), - keymap={ - 'img': 'image', - 'gt_bboxes': 'bboxes' - }), - dict(type='mmdet.PackDetInputs', - meta_keys=('img_path', 'img_id', 'instances', 'img_shape', - 'ori_shape', 'gt_bboxes', 'gt_bboxes_labels')) + dict( + type='mmdet.Albu', + transforms=albu_train_transforms, + bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), + keymap={'img': 'image', 'gt_bboxes': 'bboxes'}, + ), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_path', 'img_id', 'instances', 'img_shape', 'ori_shape', 'gt_bboxes', 'gt_bboxes_labels'), + ), ] test_pipeline = [ *pre_transform, dict(type="mmdet.Resize", scale=(height, width)), - dict(type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor')) + dict(type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor')), ] train_dataloader = dict( @@ -123,8 +103,7 @@ find_unused_parameters = True -optim_wrapper = dict( - optimizer=dict(type="Adam", lr=lr, weight_decay=5e-4, eps=1e-7)) +optim_wrapper = dict(optimizer=dict(type="Adam", lr=lr, weight_decay=5e-4, eps=1e-7)) # evaluator val_evaluator = dict(type="FomoMetric") @@ -134,8 +113,7 @@ # learning policy param_scheduler = [ - dict(type="LinearLR", begin=0, end=30, start_factor=0.001, - by_epoch=False), # warm-up + dict(type="LinearLR", begin=0, end=30, start_factor=0.001, by_epoch=False), # warm-up dict( type="MultiStepLR", begin=1, diff --git a/configs/fomo/fomo_mobnetv2_fpn_0.35_x8_abl.py b/configs/fomo/fomo_mobnetv2_fpn_0.35_x8_abl.py index f01299cd..bd731b19 100644 --- a/configs/fomo/fomo_mobnetv2_fpn_0.35_x8_abl.py +++ b/configs/fomo/fomo_mobnetv2_fpn_0.35_x8_abl.py @@ -2,20 +2,15 @@ num_classes = 2 model = dict( - backbone=dict(type='mmdet.MobileNetV2', - widen_factor=0.35, - out_indices=(2, 3, 5)), + backbone=dict(type='mmdet.MobileNetV2', widen_factor=0.35, out_indices=(2, 3, 5)), neck=dict( type='FPN', in_channels=[16, 24, 56], num_outs=3, - out_idx=[0,], + out_idx=[ + 0, + ], out_channels=24, ), - head=dict( - type='FomoHead', - input_channels=[24], - num_classes=num_classes, - act_cfg='ReLU' - ), -) \ No newline at end of file + head=dict(type='FomoHead', input_channels=[24], num_classes=num_classes, act_cfg='ReLU'), +) diff --git a/configs/fomo/fomo_mobnetv2_x8_coco.py b/configs/fomo/fomo_mobnetv2_x8_coco.py index cbfe4b3b..300e8d18 100644 --- a/configs/fomo/fomo_mobnetv2_x8_coco.py +++ b/configs/fomo/fomo_mobnetv2_x8_coco.py @@ -1,19 +1,19 @@ _base_ = '../_base_/default_runtime_det.py' -custom_imports = dict(imports=['models', 'datasets', 'core'], - allow_failed_imports=False) +custom_imports = dict(imports=['models', 'datasets', 'core'], allow_failed_imports=False) model = dict( type='Fomo', - backbone=dict(type='MobileNetV2', widen_factor=0.35, out_indices=(2, )), - head=dict(type='FomoHead', - input_channels=16, - num_classes=80, - middle_channels=48, - act_cfg='ReLU6', - loss_cls=dict(type='BCEWithLogitsLoss', reduction='mean'), - loss_bg=dict(type='BCEWithLogitsLoss', reduction='mean'), - ), + backbone=dict(type='MobileNetV2', widen_factor=0.35, out_indices=(2,)), + head=dict( + type='FomoHead', + input_channels=16, + num_classes=80, + middle_channels=48, + act_cfg='ReLU6', + loss_cls=dict(type='BCEWithLogitsLoss', reduction='mean'), + loss_bg=dict(type='BCEWithLogitsLoss', reduction='mean'), + ), ) # dataset settings @@ -22,21 +22,19 @@ "http://images.cocodataset.org/zips/train2017.zip", "http://images.cocodataset.org/zips/val2017.zip", "http://images.cocodataset.org/zips/test2017.zip", - "http://images.cocodataset.org/annotations/annotations_trainval2017.zip") + "http://images.cocodataset.org/annotations/annotations_trainval2017.zip", +) img_norm_cfg = dict(mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', - img_scale=(96, 96), - multiscale_mode='range', - keep_ratio=True), + dict(type='Resize', img_scale=(96, 96), multiscale_mode='range', keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.5), # dict(type='PhotoMetricDistortion'), dict(type='Normalize', **img_norm_cfg), dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), ] test_pipeline = [ dict(type='LoadImageFromFile'), @@ -50,8 +48,9 @@ # dict(type='PhotoMetricDistortion'), dict(type='Normalize', **img_norm_cfg), dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img']) - ]) + dict(type='Collect', keys=['img']), + ], + ), ] batch_size = 16 workers = 2 @@ -62,12 +61,14 @@ drop_last=False, collate_fn=dict(type='fomo_collate'), sampler=dict(type='DefaultSampler', shuffle=True, round_up=False), - dataset=dict(type=dataset_type, - data_root=data_root, - filter_empty_gt=False, - ann_file='annotations/instances_train2017.json', - img_prefix='train2017/', - pipeline=train_pipeline), + dataset=dict( + type=dataset_type, + data_root=data_root, + filter_empty_gt=False, + ann_file='annotations/instances_train2017.json', + img_prefix='train2017/', + pipeline=train_pipeline, + ), ) val_dataloader = dict( batch_size=batch_size, @@ -76,12 +77,14 @@ drop_last=False, collate_fn=dict(type='fomo_collate'), sampler=dict(type='DefaultSampler', shuffle=True, round_up=False), - dataset=dict(type=dataset_type, - data_root=data_root, - filter_empty_gt=False, - ann_file='annotations/instances_val2017.json', - img_prefix='test2017/', - pipeline=test_pipeline), + dataset=dict( + type=dataset_type, + data_root=data_root, + filter_empty_gt=False, + ann_file='annotations/instances_val2017.json', + img_prefix='test2017/', + pipeline=test_pipeline, + ), ) test_dataloader = val_dataloader @@ -91,25 +94,16 @@ find_unused_parameters = True -optim_wrapper=dict(optimizer = dict(type='Adam', lr=lr, weight_decay=5e-4,eps=1e-7)) +optim_wrapper = dict(optimizer=dict(type='Adam', lr=lr, weight_decay=5e-4, eps=1e-7)) -#evaluator -val_evaluator=dict( - type='FomoMetric') +# evaluator +val_evaluator = dict(type='FomoMetric') test_evaluator = val_evaluator -train_cfg = dict(by_epoch=True,max_epochs=70) +train_cfg = dict(by_epoch=True, max_epochs=70) # learning policy param_scheduler = [ - dict( - type='LinearLR', begin=0, end=30, start_factor=0.001, - by_epoch=False), # warm-up - dict( - type='MultiStepLR', - begin=1, - end=500, - milestones=[100, 200,250], - gamma=0.1, - by_epoch=True) -] \ No newline at end of file + dict(type='LinearLR', begin=0, end=30, start_factor=0.001, by_epoch=False), # warm-up + dict(type='MultiStepLR', begin=1, end=500, milestones=[100, 200, 250], gamma=0.1, by_epoch=True), +] diff --git a/configs/fomo/fomo_mobnetv2_x8_voc.py b/configs/fomo/fomo_mobnetv2_x8_voc.py index 7d036a2f..3be8525f 100644 --- a/configs/fomo/fomo_mobnetv2_x8_voc.py +++ b/configs/fomo/fomo_mobnetv2_x8_voc.py @@ -1,19 +1,19 @@ _base_ = '../_base_/default_runtime_det.py' -custom_imports = dict(imports=['models', 'datasets', 'core'], - allow_failed_imports=False) +custom_imports = dict(imports=['models', 'datasets', 'core'], allow_failed_imports=False) model = dict( type='Fomo', - backbone=dict(type='MobileNetV2', widen_factor=0.35, out_indices=(2, )), - head=dict(type='FomoHead', - input_channels=16, - num_classes=20, - middle_channels=[96, 32], - act_cfg='ReLU6', - loss_cls=dict(type='BCEWithLogitsLoss', reduction='mean'), - loss_bg=dict(type='BCEWithLogitsLoss', reduction='mean'), - ), + backbone=dict(type='MobileNetV2', widen_factor=0.35, out_indices=(2,)), + head=dict( + type='FomoHead', + input_channels=16, + num_classes=20, + middle_channels=[96, 32], + act_cfg='ReLU6', + loss_cls=dict(type='BCEWithLogitsLoss', reduction='mean'), + loss_bg=dict(type='BCEWithLogitsLoss', reduction='mean'), + ), ) # dataset settings @@ -24,29 +24,28 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), - dict(type='Resize', - img_scale=[(96, 96)], - multiscale_mode='range', - keep_ratio=True), + dict(type='Resize', img_scale=[(96, 96)], multiscale_mode='range', keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.5), # dict(type='PhotoMetricDistortion'), dict(type='Normalize', **img_norm_cfg), dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), ] test_pipeline = [ dict(type='LoadImageFromFile'), - dict(type='MultiScaleFlipAug', - img_scale=(96, 96), - flip=False, - transforms=[ - dict(type='Resize', keep_ratio=True), - dict(type='RandomFlip'), + dict( + type='MultiScaleFlipAug', + img_scale=(96, 96), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), # dict(type='PhotoMetricDistortion'), - dict(type='Normalize', **img_norm_cfg), - dict(type='DefaultFormatBundle'), - dict(type='Collect', keys=['img']) - ]) + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img']), + ], + ), ] batch_size = 16 workers = 2 @@ -57,12 +56,14 @@ drop_last=False, collate_fn=dict(type='fomo_collate'), sampler=dict(type='DefaultSampler', shuffle=True, round_up=False), - dataset=dict(type=dataset_type, - data_root=data_root, - filter_empty_gt=False, - ann_file='annotations/instances_train2017.json', - img_prefix='train2017/', - pipeline=train_pipeline), + dataset=dict( + type=dataset_type, + data_root=data_root, + filter_empty_gt=False, + ann_file='annotations/instances_train2017.json', + img_prefix='train2017/', + pipeline=train_pipeline, + ), ) val_dataloader = dict( batch_size=batch_size, @@ -71,12 +72,14 @@ drop_last=False, collate_fn=dict(type='fomo_collate'), sampler=dict(type='DefaultSampler', shuffle=True, round_up=False), - dataset=dict(type=dataset_type, - data_root=data_root, - filter_empty_gt=False, - ann_file='annotations/instances_val2017.json', - img_prefix='test2017/', - pipeline=test_pipeline), + dataset=dict( + type=dataset_type, + data_root=data_root, + filter_empty_gt=False, + ann_file='annotations/instances_val2017.json', + img_prefix='test2017/', + pipeline=test_pipeline, + ), ) test_dataloader = val_dataloader @@ -86,25 +89,16 @@ find_unused_parameters = True -optim_wrapper=dict(optimizer = dict(type='Adam', lr=lr, weight_decay=5e-4,eps=1e-7)) +optim_wrapper = dict(optimizer=dict(type='Adam', lr=lr, weight_decay=5e-4, eps=1e-7)) -#evaluator -val_evaluator=dict( - type='FomoMetric') +# evaluator +val_evaluator = dict(type='FomoMetric') test_evaluator = val_evaluator -train_cfg = dict(by_epoch=True,max_epochs=70) +train_cfg = dict(by_epoch=True, max_epochs=70) # learning policy param_scheduler = [ - dict( - type='LinearLR', begin=0, end=30, start_factor=0.001, - by_epoch=False), # warm-up - dict( - type='MultiStepLR', - begin=1, - end=500, - milestones=[100, 200,250], - gamma=0.1, - by_epoch=True) + dict(type='LinearLR', begin=0, end=30, start_factor=0.001, by_epoch=False), # warm-up + dict(type='MultiStepLR', begin=1, end=500, milestones=[100, 200, 250], gamma=0.1, by_epoch=True), ] diff --git a/configs/fomo/fomo_mobnetv3_0.35_x8_abl_coco.py b/configs/fomo/fomo_mobnetv3_0.35_x8_abl_coco.py index a5b3ddc4..63666c04 100644 --- a/configs/fomo/fomo_mobnetv3_0.35_x8_abl_coco.py +++ b/configs/fomo/fomo_mobnetv3_0.35_x8_abl_coco.py @@ -1,18 +1,16 @@ _base_ = './fomo_mobnetv2_0.35_x8_abl_coco.py' -num_classes=2 +num_classes = 2 model = dict( type='Fomo', - backbone=dict(type='MobileNetV3', arch='large',widen_factor=0.35, out_indices=(3, )), + backbone=dict(type='MobileNetV3', arch='large', widen_factor=0.35, out_indices=(3,)), head=dict( type='FomoHead', input_channels=16, num_classes=num_classes, middle_channels=[96, 32], act_cfg='ReLU6', - loss_cls=dict(type='BCEWithLogitsLoss', - reduction='none', - pos_weight=100), + loss_cls=dict(type='BCEWithLogitsLoss', reduction='none', pos_weight=100), loss_bg=dict(type='BCEWithLogitsLoss', reduction='none'), ), ) diff --git a/configs/fomo/fomo_rep_0.35_abl_coco.py b/configs/fomo/fomo_rep_0.35_abl_coco.py index d79ca97a..91b87222 100644 --- a/configs/fomo/fomo_rep_0.35_abl_coco.py +++ b/configs/fomo/fomo_rep_0.35_abl_coco.py @@ -9,51 +9,54 @@ loss_cls_weight = 0.5 loss_bbox_weight = 0.05 loss_obj_weight = 1.0 -prior_match_thr = 4. # Priori box matching threshold +prior_match_thr = 4.0 # Priori box matching threshold strides = [8, 16, 32] anchors = [ [(10, 13), (16, 30), (33, 23)], # P3/8 [(30, 61), (62, 45), (59, 119)], # P4/16 - [(116, 90), (156, 198), (373, 326)] # P5/32 + [(116, 90), (156, 198), (373, 326)], # P5/32 ] num_det_layers = 3 # The number of model output scales norm_cfg = dict(type='BN', momentum=0.03, eps=0.001) # Normalization config -obj_level_weights = [4., 1., 0.4] +obj_level_weights = [4.0, 1.0, 0.4] model = dict( type='Fomo', - backbone=dict(type="MobileNetv2", - widen_factor=0.35, - out_indices=(2, 3, 4), - rep=True), + backbone=dict(type="MobileNetv2", widen_factor=0.35, out_indices=(2, 3, 4), rep=True), head=dict( type='edgelab.YOLOV5Head', - head_module=dict(type='edgelab.DetHead', - num_classes=num_classes, - in_channels=[16, 32, 64], - widen_factor=1, - featmap_strides=strides, - num_base_priors=3), - prior_generator=dict(type='mmdet.YOLOAnchorGenerator', - base_sizes=anchors, - strides=strides), + head_module=dict( + type='edgelab.DetHead', + num_classes=num_classes, + in_channels=[16, 32, 64], + widen_factor=1, + featmap_strides=strides, + num_base_priors=3, + ), + prior_generator=dict(type='mmdet.YOLOAnchorGenerator', base_sizes=anchors, strides=strides), # scaled based on number of detection layers - loss_cls=dict(type='mmdet.CrossEntropyLoss', - use_sigmoid=True, - reduction='mean', - loss_weight=loss_cls_weight * - (num_classes / 80 * 3 / num_det_layers)), - loss_bbox=dict(type='IoULoss', - iou_mode='ciou', - bbox_format='xywh', - eps=1e-7, - reduction='mean', - loss_weight=loss_bbox_weight * (3 / num_det_layers), - return_iou=True), - loss_obj=dict(type='mmdet.CrossEntropyLoss', - use_sigmoid=True, - reduction='mean', - loss_weight=loss_obj_weight * - ((img_scale[0] / 640)**2 * 3 / num_det_layers)), + loss_cls=dict( + type='mmdet.CrossEntropyLoss', + use_sigmoid=True, + reduction='mean', + loss_weight=loss_cls_weight * (num_classes / 80 * 3 / num_det_layers), + ), + loss_bbox=dict( + type='IoULoss', + iou_mode='ciou', + bbox_format='xywh', + eps=1e-7, + reduction='mean', + loss_weight=loss_bbox_weight * (3 / num_det_layers), + return_iou=True, + ), + loss_obj=dict( + type='mmdet.CrossEntropyLoss', + use_sigmoid=True, + reduction='mean', + loss_weight=loss_obj_weight * ((img_scale[0] / 640) ** 2 * 3 / num_det_layers), + ), prior_match_thr=prior_match_thr, - obj_level_weights=obj_level_weights)) + obj_level_weights=obj_level_weights, + ), +) diff --git a/configs/fomo/fomo_shufflenetv2_0.1_x8_abl_coco.py b/configs/fomo/fomo_shufflenetv2_0.1_x8_abl_coco.py index 087fddd9..19d1b067 100644 --- a/configs/fomo/fomo_shufflenetv2_0.1_x8_abl_coco.py +++ b/configs/fomo/fomo_shufflenetv2_0.1_x8_abl_coco.py @@ -1,18 +1,16 @@ _base_ = './fomo_mobnetv2_0.35_x8_abl_coco.py' -num_classes=2 +num_classes = 2 model = dict( type='Fomo', - backbone=dict(type='ShuffleNetV2', widen_factor=0.1, out_indices=(1, )), + backbone=dict(type='ShuffleNetV2', widen_factor=0.1, out_indices=(1,)), head=dict( type='FomoHead', input_channels=24, num_classes=num_classes, middle_channels=[96, 32], act_cfg='ReLU6', - loss_cls=dict(type='BCEWithLogitsLoss', - reduction='none', - pos_weight=100), + loss_cls=dict(type='BCEWithLogitsLoss', reduction='none', pos_weight=100), loss_bg=dict(type='BCEWithLogitsLoss', reduction='none'), ), -) \ No newline at end of file +) diff --git a/configs/fomo/fomo_squeezenet_0.1_x8_abl_coco.py b/configs/fomo/fomo_squeezenet_0.1_x8_abl_coco.py index bef88c54..3d7e8493 100644 --- a/configs/fomo/fomo_squeezenet_0.1_x8_abl_coco.py +++ b/configs/fomo/fomo_squeezenet_0.1_x8_abl_coco.py @@ -1,18 +1,16 @@ _base_ = './fomo_mobnetv2_0.35_x8_abl_coco.py' -num_classes=2 +num_classes = 2 model = dict( type='Fomo', - backbone=dict(type='SqueezeNet', widen_factor=0.1, out_indices=(2, )), + backbone=dict(type='SqueezeNet', widen_factor=0.1, out_indices=(2,)), head=dict( type='FomoHead', input_channels=24, num_classes=num_classes, middle_channels=[96, 32], act_cfg='ReLU6', - loss_cls=dict(type='BCEWithLogitsLoss', - reduction='none', - pos_weight=100), + loss_cls=dict(type='BCEWithLogitsLoss', reduction='none', pos_weight=100), loss_bg=dict(type='BCEWithLogitsLoss', reduction='none'), ), -) \ No newline at end of file +) diff --git a/configs/pfld/pfld_dan_fpn_x8_192.py b/configs/pfld/pfld_dan_fpn_x8_192.py index 9861e1f2..8af9b6c6 100644 --- a/configs/pfld/pfld_dan_fpn_x8_192.py +++ b/configs/pfld/pfld_dan_fpn_x8_192.py @@ -4,7 +4,7 @@ anchors = [ [(10, 13), (16, 30), (33, 23)], # P3/8 [(30, 61), (62, 45), (59, 119)], # P4/16 - [(116, 90), (156, 198), (373, 326)] # P5/32 + [(116, 90), (156, 198), (373, 326)], # P5/32 ] num_classes = 4 deepen_factor = 0.33 @@ -15,26 +15,24 @@ model = dict( type='PFLD', - - backbone=dict(type='mmyolo.YOLOv5CSPDarknet', - deepen_factor=deepen_factor, - widen_factor=widen_factor, - norm_cfg=norm_cfg, - act_cfg=dict(type='SiLU', inplace=True)), - neck=dict(type='mmyolo.YOLOv5PAFPN', - deepen_factor=deepen_factor, - widen_factor=widen_factor, - in_channels=[256, 512, 1024], - out_channels=[256, 512, 1024], - num_csp_blocks=3, - norm_cfg=norm_cfg, - act_cfg=dict(type='SiLU', inplace=True)), - head=dict(type='PFLDhead', - num_point=num_classes, - input_channel=40, - act_cfg="ReLU", - loss_cfg=dict(type='PFLDLoss')) - + backbone=dict( + type='mmyolo.YOLOv5CSPDarknet', + deepen_factor=deepen_factor, + widen_factor=widen_factor, + norm_cfg=norm_cfg, + act_cfg=dict(type='SiLU', inplace=True), + ), + neck=dict( + type='mmyolo.YOLOv5PAFPN', + deepen_factor=deepen_factor, + widen_factor=widen_factor, + in_channels=[256, 512, 1024], + out_channels=[256, 512, 1024], + num_csp_blocks=3, + norm_cfg=norm_cfg, + act_cfg=dict(type='SiLU', inplace=True), + ), + head=dict(type='PFLDhead', num_point=num_classes, input_channel=40, act_cfg="ReLU", loss_cfg=dict(type='PFLDLoss')), ) # dataset settings dataset_type = 'MeterData' @@ -48,14 +46,14 @@ train_pipeline = [ dict(type="Resize", height=height, width=width, interpolation=0), # dict(type="PixelDropout"), - dict(type='ColorJitter', brightness=0.3,contrast=0.3,saturation=0.3, p=0.5), + dict(type='ColorJitter', brightness=0.3, contrast=0.3, saturation=0.3, p=0.5), # dict(type="CoarseDropout",max_height=12,max_width=12), # dict(type='GaussNoise'), dict(type='MedianBlur', blur_limit=3, p=0.5), dict(type='HorizontalFlip'), dict(type='VerticalFlip'), - dict(type='Rotate',limit=45, p=0.7), - dict(type='Affine', translate_percent=[0.05, 0.30], p=0.6) + dict(type='Rotate', limit=45, p=0.7), + dict(type='Affine', translate_percent=[0.05, 0.30], p=0.6), ] val_pipeline = [dict(type="Resize", height=height, width=width)] @@ -67,11 +65,13 @@ drop_last=False, collate_fn=dict(type='default_collate'), sampler=dict(type='DefaultSampler', shuffle=True, round_up=False), - dataset=dict(type=dataset_type, - data_root=data_root, - img_dir="train/images", - index_file=r'train/annotations.txt', - pipeline=train_pipeline), + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir="train/images", + index_file=r'train/annotations.txt', + pipeline=train_pipeline, + ), ) val_dataloader = dict( @@ -81,19 +81,20 @@ drop_last=False, collate_fn=dict(type='default_collate'), sampler=dict(type='DefaultSampler', shuffle=True, round_up=False), - dataset=dict(type=dataset_type, - data_root=data_root, - img_dir="val/images", - index_file=r'val/annotations.txt', - pipeline=val_pipeline), + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir="val/images", + index_file=r'val/annotations.txt', + pipeline=val_pipeline, + ), ) test_dataloader = val_dataloader lr = 0.0001 epochs = 1000 evaluation = dict(save_best='loss') -optim_wrapper = dict( - optimizer=dict(type='Adam', lr=lr, betas=(0.9, 0.99), weight_decay=1e-6)) +optim_wrapper = dict(optimizer=dict(type='Adam', lr=lr, betas=(0.9, 0.99), weight_decay=1e-6)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) val_evaluator = dict(type='PointMetric') test_evaluator = val_evaluator @@ -103,12 +104,8 @@ train_cfg = dict(by_epoch=True, max_epochs=epochs) # learning policy param_scheduler = [ - dict(type='LinearLR', begin=0, end=500, start_factor=0.001, - by_epoch=False), # warm-up - dict(type='MultiStepLR', - begin=1, - end=500, - milestones=[300,400,500,600,700,800,900], - gamma=0.1, - by_epoch=True) -] \ No newline at end of file + dict(type='LinearLR', begin=0, end=500, start_factor=0.001, by_epoch=False), # warm-up + dict( + type='MultiStepLR', begin=1, end=500, milestones=[300, 400, 500, 600, 700, 800, 900], gamma=0.1, by_epoch=True + ), +] diff --git a/configs/pfld/pfld_mbv2n_112.py b/configs/pfld/pfld_mbv2n_112.py index 612fa8e7..0995c0f3 100644 --- a/configs/pfld/pfld_mbv2n_112.py +++ b/configs/pfld/pfld_mbv2n_112.py @@ -1,17 +1,17 @@ _base_ = '../_base_/default_runtime_pose.py' num_classes = 4 -model = dict(type='PFLD', - backbone=dict(type='PfldMobileNetV2', - inchannel=3, - layer1=[16, 16, 16, 16, 16], - layer2=[32, 32, 32, 32, 32, 32], - out_channel=32), - head=dict(type='PFLDhead', - num_point=num_classes, - input_channel=32, - act_cfg="ReLU", - loss_cfg=dict(type='PFLDLoss'))) +model = dict( + type='PFLD', + backbone=dict( + type='PfldMobileNetV2', + inchannel=3, + layer1=[16, 16, 16, 16, 16], + layer2=[32, 32, 32, 32, 32, 32], + out_channel=32, + ), + head=dict(type='PFLDhead', num_point=num_classes, input_channel=32, act_cfg="ReLU", loss_cfg=dict(type='PFLDLoss')), +) # dataset settings dataset_type = 'MeterData' @@ -25,14 +25,14 @@ train_pipeline = [ dict(type="Resize", height=height, width=width, interpolation=0), # dict(type="PixelDropout"), - dict(type='ColorJitter', brightness=0.3,contrast=0.3,saturation=0.3, p=0.5), + dict(type='ColorJitter', brightness=0.3, contrast=0.3, saturation=0.3, p=0.5), # dict(type="CoarseDropout",max_height=12,max_width=12), # dict(type='GaussNoise'), dict(type='MedianBlur', blur_limit=3, p=0.5), dict(type='HorizontalFlip'), dict(type='VerticalFlip'), - dict(type='Rotate',limit=45, p=0.7), - dict(type='Affine', translate_percent=[0.05, 0.30], p=0.6) + dict(type='Rotate', limit=45, p=0.7), + dict(type='Affine', translate_percent=[0.05, 0.30], p=0.6), ] val_pipeline = [dict(type="Resize", height=height, width=width)] @@ -44,11 +44,13 @@ drop_last=False, collate_fn=dict(type='default_collate'), sampler=dict(type='DefaultSampler', shuffle=True, round_up=False), - dataset=dict(type=dataset_type, - data_root=data_root, - img_dir="train/images", - index_file=r'train/annotations.txt', - pipeline=train_pipeline), + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir="train/images", + index_file=r'train/annotations.txt', + pipeline=train_pipeline, + ), ) val_dataloader = dict( @@ -58,19 +60,20 @@ drop_last=False, collate_fn=dict(type='default_collate'), sampler=dict(type='DefaultSampler', shuffle=True, round_up=False), - dataset=dict(type=dataset_type, - data_root=data_root, - img_dir="val/images", - index_file=r'val/annotations.txt', - pipeline=val_pipeline), + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir="val/images", + index_file=r'val/annotations.txt', + pipeline=val_pipeline, + ), ) test_dataloader = val_dataloader lr = 0.0001 epochs = 1000 evaluation = dict(save_best='loss') -optim_wrapper = dict( - optimizer=dict(type='Adam', lr=lr, betas=(0.9, 0.99), weight_decay=1e-6)) +optim_wrapper = dict(optimizer=dict(type='Adam', lr=lr, betas=(0.9, 0.99), weight_decay=1e-6)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) val_evaluator = dict(type='PointMetric') test_evaluator = val_evaluator @@ -80,12 +83,6 @@ train_cfg = dict(by_epoch=True, max_epochs=epochs) # learning policy param_scheduler = [ - dict(type='LinearLR', begin=0, end=500, start_factor=0.001, - by_epoch=False), # warm-up - dict(type='MultiStepLR', - begin=1, - end=500, - milestones=[350,500,600,700,800,900], - gamma=0.1, - by_epoch=True) + dict(type='LinearLR', begin=0, end=500, start_factor=0.001, by_epoch=False), # warm-up + dict(type='MultiStepLR', begin=1, end=500, milestones=[350, 500, 600, 700, 800, 900], gamma=0.1, by_epoch=True), ] diff --git a/configs/pfld/pfld_mbv3l_192.py b/configs/pfld/pfld_mbv3l_192.py index d9253284..8d82af58 100644 --- a/configs/pfld/pfld_mbv3l_192.py +++ b/configs/pfld/pfld_mbv3l_192.py @@ -1,18 +1,16 @@ _base_ = './pfld_mbv2n_112.py' num_classes = 4 -model = dict(type='PFLD', - backbone=dict( - type='MobileNetV3', - inchannel=3, - arch='large', - out_indices=(3, ), - ), - head=dict(type='PFLDhead', - num_point=num_classes, - input_channel=40, - act_cfg="ReLU", - loss_cfg=dict(type='PFLDLoss'))) +model = dict( + type='PFLD', + backbone=dict( + type='MobileNetV3', + inchannel=3, + arch='large', + out_indices=(3,), + ), + head=dict(type='PFLDhead', num_point=num_classes, input_channel=40, act_cfg="ReLU", loss_cfg=dict(type='PFLDLoss')), +) # dataset settings dataset_type = 'MeterData' @@ -26,14 +24,14 @@ train_pipeline = [ dict(type="Resize", height=height, width=width, interpolation=0), # dict(type="PixelDropout"), - dict(type='ColorJitter', brightness=0.3,contrast=0.3,saturation=0.3, p=0.5), + dict(type='ColorJitter', brightness=0.3, contrast=0.3, saturation=0.3, p=0.5), # dict(type='GaussNoise'), # dict(type="CoarseDropout",max_height=12,max_width=12), dict(type='MedianBlur', blur_limit=3, p=0.5), dict(type='HorizontalFlip'), dict(type='VerticalFlip'), - dict(type='Rotate',limit=45, p=0.7), - dict(type='Affine', translate_percent=[0.05, 0.3], p=0.6) + dict(type='Rotate', limit=45, p=0.7), + dict(type='Affine', translate_percent=[0.05, 0.3], p=0.6), ] val_pipeline = [dict(type="Resize", height=height, width=width)] @@ -45,11 +43,13 @@ drop_last=False, collate_fn=dict(type='default_collate'), sampler=dict(type='DefaultSampler', shuffle=True, round_up=False), - dataset=dict(type=dataset_type, - data_root=data_root, - img_dir="train/images", - index_file=r'train/annotations.txt', - pipeline=train_pipeline), + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir="train/images", + index_file=r'train/annotations.txt', + pipeline=train_pipeline, + ), ) val_dataloader = dict( @@ -59,9 +59,12 @@ drop_last=False, collate_fn=dict(type='default_collate'), sampler=dict(type='DefaultSampler', shuffle=True, round_up=False), - dataset=dict(type=dataset_type, - data_root=data_root, - img_dir="val/images", - index_file=r'val/annotations.txt', - pipeline=val_pipeline),) -test_dataloader = val_dataloader \ No newline at end of file + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir="val/images", + index_file=r'val/annotations.txt', + pipeline=val_pipeline, + ), +) +test_dataloader = val_dataloader diff --git a/configs/yolov3/yolov3_mbv2_416_voc.py b/configs/yolov3/yolov3_mbv2_416_voc.py index f89ece8b..05e188ff 100644 --- a/configs/yolov3/yolov3_mbv2_416_voc.py +++ b/configs/yolov3/yolov3_mbv2_416_voc.py @@ -4,25 +4,18 @@ # model settings num_classes = 20 data_preprocessor = dict( - type='DetDataPreprocessor', - mean=[0, 0, 0], - std=[255., 255., 255.], - bgr_to_rgb=True, - pad_size_divisor=32) + type='DetDataPreprocessor', mean=[0, 0, 0], std=[255.0, 255.0, 255.0], bgr_to_rgb=True, pad_size_divisor=32 +) model = dict( -type='YOLOV3', + type='YOLOV3', data_preprocessor=data_preprocessor, backbone=dict( type='Darknet', depth=53, out_indices=(3, 4, 5), # init_cfg=dict(type='Pretrained', checkpoint='open-mmlab://darknet53') - ), - neck=dict( - type='YOLOV3Neck', - num_scales=3, - in_channels=[1024, 512, 256], - out_channels=[512, 256, 128]), + ), + neck=dict(type='YOLOV3Neck', num_scales=3, in_channels=[1024, 512, 256], out_channels=[512, 256, 128]), bbox_head=dict( type='YOLOV3Head', num_classes=1, @@ -30,42 +23,31 @@ out_channels=[1024, 512, 256], anchor_generator=dict( type='YOLOAnchorGenerator', - base_sizes=[[(116, 90), (156, 198), (373, 326)], - [(30, 61), (62, 45), (59, 119)], - [(10, 13), (16, 30), (33, 23)]], - strides=[32, 16, 8]), + base_sizes=[ + [(116, 90), (156, 198), (373, 326)], + [(30, 61), (62, 45), (59, 119)], + [(10, 13), (16, 30), (33, 23)], + ], + strides=[32, 16, 8], + ), bbox_coder=dict(type='YOLOBBoxCoder'), featmap_strides=[32, 16, 8], - loss_cls=dict( - type='CrossEntropyLoss', - use_sigmoid=True, - loss_weight=1.0, - reduction='sum'), - loss_conf=dict( - type='CrossEntropyLoss', - use_sigmoid=True, - loss_weight=1.0, - reduction='sum'), - loss_xy=dict( - type='CrossEntropyLoss', - use_sigmoid=True, - loss_weight=2.0, - reduction='sum'), - loss_wh=dict(type='MSELoss', loss_weight=2.0, reduction='sum')), + loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0, reduction='sum'), + loss_conf=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0, reduction='sum'), + loss_xy=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=2.0, reduction='sum'), + loss_wh=dict(type='MSELoss', loss_weight=2.0, reduction='sum'), + ), # training and testing settings - train_cfg=dict( - assigner=dict( - type='GridAssigner', - pos_iou_thr=0.5, - neg_iou_thr=0.5, - min_pos_iou=0)), + train_cfg=dict(assigner=dict(type='GridAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0)), test_cfg=dict( nms_pre=1000, min_bbox_size=0, score_thr=0.05, conf_thr=0.005, nms=dict(type='nms', iou_threshold=0.45), - max_per_img=100)) + max_per_img=100, + ), +) # dataset settings dataset_type = 'edgelab.CustomCocoDataset' # dataset_type = 'CustomVocdataset' @@ -76,35 +58,23 @@ batch_size = 16 workers = 2 -backend_args=None -img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], - to_rgb=True) +backend_args = None +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) train_pipeline = [ dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True), - dict( - type='Expand', - mean=data_preprocessor['mean'], - to_rgb=data_preprocessor['bgr_to_rgb'], - ratio_range=(1, 2)), - dict( - type='MinIoURandomCrop', - min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9), - min_crop_size=0.3), + dict(type='Expand', mean=data_preprocessor['mean'], to_rgb=data_preprocessor['bgr_to_rgb'], ratio_range=(1, 2)), + dict(type='MinIoURandomCrop', min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9), min_crop_size=0.3), dict(type='RandomResize', scale=[(320, 320), (height, width)], keep_ratio=True), dict(type='RandomFlip', prob=0.5), dict(type='PhotoMetricDistortion'), - dict(type='PackDetInputs') + dict(type='PackDetInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile', backend_args=backend_args), dict(type='Resize', scale=(height, width), keep_ratio=True), dict(type='LoadAnnotations', with_bbox=True), - dict( - type='PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor')) + dict(type='PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor')), ] @@ -122,7 +92,9 @@ data_prefix=dict(img='train/images'), filter_cfg=dict(filter_empty_gt=True, min_size=32), pipeline=train_pipeline, - backend_args=backend_args)) + backend_args=backend_args, + ), +) val_dataloader = dict( batch_size=1, @@ -138,15 +110,15 @@ data_prefix=dict(img='val/images'), test_mode=True, pipeline=test_pipeline, - backend_args=backend_args)) + backend_args=backend_args, + ), +) test_dataloader = val_dataloader -#evaluator +# evaluator val_evaluator = dict( - type='CocoMetric', - ann_file=data_root + 'val/annotations/valid.json', - metric='bbox', - backend_args=backend_args) + type='CocoMetric', ann_file=data_root + 'val/annotations/valid.json', metric='bbox', backend_args=backend_args +) test_evaluator = val_evaluator train_cfg = dict(max_epochs=200, val_interval=1) @@ -159,12 +131,13 @@ optim_wrapper = dict( type='OptimWrapper', optimizer=dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005), - clip_grad=dict(max_norm=35, norm_type=2)) + clip_grad=dict(max_norm=35, norm_type=2), +) # learning policy param_scheduler = [ dict(type='LinearLR', start_factor=0.1, by_epoch=False, begin=0, end=2000), - dict(type='MultiStepLR', by_epoch=True, milestones=[218, 246], gamma=0.1) + dict(type='MultiStepLR', by_epoch=True, milestones=[218, 246], gamma=0.1), ] train_cfg = dict(by_epoch=True, max_epochs=70) @@ -172,12 +145,6 @@ test_cfg = dict() # learning policy param_scheduler = [ - dict(type='LinearLR', begin=0, end=30, start_factor=0.001, - by_epoch=False), # warm-up - dict(type='MultiStepLR', - begin=1, - end=500, - milestones=[100, 200, 250], - gamma=0.1, - by_epoch=True) + dict(type='LinearLR', begin=0, end=30, start_factor=0.001, by_epoch=False), # warm-up + dict(type='MultiStepLR', begin=1, end=500, milestones=[100, 200, 250], gamma=0.1, by_epoch=True), ] diff --git a/configs/yolov5/base_arch.py b/configs/yolov5/base_arch.py index 1c7daea5..ffa53167 100644 --- a/configs/yolov5/base_arch.py +++ b/configs/yolov5/base_arch.py @@ -23,7 +23,7 @@ anchors = [ [(10, 13), (16, 30), (33, 23)], # P3/8 [(30, 61), (62, 45), (59, 119)], # P4/16 - [(116, 90), (156, 198), (373, 326)] # P5/32 + [(116, 90), (156, 198), (373, 326)], # P5/32 ] # -----train val related----- @@ -38,7 +38,8 @@ nms_pre=30000, score_thr=0.001, # Threshold to filter out boxes. nms=dict(type='nms', iou_threshold=0.65), # NMS type and threshold - max_per_img=300) # Max number of detections of each image + max_per_img=300, +) # Max number of detections of each image # -----data related----- height = 192 @@ -60,7 +61,8 @@ # The image scale of padding should be divided by pad_size_divisor size_divisor=32, # Additional paddings for pixel scale - extra_pad_ratio=0.5) + extra_pad_ratio=0.5, +) # -----model related----- # The scaling factor that controls the depth of the network structure @@ -77,9 +79,9 @@ loss_cls_weight = 0.5 loss_bbox_weight = 0.05 loss_obj_weight = 1.0 -prior_match_thr = 4. # Priori box matching threshold +prior_match_thr = 4.0 # Priori box matching threshold # The obj loss weights of the three output layers -obj_level_weights = [4., 1., 0.4] +obj_level_weights = [4.0, 1.0, 0.4] lr_factor = 0.01 # Learning rate scaling factor weight_decay = 0.0005 # Save model checkpoint and validation intervals @@ -93,72 +95,70 @@ # model arch model = dict( type='mmyolo.YOLODetector', - data_preprocessor=dict(type='mmdet.DetDataPreprocessor', - mean=[0., 0., 0.], - std=[255., 255., 255.], - bgr_to_rgb=True), - backbone=dict(type='YOLOv5CSPDarknet', - deepen_factor=deepen_factor, - widen_factor=widen_factor, - norm_cfg=norm_cfg, - act_cfg=dict(type='ReLU', inplace=True)), - neck=dict(type='YOLOv5PAFPN', - deepen_factor=deepen_factor, - widen_factor=widen_factor, - in_channels=[256, 512, 1024], - out_channels=[256, 512, 1024], - num_csp_blocks=3, - norm_cfg=norm_cfg, - act_cfg=dict(type='ReLU', inplace=True)), + data_preprocessor=dict( + type='mmdet.DetDataPreprocessor', mean=[0.0, 0.0, 0.0], std=[255.0, 255.0, 255.0], bgr_to_rgb=True + ), + backbone=dict( + type='YOLOv5CSPDarknet', + deepen_factor=deepen_factor, + widen_factor=widen_factor, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU', inplace=True), + ), + neck=dict( + type='YOLOv5PAFPN', + deepen_factor=deepen_factor, + widen_factor=widen_factor, + in_channels=[256, 512, 1024], + out_channels=[256, 512, 1024], + num_csp_blocks=3, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU', inplace=True), + ), bbox_head=dict( type='edgelab.YOLOV5Head', - head_module=dict(type='edgelab.DetHead', - num_classes=num_classes, - in_channels=[256, 512, 1024], - widen_factor=widen_factor, - featmap_strides=strides, - num_base_priors=3), - prior_generator=dict(type='mmdet.YOLOAnchorGenerator', - base_sizes=anchors, - strides=strides), + head_module=dict( + type='edgelab.DetHead', + num_classes=num_classes, + in_channels=[256, 512, 1024], + widen_factor=widen_factor, + featmap_strides=strides, + num_base_priors=3, + ), + prior_generator=dict(type='mmdet.YOLOAnchorGenerator', base_sizes=anchors, strides=strides), # scaled based on number of detection layers - loss_cls=dict(type='mmdet.CrossEntropyLoss', - use_sigmoid=True, - reduction='mean', - loss_weight=loss_cls_weight), - loss_bbox=dict(type='IoULoss', - iou_mode='ciou', - bbox_format='xywh', - eps=1e-7, - reduction='mean', - loss_weight=loss_bbox_weight, - return_iou=True), - loss_obj=dict(type='mmdet.CrossEntropyLoss', - use_sigmoid=True, - reduction='mean', - loss_weight=loss_obj_weight), + loss_cls=dict(type='mmdet.CrossEntropyLoss', use_sigmoid=True, reduction='mean', loss_weight=loss_cls_weight), + loss_bbox=dict( + type='IoULoss', + iou_mode='ciou', + bbox_format='xywh', + eps=1e-7, + reduction='mean', + loss_weight=loss_bbox_weight, + return_iou=True, + ), + loss_obj=dict(type='mmdet.CrossEntropyLoss', use_sigmoid=True, reduction='mean', loss_weight=loss_obj_weight), prior_match_thr=prior_match_thr, - obj_level_weights=obj_level_weights), - test_cfg=model_test_cfg) + obj_level_weights=obj_level_weights, + ), + test_cfg=model_test_cfg, +) albu_train_transforms = [ dict(type='Blur', p=0.01), dict(type='MedianBlur', p=0.01), dict(type='ToGray', p=0.01), - dict(type='CLAHE', p=0.01) + dict(type='CLAHE', p=0.01), ] pre_transform = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), - dict(type='LoadAnnotations', with_bbox=True) + dict(type='LoadAnnotations', with_bbox=True), ] train_pipeline = [ *pre_transform, - dict(type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + dict(type='Mosaic', img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform), dict( type='YOLOv5RandomAffine', max_rotate_degree=0.0, @@ -166,103 +166,99 @@ scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)), - dict(type='mmdet.Albu', - transforms=albu_train_transforms, - bbox_params=dict(type='BboxParams', - format='pascal_voc', - label_fields=['gt_bboxes_labels', - 'gt_ignore_flags']), - keymap={ - 'img': 'image', - 'gt_bboxes': 'bboxes' - }), + border_val=(114, 114, 114), + ), + dict( + type='mmdet.Albu', + transforms=albu_train_transforms, + bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), + keymap={'img': 'image', 'gt_bboxes': 'bboxes'}, + ), dict(type='YOLOv5HSVRandomAug'), dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + dict( + type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction') + ), ] -train_dataloader = dict(batch_size=batch_size, - num_workers=workers, - persistent_workers=persistent_workers, - pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), - dataset=dict(type=dataset_type, - data_root=data_root, - ann_file=train_ann_file, - data_prefix=dict(img=train_data_prefix), - filter_cfg=dict(filter_empty_gt=False, - min_size=32), - pipeline=train_pipeline)) +train_dataloader = dict( + batch_size=batch_size, + num_workers=workers, + persistent_workers=persistent_workers, + pin_memory=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file=train_ann_file, + data_prefix=dict(img=train_data_prefix), + filter_cfg=dict(filter_empty_gt=False, min_size=32), + pipeline=train_pipeline, + ), +) test_pipeline = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='YOLOv5KeepRatioResize', scale=img_scale), - dict(type='LetterResize', - scale=img_scale, - allow_scale_up=False, - pad_val=dict(img=114)), + dict(type='LetterResize', scale=img_scale, allow_scale_up=False, pad_val=dict(img=114)), dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), - dict(type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor', 'pad_param')) + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'pad_param'), + ), ] -val_dataloader = dict(batch_size=val_batch_size_per_gpu, - num_workers=val_num_workers, - persistent_workers=persistent_workers, - pin_memory=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict(type=dataset_type, - data_root=data_root, - test_mode=True, - data_prefix=dict(img=val_data_prefix), - ann_file=val_ann_file, - pipeline=test_pipeline, - batch_shapes_cfg=batch_shapes_cfg)) +val_dataloader = dict( + batch_size=val_batch_size_per_gpu, + num_workers=val_num_workers, + persistent_workers=persistent_workers, + pin_memory=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + test_mode=True, + data_prefix=dict(img=val_data_prefix), + ann_file=val_ann_file, + pipeline=test_pipeline, + batch_shapes_cfg=batch_shapes_cfg, + ), +) test_dataloader = val_dataloader param_scheduler = None -optim_wrapper = dict(type='OptimWrapper', - optimizer=dict(type='SGD', - lr=base_lr, - momentum=0.937, - weight_decay=weight_decay, - nesterov=True, - batch_size_per_gpu=16), - constructor='YOLOv5OptimizerConstructor') +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict( + type='SGD', lr=base_lr, momentum=0.937, weight_decay=weight_decay, nesterov=True, batch_size_per_gpu=16 + ), + constructor='YOLOv5OptimizerConstructor', +) -default_hooks = dict(param_scheduler=dict(type='YOLOv5ParamSchedulerHook', - scheduler_type='linear', - lr_factor=lr_factor, - max_epochs=max_epochs), - checkpoint=dict(type='CheckpointHook', - interval=save_checkpoint_intervals, - save_best='auto', - max_keep_ckpts=max_keep_ckpts)) +default_hooks = dict( + param_scheduler=dict( + type='YOLOv5ParamSchedulerHook', scheduler_type='linear', lr_factor=lr_factor, max_epochs=max_epochs + ), + checkpoint=dict( + type='CheckpointHook', interval=save_checkpoint_intervals, save_best='auto', max_keep_ckpts=max_keep_ckpts + ), +) custom_hooks = [ - dict(type='EMAHook', - ema_type='ExpMomentumEMA', - momentum=0.0001, - update_buffers=True, - strict_load=False, - priority=49) + dict( + type='EMAHook', ema_type='ExpMomentumEMA', momentum=0.0001, update_buffers=True, strict_load=False, priority=49 + ) ] -val_evaluator = dict(type='mmdet.CocoMetric', - proposal_nums=(100, 1, 10), - ann_file=data_root + val_ann_file, - metric='bbox') +val_evaluator = dict( + type='mmdet.CocoMetric', proposal_nums=(100, 1, 10), ann_file=data_root + val_ann_file, metric='bbox' +) test_evaluator = val_evaluator -train_cfg = dict(type='EpochBasedTrainLoop', - max_epochs=max_epochs, - val_interval=save_checkpoint_intervals, - _delete_=True) +train_cfg = dict( + type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=save_checkpoint_intervals, _delete_=True +) val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') \ No newline at end of file +test_cfg = dict(type='TestLoop') diff --git a/configs/yolov5/yolov5_l_1xb16_300e_coco.py b/configs/yolov5/yolov5_l_1xb16_300e_coco.py index 14e7d538..02ab4ef2 100644 --- a/configs/yolov5/yolov5_l_1xb16_300e_coco.py +++ b/configs/yolov5/yolov5_l_1xb16_300e_coco.py @@ -3,31 +3,31 @@ anchors = [ [(10, 13), (16, 30), (33, 23)], # P3/8 [(30, 61), (62, 45), (59, 119)], # P4/16 - [(116, 90), (156, 198), (373, 326)] # P5/32 + [(116, 90), (156, 198), (373, 326)], # P5/32 ] -#======================modify start====================== +# ======================modify start====================== -#model +# model strides = [8, 16, 32] num_classes = 80 deepen_factor = 1.0 widen_factor = 1.0 -#datasets +# datasets data_root = '' height = 192 width = 192 batch_size = 16 workers = 2 -#training +# training lr = 0.01 epochs = 300 # Maximum training epochs -#======================modify end====================== +# ======================modify end====================== -#======================model================== +# ======================model================== model = dict( type='mmyolo.YOLODetector', backbone=dict( @@ -40,14 +40,16 @@ deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict( - num_classes=num_classes, - in_channels=[256, 512, 1024], - widen_factor=widen_factor, - ), ), + bbox_head=dict( + head_module=dict( + num_classes=num_classes, + in_channels=[256, 512, 1024], + widen_factor=widen_factor, + ), + ), ) -#======================datasets================== +# ======================datasets================== img_scale = (width, height) affine_scale = 0.5 persistent_workers = True @@ -64,26 +66,24 @@ # The image scale of padding should be divided by pad_size_divisor size_divisor=32, # Additional paddings for pixel scale - extra_pad_ratio=0.5) + extra_pad_ratio=0.5, +) albu_train_transforms = [ dict(type='Blur', p=0.01), dict(type='MedianBlur', p=0.01), dict(type='ToGray', p=0.01), - dict(type='CLAHE', p=0.01) + dict(type='CLAHE', p=0.01), ] pre_transform = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), - dict(type='LoadAnnotations', with_bbox=True) + dict(type='LoadAnnotations', with_bbox=True), ] train_pipeline = [ *pre_transform, - dict(type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + dict(type='Mosaic', img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform), dict( type='YOLOv5RandomAffine', max_rotate_degree=0.0, @@ -91,77 +91,77 @@ scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)), - dict(type='mmdet.Albu', - transforms=albu_train_transforms, - bbox_params=dict(type='BboxParams', - format='pascal_voc', - label_fields=['gt_bboxes_labels', - 'gt_ignore_flags']), - keymap={ - 'img': 'image', - 'gt_bboxes': 'bboxes' - }), + border_val=(114, 114, 114), + ), + dict( + type='mmdet.Albu', + transforms=albu_train_transforms, + bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), + keymap={'img': 'image', 'gt_bboxes': 'bboxes'}, + ), dict(type='YOLOv5HSVRandomAug'), dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + dict( + type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction') + ), ] -train_dataloader = dict(batch_size=batch_size, - num_workers=workers, - persistent_workers=persistent_workers, - pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), - dataset=dict(type=dataset_type, - data_root=data_root, - ann_file=train_ann_file, - data_prefix=dict(img=train_data_prefix), - filter_cfg=dict(filter_empty_gt=False, - min_size=32), - pipeline=train_pipeline)) +train_dataloader = dict( + batch_size=batch_size, + num_workers=workers, + persistent_workers=persistent_workers, + pin_memory=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file=train_ann_file, + data_prefix=dict(img=train_data_prefix), + filter_cfg=dict(filter_empty_gt=False, min_size=32), + pipeline=train_pipeline, + ), +) test_pipeline = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='YOLOv5KeepRatioResize', scale=img_scale), - dict(type='LetterResize', - scale=img_scale, - allow_scale_up=False, - pad_val=dict(img=114)), + dict(type='LetterResize', scale=img_scale, allow_scale_up=False, pad_val=dict(img=114)), dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), - dict(type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor', 'pad_param')) + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'pad_param'), + ), ] -val_dataloader = dict(batch_size=batch_size, - num_workers=workers, - persistent_workers=persistent_workers, - pin_memory=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict(type=dataset_type, - data_root=data_root, - test_mode=True, - data_prefix=dict(img=val_data_prefix), - ann_file=val_ann_file, - pipeline=test_pipeline, - batch_shapes_cfg=batch_shapes_cfg)) +val_dataloader = dict( + batch_size=batch_size, + num_workers=workers, + persistent_workers=persistent_workers, + pin_memory=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + test_mode=True, + data_prefix=dict(img=val_data_prefix), + ann_file=val_ann_file, + pipeline=test_pipeline, + batch_shapes_cfg=batch_shapes_cfg, + ), +) test_dataloader = val_dataloader -#======================training================== -optim_wrapper = dict(type='OptimWrapper', - optimizer=dict(type='SGD', - lr=lr, - momentum=0.937, - weight_decay=0.0005, - nesterov=True, - batch_size_per_gpu=batch_size), - constructor='YOLOv5OptimizerConstructor') -val_evaluator = dict(type='mmdet.CocoMetric', - proposal_nums=(100, 1, 10), - ann_file=data_root + val_ann_file, - metric='bbox') -test_evaluator = val_evaluator \ No newline at end of file +# ======================training================== +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict( + type='SGD', lr=lr, momentum=0.937, weight_decay=0.0005, nesterov=True, batch_size_per_gpu=batch_size + ), + constructor='YOLOv5OptimizerConstructor', +) +val_evaluator = dict( + type='mmdet.CocoMetric', proposal_nums=(100, 1, 10), ann_file=data_root + val_ann_file, metric='bbox' +) +test_evaluator = val_evaluator diff --git a/configs/yolov5/yolov5_m_1xb16_300e_coco.py b/configs/yolov5/yolov5_m_1xb16_300e_coco.py index dc8dc890..053f76fe 100644 --- a/configs/yolov5/yolov5_m_1xb16_300e_coco.py +++ b/configs/yolov5/yolov5_m_1xb16_300e_coco.py @@ -3,31 +3,31 @@ anchors = [ [(10, 13), (16, 30), (33, 23)], # P3/8 [(30, 61), (62, 45), (59, 119)], # P4/16 - [(116, 90), (156, 198), (373, 326)] # P5/32 + [(116, 90), (156, 198), (373, 326)], # P5/32 ] -#======================modify start====================== +# ======================modify start====================== -#model +# model strides = [8, 16, 32] num_classes = 80 deepen_factor = 0.67 widen_factor = 0.75 -#datasets +# datasets data_root = '' height = 192 width = 192 batch_size = 16 workers = 2 -#training +# training lr = 0.01 epochs = 300 # Maximum training epochs -#======================modify end====================== +# ======================modify end====================== -#======================model================== +# ======================model================== model = dict( type='mmyolo.YOLODetector', backbone=dict( @@ -40,14 +40,16 @@ deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict( - num_classes=num_classes, - in_channels=[256, 512, 1024], - widen_factor=widen_factor, - ), ), + bbox_head=dict( + head_module=dict( + num_classes=num_classes, + in_channels=[256, 512, 1024], + widen_factor=widen_factor, + ), + ), ) -#======================datasets================== +# ======================datasets================== img_scale = (width, height) affine_scale = 0.5 persistent_workers = True @@ -64,26 +66,24 @@ # The image scale of padding should be divided by pad_size_divisor size_divisor=32, # Additional paddings for pixel scale - extra_pad_ratio=0.5) + extra_pad_ratio=0.5, +) albu_train_transforms = [ dict(type='Blur', p=0.01), dict(type='MedianBlur', p=0.01), dict(type='ToGray', p=0.01), - dict(type='CLAHE', p=0.01) + dict(type='CLAHE', p=0.01), ] pre_transform = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), - dict(type='LoadAnnotations', with_bbox=True) + dict(type='LoadAnnotations', with_bbox=True), ] train_pipeline = [ *pre_transform, - dict(type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + dict(type='Mosaic', img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform), dict( type='YOLOv5RandomAffine', max_rotate_degree=0.0, @@ -91,77 +91,77 @@ scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)), - dict(type='mmdet.Albu', - transforms=albu_train_transforms, - bbox_params=dict(type='BboxParams', - format='pascal_voc', - label_fields=['gt_bboxes_labels', - 'gt_ignore_flags']), - keymap={ - 'img': 'image', - 'gt_bboxes': 'bboxes' - }), + border_val=(114, 114, 114), + ), + dict( + type='mmdet.Albu', + transforms=albu_train_transforms, + bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), + keymap={'img': 'image', 'gt_bboxes': 'bboxes'}, + ), dict(type='YOLOv5HSVRandomAug'), dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + dict( + type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction') + ), ] -train_dataloader = dict(batch_size=batch_size, - num_workers=workers, - persistent_workers=persistent_workers, - pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), - dataset=dict(type=dataset_type, - data_root=data_root, - ann_file=train_ann_file, - data_prefix=dict(img=train_data_prefix), - filter_cfg=dict(filter_empty_gt=False, - min_size=32), - pipeline=train_pipeline)) +train_dataloader = dict( + batch_size=batch_size, + num_workers=workers, + persistent_workers=persistent_workers, + pin_memory=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file=train_ann_file, + data_prefix=dict(img=train_data_prefix), + filter_cfg=dict(filter_empty_gt=False, min_size=32), + pipeline=train_pipeline, + ), +) test_pipeline = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='YOLOv5KeepRatioResize', scale=img_scale), - dict(type='LetterResize', - scale=img_scale, - allow_scale_up=False, - pad_val=dict(img=114)), + dict(type='LetterResize', scale=img_scale, allow_scale_up=False, pad_val=dict(img=114)), dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), - dict(type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor', 'pad_param')) + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'pad_param'), + ), ] -val_dataloader = dict(batch_size=batch_size, - num_workers=workers, - persistent_workers=persistent_workers, - pin_memory=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict(type=dataset_type, - data_root=data_root, - test_mode=True, - data_prefix=dict(img=val_data_prefix), - ann_file=val_ann_file, - pipeline=test_pipeline, - batch_shapes_cfg=batch_shapes_cfg)) +val_dataloader = dict( + batch_size=batch_size, + num_workers=workers, + persistent_workers=persistent_workers, + pin_memory=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + test_mode=True, + data_prefix=dict(img=val_data_prefix), + ann_file=val_ann_file, + pipeline=test_pipeline, + batch_shapes_cfg=batch_shapes_cfg, + ), +) test_dataloader = val_dataloader -#======================training================== -optim_wrapper = dict(type='OptimWrapper', - optimizer=dict(type='SGD', - lr=lr, - momentum=0.937, - weight_decay=0.0005, - nesterov=True, - batch_size_per_gpu=batch_size), - constructor='YOLOv5OptimizerConstructor') -val_evaluator = dict(type='mmdet.CocoMetric', - proposal_nums=(100, 1, 10), - ann_file=data_root + val_ann_file, - metric='bbox') -test_evaluator = val_evaluator \ No newline at end of file +# ======================training================== +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict( + type='SGD', lr=lr, momentum=0.937, weight_decay=0.0005, nesterov=True, batch_size_per_gpu=batch_size + ), + constructor='YOLOv5OptimizerConstructor', +) +val_evaluator = dict( + type='mmdet.CocoMetric', proposal_nums=(100, 1, 10), ann_file=data_root + val_ann_file, metric='bbox' +) +test_evaluator = val_evaluator diff --git a/configs/yolov5/yolov5_n_1xb16_300e_coco.py b/configs/yolov5/yolov5_n_1xb16_300e_coco.py index 8d561d4e..02f48097 100644 --- a/configs/yolov5/yolov5_n_1xb16_300e_coco.py +++ b/configs/yolov5/yolov5_n_1xb16_300e_coco.py @@ -3,31 +3,31 @@ anchors = [ [(10, 13), (16, 30), (33, 23)], # P3/8 [(30, 61), (62, 45), (59, 119)], # P4/16 - [(116, 90), (156, 198), (373, 326)] # P5/32 + [(116, 90), (156, 198), (373, 326)], # P5/32 ] -#======================modify start====================== +# ======================modify start====================== -#model +# model strides = [8, 16, 32] num_classes = 80 deepen_factor = 0.33 widen_factor = 0.25 -#datasets +# datasets data_root = '' height = 192 width = 192 batch_size = 16 workers = 2 -#training +# training lr = 0.01 epochs = 300 # Maximum training epochs -#======================modify end====================== +# ======================modify end====================== -#======================model================== +# ======================model================== model = dict( type='mmyolo.YOLODetector', backbone=dict( @@ -40,14 +40,16 @@ deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict( - num_classes=num_classes, - in_channels=[256, 512, 1024], - widen_factor=widen_factor, - ), ), + bbox_head=dict( + head_module=dict( + num_classes=num_classes, + in_channels=[256, 512, 1024], + widen_factor=widen_factor, + ), + ), ) -#======================datasets================== +# ======================datasets================== img_scale = (width, height) affine_scale = 0.5 persistent_workers = True @@ -64,26 +66,24 @@ # The image scale of padding should be divided by pad_size_divisor size_divisor=32, # Additional paddings for pixel scale - extra_pad_ratio=0.5) + extra_pad_ratio=0.5, +) albu_train_transforms = [ dict(type='Blur', p=0.01), dict(type='MedianBlur', p=0.01), dict(type='ToGray', p=0.01), - dict(type='CLAHE', p=0.01) + dict(type='CLAHE', p=0.01), ] pre_transform = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), - dict(type='LoadAnnotations', with_bbox=True) + dict(type='LoadAnnotations', with_bbox=True), ] train_pipeline = [ *pre_transform, - dict(type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + dict(type='Mosaic', img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform), dict( type='YOLOv5RandomAffine', max_rotate_degree=0.0, @@ -91,77 +91,77 @@ scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)), - dict(type='mmdet.Albu', - transforms=albu_train_transforms, - bbox_params=dict(type='BboxParams', - format='pascal_voc', - label_fields=['gt_bboxes_labels', - 'gt_ignore_flags']), - keymap={ - 'img': 'image', - 'gt_bboxes': 'bboxes' - }), + border_val=(114, 114, 114), + ), + dict( + type='mmdet.Albu', + transforms=albu_train_transforms, + bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), + keymap={'img': 'image', 'gt_bboxes': 'bboxes'}, + ), dict(type='YOLOv5HSVRandomAug'), dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + dict( + type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction') + ), ] -train_dataloader = dict(batch_size=batch_size, - num_workers=workers, - persistent_workers=persistent_workers, - pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), - dataset=dict(type=dataset_type, - data_root=data_root, - ann_file=train_ann_file, - data_prefix=dict(img=train_data_prefix), - filter_cfg=dict(filter_empty_gt=False, - min_size=32), - pipeline=train_pipeline)) +train_dataloader = dict( + batch_size=batch_size, + num_workers=workers, + persistent_workers=persistent_workers, + pin_memory=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file=train_ann_file, + data_prefix=dict(img=train_data_prefix), + filter_cfg=dict(filter_empty_gt=False, min_size=32), + pipeline=train_pipeline, + ), +) test_pipeline = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='YOLOv5KeepRatioResize', scale=img_scale), - dict(type='LetterResize', - scale=img_scale, - allow_scale_up=False, - pad_val=dict(img=114)), + dict(type='LetterResize', scale=img_scale, allow_scale_up=False, pad_val=dict(img=114)), dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), - dict(type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor', 'pad_param')) + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'pad_param'), + ), ] -val_dataloader = dict(batch_size=batch_size, - num_workers=workers, - persistent_workers=persistent_workers, - pin_memory=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict(type=dataset_type, - data_root=data_root, - test_mode=True, - data_prefix=dict(img=val_data_prefix), - ann_file=val_ann_file, - pipeline=test_pipeline, - batch_shapes_cfg=batch_shapes_cfg)) +val_dataloader = dict( + batch_size=batch_size, + num_workers=workers, + persistent_workers=persistent_workers, + pin_memory=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + test_mode=True, + data_prefix=dict(img=val_data_prefix), + ann_file=val_ann_file, + pipeline=test_pipeline, + batch_shapes_cfg=batch_shapes_cfg, + ), +) test_dataloader = val_dataloader -#======================training================== -optim_wrapper = dict(type='OptimWrapper', - optimizer=dict(type='SGD', - lr=lr, - momentum=0.937, - weight_decay=0.0005, - nesterov=True, - batch_size_per_gpu=batch_size), - constructor='YOLOv5OptimizerConstructor') -val_evaluator = dict(type='mmdet.CocoMetric', - proposal_nums=(100, 1, 10), - ann_file=data_root + val_ann_file, - metric='bbox') -test_evaluator = val_evaluator \ No newline at end of file +# ======================training================== +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict( + type='SGD', lr=lr, momentum=0.937, weight_decay=0.0005, nesterov=True, batch_size_per_gpu=batch_size + ), + constructor='YOLOv5OptimizerConstructor', +) +val_evaluator = dict( + type='mmdet.CocoMetric', proposal_nums=(100, 1, 10), ann_file=data_root + val_ann_file, metric='bbox' +) +test_evaluator = val_evaluator diff --git a/configs/yolov5/yolov5_s_1xb16_300e_coco.py b/configs/yolov5/yolov5_s_1xb16_300e_coco.py index 049d8853..4cf5652f 100644 --- a/configs/yolov5/yolov5_s_1xb16_300e_coco.py +++ b/configs/yolov5/yolov5_s_1xb16_300e_coco.py @@ -3,31 +3,31 @@ anchors = [ [(10, 13), (16, 30), (33, 23)], # P3/8 [(30, 61), (62, 45), (59, 119)], # P4/16 - [(116, 90), (156, 198), (373, 326)] # P5/32 + [(116, 90), (156, 198), (373, 326)], # P5/32 ] -#======================modify start====================== +# ======================modify start====================== -#model +# model strides = [8, 16, 32] num_classes = 80 deepen_factor = 0.33 widen_factor = 0.5 -#datasets +# datasets data_root = '' height = 192 width = 192 batch_size = 16 workers = 2 -#training +# training lr = 0.01 epochs = 300 # Maximum training epochs -#======================modify end====================== +# ======================modify end====================== -#======================model================== +# ======================model================== model = dict( type='mmyolo.YOLODetector', backbone=dict( @@ -40,14 +40,16 @@ deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict( - num_classes=num_classes, - in_channels=[256, 512, 1024], - widen_factor=widen_factor, - ), ), + bbox_head=dict( + head_module=dict( + num_classes=num_classes, + in_channels=[256, 512, 1024], + widen_factor=widen_factor, + ), + ), ) -#======================datasets================== +# ======================datasets================== img_scale = (width, height) affine_scale = 0.5 persistent_workers = True @@ -64,26 +66,24 @@ # The image scale of padding should be divided by pad_size_divisor size_divisor=32, # Additional paddings for pixel scale - extra_pad_ratio=0.5) + extra_pad_ratio=0.5, +) albu_train_transforms = [ dict(type='Blur', p=0.01), dict(type='MedianBlur', p=0.01), dict(type='ToGray', p=0.01), - dict(type='CLAHE', p=0.01) + dict(type='CLAHE', p=0.01), ] pre_transform = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), - dict(type='LoadAnnotations', with_bbox=True) + dict(type='LoadAnnotations', with_bbox=True), ] train_pipeline = [ *pre_transform, - dict(type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + dict(type='Mosaic', img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform), dict( type='YOLOv5RandomAffine', max_rotate_degree=0.0, @@ -91,77 +91,77 @@ scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)), - dict(type='mmdet.Albu', - transforms=albu_train_transforms, - bbox_params=dict(type='BboxParams', - format='pascal_voc', - label_fields=['gt_bboxes_labels', - 'gt_ignore_flags']), - keymap={ - 'img': 'image', - 'gt_bboxes': 'bboxes' - }), + border_val=(114, 114, 114), + ), + dict( + type='mmdet.Albu', + transforms=albu_train_transforms, + bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), + keymap={'img': 'image', 'gt_bboxes': 'bboxes'}, + ), dict(type='YOLOv5HSVRandomAug'), dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + dict( + type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction') + ), ] -train_dataloader = dict(batch_size=batch_size, - num_workers=workers, - persistent_workers=persistent_workers, - pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), - dataset=dict(type=dataset_type, - data_root=data_root, - ann_file=train_ann_file, - data_prefix=dict(img=train_data_prefix), - filter_cfg=dict(filter_empty_gt=False, - min_size=32), - pipeline=train_pipeline)) +train_dataloader = dict( + batch_size=batch_size, + num_workers=workers, + persistent_workers=persistent_workers, + pin_memory=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file=train_ann_file, + data_prefix=dict(img=train_data_prefix), + filter_cfg=dict(filter_empty_gt=False, min_size=32), + pipeline=train_pipeline, + ), +) test_pipeline = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='YOLOv5KeepRatioResize', scale=img_scale), - dict(type='LetterResize', - scale=img_scale, - allow_scale_up=False, - pad_val=dict(img=114)), + dict(type='LetterResize', scale=img_scale, allow_scale_up=False, pad_val=dict(img=114)), dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), - dict(type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor', 'pad_param')) + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'pad_param'), + ), ] -val_dataloader = dict(batch_size=batch_size, - num_workers=workers, - persistent_workers=persistent_workers, - pin_memory=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict(type=dataset_type, - data_root=data_root, - test_mode=True, - data_prefix=dict(img=val_data_prefix), - ann_file=val_ann_file, - pipeline=test_pipeline, - batch_shapes_cfg=batch_shapes_cfg)) +val_dataloader = dict( + batch_size=batch_size, + num_workers=workers, + persistent_workers=persistent_workers, + pin_memory=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + test_mode=True, + data_prefix=dict(img=val_data_prefix), + ann_file=val_ann_file, + pipeline=test_pipeline, + batch_shapes_cfg=batch_shapes_cfg, + ), +) test_dataloader = val_dataloader -#======================training================== -optim_wrapper = dict(type='OptimWrapper', - optimizer=dict(type='SGD', - lr=lr, - momentum=0.937, - weight_decay=0.0005, - nesterov=True, - batch_size_per_gpu=batch_size), - constructor='YOLOv5OptimizerConstructor') -val_evaluator = dict(type='mmdet.CocoMetric', - proposal_nums=(100, 1, 10), - ann_file=data_root + val_ann_file, - metric='bbox') -test_evaluator = val_evaluator \ No newline at end of file +# ======================training================== +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict( + type='SGD', lr=lr, momentum=0.937, weight_decay=0.0005, nesterov=True, batch_size_per_gpu=batch_size + ), + constructor='YOLOv5OptimizerConstructor', +) +val_evaluator = dict( + type='mmdet.CocoMetric', proposal_nums=(100, 1, 10), ann_file=data_root + val_ann_file, metric='bbox' +) +test_evaluator = val_evaluator diff --git a/configs/yolov5/yolov5_tiny_1xb16_300e_coco.py b/configs/yolov5/yolov5_tiny_1xb16_300e_coco.py index 96bce187..ae04e464 100644 --- a/configs/yolov5/yolov5_tiny_1xb16_300e_coco.py +++ b/configs/yolov5/yolov5_tiny_1xb16_300e_coco.py @@ -3,31 +3,31 @@ anchors = [ [(10, 13), (16, 30), (33, 23)], # P3/8 [(30, 61), (62, 45), (59, 119)], # P4/16 - [(116, 90), (156, 198), (373, 326)] # P5/32 + [(116, 90), (156, 198), (373, 326)], # P5/32 ] -#======================modify start====================== +# ======================modify start====================== -#model +# model strides = [8, 16, 32] num_classes = 80 deepen_factor = 0.33 widen_factor = 0.15 -#datasets +# datasets data_root = '' height = 192 width = 192 batch_size = 16 workers = 2 -#training +# training lr = 0.01 epochs = 300 # Maximum training epochs -#======================modify end====================== +# ======================modify end====================== -#======================model================== +# ======================model================== model = dict( type='mmyolo.YOLODetector', backbone=dict( @@ -40,14 +40,16 @@ deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict( - num_classes=num_classes, - in_channels=[256, 512, 1024], - widen_factor=widen_factor, - ), ), + bbox_head=dict( + head_module=dict( + num_classes=num_classes, + in_channels=[256, 512, 1024], + widen_factor=widen_factor, + ), + ), ) -#======================datasets================== +# ======================datasets================== img_scale = (width, height) affine_scale = 0.5 persistent_workers = True @@ -64,26 +66,24 @@ # The image scale of padding should be divided by pad_size_divisor size_divisor=32, # Additional paddings for pixel scale - extra_pad_ratio=0.5) + extra_pad_ratio=0.5, +) albu_train_transforms = [ dict(type='Blur', p=0.01), dict(type='MedianBlur', p=0.01), dict(type='ToGray', p=0.01), - dict(type='CLAHE', p=0.01) + dict(type='CLAHE', p=0.01), ] pre_transform = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), - dict(type='LoadAnnotations', with_bbox=True) + dict(type='LoadAnnotations', with_bbox=True), ] train_pipeline = [ *pre_transform, - dict(type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + dict(type='Mosaic', img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform), dict( type='YOLOv5RandomAffine', max_rotate_degree=0.0, @@ -91,78 +91,78 @@ scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)), - dict(type='mmdet.Albu', - transforms=albu_train_transforms, - bbox_params=dict(type='BboxParams', - format='pascal_voc', - label_fields=['gt_bboxes_labels', - 'gt_ignore_flags']), - keymap={ - 'img': 'image', - 'gt_bboxes': 'bboxes' - }), + border_val=(114, 114, 114), + ), + dict( + type='mmdet.Albu', + transforms=albu_train_transforms, + bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), + keymap={'img': 'image', 'gt_bboxes': 'bboxes'}, + ), dict(type='YOLOv5HSVRandomAug'), dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + dict( + type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction') + ), ] -train_dataloader = dict(batch_size=batch_size, - num_workers=workers, - persistent_workers=persistent_workers, - pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), - dataset=dict(type=dataset_type, - data_root=data_root, - ann_file=train_ann_file, - data_prefix=dict(img=train_data_prefix), - filter_cfg=dict(filter_empty_gt=False, - min_size=32), - pipeline=train_pipeline)) +train_dataloader = dict( + batch_size=batch_size, + num_workers=workers, + persistent_workers=persistent_workers, + pin_memory=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file=train_ann_file, + data_prefix=dict(img=train_data_prefix), + filter_cfg=dict(filter_empty_gt=False, min_size=32), + pipeline=train_pipeline, + ), +) test_pipeline = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='YOLOv5KeepRatioResize', scale=img_scale), - dict(type='LetterResize', - scale=img_scale, - allow_scale_up=False, - pad_val=dict(img=114)), + dict(type='LetterResize', scale=img_scale, allow_scale_up=False, pad_val=dict(img=114)), dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), - dict(type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor', 'pad_param')) + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'pad_param'), + ), ] -val_dataloader = dict(batch_size=batch_size, - num_workers=workers, - persistent_workers=persistent_workers, - pin_memory=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict(type=dataset_type, - data_root=data_root, - test_mode=True, - data_prefix=dict(img=val_data_prefix), - ann_file=val_ann_file, - pipeline=test_pipeline, - batch_shapes_cfg=batch_shapes_cfg)) +val_dataloader = dict( + batch_size=batch_size, + num_workers=workers, + persistent_workers=persistent_workers, + pin_memory=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + test_mode=True, + data_prefix=dict(img=val_data_prefix), + ann_file=val_ann_file, + pipeline=test_pipeline, + batch_shapes_cfg=batch_shapes_cfg, + ), +) test_dataloader = val_dataloader -#======================training================== -optim_wrapper = dict(type='OptimWrapper', - optimizer=dict(type='SGD', - lr=lr, - momentum=0.937, - weight_decay=0.0005, - nesterov=True, - batch_size_per_gpu=batch_size), - constructor='YOLOv5OptimizerConstructor') - -val_evaluator = dict(type='mmdet.CocoMetric', - proposal_nums=(100, 1, 10), - ann_file=data_root + val_ann_file, - metric='bbox') +# ======================training================== +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict( + type='SGD', lr=lr, momentum=0.937, weight_decay=0.0005, nesterov=True, batch_size_per_gpu=batch_size + ), + constructor='YOLOv5OptimizerConstructor', +) + +val_evaluator = dict( + type='mmdet.CocoMetric', proposal_nums=(100, 1, 10), ann_file=data_root + val_ann_file, metric='bbox' +) test_evaluator = val_evaluator diff --git a/configs/yolox/base_arch.py b/configs/yolox/base_arch.py index 35fbc236..7a39436d 100644 --- a/configs/yolox/base_arch.py +++ b/configs/yolox/base_arch.py @@ -103,7 +103,8 @@ a=2.23606797749979, # math.sqrt(5) distribution='uniform', mode='fan_in', - nonlinearity='leaky_relu'), + nonlinearity='leaky_relu', + ), data_preprocessor=dict( type="mmdet.DetDataPreprocessor", mean=[0.0, 0.0, 0.0], @@ -161,9 +162,7 @@ reduction="sum", loss_weight=loss_obj_weight, ), - loss_bbox_aux=dict( - type="mmdet.L1Loss", reduction="sum", loss_weight=loss_bbox_aux_weight - ), + loss_bbox_aux=dict(type="mmdet.L1Loss", reduction="sum", loss_weight=loss_bbox_aux_weight), ), train_cfg=dict( assigner=dict( @@ -189,9 +188,7 @@ train_pipeline = [ *pre_transform, - dict( - type="Mosaic", img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform - ), + dict(type="Mosaic", img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform), dict( type="YOLOv5RandomAffine", max_rotate_degree=0.0, diff --git a/configs/yolox/yolox_tiny_1xb16_300e_coco.py b/configs/yolox/yolox_tiny_1xb16_300e_coco.py index 9bd9b731..f3336ddd 100644 --- a/configs/yolox/yolox_tiny_1xb16_300e_coco.py +++ b/configs/yolox/yolox_tiny_1xb16_300e_coco.py @@ -1,42 +1,35 @@ _base_ = ["./base_arch.py"] -#======================modify start====================== +# ======================modify start====================== -#model +# model num_classes = 11 deepen_factor = 0.33 widen_factor = 0.125 use_depthwise = True -#datasets +# datasets data_root = 'datasets/digital_meter/' height = 640 width = 640 batch_size = 16 workers = 2 -#training +# training lr = 0.01 epochs = 300 # Maximum training epochs -#======================modify end====================== +# ======================modify end====================== -#======================model================== +# ======================model================== # =======================Unmodified in most cases================== # model settings model = dict( - backbone=dict( - deepen_factor=deepen_factor, - widen_factor=widen_factor, - use_depthwise=use_depthwise), - neck=dict( - deepen_factor=deepen_factor, - widen_factor=widen_factor, - use_depthwise=use_depthwise), - bbox_head=dict( - head_module=dict( - widen_factor=widen_factor, use_depthwise=use_depthwise, num_classes=num_classes))) -#======================datasets================== + backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor, use_depthwise=use_depthwise), + neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor, use_depthwise=use_depthwise), + bbox_head=dict(head_module=dict(widen_factor=widen_factor, use_depthwise=use_depthwise, num_classes=num_classes)), +) +# ======================datasets================== img_scale = (width, height) affine_scale = 0.5 persistent_workers = True @@ -53,26 +46,24 @@ # The image scale of padding should be divided by pad_size_divisor size_divisor=32, # Additional paddings for pixel scale - extra_pad_ratio=0.5) + extra_pad_ratio=0.5, +) albu_train_transforms = [ dict(type='Blur', p=0.01), dict(type='MedianBlur', p=0.01), dict(type='ToGray', p=0.01), - dict(type='CLAHE', p=0.01) + dict(type='CLAHE', p=0.01), ] pre_transform = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), - dict(type='LoadAnnotations', with_bbox=True) + dict(type='LoadAnnotations', with_bbox=True), ] train_pipeline = [ *pre_transform, - dict(type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + dict(type='Mosaic', img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform), dict( type='YOLOv5RandomAffine', max_rotate_degree=0.0, @@ -80,40 +71,42 @@ scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)), - dict(type='mmdet.Albu', - transforms=albu_train_transforms, - bbox_params=dict(type='BboxParams', - format='pascal_voc', - label_fields=['gt_bboxes_labels', - 'gt_ignore_flags']), - keymap={ - 'img': 'image', - 'gt_bboxes': 'bboxes' - }), + border_val=(114, 114, 114), + ), + dict( + type='mmdet.Albu', + transforms=albu_train_transforms, + bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), + keymap={'img': 'image', 'gt_bboxes': 'bboxes'}, + ), dict(type='YOLOv5HSVRandomAug'), # dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.PackDetInputs', - meta_keys=( - 'img_id', - 'img_path', - 'ori_shape', - 'img_shape', - )) + dict( + type='mmdet.PackDetInputs', + meta_keys=( + 'img_id', + 'img_path', + 'ori_shape', + 'img_shape', + ), + ), ] -train_dataloader = dict(batch_size=batch_size, - num_workers=workers, - persistent_workers=persistent_workers, - pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), - dataset=dict(type=dataset_type, - data_root=data_root, - ann_file=train_ann_file, - data_prefix=dict(img=train_data_prefix), - filter_cfg=dict(filter_empty_gt=False, - min_size=32), - pipeline=train_pipeline)) +train_dataloader = dict( + batch_size=batch_size, + num_workers=workers, + persistent_workers=persistent_workers, + pin_memory=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file=train_ann_file, + data_prefix=dict(img=train_data_prefix), + filter_cfg=dict(filter_empty_gt=False, min_size=32), + pipeline=train_pipeline, + ), +) test_pipeline = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), @@ -124,40 +117,40 @@ # # pad_val=dict(img=114) # ), dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), - dict(type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor')) + dict(type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor')), ] -val_dataloader = dict(batch_size=batch_size, - num_workers=workers, - persistent_workers=persistent_workers, - pin_memory=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict(type=dataset_type, - data_root=data_root, - test_mode=True, - data_prefix=dict(img=val_data_prefix), - ann_file=val_ann_file, - pipeline=test_pipeline, - batch_shapes_cfg=batch_shapes_cfg)) +val_dataloader = dict( + batch_size=batch_size, + num_workers=workers, + persistent_workers=persistent_workers, + pin_memory=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + test_mode=True, + data_prefix=dict(img=val_data_prefix), + ann_file=val_ann_file, + pipeline=test_pipeline, + batch_shapes_cfg=batch_shapes_cfg, + ), +) test_dataloader = val_dataloader -#======================training================== -optim_wrapper = dict(type='OptimWrapper', - optimizer=dict(type='SGD', - lr=lr, - momentum=0.937, - weight_decay=0.0005, - nesterov=True, - batch_size_per_gpu=batch_size), - constructor='YOLOv5OptimizerConstructor') - -val_evaluator = dict(type='mmdet.CocoMetric', - proposal_nums=(100, 1, 10), - ann_file=data_root + val_ann_file, - metric='bbox') +# ======================training================== +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict( + type='SGD', lr=lr, momentum=0.937, weight_decay=0.0005, nesterov=True, batch_size_per_gpu=batch_size + ), + constructor='YOLOv5OptimizerConstructor', +) + +val_evaluator = dict( + type='mmdet.CocoMetric', proposal_nums=(100, 1, 10), ann_file=data_root + val_ann_file, metric='bbox' +) test_evaluator = val_evaluator diff --git a/edgelab/__init__.py b/edgelab/__init__.py index 81f20e85..1c4f7e8f 100644 --- a/edgelab/__init__.py +++ b/edgelab/__init__.py @@ -1,3 +1,3 @@ from .version import __version__, short_version -__all__ = ['__version__', 'short_version'] \ No newline at end of file +__all__ = ['__version__', 'short_version'] diff --git a/edgelab/datasets/__init__.py b/edgelab/datasets/__init__.py index a42c1d36..45e6d66d 100644 --- a/edgelab/datasets/__init__.py +++ b/edgelab/datasets/__init__.py @@ -11,7 +11,19 @@ from .yolodataset import CustomYOLOv5CocoDataset __all__ = [ - 'Speechcommand', 'MeterData', 'AudioAugs', 'CustomCocoDataset', - 'CustomVocdataset', 'FomoDatasets', 'SensorDataset', 'RandomResizedCrop', - 'fomo_collate', 'ETADataPreprocessor', 'CustomYOLOv5CocoDataset', 'SensorDataPreprocessor', 'PackSensorInputs', "LoadSensorFromFile", 'Bbox2FomoMask' + 'Speechcommand', + 'MeterData', + 'AudioAugs', + 'CustomCocoDataset', + 'CustomVocdataset', + 'FomoDatasets', + 'SensorDataset', + 'RandomResizedCrop', + 'fomo_collate', + 'ETADataPreprocessor', + 'CustomYOLOv5CocoDataset', + 'SensorDataPreprocessor', + 'PackSensorInputs', + "LoadSensorFromFile", + 'Bbox2FomoMask', ] diff --git a/edgelab/datasets/builder.py b/edgelab/datasets/builder.py index 301efc41..1ac1f74b 100644 --- a/edgelab/datasets/builder.py +++ b/edgelab/datasets/builder.py @@ -8,11 +8,14 @@ from mmcv.utils import TORCH_VERSION, Registry, build_from_cfg, digit_version from torch.utils.data import DataLoader from mmdet.datasets.builder import worker_init_fn -from mmdet.datasets.samplers import (ClassAwareSampler, - DistributedGroupSampler, - DistributedSampler, GroupSampler, - InfiniteBatchSampler, - InfiniteGroupBatchSampler) +from mmdet.datasets.samplers import ( + ClassAwareSampler, + DistributedGroupSampler, + DistributedSampler, + GroupSampler, + InfiniteBatchSampler, + InfiniteGroupBatchSampler, +) def collate_fn(batch): @@ -23,17 +26,19 @@ def collate_fn(batch): return dict(img=torch.stack(img), target=torch.cat(label, 0)) -def build_dataloader(dataset, - samples_per_gpu, - workers_per_gpu, - num_gpus=1, - dist=True, - shuffle=True, - seed=None, - runner_type='EpochBasedRunner', - persistent_workers=False, - class_aware_sampler=None, - **kwargs): +def build_dataloader( + dataset, + samples_per_gpu, + workers_per_gpu, + num_gpus=1, + dist=True, + shuffle=True, + seed=None, + runner_type='EpochBasedRunner', + persistent_workers=False, + class_aware_sampler=None, + **kwargs, +): """Build PyTorch DataLoader. In distributed training, each GPU/process has a dataloader. @@ -82,18 +87,9 @@ def build_dataloader(dataset, # it can be used in both `DataParallel` and # `DistributedDataParallel` if shuffle: - batch_sampler = InfiniteGroupBatchSampler(dataset, - batch_size, - world_size, - rank, - seed=seed) + batch_sampler = InfiniteGroupBatchSampler(dataset, batch_size, world_size, rank, seed=seed) else: - batch_sampler = InfiniteBatchSampler(dataset, - batch_size, - world_size, - rank, - seed=seed, - shuffle=False) + batch_sampler = InfiniteBatchSampler(dataset, batch_size, world_size, rank, seed=seed, shuffle=False) batch_size = 1 sampler = None else: @@ -101,44 +97,28 @@ def build_dataloader(dataset, # ClassAwareSampler can be used in both distributed and # non-distributed training. num_sample_class = class_aware_sampler.get('num_sample_class', 1) - sampler = ClassAwareSampler(dataset, - samples_per_gpu, - world_size, - rank, - seed=seed, - num_sample_class=num_sample_class) + sampler = ClassAwareSampler( + dataset, samples_per_gpu, world_size, rank, seed=seed, num_sample_class=num_sample_class + ) elif dist: # DistributedGroupSampler will definitely shuffle the data to # satisfy that images on each GPU are in the same group if shuffle: - sampler = DistributedGroupSampler(dataset, - samples_per_gpu, - world_size, - rank, - seed=seed) + sampler = DistributedGroupSampler(dataset, samples_per_gpu, world_size, rank, seed=seed) else: - sampler = DistributedSampler(dataset, - world_size, - rank, - shuffle=False, - seed=seed) + sampler = DistributedSampler(dataset, world_size, rank, shuffle=False, seed=seed) else: - sampler = GroupSampler(dataset, - samples_per_gpu) if shuffle else None + sampler = GroupSampler(dataset, samples_per_gpu) if shuffle else None batch_sampler = None - init_fn = partial( - worker_init_fn, num_workers=num_workers, rank=rank, - seed=seed) if seed is not None else None + init_fn = partial(worker_init_fn, num_workers=num_workers, rank=rank, seed=seed) if seed is not None else None - if (TORCH_VERSION != 'parrots' - and digit_version(TORCH_VERSION) >= digit_version('1.7.0')): + if TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) >= digit_version('1.7.0'): kwargs['persistent_workers'] = persistent_workers elif persistent_workers is True: - warnings.warn('persistent_workers is invalid because your pytorch ' - 'version is lower than 1.7.0') + warnings.warn('persistent_workers is invalid because your pytorch ' 'version is lower than 1.7.0') - collate_=collate_fn if 'collate' in kwargs else partial(collate, samples_per_gpu=samples_per_gpu) + collate_ = collate_fn if 'collate' in kwargs else partial(collate, samples_per_gpu=samples_per_gpu) kwargs.pop('collate') if 'collate' in kwargs else None data_loader = DataLoader( dataset, @@ -149,7 +129,8 @@ def build_dataloader(dataset, collate_fn=collate_, pin_memory=kwargs.pop('pin_memory', False), worker_init_fn=init_fn, - **kwargs) + **kwargs, + ) return data_loader diff --git a/edgelab/datasets/cocodataset.py b/edgelab/datasets/cocodataset.py index b7df60af..a1d51cec 100644 --- a/edgelab/datasets/cocodataset.py +++ b/edgelab/datasets/cocodataset.py @@ -18,66 +18,138 @@ class CustomCocoDataset(CocoDataset): METAINFO = { 'classes': (), # palette is a list of color tuples, which is used for visualization. - 'palette': - [(220, 20, 60), (119, 11, 32), (0, 0, 142), (0, 0, 230), (106, 0, 228), - (0, 60, 100), (0, 80, 100), (0, 0, 70), (0, 0, 192), (250, 170, 30), - (100, 170, 30), (220, 220, 0), (175, 116, 175), (250, 0, 30), - (165, 42, 42), (255, 77, 255), (0, 226, 252), (182, 182, 255), - (0, 82, 0), (120, 166, 157), (110, 76, 0), (174, 57, 255), - (199, 100, 0), (72, 0, 118), (255, 179, 240), (0, 125, 92), - (209, 0, 151), (188, 208, 182), (0, 220, 176), (255, 99, 164), - (92, 0, 73), (133, 129, 255), (78, 180, 255), (0, 228, 0), - (174, 255, 243), (45, 89, 255), (134, 134, 103), (145, 148, 174), - (255, 208, 186), (197, 226, 255), (171, 134, 1), (109, 63, 54), - (207, 138, 255), (151, 0, 95), (9, 80, 61), (84, 105, 51), - (74, 65, 105), (166, 196, 102), (208, 195, 210), (255, 109, 65), - (0, 143, 149), (179, 0, 194), (209, 99, 106), (5, 121, 0), - (227, 255, 205), (147, 186, 208), (153, 69, 1), (3, 95, 161), - (163, 255, 0), (119, 0, 170), (0, 182, 199), (0, 165, 120), - (183, 130, 88), (95, 32, 0), (130, 114, 135), (110, 129, 133), - (166, 74, 118), (219, 142, 185), (79, 210, 114), (178, 90, 62), - (65, 70, 15), (127, 167, 115), (59, 105, 106), (142, 108, 45), - (196, 172, 0), (95, 54, 80), (128, 76, 255), (201, 57, 1), - (246, 0, 122), (191, 162, 208)] + 'palette': [ + (220, 20, 60), + (119, 11, 32), + (0, 0, 142), + (0, 0, 230), + (106, 0, 228), + (0, 60, 100), + (0, 80, 100), + (0, 0, 70), + (0, 0, 192), + (250, 170, 30), + (100, 170, 30), + (220, 220, 0), + (175, 116, 175), + (250, 0, 30), + (165, 42, 42), + (255, 77, 255), + (0, 226, 252), + (182, 182, 255), + (0, 82, 0), + (120, 166, 157), + (110, 76, 0), + (174, 57, 255), + (199, 100, 0), + (72, 0, 118), + (255, 179, 240), + (0, 125, 92), + (209, 0, 151), + (188, 208, 182), + (0, 220, 176), + (255, 99, 164), + (92, 0, 73), + (133, 129, 255), + (78, 180, 255), + (0, 228, 0), + (174, 255, 243), + (45, 89, 255), + (134, 134, 103), + (145, 148, 174), + (255, 208, 186), + (197, 226, 255), + (171, 134, 1), + (109, 63, 54), + (207, 138, 255), + (151, 0, 95), + (9, 80, 61), + (84, 105, 51), + (74, 65, 105), + (166, 196, 102), + (208, 195, 210), + (255, 109, 65), + (0, 143, 149), + (179, 0, 194), + (209, 99, 106), + (5, 121, 0), + (227, 255, 205), + (147, 186, 208), + (153, 69, 1), + (3, 95, 161), + (163, 255, 0), + (119, 0, 170), + (0, 182, 199), + (0, 165, 120), + (183, 130, 88), + (95, 32, 0), + (130, 114, 135), + (110, 129, 133), + (166, 74, 118), + (219, 142, 185), + (79, 210, 114), + (178, 90, 62), + (65, 70, 15), + (127, 167, 115), + (59, 105, 106), + (142, 108, 45), + (196, 172, 0), + (95, 54, 80), + (128, 76, 255), + (201, 57, 1), + (246, 0, 122), + (191, 162, 208), + ], } - def __init__(self, - ann_file: str = '', - metainfo: Optional[dict] = None, - data_root=None, - data_prefix: dict = dict(img_path=''), - filter_cfg: Optional[dict] = None, - indices: Optional[Union[int, Sequence[int]]] = None, - serialize_data: bool = True, - pipeline: List[Union[dict, Callable]] = [], - test_mode: bool = False, - lazy_init: bool = False, - max_refetch: int = 1000, - filter_supercat: bool = True, - file_client_args: Optional[dict] = dict(backend='disk'), - classes=None, - **kwargs): + def __init__( + self, + ann_file: str = '', + metainfo: Optional[dict] = None, + data_root=None, + data_prefix: dict = dict(img_path=''), + filter_cfg: Optional[dict] = None, + indices: Optional[Union[int, Sequence[int]]] = None, + serialize_data: bool = True, + pipeline: List[Union[dict, Callable]] = [], + test_mode: bool = False, + lazy_init: bool = False, + max_refetch: int = 1000, + filter_supercat: bool = True, + file_client_args: Optional[dict] = dict(backend='disk'), + classes=None, + **kwargs, + ): if data_root: if not (osp.isabs(ann_file) and (osp.isabs(data_prefix['img']))): - data_root = check_file( - data_root, data_name="coco") if data_root else data_root + data_root = check_file(data_root, data_name="coco") if data_root else data_root if metainfo is None and not self.METAINFO['classes'] and not classes: if not osp.isabs(ann_file) and ann_file: self.ann_file = osp.join(data_root, ann_file) with open(self.ann_file, 'r') as f: data = json.load(f) if filter_supercat: - catgories = tuple(cat['name'] for cat in data['categories'] - if cat['supercategory'] != 'none') + catgories = tuple(cat['name'] for cat in data['categories'] if cat['supercategory'] != 'none') else: catgories = tuple(cat['name'] for cat in data['categories']) self.METAINFO['classes'] = catgories elif classes: self.METAINFO['classes'] = classes - super().__init__(ann_file, metainfo, data_root, data_prefix, - filter_cfg, indices, serialize_data, pipeline, - test_mode, lazy_init, max_refetch, **kwargs) + super().__init__( + ann_file, + metainfo, + data_root, + data_prefix, + filter_cfg, + indices, + serialize_data, + pipeline, + test_mode, + lazy_init, + max_refetch, + **kwargs, + ) def bboxe2cell(self, bboxe, img_h, img_w, H, W): w = (bboxe[0] + bboxe[2]) / 2 @@ -95,19 +167,17 @@ def build_target(self, preds, targets, img_h, img_w): bboxes = targets['bboxes'] labels = targets['labels'] - bboxes = [ - self.bboxe2cell(bboxe, img_h, img_w, H, W) for bboxe in bboxes - ] + bboxes = [self.bboxe2cell(bboxe, img_h, img_w, H, W) for bboxe in bboxes] for bboxe, label in zip(bboxes, labels): - target_data[0, bboxe[1], bboxe[0]] = label + 1 #label + target_data[0, bboxe[1], bboxe[0]] = label + 1 # label return target_data def compute_FTP(self, pred, target): - confusion = confusion_matrix(target.flatten().cpu().numpy(), - pred.flatten().cpu().numpy(), - labels=range(len(self.CLASSES) + 1)) + confusion = confusion_matrix( + target.flatten().cpu().numpy(), pred.flatten().cpu().numpy(), labels=range(len(self.CLASSES) + 1) + ) tn = confusion[0, 0] tp = np.diagonal(confusion).sum() - tn fn = np.tril(confusion, k=-1).sum() @@ -116,7 +186,6 @@ def compute_FTP(self, pred, target): return tp, fp, fn def computer_prf(self, tp, fp, fn): - if tp == 0 and fn == 0 and fp == 0: return 1.0, 1.0, 1.0 @@ -125,17 +194,19 @@ def computer_prf(self, tp, fp, fn): f1 = 0.0 if (p + r == 0) else 2 * (p * r) / (p + r) return p, r, f1 - def evaluate(self, - results, - metric='bbox', - logger=None, - jsonfile_prefix=None, - classwise=False, - proposal_nums=..., - iou_thrs=None, - fomo=False, - metric_items=None): - if fomo: #just with here evaluate for fomo data + def evaluate( + self, + results, + metric='bbox', + logger=None, + jsonfile_prefix=None, + classwise=False, + proposal_nums=..., + iou_thrs=None, + fomo=False, + metric_items=None, + ): + if fomo: # just with here evaluate for fomo data annotations = [self.get_ann_info(i) for i in range(len(self))] eval_results = OrderedDict() tmp = [] @@ -162,9 +233,9 @@ def evaluate(self, eval_results['F1'] = F1 return eval_results - return super().evaluate(results, metric, logger, jsonfile_prefix, - classwise, proposal_nums, iou_thrs, - metric_items) + return super().evaluate( + results, metric, logger, jsonfile_prefix, classwise, proposal_nums, iou_thrs, metric_items + ) def show_result(result, img_path, classes): @@ -175,16 +246,16 @@ def show_result(result, img_path, classes): for i in mask: b, h, w = i label = classes[pred[0, h, w] - 1] - cv2.circle(img, - (int(W / result[0].shape[1] * - (w + 0.5)), int(H / result[0].shape[0] * (h + 0.5))), - 5, (0, 0, 255), 1) - cv2.putText(img, - str(label), - org=(int(W / result[0].shape[1] * w), - int(H / result[0].shape[0] * h)), - color=(255, 0, 0), - fontScale=1, - fontFace=cv2.FONT_HERSHEY_SIMPLEX) + cv2.circle( + img, (int(W / result[0].shape[1] * (w + 0.5)), int(H / result[0].shape[0] * (h + 0.5))), 5, (0, 0, 255), 1 + ) + cv2.putText( + img, + str(label), + org=(int(W / result[0].shape[1] * w), int(H / result[0].shape[0] * h)), + color=(255, 0, 0), + fontScale=1, + fontFace=cv2.FONT_HERSHEY_SIMPLEX, + ) cv2.imshow('img', img) - cv2.waitKey(0) \ No newline at end of file + cv2.waitKey(0) diff --git a/edgelab/datasets/data_preprocessors/SensorDataPreprocessor.py b/edgelab/datasets/data_preprocessors/SensorDataPreprocessor.py index 3ed7faaa..9a2f3023 100644 --- a/edgelab/datasets/data_preprocessors/SensorDataPreprocessor.py +++ b/edgelab/datasets/data_preprocessors/SensorDataPreprocessor.py @@ -5,37 +5,42 @@ from mmengine.logging import MessageHub from edgelab.engine.utils.batch_augs import BatchAugs -from mmcls.structures import (ClsDataSample, MultiTaskDataSample, - batch_label_to_onehot, cat_batch_labels, - stack_batch_scores, tensor_split) +from mmcls.structures import ( + ClsDataSample, + MultiTaskDataSample, + batch_label_to_onehot, + cat_batch_labels, + stack_batch_scores, + tensor_split, +) from mmcls.models.utils.batch_augments import RandomBatchAugment from edgelab.registry import MODELS import numpy as np import json + @MODELS.register_module() class SensorDataPreprocessor(BaseDataPreprocessor): - def __init__(self, - to_onehot: bool = False, - num_classes: Optional[int] = None, - batch_augments: Optional[dict] = None - ): + def __init__( + self, to_onehot: bool = False, num_classes: Optional[int] = None, batch_augments: Optional[dict] = None + ): super().__init__() self.to_onehot = to_onehot self.num_classes = num_classes - + if batch_augments is not None: self.batch_augments = RandomBatchAugment(**batch_augments) if not self.to_onehot: from mmengine.logging import MMLogger + MMLogger.get_current_instance().info( 'Because batch augmentations are enabled, the data ' 'preprocessor automatically enables the `to_onehot` ' - 'option to generate one-hot format labels.') + 'option to generate one-hot format labels.' + ) self.to_onehot = True else: self.batch_augments = None - def forward(self, data: dict, training: bool = False) -> dict: """Perform normalization, padding, bgr2rgb conversion and batch @@ -49,28 +54,22 @@ def forward(self, data: dict, training: bool = False) -> dict: dict: Data in the same format as the model input. """ inputs = self.cast_data(data['inputs']) - data_samples = data.get('data_samples', None) - + sample_item = data_samples[0] if data_samples is not None else None - if isinstance(sample_item, - ClsDataSample) and 'gt_label' in sample_item: + if isinstance(sample_item, ClsDataSample) and 'gt_label' in sample_item: gt_labels = [sample.gt_label for sample in data_samples] - batch_label, label_indices = cat_batch_labels( - gt_labels, device=self.device) + batch_label, label_indices = cat_batch_labels(gt_labels, device=self.device) batch_score = stack_batch_scores(gt_labels, device=self.device) if batch_score is None and self.to_onehot: - assert batch_label is not None, \ - 'Cannot generate onehot format labels because no labels.' - num_classes = self.num_classes or data_samples[0].get( - 'num_classes') - assert num_classes is not None, \ - 'Cannot generate one-hot format labels because not set ' \ - '`num_classes` in `data_preprocessor`.' - batch_score = batch_label_to_onehot(batch_label, label_indices, - num_classes) + assert batch_label is not None, 'Cannot generate onehot format labels because no labels.' + num_classes = self.num_classes or data_samples[0].get('num_classes') + assert num_classes is not None, ( + 'Cannot generate one-hot format labels because not set ' '`num_classes` in `data_preprocessor`.' + ) + batch_score = batch_label_to_onehot(batch_label, label_indices, num_classes) # ----- Batch Augmentations ---- if training and self.batch_augments is not None: @@ -78,14 +77,12 @@ def forward(self, data: dict, training: bool = False) -> dict: # ----- scatter labels and scores to data samples --- if batch_label is not None: - for sample, label in zip( - data_samples, tensor_split(batch_label, - label_indices)): + for sample, label in zip(data_samples, tensor_split(batch_label, label_indices)): sample.set_gt_label(label) if batch_score is not None: for sample, score in zip(data_samples, batch_score): sample.set_gt_score(score) elif isinstance(sample_item, MultiTaskDataSample): data_samples = self.cast_data(data_samples) - + return {'inputs': inputs, 'data_samples': data_samples} diff --git a/edgelab/datasets/data_preprocessors/__init__.py b/edgelab/datasets/data_preprocessors/__init__.py index 004c7275..e025d6d4 100644 --- a/edgelab/datasets/data_preprocessors/__init__.py +++ b/edgelab/datasets/data_preprocessors/__init__.py @@ -1,3 +1,4 @@ from .pointpreprocessor import ETADataPreprocessor from .SensorDataPreprocessor import SensorDataPreprocessor -__all__=['ETADataPreprocessor', 'SensorDataPreprocessor'] \ No newline at end of file + +__all__ = ['ETADataPreprocessor', 'SensorDataPreprocessor'] diff --git a/edgelab/datasets/data_preprocessors/audio_augs.py b/edgelab/datasets/data_preprocessors/audio_augs.py index d2d39e84..c60c6989 100644 --- a/edgelab/datasets/data_preprocessors/audio_augs.py +++ b/edgelab/datasets/data_preprocessors/audio_augs.py @@ -42,8 +42,8 @@ def rir(self, mic, n, r, rm, src): yj = srcs * src[1] + rms * rm[1] - mic[1] zk = srcs * src[2] + rms * rm[2] - mic[2] [i, j, k] = np.meshgrid(xi, yj, zk) - d = np.sqrt(i ** 2 + j ** 2 + k ** 2) - t = np.round(self.fs * d / 343.) + 1 + d = np.sqrt(i**2 + j**2 + k**2) + t = np.round(self.fs * d / 343.0) + 1 [e, f, g] = np.meshgrid(nn, nn, nn) c = np.power(r, np.abs(e) + np.abs(f) + np.abs(g)) e = c / d @@ -82,8 +82,9 @@ def __call__(self, sample): h = torch.from_numpy(h).float() sample = sample[None, None, :] sample = F.pad(sample, (h.shape[-1] // 2, h.shape[-1] // 2), "reflect") - sample = F.conv1d(sample, h[None, None, :], bias=None, stride=1, padding=0, dilation=1, - groups=sample.shape[1]) + sample = F.conv1d( + sample, h[None, None, :], bias=None, stride=1, padding=0, dilation=1, groups=sample.shape[1] + ) return sample, h @@ -160,7 +161,7 @@ def __call__(self, sample): if random.random() < self.p: max_start = sample.size(-1) - self.n_mask idx_rand = random.randint(0, max_start) - sample[idx_rand:idx_rand + self.n_mask] = torch.randn(self.n_mask) * 1e-6 + sample[idx_rand : idx_rand + self.n_mask] = torch.randn(self.n_mask) * 1e-6 return sample @@ -195,7 +196,12 @@ def __init__(self, p=0.5): def __call__(self, sample): if random.random() < self.p: - sample.data = torch.flip(sample.data, dims=[-1, ]) + sample.data = torch.flip( + sample.data, + dims=[ + -1, + ], + ) return sample @@ -217,9 +223,9 @@ def __init__(self, p=0.5, snr_db=30): def __call__(self, sample): if random.random() < self.p: - s = torch.sqrt(torch.mean(sample ** 2)) + s = torch.sqrt(torch.mean(sample**2)) snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) - sgm = s * 10 ** (-snr_db / 20.) + sgm = s * 10 ** (-snr_db / 20.0) w = torch.randn_like(sample).mul_(sgm) sample.add_(w) return sample @@ -233,9 +239,9 @@ def __init__(self, snr_db=35, p=0.5): def __call__(self, sample): if random.random() < self.p: - s = torch.sqrt(torch.mean(sample ** 2)) + s = torch.sqrt(torch.mean(sample**2)) snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) - sgm = s * 10 ** (-snr_db / 20.) * np.sqrt(3) + sgm = s * 10 ** (-snr_db / 20.0) * np.sqrt(3) w = torch.rand_like(sample).mul_(2 * sgm).add_(-sgm) sample.add_(w) return sample @@ -249,7 +255,7 @@ def __init__(self, snr_db=35, p=0.5): def __call__(self, sample): if random.random() < self.p: - s = torch.sqrt(torch.mean(sample ** 2)) + s = torch.sqrt(torch.mean(sample**2)) n = sample.shape[-1] w = torch.randn(n) nn = n // 2 + 1 @@ -260,7 +266,7 @@ def __call__(self, sample): w = torch.fft.ifft(W).real w.add_(w.mean()).div_(w.std()) snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) - sgm = s * 10 ** (-snr_db / 20.) + sgm = s * 10 ** (-snr_db / 20.0) sample.add_(w.mul_(sgm)) return sample @@ -273,7 +279,7 @@ def __init__(self, p=0.5, snr_db=35): def __call__(self, sample): if random.random() < self.p: - s = torch.sqrt(torch.mean(sample ** 2)) + s = torch.sqrt(torch.mean(sample**2)) n = sample.shape[-1] w = torch.randn(n) nn = n // 2 + 1 @@ -284,7 +290,7 @@ def __call__(self, sample): w = torch.fft.ifft(W).real w.add_(w.mean()).div_(w.std()) snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) - sgm = s * 10 ** (-snr_db / 20.) + sgm = s * 10 ** (-snr_db / 20.0) sample.add_(w.mul_(sgm)) return sample @@ -297,7 +303,7 @@ def __init__(self, p=0.5, snr_db=35): def __call__(self, sample): if random.random() < self.p: - s = torch.sqrt(torch.mean(sample ** 2)) + s = torch.sqrt(torch.mean(sample**2)) n = sample.shape[-1] w = torch.randn(n) nn = n // 2 + 1 @@ -308,7 +314,7 @@ def __call__(self, sample): w = torch.fft.ifft(W).real w.add_(w.mean()).div_(w.std()) snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) - sgm = s * 10 ** (-snr_db / 20.) + sgm = s * 10 ** (-snr_db / 20.0) sample.add_(w.mul_(sgm)) return sample @@ -321,7 +327,7 @@ def __init__(self, p=0.5, snr_db=35): def __call__(self, sample): if random.random() < self.p: - s = torch.sqrt(torch.mean(sample ** 2)) + s = torch.sqrt(torch.mean(sample**2)) n = sample.shape[-1] w = torch.randn(n) nn = n // 2 + 1 @@ -332,7 +338,7 @@ def __call__(self, sample): w = torch.fft.ifft(W).real w.add_(w.mean()).div_(w.std()) snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) - sgm = s * 10 ** (-snr_db / 20.) + sgm = s * 10 ** (-snr_db / 20.0) sample.add_(w.mul_(sgm)) return sample @@ -350,16 +356,20 @@ def __call__(self, sample): f_shift = torch.randn(1).mul_(self.sgm * df) t = torch.arange(0, self.fft_params['win_len'][win_idx], 1).float() w = torch.real(torch.exp(-1j * 2 * np.pi * t * f_shift)) - X = torch.stft(sample, - win_length=self.fft_params['win_len'][win_idx], - hop_length=self.fft_params['hop_len'][win_idx], - n_fft=self.fft_params['n_fft'][win_idx], - window=w, - return_complex=True) - sample = torch.istft(X, - win_length=self.fft_params['win_len'][win_idx], - hop_length=self.fft_params['hop_len'][win_idx], - n_fft=self.fft_params['n_fft'][win_idx]) + X = torch.stft( + sample, + win_length=self.fft_params['win_len'][win_idx], + hop_length=self.fft_params['hop_len'][win_idx], + n_fft=self.fft_params['n_fft'][win_idx], + window=w, + return_complex=True, + ) + sample = torch.istft( + X, + win_length=self.fft_params['win_len'][win_idx], + hop_length=self.fft_params['hop_len'][win_idx], + n_fft=self.fft_params['n_fft'][win_idx], + ) return sample @@ -377,9 +387,9 @@ def __call__(self, sample): f = self.max_freq * torch.rand(1) + 3 * torch.randn(1) if random.random() < self.p: snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) - t = n * 1. / self.fs - s = (sample ** 2).mean().sqrt() - sgm = s * np.sqrt(2) * 10 ** (-snr_db / 20.) + t = n * 1.0 / self.fs + s = (sample**2).mean().sqrt() + sgm = s * np.sqrt(2) * 10 ** (-snr_db / 20.0) b = sgm * torch.sin(2 * np.pi * f * t + torch.rand(1) * np.pi) sample.add_(b) @@ -399,7 +409,7 @@ def __call__(self, sample): self.max_len = sample.shape[-1] // 10 idx = random.randint(0, self.max_len) amp = torch.FloatTensor(1).uniform_(self.low, self.high) - sample[idx: idx + self.max_len].mul_(amp) + sample[idx : idx + self.max_len].mul_(amp) return sample @@ -413,18 +423,22 @@ def __call__(self, sample): if random.random() < self.p: win_idx = random.randint(0, len(self.fft_params['win_len']) - 1) sgm_noise = self.sgm + 0.01 * torch.rand(1) - X = torch.stft(sample, - win_length=self.fft_params['win_len'][win_idx], - hop_length=self.fft_params['hop_len'][win_idx], - n_fft=self.fft_params['n_fft'][win_idx], - return_complex=True) + X = torch.stft( + sample, + win_length=self.fft_params['win_len'][win_idx], + hop_length=self.fft_params['hop_len'][win_idx], + n_fft=self.fft_params['n_fft'][win_idx], + return_complex=True, + ) w = sgm_noise * torch.rand_like(X) phn = torch.exp(1j * w) X.mul_(phn) - sample = torch.istft(X, - win_length=self.fft_params['win_len'][win_idx], - hop_length=self.fft_params['hop_len'][win_idx], - n_fft=self.fft_params['n_fft'][win_idx]) + sample = torch.istft( + X, + win_length=self.fft_params['win_len'][win_idx], + hop_length=self.fft_params['hop_len'][win_idx], + n_fft=self.fft_params['n_fft'][win_idx], + ) return sample @@ -446,7 +460,7 @@ def __call__(self, sample): @TRANSFORMS.register_module() -class AudioAugs(): +class AudioAugs: def __init__(self, k_augs): self.noise_vec = ['awgn', 'abgn', 'apgn', 'argn', 'avgn', 'aun', 'phn', 'sine'] self.k_augs = k_augs diff --git a/edgelab/datasets/data_preprocessors/pointpreprocessor.py b/edgelab/datasets/data_preprocessors/pointpreprocessor.py index 53ecbc8a..8c9eaa3b 100644 --- a/edgelab/datasets/data_preprocessors/pointpreprocessor.py +++ b/edgelab/datasets/data_preprocessors/pointpreprocessor.py @@ -10,21 +10,21 @@ @MODELS.register_module() class ETADataPreprocessor(BaseDataPreprocessor): - - def __init__(self, - n_cls, - multilabel, - seq_len, - sampling_rate, - augs_mix, - mix_ratio, - local_rank, - epoch_mix, - mix_loss, - non_blocking: Optional[bool] = False): + def __init__( + self, + n_cls, + multilabel, + seq_len, + sampling_rate, + augs_mix, + mix_ratio, + local_rank, + epoch_mix, + mix_loss, + non_blocking: Optional[bool] = False, + ): self.n_cls = n_cls - self._device = torch.device( - "cuda" if torch.cuda.is_available() else "cpu") + self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu") ba_params = { 'seq_len': seq_len, 'fs': sampling_rate, @@ -35,7 +35,7 @@ def __init__(self, 'epoch_mix': epoch_mix, 'resample_factors': [0.8, 0.9, 1.1, 1.2], 'multilabel': True if multilabel else False, - 'mix_loss': mix_loss + 'mix_loss': mix_loss, } super().__init__(non_blocking) @@ -50,8 +50,7 @@ def forward(self, data: dict, training: bool = False) -> Union[dict, list]: epoch = MessageHub.get_current_instance().get_info('epoch') - x, target, ismixed = self.audio_augs(x.to(self.device), - y.to(self.device), epoch) + x, target, ismixed = self.audio_augs(x.to(self.device), y.to(self.device), epoch) self.messbus.update_info('target', target) self.messbus.update_info('ismixed', ismixed) diff --git a/edgelab/datasets/fomo.py b/edgelab/datasets/fomo.py index 4cd18356..d314f530 100644 --- a/edgelab/datasets/fomo.py +++ b/edgelab/datasets/fomo.py @@ -2,15 +2,14 @@ from edgelab.registry import DATASETS + @DATASETS.register_module() class FoMoDataset(Dataset): def __init__(self) -> None: super().__init__() - - def get_preceicn_recall_f1(self): - """ Calculate the predicted evaluation index through the output of the model """ + """Calculate the predicted evaluation index through the output of the model""" pass def eval(self): diff --git a/edgelab/datasets/fomodataset.py b/edgelab/datasets/fomodataset.py index 4e701040..cc07e369 100644 --- a/edgelab/datasets/fomodataset.py +++ b/edgelab/datasets/fomodataset.py @@ -17,16 +17,17 @@ @DATASETS.register_module() class FomoDatasets(CocoDataset): - def __init__(self, - data_root, - pipeline, - classes=None, - use_alb=True, - bbox_params: dict = dict(format='coco', - label_fields=['class_labels']), - ann_file: str = None, - img_prefix: str = None) -> None: - super().__init__(ann_file=ann_file,data_root=data_root,pipeline=pipeline,data_prefix=dict(img='train/')) + def __init__( + self, + data_root, + pipeline, + classes=None, + use_alb=True, + bbox_params: dict = dict(format='coco', label_fields=['class_labels']), + ann_file: str = None, + img_prefix: str = None, + ) -> None: + super().__init__(ann_file=ann_file, data_root=data_root, pipeline=pipeline, data_prefix=dict(img='train/')) if not osp.isabs(img_prefix): img_dir = os.path.join(data_root, img_prefix) @@ -35,8 +36,7 @@ def __init__(self, self.bbox_params = bbox_params - self.transform = AlbCompose(pipeline, - bbox_params=A.BboxParams(**bbox_params)) + self.transform = AlbCompose(pipeline, bbox_params=A.BboxParams(**bbox_params)) # load data with coco format self.data = torchvision.datasets.CocoDetection( img_dir, @@ -46,9 +46,9 @@ def __init__(self, self.parse_cats() # Offset of the ground truth box self.posit_offset = torch.tensor( - [[0, -1, 0], [0, -1, -1], [0, 0, -1], [0, 1, 0], [0, 1, 1], - [0, 0, 1], [0, 1, -1], [0, -1, 1], [0, 0, 0]], - dtype=torch.long) + [[0, -1, 0], [0, -1, -1], [0, 0, -1], [0, 1, 0], [0, 1, 1], [0, 0, 1], [0, 1, -1], [0, -1, 1], [0, 0, 0]], + dtype=torch.long, + ) # TODO self.flag = np.zeros(len(self), dtype=np.uint8) @@ -56,7 +56,7 @@ def __init__(self, self.flag[i] = 1 def parse_cats(self): - """ parse dataset is roboflow """ + """parse dataset is roboflow""" self.roboflow = False self.CLASSES = [] @@ -70,12 +70,12 @@ def parse_cats(self): self.CLASSES.append(value['name']) def __len__(self): - """ return datasets len""" + """return datasets len""" return len(self.data) def __getitem____(self, index): image, ann = self.data[index] - + self.prepare_data(idx=index) image = np.asarray(image) return self.pipeline() @@ -97,11 +97,7 @@ def __getitem____(self, index): bboxes = np.array(bboxes) labels = np.array(labels) - trans_param = { - 'image': image, - 'bboxes': bboxes, - self.bbox_params['label_fields'][0]: labels - } + trans_param = {'image': image, 'bboxes': bboxes, self.bbox_params['label_fields'][0]: labels} result = self.transform(**trans_param) image = result['image'] @@ -111,22 +107,16 @@ def __getitem____(self, index): H, W, C = image.shape bbl = [] for bbox, l in zip(bboxes, labels): - bbl.append([ - 0, l, (bbox[0] + (bbox[2] / 2)) / W, - (bbox[1] + (bbox[3] / 2)) / H, bbox[2] / W, bbox[3] / H - ]) + bbl.append([0, l, (bbox[0] + (bbox[2] / 2)) / W, (bbox[1] + (bbox[3] / 2)) / H, bbox[2] / W, bbox[3] / H]) - return { - 'inputs': ToTensor()(image), - 'data_samples': torch.from_numpy(np.asarray(bbl)) - } + return {'inputs': ToTensor()(image), 'data_samples': torch.from_numpy(np.asarray(bbl))} def get_ann_info(self, idx): ann = self.__getitem__[idx]["target"] return ann def bboxe2cell(self, bboxe, img_h, img_w, H, W): - """ transform the bbox to ground cell """ + """transform the bbox to ground cell""" w = bboxe[0] + (bboxe[2] / 2) h = bboxe[1] + (bboxe[3] / 2) w = w / img_w @@ -157,7 +147,7 @@ def build_target(self, preds, targets): for i in targets: h, w = int(i[3].item() * H), int(i[2].item() * W) target_data[int(i[0]), h, w, 0] = 0 # background - target_data[int(i[0]), h, w, int(i[1])] = 1 #label + target_data[int(i[0]), h, w, int(i[1])] = 1 # label return target_data @@ -183,14 +173,13 @@ def compute_ftp(self, preds, target): if torch.any(site < 0) or torch.any(site >= H): continue # The prediction is considered to be correct if it is near the ground truth box - if site in preds_index and preds_max[site.chunk( - 3)] == target_max[ti.chunk(3)]: + if site in preds_index and preds_max[site.chunk(3)] == target_max[ti.chunk(3)]: preds_max[site.chunk(3)] = target_max[ti.chunk(3)] target_max[site.chunk(3)] = target_max[ti.chunk(3)] # Calculate the confusion matrix - confusion = confusion_matrix(target_max.flatten().cpu().numpy(), - preds_max.flatten().cpu().numpy(), - labels=range(preds.shape[-1])) + confusion = confusion_matrix( + target_max.flatten().cpu().numpy(), preds_max.flatten().cpu().numpy(), labels=range(preds.shape[-1]) + ) # Calculate the value of P, R, F1 based on the confusion matrix tn = confusion[0, 0] tp = np.diagonal(confusion).sum() - tn diff --git a/edgelab/datasets/meter.py b/edgelab/datasets/meter.py index 6044db89..ff109967 100644 --- a/edgelab/datasets/meter.py +++ b/edgelab/datasets/meter.py @@ -17,12 +17,11 @@ def calc_angle(x1, y1, x2, y2): - x = (x1 - x2) - y = (y1 - y2) + x = x1 - x2 + y = y1 - y2 z = math.sqrt(x * x + y * y) try: - angle = math.acos( - (z**2 + 1 - (x - 1)**2 - y**2) / (2 * z * 1)) / math.pi * 180 + angle = math.acos((z**2 + 1 - (x - 1) ** 2 - y**2) / (2 * z * 1)) / math.pi * 180 except: angle = 0 @@ -35,35 +34,38 @@ def calc_angle(x1, y1, x2, y2): @DATASETS.register_module() class MeterData(Dataset, metaclass=ABCMeta): """ - The meter data set class, this class is mainly for the data set of - the pointer table, the data set is marked in a format similar to the + The meter data set class, this class is mainly for the data set of + the pointer table, the data set is marked in a format similar to the key point detection - + Args: data_root: The root path of the dataset index_file: The path of the annotation file or the folder path of the annotation file - img_dir: The folder path of the image data, which needs to be - the image file name that can be found in the corresponding + img_dir: The folder path of the image data, which needs to be + the image file name that can be found in the corresponding annotation file pipeline: The option to do data enhancement on image data, which needs to be in list format format: format of keypoints. Should be 'xy', 'yx', 'xya', 'xys', 'xyas', 'xysa' - + """ - CLASSES = ('meter') - - def __init__(self, - data_root: str, - index_file: str, - img_dir: Optional[str] = None, - pipeline: Optional[Sequence[dict]] = None, - format: str = 'xy'): + + CLASSES = 'meter' + + def __init__( + self, + data_root: str, + index_file: str, + img_dir: Optional[str] = None, + pipeline: Optional[Sequence[dict]] = None, + format: str = 'xy', + ): super(MeterData, self).__init__() self.metainfo = dict() self.data_root = check_file(data_root) - self.img_dir = img_dir #todo + self.img_dir = img_dir # todo if img_dir and not osp.isabs(img_dir) and self.data_root: self.img_dir = osp.join(self.data_root, img_dir) @@ -88,7 +90,8 @@ def __init__(self, raise ValueError( 'The parameter index_file must be a folder path', ' or a file in txt or json format, but the received ', - f'value is {index_file}') + f'value is {index_file}', + ) self.transforms = AlbCompose(pipeline, keypoint_params=format) self.totensor = transforms.Compose([transforms.ToTensor()]) @@ -108,8 +111,7 @@ def __getitem__(self, item: int) -> dict: result = self.transforms(image=self.img, keypoints=landmark) if len(result['keypoints']) == point_num: break - img, keypoints = self.totensor(result['image']), np.asarray( - result['keypoints']).flatten() + img, keypoints = self.totensor(result['image']), np.asarray(result['keypoints']).flatten() h, w = img.shape[1:] keypoints[::2] = keypoints[::2] / w keypoints[1::2] = keypoints[1::2] / h @@ -162,11 +164,7 @@ def parse_txt(self, index_file: str) -> None: img_file = os.path.join(self.img_dir, line[0]) points = np.asarray(line[1:], dtype=np.float32) point_num = len(points) // 2 - self.ann_ls.append({ - 'image_file': img_file, - 'keypoints': points, - 'point_num': point_num - }) + self.ann_ls.append({'image_file': img_file, 'keypoints': points, 'point_num': point_num}) def parse_json(self, json_path: str) -> None: - pass #todo \ No newline at end of file + pass # todo diff --git a/edgelab/datasets/pipelines/albu.py b/edgelab/datasets/pipelines/albu.py index 437a191b..9a77973a 100644 --- a/edgelab/datasets/pipelines/albu.py +++ b/edgelab/datasets/pipelines/albu.py @@ -5,227 +5,215 @@ from edgelab.registry import TRANSFORMS - @TRANSFORMS.register_module() class ColorJitter(A.ColorJitter): - - def __init__(self, - brightness=0.2, - contrast=0.2, - saturation=0.2, - hue=0.2, - always_apply=False, - p=0.5): - super().__init__(brightness, contrast, saturation, hue, always_apply, - p) + def __init__(self, brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, always_apply=False, p=0.5): + super().__init__(brightness, contrast, saturation, hue, always_apply, p) @TRANSFORMS.register_module() class HorizontalFlip(A.HorizontalFlip): - def __init__(self, always_apply: bool = False, p: float = 0.5): super().__init__(always_apply, p) @TRANSFORMS.register_module() class VerticalFlip(A.VerticalFlip): - def __init__(self, always_apply: bool = False, p: float = 0.5): super().__init__(always_apply, p) @TRANSFORMS.register_module() class Rotate(A.Rotate): - - def __init__(self, - limit=90, - interpolation=cv2.INTER_LINEAR, - border_mode=cv2.BORDER_REFLECT_101, - value=None, - mask_value=None, - rotate_method="largest_box", - crop_border=False, - always_apply=False, - p=0.5): - super().__init__(limit, interpolation, border_mode, value, mask_value, - rotate_method, crop_border, always_apply, p) + def __init__( + self, + limit=90, + interpolation=cv2.INTER_LINEAR, + border_mode=cv2.BORDER_REFLECT_101, + value=None, + mask_value=None, + rotate_method="largest_box", + crop_border=False, + always_apply=False, + p=0.5, + ): + super().__init__( + limit, interpolation, border_mode, value, mask_value, rotate_method, crop_border, always_apply, p + ) @TRANSFORMS.register_module() class Affine(A.Affine): - - def __init__(self, - scale: Optional[Union[float, Sequence[float], dict]] = None, - translate_percent: Optional[Union[float, Sequence[float], - dict]] = None, - translate_px: Optional[Union[int, Sequence[int], - dict]] = None, - rotate: Optional[Union[float, Sequence[float]]] = None, - shear: Optional[Union[float, Sequence[float], dict]] = None, - interpolation: int = cv2.INTER_LINEAR, - mask_interpolation: int = cv2.INTER_NEAREST, - cval: Union[int, float, Sequence[int], Sequence[float]] = 0, - cval_mask: Union[int, float, Sequence[int], - Sequence[float]] = 0, - mode: int = cv2.BORDER_CONSTANT, - fit_output: bool = False, - keep_ratio: bool = False, - always_apply: bool = False, - p: float = 0.5): - super().__init__(scale, translate_percent, translate_px, rotate, shear, - interpolation, mask_interpolation, cval, cval_mask, - mode, fit_output, keep_ratio, always_apply, p) + def __init__( + self, + scale: Optional[Union[float, Sequence[float], dict]] = None, + translate_percent: Optional[Union[float, Sequence[float], dict]] = None, + translate_px: Optional[Union[int, Sequence[int], dict]] = None, + rotate: Optional[Union[float, Sequence[float]]] = None, + shear: Optional[Union[float, Sequence[float], dict]] = None, + interpolation: int = cv2.INTER_LINEAR, + mask_interpolation: int = cv2.INTER_NEAREST, + cval: Union[int, float, Sequence[int], Sequence[float]] = 0, + cval_mask: Union[int, float, Sequence[int], Sequence[float]] = 0, + mode: int = cv2.BORDER_CONSTANT, + fit_output: bool = False, + keep_ratio: bool = False, + always_apply: bool = False, + p: float = 0.5, + ): + super().__init__( + scale, + translate_percent, + translate_px, + rotate, + shear, + interpolation, + mask_interpolation, + cval, + cval_mask, + mode, + fit_output, + keep_ratio, + always_apply, + p, + ) @TRANSFORMS.register_module() class ChannelShuffle(A.ChannelShuffle): - def __init__(self, always_apply: bool = False, p: float = 0.5): super().__init__(always_apply, p) @TRANSFORMS.register_module() class OneOf(A.OneOf): - def __init__(self, transforms, p: float = 0.5): super().__init__(transforms, p) @TRANSFORMS.register_module() class IAAAdditiveGaussianNoise(A.IAAAdditiveGaussianNoise): - - def __init__(self, - loc=0, - scale=..., - per_channel=False, - always_apply=False, - p=0.5): + def __init__(self, loc=0, scale=..., per_channel=False, always_apply=False, p=0.5): super().__init__(loc, scale, per_channel, always_apply, p) @TRANSFORMS.register_module() class GaussNoise(A.GaussNoise): - - def __init__(self, - var_limit=..., - mean=0, - per_channel=True, - always_apply=False, - p=0.5): + def __init__(self, var_limit=..., mean=0, per_channel=True, always_apply=False, p=0.5): super().__init__(var_limit, mean, per_channel, always_apply, p) @TRANSFORMS.register_module() class Blur(A.Blur): - - def __init__(self, - blur_limit=7, - always_apply: bool = False, - p: float = 0.5): + def __init__(self, blur_limit=7, always_apply: bool = False, p: float = 0.5): super().__init__(blur_limit, always_apply, p) @TRANSFORMS.register_module() class MotionBlur(A.MotionBlur): - - def __init__(self, - blur_limit=7, - allow_shifted: bool = True, - always_apply: bool = False, - p: float = 0.5): + def __init__(self, blur_limit=7, allow_shifted: bool = True, always_apply: bool = False, p: float = 0.5): super().__init__(blur_limit, allow_shifted, always_apply, p) @TRANSFORMS.register_module() class MedianBlur(A.MedianBlur): - def __init__(self, blur_limit, always_apply: bool = False, p: float = 0.5): super().__init__(blur_limit, always_apply, p) @TRANSFORMS.register_module() class SafeRotate(A.SafeRotate): - - def __init__(self, - limit: Union[float, Tuple[float, float]] = 90, - interpolation: int = cv2.INTER_LINEAR, - border_mode: int = cv2.BORDER_REFLECT_101, - value=None, - mask_value: Optional[Union[int, float, Sequence[int], - Sequence[float]]] = None, - always_apply: bool = False, - p: float = 0.5): - super().__init__(limit, interpolation, border_mode, value, mask_value, - always_apply, p) + def __init__( + self, + limit: Union[float, Tuple[float, float]] = 90, + interpolation: int = cv2.INTER_LINEAR, + border_mode: int = cv2.BORDER_REFLECT_101, + value=None, + mask_value: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, + always_apply: bool = False, + p: float = 0.5, + ): + super().__init__(limit, interpolation, border_mode, value, mask_value, always_apply, p) @TRANSFORMS.register_module() class RandomCrop(A.RandomCrop): - def __init__(self, height, width, always_apply=False, p=1): super().__init__(height, width, always_apply, p) @TRANSFORMS.register_module() class Resize(A.Resize): - - def __init__(self, - height, - width, - interpolation=cv2.INTER_LINEAR, - always_apply=False, - p=1): + def __init__(self, height, width, interpolation=cv2.INTER_LINEAR, always_apply=False, p=1): super().__init__(height, width, interpolation, always_apply, p) @TRANSFORMS.register_module() class ToGray(A.ToGray): - def __init__(self, always_apply: bool = False, p: float = 0.5): super().__init__(always_apply, p) @TRANSFORMS.register_module() class CoarseDropout(A.CoarseDropout): - - def __init__(self, - max_holes: int = 8, - max_height: int = 8, - max_width: int = 8, - min_holes: Optional[int] = None, - min_height: Optional[int] = None, - min_width: Optional[int] = None, - fill_value: int = 0, - mask_fill_value: Optional[int] = None, - always_apply: bool = False, - p: float = 0.5): - super().__init__(max_holes, max_height, max_width, min_holes, - min_height, min_width, fill_value, mask_fill_value, - always_apply, p) + def __init__( + self, + max_holes: int = 8, + max_height: int = 8, + max_width: int = 8, + min_holes: Optional[int] = None, + min_height: Optional[int] = None, + min_width: Optional[int] = None, + fill_value: int = 0, + mask_fill_value: Optional[int] = None, + always_apply: bool = False, + p: float = 0.5, + ): + super().__init__( + max_holes, + max_height, + max_width, + min_holes, + min_height, + min_width, + fill_value, + mask_fill_value, + always_apply, + p, + ) @TRANSFORMS.register_module() class CoraseDropout(A.CoarseDropout): - - def __init__(self, - max_holes: int = 8, - max_height: int = 8, - max_width: int = 8, - min_holes: Optional[int] = None, - min_height: Optional[int] = None, - min_width: Optional[int] = None, - fill_value: int = 0, - mask_fill_value: Optional[int] = None, - always_apply: bool = False, - p: float = 0.5): - super().__init__(max_holes, max_height, max_width, min_holes, - min_height, min_width, fill_value, mask_fill_value, - always_apply, p) + def __init__( + self, + max_holes: int = 8, + max_height: int = 8, + max_width: int = 8, + min_holes: Optional[int] = None, + min_height: Optional[int] = None, + min_width: Optional[int] = None, + fill_value: int = 0, + mask_fill_value: Optional[int] = None, + always_apply: bool = False, + p: float = 0.5, + ): + super().__init__( + max_holes, + max_height, + max_width, + min_holes, + min_height, + min_width, + fill_value, + mask_fill_value, + always_apply, + p, + ) @TRANSFORMS.register_module() class RandomResizedCrop(A.RandomResizedCrop): - def __init__( self, height, @@ -236,18 +224,10 @@ def __init__( always_apply=False, p=1.0, ): - super().__init__(height, width, scale, ratio, interpolation, - always_apply, p) + super().__init__(height, width, scale, ratio, interpolation, always_apply, p) @TRANSFORMS.register_module() class RandomBrightnessContrast(A.RandomBrightnessContrast): - - def __init__(self, - brightness_limit=0.2, - contrast_limit=0.2, - brightness_by_max=True, - always_apply=False, - p=0.5): - super().__init__(brightness_limit, contrast_limit, brightness_by_max, - always_apply, p) \ No newline at end of file + def __init__(self, brightness_limit=0.2, contrast_limit=0.2, brightness_by_max=True, always_apply=False, p=0.5): + super().__init__(brightness_limit, contrast_limit, brightness_by_max, always_apply, p) diff --git a/edgelab/datasets/pipelines/audio_augs.py b/edgelab/datasets/pipelines/audio_augs.py index 540297b9..c60c6989 100644 --- a/edgelab/datasets/pipelines/audio_augs.py +++ b/edgelab/datasets/pipelines/audio_augs.py @@ -42,8 +42,8 @@ def rir(self, mic, n, r, rm, src): yj = srcs * src[1] + rms * rm[1] - mic[1] zk = srcs * src[2] + rms * rm[2] - mic[2] [i, j, k] = np.meshgrid(xi, yj, zk) - d = np.sqrt(i ** 2 + j ** 2 + k ** 2) - t = np.round(self.fs * d / 343.) + 1 + d = np.sqrt(i**2 + j**2 + k**2) + t = np.round(self.fs * d / 343.0) + 1 [e, f, g] = np.meshgrid(nn, nn, nn) c = np.power(r, np.abs(e) + np.abs(f) + np.abs(g)) e = c / d @@ -82,8 +82,9 @@ def __call__(self, sample): h = torch.from_numpy(h).float() sample = sample[None, None, :] sample = F.pad(sample, (h.shape[-1] // 2, h.shape[-1] // 2), "reflect") - sample = F.conv1d(sample, h[None, None, :], bias=None, stride=1, padding=0, dilation=1, - groups=sample.shape[1]) + sample = F.conv1d( + sample, h[None, None, :], bias=None, stride=1, padding=0, dilation=1, groups=sample.shape[1] + ) return sample, h @@ -160,7 +161,7 @@ def __call__(self, sample): if random.random() < self.p: max_start = sample.size(-1) - self.n_mask idx_rand = random.randint(0, max_start) - sample[idx_rand:idx_rand + self.n_mask] = torch.randn(self.n_mask) * 1e-6 + sample[idx_rand : idx_rand + self.n_mask] = torch.randn(self.n_mask) * 1e-6 return sample @@ -195,7 +196,12 @@ def __init__(self, p=0.5): def __call__(self, sample): if random.random() < self.p: - sample.data = torch.flip(sample.data, dims=[-1, ]) + sample.data = torch.flip( + sample.data, + dims=[ + -1, + ], + ) return sample @@ -217,9 +223,9 @@ def __init__(self, p=0.5, snr_db=30): def __call__(self, sample): if random.random() < self.p: - s = torch.sqrt(torch.mean(sample ** 2)) + s = torch.sqrt(torch.mean(sample**2)) snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) - sgm = s * 10 ** (-snr_db / 20.) + sgm = s * 10 ** (-snr_db / 20.0) w = torch.randn_like(sample).mul_(sgm) sample.add_(w) return sample @@ -233,9 +239,9 @@ def __init__(self, snr_db=35, p=0.5): def __call__(self, sample): if random.random() < self.p: - s = torch.sqrt(torch.mean(sample ** 2)) + s = torch.sqrt(torch.mean(sample**2)) snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) - sgm = s * 10 ** (-snr_db / 20.) * np.sqrt(3) + sgm = s * 10 ** (-snr_db / 20.0) * np.sqrt(3) w = torch.rand_like(sample).mul_(2 * sgm).add_(-sgm) sample.add_(w) return sample @@ -249,7 +255,7 @@ def __init__(self, snr_db=35, p=0.5): def __call__(self, sample): if random.random() < self.p: - s = torch.sqrt(torch.mean(sample ** 2)) + s = torch.sqrt(torch.mean(sample**2)) n = sample.shape[-1] w = torch.randn(n) nn = n // 2 + 1 @@ -260,7 +266,7 @@ def __call__(self, sample): w = torch.fft.ifft(W).real w.add_(w.mean()).div_(w.std()) snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) - sgm = s * 10 ** (-snr_db / 20.) + sgm = s * 10 ** (-snr_db / 20.0) sample.add_(w.mul_(sgm)) return sample @@ -273,7 +279,7 @@ def __init__(self, p=0.5, snr_db=35): def __call__(self, sample): if random.random() < self.p: - s = torch.sqrt(torch.mean(sample ** 2)) + s = torch.sqrt(torch.mean(sample**2)) n = sample.shape[-1] w = torch.randn(n) nn = n // 2 + 1 @@ -284,7 +290,7 @@ def __call__(self, sample): w = torch.fft.ifft(W).real w.add_(w.mean()).div_(w.std()) snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) - sgm = s * 10 ** (-snr_db / 20.) + sgm = s * 10 ** (-snr_db / 20.0) sample.add_(w.mul_(sgm)) return sample @@ -297,7 +303,7 @@ def __init__(self, p=0.5, snr_db=35): def __call__(self, sample): if random.random() < self.p: - s = torch.sqrt(torch.mean(sample ** 2)) + s = torch.sqrt(torch.mean(sample**2)) n = sample.shape[-1] w = torch.randn(n) nn = n // 2 + 1 @@ -308,7 +314,7 @@ def __call__(self, sample): w = torch.fft.ifft(W).real w.add_(w.mean()).div_(w.std()) snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) - sgm = s * 10 ** (-snr_db / 20.) + sgm = s * 10 ** (-snr_db / 20.0) sample.add_(w.mul_(sgm)) return sample @@ -321,7 +327,7 @@ def __init__(self, p=0.5, snr_db=35): def __call__(self, sample): if random.random() < self.p: - s = torch.sqrt(torch.mean(sample ** 2)) + s = torch.sqrt(torch.mean(sample**2)) n = sample.shape[-1] w = torch.randn(n) nn = n // 2 + 1 @@ -332,7 +338,7 @@ def __call__(self, sample): w = torch.fft.ifft(W).real w.add_(w.mean()).div_(w.std()) snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) - sgm = s * 10 ** (-snr_db / 20.) + sgm = s * 10 ** (-snr_db / 20.0) sample.add_(w.mul_(sgm)) return sample @@ -350,16 +356,20 @@ def __call__(self, sample): f_shift = torch.randn(1).mul_(self.sgm * df) t = torch.arange(0, self.fft_params['win_len'][win_idx], 1).float() w = torch.real(torch.exp(-1j * 2 * np.pi * t * f_shift)) - X = torch.stft(sample, - win_length=self.fft_params['win_len'][win_idx], - hop_length=self.fft_params['hop_len'][win_idx], - n_fft=self.fft_params['n_fft'][win_idx], - window=w, - return_complex=True) - sample = torch.istft(X, - win_length=self.fft_params['win_len'][win_idx], - hop_length=self.fft_params['hop_len'][win_idx], - n_fft=self.fft_params['n_fft'][win_idx]) + X = torch.stft( + sample, + win_length=self.fft_params['win_len'][win_idx], + hop_length=self.fft_params['hop_len'][win_idx], + n_fft=self.fft_params['n_fft'][win_idx], + window=w, + return_complex=True, + ) + sample = torch.istft( + X, + win_length=self.fft_params['win_len'][win_idx], + hop_length=self.fft_params['hop_len'][win_idx], + n_fft=self.fft_params['n_fft'][win_idx], + ) return sample @@ -377,9 +387,9 @@ def __call__(self, sample): f = self.max_freq * torch.rand(1) + 3 * torch.randn(1) if random.random() < self.p: snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) - t = n * 1. / self.fs - s = (sample ** 2).mean().sqrt() - sgm = s * np.sqrt(2) * 10 ** (-snr_db / 20.) + t = n * 1.0 / self.fs + s = (sample**2).mean().sqrt() + sgm = s * np.sqrt(2) * 10 ** (-snr_db / 20.0) b = sgm * torch.sin(2 * np.pi * f * t + torch.rand(1) * np.pi) sample.add_(b) @@ -399,7 +409,7 @@ def __call__(self, sample): self.max_len = sample.shape[-1] // 10 idx = random.randint(0, self.max_len) amp = torch.FloatTensor(1).uniform_(self.low, self.high) - sample[idx: idx + self.max_len].mul_(amp) + sample[idx : idx + self.max_len].mul_(amp) return sample @@ -413,18 +423,22 @@ def __call__(self, sample): if random.random() < self.p: win_idx = random.randint(0, len(self.fft_params['win_len']) - 1) sgm_noise = self.sgm + 0.01 * torch.rand(1) - X = torch.stft(sample, - win_length=self.fft_params['win_len'][win_idx], - hop_length=self.fft_params['hop_len'][win_idx], - n_fft=self.fft_params['n_fft'][win_idx], - return_complex=True) + X = torch.stft( + sample, + win_length=self.fft_params['win_len'][win_idx], + hop_length=self.fft_params['hop_len'][win_idx], + n_fft=self.fft_params['n_fft'][win_idx], + return_complex=True, + ) w = sgm_noise * torch.rand_like(X) phn = torch.exp(1j * w) X.mul_(phn) - sample = torch.istft(X, - win_length=self.fft_params['win_len'][win_idx], - hop_length=self.fft_params['hop_len'][win_idx], - n_fft=self.fft_params['n_fft'][win_idx]) + sample = torch.istft( + X, + win_length=self.fft_params['win_len'][win_idx], + hop_length=self.fft_params['hop_len'][win_idx], + n_fft=self.fft_params['n_fft'][win_idx], + ) return sample @@ -446,7 +460,7 @@ def __call__(self, sample): @TRANSFORMS.register_module() -class AudioAugs(): +class AudioAugs: def __init__(self, k_augs): self.noise_vec = ['awgn', 'abgn', 'apgn', 'argn', 'avgn', 'aun', 'phn', 'sine'] self.k_augs = k_augs @@ -517,5 +531,3 @@ def __call__(self, sample, fs, p=0.5, snr_db=30, **kwargs): plt.plot(x) plt.plot(y[0].view(-1), 'r') plt.show() - - diff --git a/edgelab/datasets/pipelines/composition.py b/edgelab/datasets/pipelines/composition.py index fd05e661..12c9bc74 100644 --- a/edgelab/datasets/pipelines/composition.py +++ b/edgelab/datasets/pipelines/composition.py @@ -10,27 +10,28 @@ class AlbCompose(A.Compose): """ - The packaging of the compose class of alb, the purpose + The packaging of the compose class of alb, the purpose is to parse the pipeline in the configuration file - + Args: - tranforms(list):The packaging of the compose class of alb, the purpose is to + tranforms(list):The packaging of the compose class of alb, the purpose is to parse the pipeline in the configuration file bbox_params (BboxParams): Parameters for bounding boxes transforms keypoint_params (KeypointParams): Parameters for keypoints transforms - additional_targets (dict): Dict with keys - new target name, values - old + additional_targets (dict): Dict with keys - new target name, values - old target name. ex: {'image2': 'image'} p (float): probability of applying all list of transforms. Default: 1.0. - + """ - def __init__(self, - transforms: Sequence[Dict], - bbox_params: Optional[Union[dict, "BboxParams"]] = None, - keypoint_params: Optional[Union[dict, - "KeypointParams"]] = None, - additional_targets: Optional[Dict[str, str]] = None, - p: float = 1): + def __init__( + self, + transforms: Sequence[Dict], + bbox_params: Optional[Union[dict, "BboxParams"]] = None, + keypoint_params: Optional[Union[dict, "KeypointParams"]] = None, + additional_targets: Optional[Dict[str, str]] = None, + p: float = 1, + ): pose_trans = [] for transform in transforms: if isinstance(transform, dict): @@ -39,13 +40,14 @@ def __init__(self, elif isinstance(transforms, (BaseCompose, BasicTransform)): pose_trans.append(transform) else: - raise TypeError('transform must be callable or a dict, but got' - f' {type(transform)}') - if isinstance(keypoint_params,str): + raise TypeError('transform must be callable or a dict, but got' f' {type(transform)}') + if isinstance(keypoint_params, str): keypoint_params = A.KeypointParams(keypoint_params) - super().__init__(transforms=pose_trans, - bbox_params=bbox_params, - keypoint_params=keypoint_params, - additional_targets=additional_targets, - p=p) + super().__init__( + transforms=pose_trans, + bbox_params=bbox_params, + keypoint_params=keypoint_params, + additional_targets=additional_targets, + p=p, + ) diff --git a/edgelab/datasets/pipelines/transforms.py b/edgelab/datasets/pipelines/transforms.py index da168731..dffe2038 100644 --- a/edgelab/datasets/pipelines/transforms.py +++ b/edgelab/datasets/pipelines/transforms.py @@ -6,18 +6,16 @@ @TRANSFORMS.register_module() class Bbox2FomoMask(BaseTransform): - def __init__( self, - downsample_factor: Tuple[int, ...] = (8, ), + downsample_factor: Tuple[int, ...] = (8,), classes_num: int = 80, ) -> None: super().__init__() self.downsample_factor = downsample_factor self.classes_num = classes_num - def transform(self, - results: Dict) -> Optional[Union[Dict, Tuple[List, List]]]: + def transform(self, results: Dict) -> Optional[Union[Dict, Tuple[List, List]]]: H, W = results['img_shape'] bbox = results['gt_bboxes'] print(bbox) @@ -38,6 +36,6 @@ def build_target(self, targets, shape): for i in targets: h, w = int(i[3].item() * H), int(i[2].item() * W) target_data[int(i[0]), h, w, 0] = 0 # background - target_data[int(i[0]), h, w, int(i[1])] = 1 #label + target_data[int(i[0]), h, w, int(i[1])] = 1 # label - return target_data \ No newline at end of file + return target_data diff --git a/edgelab/datasets/sensordataset.py b/edgelab/datasets/sensordataset.py index 380fbb9c..62b293c7 100644 --- a/edgelab/datasets/sensordataset.py +++ b/edgelab/datasets/sensordataset.py @@ -12,42 +12,32 @@ @DATASETS.register_module() class SensorDataset(CustomDataset): CLASSES = [] - - def __init__(self, - ann_file: str = '', - metainfo: Optional[dict] = None, - data_root: str = '', - data_prefix: Union[str, dict] = '', - multi_label: bool = False, - **kwargs): - + + def __init__( + self, + ann_file: str = '', + metainfo: Optional[dict] = None, + data_root: str = '', + data_prefix: Union[str, dict] = '', + multi_label: bool = False, + **kwargs, + ): if multi_label: - raise NotImplementedError( - 'The `multi_label` option is not supported by now.') + raise NotImplementedError('The `multi_label` option is not supported by now.') self.multi_label = multi_label self.data_root = data_root self.ann_file = ann_file self.data_prefix = data_prefix - - self.info_lables = json.load( - open(os.path.join(self.data_root, self.data_prefix, self.ann_file))) - + + self.info_lables = json.load(open(os.path.join(self.data_root, self.data_prefix, self.ann_file))) for i in range(len(self.info_lables['files'])): if self.info_lables['files'][i]['label']['label'] not in self.CLASSES: - self.CLASSES.append( - self.info_lables['files'][i]['label']['label']) - - super().__init__( - ann_file=ann_file, - metainfo=metainfo, - data_root=data_root, - data_prefix=data_prefix, - **kwargs) - + self.CLASSES.append(self.info_lables['files'][i]['label']['label']) - def get_classes(self, classes=None): + super().__init__(ann_file=ann_file, metainfo=metainfo, data_root=data_root, data_prefix=data_prefix, **kwargs) + def get_classes(self, classes=None): if classes is not None: return classes @@ -55,9 +45,8 @@ def get_classes(self, classes=None): for i in range(len(self.info_lables['files'])): if self.info_lables['files'][i]['label']['label'] not in class_names: - class_names.append( - self.info_lables['files'][i]['label']['label']) - + class_names.append(self.info_lables['files'][i]['label']['label']) + return class_names def _find_samples(self): @@ -75,7 +64,6 @@ def _find_samples(self): return samples def load_data_list(self): - samples = [] for i in range(len(self.info_lables['files'])): filename = self.info_lables['files'][i]['path'] @@ -86,13 +74,13 @@ def load_data_list(self): gt_label = j break samples.append((filename, gt_label)) - + data_list = [] for filename, gt_label in samples: img_path = os.path.join(self.img_prefix, filename) info = {'file_path': img_path, 'gt_label': int(gt_label)} data_list.append(info) - + return data_list def is_valid_file(self, filename: str) -> bool: diff --git a/edgelab/datasets/speechcommand.py b/edgelab/datasets/speechcommand.py index 30704533..79efe467 100644 --- a/edgelab/datasets/speechcommand.py +++ b/edgelab/datasets/speechcommand.py @@ -16,23 +16,55 @@ @DATASETS.register_module() class Speechcommand(Dataset): CLASSES = [ - 'backward', 'bed', 'bird', 'cat', 'dog', 'down', 'eight', 'five', - 'follow', 'forward', 'four', 'go', 'happy', 'house', 'learn', 'left', - 'marvin', 'nine', 'no', 'off', 'on', 'one', 'right', 'seven', 'sheila', - 'six', 'stop', 'three', 'tree', 'two', 'up', 'visual', 'wow', 'yes', - 'zero' + 'backward', + 'bed', + 'bird', + 'cat', + 'dog', + 'down', + 'eight', + 'five', + 'follow', + 'forward', + 'four', + 'go', + 'happy', + 'house', + 'learn', + 'left', + 'marvin', + 'nine', + 'no', + 'off', + 'on', + 'one', + 'right', + 'seven', + 'sheila', + 'six', + 'stop', + 'three', + 'tree', + 'two', + 'up', + 'visual', + 'wow', + 'yes', + 'zero', ] - def __init__(self, - root, - mode, - segment_length, - sampling_rate, - test_mode=None, - pipeline=None, - use_background=False, - lower_volume=False, - words=['yes', 'off', 'on', 'no']): + def __init__( + self, + root, + mode, + segment_length, + sampling_rate, + test_mode=None, + pipeline=None, + use_background=False, + lower_volume=False, + words=['yes', 'off', 'on', 'no'], + ): self.sampling_rate = sampling_rate self.segment_length = segment_length self.lower_volume = lower_volume @@ -54,15 +86,9 @@ def __init__(self, self.pipeline = TRANSFORMS.build(pipeline) self.use_background = use_background if self.use_background: - self.bg_aug = glob.glob(root + - f"{sep}_background_noise_{sep}*.wav") - self.bg_aug = [ - f for f in self.bg_aug if 'noise' not in os.path.basename(f) - ] - self.bg_aug = [ - torch.from_numpy(torchaudio.load(f)[0][0].detach().numpy()) - for f in self.bg_aug - ] + self.bg_aug = glob.glob(root + f"{sep}_background_noise_{sep}*.wav") + self.bg_aug = [f for f in self.bg_aug if 'noise' not in os.path.basename(f)] + self.bg_aug = [torch.from_numpy(torchaudio.load(f)[0][0].detach().numpy()) for f in self.bg_aug] self.bg_aug = [x for x in self.bg_aug] def load_meta_file(self, root, f_meta): @@ -70,17 +96,15 @@ def load_meta_file(self, root, f_meta): with open(filepath) as fileobj: # fnames = [os.path.join(root, line.strip()) for line in fileobj if os.path.basename(os.path.dirname(os.path.join(root, line.strip()))) in ['yes','off','on','no']] fnames = [ - os.path.join(root, line.strip()) for line in fileobj - if os.path.basename( - os.path.dirname(os.path.join(root, line.strip()))) in - self.words + os.path.join(root, line.strip()) + for line in fileobj + if os.path.basename(os.path.dirname(os.path.join(root, line.strip()))) in self.words ] return fnames def _get_labels(self, root): f_names = glob.glob(root + f"{sep}**{sep}*.wav") - self.labels = sorted( - list(set([f.split(f'{os.path.sep}')[-2] for f in f_names]))) + self.labels = sorted(list(set([f.split(f'{os.path.sep}')[-2] for f in f_names]))) self.labels = sorted([l for l in self.labels if l in self.words]) def __getitem__(self, index): @@ -91,26 +115,23 @@ def __getitem__(self, index): label = self.label2idx[label] audio, sampling_rate = torchaudio.load(fname, normalize=True) - self.trans = torch.nn.Sequential( - torchaudio.transforms.Resample(sampling_rate, 8000, rolloff=0.5)) + self.trans = torch.nn.Sequential(torchaudio.transforms.Resample(sampling_rate, 8000, rolloff=0.5)) if self.lower_volume: - self.trans.add_module( - 'Vol', torchaudio.transforms.Vol(0.5, gain_type='db')) + self.trans.add_module('Vol', torchaudio.transforms.Vol(0.5, gain_type='db')) audio = self.trans(audio) audio.squeeze_() audio = (audio / audio.__abs__().max()).float() - assert ( - "sampling rate of the file is not as configured in dataset, will cause slow fetch {}" - .format(sampling_rate != self.sampling_rate)) + assert "sampling rate of the file is not as configured in dataset, will cause slow fetch {}".format( + sampling_rate != self.sampling_rate + ) if audio.shape[0] >= self.segment_length: max_audio_start = audio.size(0) - self.segment_length audio_start = random.randint(0, max_audio_start) - audio = audio[audio_start:audio_start + self.segment_length] + audio = audio[audio_start : audio_start + self.segment_length] else: - audio = F.pad(audio, (0, self.segment_length - audio.size(0)), - "constant").data + audio = F.pad(audio, (0, self.segment_length - audio.size(0)), "constant").data if self.use_background and random.random() < 0.5: i = random.randint(0, len(self.bg_aug) - 1) @@ -119,13 +140,12 @@ def __getitem__(self, index): if bg.shape[0] >= self.segment_length: max_bg_start = bg.size(0) - self.segment_length bg_start = random.randint(0, max_bg_start) - bg = bg[bg_start:bg_start + self.segment_length] + bg = bg[bg_start : bg_start + self.segment_length] else: - bg = F.pad(bg, (0, self.segment_length - bg.size(0)), - "constant").data + bg = F.pad(bg, (0, self.segment_length - bg.size(0)), "constant").data bg_level = (bg**2).mean().sqrt() snr_db = 20 + random.random() * 5 - sgm = s_level * 10**(-snr_db / 10) + sgm = s_level * 10 ** (-snr_db / 10) bg /= bg_level audio.add_(bg * sgm) @@ -138,10 +158,7 @@ def __len__(self): def get_training_list(self, root): f_names = glob.glob(root + f"{sep}**{sep}*.wav") - f_names = [ - f for f in f_names - if os.path.basename(os.path.dirname(f)) in self.words - ] + f_names = [f for f in f_names if os.path.basename(os.path.dirname(f)) in self.words] # print(f_names[:100]) val = self.load_meta_file(root, f"{sep}validation_list.txt") test = self.load_meta_file(root, f"{sep}testing_list.txt") @@ -151,9 +168,6 @@ def get_training_list(self, root): def evaluate(self, results, **kwargs): return { - 'loss': - torch.mean(torch.tensor([i['loss'] - for i in results])).cpu().item(), - 'acc': - torch.mean(torch.tensor([i['acc'] for i in results])).cpu().item() - } \ No newline at end of file + 'loss': torch.mean(torch.tensor([i['loss'] for i in results])).cpu().item(), + 'acc': torch.mean(torch.tensor([i['acc'] for i in results])).cpu().item(), + } diff --git a/edgelab/datasets/transforms/__init__.py b/edgelab/datasets/transforms/__init__.py index 98363588..797403bb 100644 --- a/edgelab/datasets/transforms/__init__.py +++ b/edgelab/datasets/transforms/__init__.py @@ -1,4 +1,4 @@ -from .formatting import (PackSensorInputs) -from .loading import (LoadSensorFromFile) +from .formatting import PackSensorInputs +from .loading import LoadSensorFromFile -__all__ = ['PackSensorInputs', 'LoadSensorFromFile'] \ No newline at end of file +__all__ = ['PackSensorInputs', 'LoadSensorFromFile'] diff --git a/edgelab/datasets/transforms/formatting.py b/edgelab/datasets/transforms/formatting.py index c0d06a8c..b6a00133 100644 --- a/edgelab/datasets/transforms/formatting.py +++ b/edgelab/datasets/transforms/formatting.py @@ -11,6 +11,7 @@ from edgelab.registry import TRANSFORMS from mmcls.structures import ClsDataSample + def to_tensor(data): """Convert objects of various python types to :obj:`torch.Tensor`. @@ -31,31 +32,32 @@ def to_tensor(data): raise TypeError( f'Type {type(data)} cannot be converted to tensor.' 'Supported types are: `numpy.ndarray`, `torch.Tensor`, ' - '`Sequence`, `int` and `float`') + '`Sequence`, `int` and `float`' + ) @TRANSFORMS.register_module() class PackSensorInputs(BaseTransform): def __init__(self, meta_keys={'sample_idx', 'file_path', 'sensors'}): self.meta_keys = meta_keys - - def transform(self, results: dict) -> dict: + + def transform(self, results: dict) -> dict: """Pack sensor inputs into a single tensor.""" packed_results = dict() if 'data' in results: data = results['data'] packed_results['inputs'] = to_tensor(data).to(dtype=torch.float32) - + data_sample = ClsDataSample() - + if 'gt_label' in results: gt_label = results['gt_label'] data_sample.set_gt_label(gt_label) - + if self.meta_keys is not None: data_meta = {k: results[k] for k in self.meta_keys if k in results} data_sample.set_metainfo(data_meta) - + packed_results['data_samples'] = data_sample - + return packed_results diff --git a/edgelab/datasets/transforms/loading.py b/edgelab/datasets/transforms/loading.py index 42b5da36..d0bb3609 100644 --- a/edgelab/datasets/transforms/loading.py +++ b/edgelab/datasets/transforms/loading.py @@ -9,38 +9,38 @@ from mmcv.transforms.base import BaseTransform from edgelab.registry import TRANSFORMS + @TRANSFORMS.register_module() class LoadSensorFromFile(BaseTransform): """Load an Sensor sample data from file. - + Required keys: - + - "file_path": Path to the Sensor sample data file. - + Modified keys: - + - "data": Sensor sample data loaded from the file. - "sensor": Sensor type and unit loaded from the file. - + """ - - def __init__(self, file_client_args: Optional[dict] = None, backend_args: Optional[dict] = None) -> None: + + def __init__(self, file_client_args: Optional[dict] = None, backend_args: Optional[dict] = None) -> None: self.file_client_args: Optional[dict] = None self.backend_args: Optional[dict] = None if file_client_args is not None: warnings.warn( - '"file_client_args" will be deprecated in future. ' - 'Please use "backend_args" instead', DeprecationWarning) + '"file_client_args" will be deprecated in future. ' 'Please use "backend_args" instead', + DeprecationWarning, + ) if backend_args is not None: - raise ValueError( - '"file_client_args" and "backend_args" cannot be set ' - 'at the same time.') + raise ValueError('"file_client_args" and "backend_args" cannot be set ' 'at the same time.') self.file_client_args = file_client_args.copy() if backend_args is not None: self.backend_args = backend_args.copy() - + def transform(self, results: dict) -> Optional[dict]: """Functions to load Axes. @@ -51,17 +51,15 @@ def transform(self, results: dict) -> Optional[dict]: Returns: dict: The dict contains loaded image and meta information. """ - + filename = results['file_path'] - + try: if self.file_client_args is not None: - file_client = fileio.FileClient.infer_client( - self.file_client_args, filename) + file_client = fileio.FileClient.infer_client(self.file_client_args, filename) lable_bytes = file_client.get(filename) else: - lable_bytes = fileio.get( - filename, backend_args=self.backend_args) + lable_bytes = fileio.get(filename, backend_args=self.backend_args) lable = json.loads(lable_bytes) sensors = lable['payload']["sensors"] data = np.array([], np.float32) @@ -69,9 +67,8 @@ def transform(self, results: dict) -> Optional[dict]: data = np.append(data, value) except Exception as e: raise e - + results['data'] = data results['sensors'] = sensors - + return results - \ No newline at end of file diff --git a/edgelab/datasets/utils/audio_augs.py b/edgelab/datasets/utils/audio_augs.py index 13d1ab61..124c5fd6 100644 --- a/edgelab/datasets/utils/audio_augs.py +++ b/edgelab/datasets/utils/audio_augs.py @@ -8,7 +8,6 @@ class AugBasic: - def __init__(self, fs): super().__init__() self.fs = fs @@ -30,7 +29,6 @@ def __init__(self, fs): class RandomRIR(AugBasic): - def __init__(self, fs, p=0.5): self.p = p self.fs = fs @@ -44,7 +42,7 @@ def rir(self, mic, n, r, rm, src): zk = srcs * src[2] + rms * rm[2] - mic[2] [i, j, k] = np.meshgrid(xi, yj, zk) d = np.sqrt(i**2 + j**2 + k**2) - t = np.round(self.fs * d / 343.) + 1 + t = np.round(self.fs * d / 343.0) + 1 [e, f, g] = np.meshgrid(nn, nn, nn) c = np.power(r, np.abs(e) + np.abs(f) + np.abs(g)) e = c / d @@ -82,20 +80,14 @@ def __call__(self, sample): h = self.rir(mic, n, r, rm, src) h = torch.from_numpy(h).float() sample = sample[None, None, :] - sample = F.pad(sample, (h.shape[-1] // 2, h.shape[-1] // 2), - "reflect") - sample = F.conv1d(sample, - h[None, None, :], - bias=None, - stride=1, - padding=0, - dilation=1, - groups=sample.shape[1]) + sample = F.pad(sample, (h.shape[-1] // 2, h.shape[-1] // 2), "reflect") + sample = F.conv1d( + sample, h[None, None, :], bias=None, stride=1, padding=0, dilation=1, groups=sample.shape[1] + ) return sample, h class RandomLPHPFilter(AugBasic): - def __init__(self, fs, p=0.5, fc_lp=None, fc_hp=None): self.p = p self.fs = fs @@ -111,22 +103,16 @@ def __call__(self, sample): filt = scipy.signal.firwin(self.num_taps, fc, window='hamming') else: fc = random.random() * 0.25 - filt = scipy.signal.firwin(self.num_taps, - fc, - window='hamming', - pass_zero=False) + filt = scipy.signal.firwin(self.num_taps, fc, window='hamming', pass_zero=False) filt = torch.from_numpy(filt).float() filt = filt / filt.sum() - sample = F.pad(sample.view(1, 1, -1), - (filt.shape[0] // 2, filt.shape[0] // 2), - mode="reflect") + sample = F.pad(sample.view(1, 1, -1), (filt.shape[0] // 2, filt.shape[0] // 2), mode="reflect") sample = F.conv1d(sample, filt.view(1, 1, -1), stride=1, groups=1) sample = sample.view(-1) return sample class RandomTimeShift(AugBasic): - def __init__(self, p=0.5, max_time_shift=None): self.p = p self.max_time_shift = max_time_shift @@ -135,8 +121,7 @@ def __call__(self, sample): if random.random() < self.p: if self.max_time_shift is None: self.max_time_shift = sample.shape[-1] // 10 - int_d = 2 * random.randint( - 0, self.max_time_shift) - self.max_time_shift + int_d = 2 * random.randint(0, self.max_time_shift) - self.max_time_shift frac_d = np.round(100 * (random.random() - 0.5)) / 100 if int_d + frac_d == 0: return sample @@ -154,19 +139,17 @@ def __call__(self, sample): dw = 2 * np.pi / n if n % 2 == 1: wp = torch.arange(0, np.pi, dw) - wn = torch.arange(-dw, -np.pi, -dw).flip(dims=(-1, )) + wn = torch.arange(-dw, -np.pi, -dw).flip(dims=(-1,)) else: wp = torch.arange(0, np.pi, dw) - wn = torch.arange(-dw, -np.pi - dw, -dw).flip(dims=(-1, )) + wn = torch.arange(-dw, -np.pi - dw, -dw).flip(dims=(-1,)) w = torch.cat((wp, wn), dim=-1) phi = frac_d * w - sample = torch.fft.ifft( - torch.fft.fft(sample) * torch.exp(-1j * phi)).real + sample = torch.fft.ifft(torch.fft.fft(sample) * torch.exp(-1j * phi)).real return sample class RandomTimeMasking(AugBasic): - def __init__(self, p=0.5, n_mask=None): self.n_mask = n_mask self.p = p @@ -177,13 +160,11 @@ def __call__(self, sample): if random.random() < self.p: max_start = sample.size(-1) - self.n_mask idx_rand = random.randint(0, max_start) - sample[idx_rand:idx_rand + - self.n_mask] = torch.randn(self.n_mask) * 1e-6 + sample[idx_rand : idx_rand + self.n_mask] = torch.randn(self.n_mask) * 1e-6 return sample class RandomMuLawCompression(AugBasic): - def __init__(self, p=0.5, n_channels=256): self.n_channels = n_channels self.p = p @@ -196,7 +177,6 @@ def __call__(self, sample): class RandomAmp(AugBasic): - def __init__(self, low, high, p=0.5): self.low = low self.high = high @@ -210,20 +190,21 @@ def __call__(self, sample): class RandomFlip(AugBasic): - def __init__(self, p=0.5): self.p = p def __call__(self, sample): if random.random() < self.p: - sample.data = torch.flip(sample.data, dims=[ - -1, - ]) + sample.data = torch.flip( + sample.data, + dims=[ + -1, + ], + ) return sample class RandomAdd180Phase(AugBasic): - def __init__(self, p=0.5): self.p = p @@ -234,7 +215,6 @@ def __call__(self, sample): class RandomAdditiveWhiteGN(AugBasic): - def __init__(self, p=0.5, snr_db=30): self.snr_db = snr_db self.min_snr_db = 30 @@ -243,16 +223,14 @@ def __init__(self, p=0.5, snr_db=30): def __call__(self, sample): if random.random() < self.p: s = torch.sqrt(torch.mean(sample**2)) - snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - - self.min_snr_db) - sgm = s * 10**(-snr_db / 20.) + snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) + sgm = s * 10 ** (-snr_db / 20.0) w = torch.randn_like(sample).mul_(sgm) sample.add_(w) return sample class RandomAdditiveUN(AugBasic): - def __init__(self, snr_db=35, p=0.5): self.snr_db = snr_db self.min_snr_db = 30 @@ -261,16 +239,14 @@ def __init__(self, snr_db=35, p=0.5): def __call__(self, sample): if random.random() < self.p: s = torch.sqrt(torch.mean(sample**2)) - snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - - self.min_snr_db) - sgm = s * 10**(-snr_db / 20.) * np.sqrt(3) + snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) + sgm = s * 10 ** (-snr_db / 20.0) * np.sqrt(3) w = torch.rand_like(sample).mul_(2 * sgm).add_(-sgm) sample.add_(w) return sample class RandomAdditivePinkGN(AugBasic): - def __init__(self, snr_db=35, p=0.5): self.snr_db = snr_db self.min_snr_db = 30 @@ -285,18 +261,16 @@ def __call__(self, sample): k = torch.arange(1, nn + 1, 1).float() W = torch.fft.fft(w) W = W[:nn] / k.sqrt() - W = torch.cat((W, W.flip(dims=(-1, ))[1:-1].conj()), dim=-1) + W = torch.cat((W, W.flip(dims=(-1,))[1:-1].conj()), dim=-1) w = torch.fft.ifft(W).real w.add_(w.mean()).div_(w.std()) - snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - - self.min_snr_db) - sgm = s * 10**(-snr_db / 20.) + snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) + sgm = s * 10 ** (-snr_db / 20.0) sample.add_(w.mul_(sgm)) return sample class RandomAdditiveVioletGN(AugBasic): - def __init__(self, p=0.5, snr_db=35): self.snr_db = snr_db self.min_snr_db = 30 @@ -311,18 +285,16 @@ def __call__(self, sample): k = torch.arange(1, nn + 1, 1).float() W = torch.fft.fft(w) W = W[:nn] * k - W = torch.cat((W, W.flip(dims=(-1, ))[1:-1].conj()), dim=-1) + W = torch.cat((W, W.flip(dims=(-1,))[1:-1].conj()), dim=-1) w = torch.fft.ifft(W).real w.add_(w.mean()).div_(w.std()) - snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - - self.min_snr_db) - sgm = s * 10**(-snr_db / 20.) + snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) + sgm = s * 10 ** (-snr_db / 20.0) sample.add_(w.mul_(sgm)) return sample class RandomAdditiveRedGN(AugBasic): - def __init__(self, p=0.5, snr_db=35): self.snr_db = snr_db self.min_snr_db = 30 @@ -337,18 +309,16 @@ def __call__(self, sample): k = torch.arange(1, nn + 1, 1).float() W = torch.fft.fft(w) W = W[:nn] / k - W = torch.cat((W, W.flip(dims=(-1, ))[1:-1].conj()), dim=-1) + W = torch.cat((W, W.flip(dims=(-1,))[1:-1].conj()), dim=-1) w = torch.fft.ifft(W).real w.add_(w.mean()).div_(w.std()) - snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - - self.min_snr_db) - sgm = s * 10**(-snr_db / 20.) + snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) + sgm = s * 10 ** (-snr_db / 20.0) sample.add_(w.mul_(sgm)) return sample class RandomAdditiveBlueGN(AugBasic): - def __init__(self, p=0.5, snr_db=35): self.snr_db = snr_db self.min_snr_db = 30 @@ -363,18 +333,16 @@ def __call__(self, sample): k = torch.arange(1, nn + 1, 1).float() W = torch.fft.fft(w) W = W[:nn] * k.sqrt() - W = torch.cat((W, W.flip(dims=(-1, ))[1:-1].conj()), dim=-1) + W = torch.cat((W, W.flip(dims=(-1,))[1:-1].conj()), dim=-1) w = torch.fft.ifft(W).real w.add_(w.mean()).div_(w.std()) - snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - - self.min_snr_db) - sgm = s * 10**(-snr_db / 20.) + snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) + sgm = s * 10 ** (-snr_db / 20.0) sample.add_(w.mul_(sgm)) return sample class RandomFreqShift(AugBasic): - def __init__(self, sgm, fs, p=0.5): super().__init__(fs=fs) self.sgm = sgm @@ -387,23 +355,25 @@ def __call__(self, sample): f_shift = torch.randn(1).mul_(self.sgm * df) t = torch.arange(0, self.fft_params['win_len'][win_idx], 1).float() w = torch.real(torch.exp(-1j * 2 * np.pi * t * f_shift)) - X = torch.stft(sample, - win_length=self.fft_params['win_len'][win_idx], - hop_length=self.fft_params['hop_len'][win_idx], - n_fft=self.fft_params['n_fft'][win_idx], - window=w, - return_complex=True) + X = torch.stft( + sample, + win_length=self.fft_params['win_len'][win_idx], + hop_length=self.fft_params['hop_len'][win_idx], + n_fft=self.fft_params['n_fft'][win_idx], + window=w, + return_complex=True, + ) sample = torch.istft( X, win_length=self.fft_params['win_len'][win_idx], hop_length=self.fft_params['hop_len'][win_idx], - n_fft=self.fft_params['n_fft'][win_idx]) + n_fft=self.fft_params['n_fft'][win_idx], + ) return sample class RandomAddSine(AugBasic): - def __init__(self, fs, snr_db=35, max_freq=50, p=0.5): self.snr_db = snr_db self.max_freq = max_freq @@ -415,11 +385,10 @@ def __call__(self, sample): n = torch.arange(0, sample.shape[-1], 1) f = self.max_freq * torch.rand(1) + 3 * torch.randn(1) if random.random() < self.p: - snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - - self.min_snr_db) - t = n * 1. / self.fs + snr_db = self.min_snr_db + torch.rand(1) * (self.snr_db - self.min_snr_db) + t = n * 1.0 / self.fs s = (sample**2).mean().sqrt() - sgm = s * np.sqrt(2) * 10**(-snr_db / 20.) + sgm = s * np.sqrt(2) * 10 ** (-snr_db / 20.0) b = sgm * torch.sin(2 * np.pi * f * t + torch.rand(1) * np.pi) sample.add_(b) @@ -427,7 +396,6 @@ def __call__(self, sample): class RandomAmpSegment(AugBasic): - def __init__(self, low, high, max_len=None, p=0.5): self.low = low self.high = high @@ -440,12 +408,11 @@ def __call__(self, sample): self.max_len = sample.shape[-1] // 10 idx = random.randint(0, self.max_len) amp = torch.FloatTensor(1).uniform_(self.low, self.high) - sample[idx:idx + self.max_len].mul_(amp) + sample[idx : idx + self.max_len].mul_(amp) return sample class RandomPhNoise(AugBasic): - def __init__(self, fs, sgm=0.01, p=0.5): super().__init__(fs=fs) self.sgm = sgm @@ -455,11 +422,13 @@ def __call__(self, sample): if random.random() < self.p: win_idx = random.randint(0, len(self.fft_params['win_len']) - 1) sgm_noise = self.sgm + 0.01 * torch.rand(1) - X = torch.stft(sample, - win_length=self.fft_params['win_len'][win_idx], - hop_length=self.fft_params['hop_len'][win_idx], - n_fft=self.fft_params['n_fft'][win_idx], - return_complex=True) + X = torch.stft( + sample, + win_length=self.fft_params['win_len'][win_idx], + hop_length=self.fft_params['hop_len'][win_idx], + n_fft=self.fft_params['n_fft'][win_idx], + return_complex=True, + ) w = sgm_noise * torch.rand_like(X) phn = torch.exp(1j * w) X.mul_(phn) @@ -467,12 +436,12 @@ def __call__(self, sample): X, win_length=self.fft_params['win_len'][win_idx], hop_length=self.fft_params['hop_len'][win_idx], - n_fft=self.fft_params['n_fft'][win_idx]) + n_fft=self.fft_params['n_fft'][win_idx], + ) return sample class RandomCyclicShift(AugBasic): - def __init__(self, max_time_shift=None, p=0.5): self.max_time_shift = max_time_shift self.p = p @@ -489,12 +458,9 @@ def __call__(self, sample): return sample -class AudioAugs(): - +class AudioAugs: def __init__(self, k_augs, fs, p=0.5, snr_db=30): - self.noise_vec = [ - 'awgn', 'abgn', 'apgn', 'argn', 'avgn', 'aun', 'phn', 'sine' - ] + self.noise_vec = ['awgn', 'abgn', 'apgn', 'argn', 'avgn', 'aun', 'phn', 'sine'] augs = {} for aug in k_augs: if aug == 'amp': @@ -518,17 +484,13 @@ def __init__(self, k_augs, fs, p=0.5, snr_db=30): elif aug == 'tmask': augs['tmask'] = RandomTimeMasking(p=p, n_mask=int(0.1 * fs)) elif aug == 'tshift': - augs['tshift'] = RandomTimeShift(p=p, - max_time_shift=int(0.1 * fs)) + augs['tshift'] = RandomTimeShift(p=p, max_time_shift=int(0.1 * fs)) elif aug == 'sine': augs['sine'] = RandomAddSine(p=p, fs=fs) elif aug == 'cycshift': augs['cycshift'] = RandomCyclicShift(p=p, max_time_shift=None) elif aug == 'ampsegment': - augs['ampsegment'] = RandomAmpSegment(p=p, - low=0.5, - high=1.3, - max_len=int(0.1 * fs)) + augs['ampsegment'] = RandomAmpSegment(p=p, low=0.5, high=1.3, max_len=int(0.1 * fs)) elif aug == 'aun': augs['aun'] = RandomAdditiveUN(p=p, snr_db=snr_db) elif aug == 'phn': diff --git a/edgelab/datasets/utils/download.py b/edgelab/datasets/utils/download.py index a63a958a..6f42bc66 100644 --- a/edgelab/datasets/utils/download.py +++ b/edgelab/datasets/utils/download.py @@ -9,9 +9,10 @@ def check_compress(file): "gz": "gzip -d {} ..", "tgz": "tar -zxf {} -C ..", "zip": "unzip -n {} -d ..", - "rar": "unrar e -o- -y {} .." + "rar": "unrar e -o- -y {} ..", } - if 'tar.gz' in file: return ["tar -zxf {} -C .."] + if 'tar.gz' in file: + return ["tar -zxf {} -C .."] fls = file.split('.') res = [] for f in fls[::-1]: @@ -27,11 +28,9 @@ def defile(files, store_dir): res.append(cmd) -def download(links: List or AnyStr, - store_path: AnyStr or __path__, - unzip_dir=None): - if isinstance(links,str): - links=[links] +def download(links: List or AnyStr, store_path: AnyStr or __path__, unzip_dir=None): + if isinstance(links, str): + links = [links] os.chdir(store_path) if not os.path.exists('download'): os.mkdir('download') @@ -63,13 +62,16 @@ def check_file(path, store_dir=None, data_name=None): download_dir = None if isinstance(path, (list, tuple)): if 'https://' in path[0] or 'http://' in path[0]: - download_dir = f"{os.environ['HOME']}/datasets" if platform.system( - ) == 'Linux' and not store_dir else 'D:\datasets' if not store_dir else store_dir - download_dir = os.path.join( - download_dir, data_name) if data_name else download_dir + download_dir = ( + f"{os.environ['HOME']}/datasets" + if platform.system() == 'Linux' and not store_dir + else 'D:\datasets' + if not store_dir + else store_dir + ) + download_dir = os.path.join(download_dir, data_name) if data_name else download_dir if not os.path.exists(download_dir): - os.makedirs(download_dir, - exist_ok=True) # makedir the datasets + os.makedirs(download_dir, exist_ok=True) # makedir the datasets download(path, download_dir, data_name) elif isinstance(path, str): @@ -77,19 +79,24 @@ def check_file(path, store_dir=None, data_name=None): path = path.replace('~', os.environ['HOME']) download_dir = path if 'https://' in path or 'http://' in path: - download_dir = f"{os.environ['HOME']}/datasets" if platform.system( - ) == 'Linux' and not store_dir else 'D:\datasets' if not store_dir else store_dir - download_dir = os.path.join( - download_dir, data_name) if data_name else download_dir + download_dir = ( + f"{os.environ['HOME']}/datasets" + if platform.system() == 'Linux' and not store_dir + else 'D:\datasets' + if not store_dir + else store_dir + ) + download_dir = os.path.join(download_dir, data_name) if data_name else download_dir if not os.path.exists(download_dir): - os.makedirs(download_dir, - exist_ok=True) # makedir the datasets + os.makedirs(download_dir, exist_ok=True) # makedir the datasets download(path, download_dir, data_name) else: raise TypeError( - 'The download link must be a list or a string, but got {} type'. - format(getattr(type(path), '__name__', repr(type(path))))) + 'The download link must be a list or a string, but got {} type'.format( + getattr(type(path), '__name__', repr(type(path))) + ) + ) return download_dir diff --git a/edgelab/datasets/utils/functions.py b/edgelab/datasets/utils/functions.py index ba95d78d..96ad7f32 100644 --- a/edgelab/datasets/utils/functions.py +++ b/edgelab/datasets/utils/functions.py @@ -2,6 +2,7 @@ from mmengine.registry import FUNCTIONS from mmdet.structures import DetDataSample + @FUNCTIONS.register_module() def fomo_collate(batch): img, label = [x['inputs'] for x in batch], [y['data_samples'] for y in batch] @@ -9,4 +10,3 @@ def fomo_collate(batch): if l.shape[0] > 0: l[:, 0] = i return dict(inputs=torch.stack(img), data_samples=[DetDataSample(labels=torch.cat(label, 0))]) - \ No newline at end of file diff --git a/edgelab/datasets/vocdataset.py b/edgelab/datasets/vocdataset.py index 6c72bc42..7f70b7e9 100644 --- a/edgelab/datasets/vocdataset.py +++ b/edgelab/datasets/vocdataset.py @@ -12,11 +12,8 @@ @DATASETS.register_module() class CustomVocdataset(VOCDataset): - def __init__(self, **kwargs): - kwargs['data_root'] = os.path.join( - check_file(kwargs['data_root'], data_name='voc'), 'VOCdevkit', - 'VOC2012') + kwargs['data_root'] = os.path.join(check_file(kwargs['data_root'], data_name='voc'), 'VOCdevkit', 'VOC2012') super(CustomVocdataset, self).__init__(**kwargs) @@ -36,19 +33,17 @@ def build_target(self, preds, targets, img_h, img_w): bboxes = targets['bboxes'] labels = targets['labels'] - bboxes = [ - self.bboxe2cell(bboxe, img_h, img_w, H, W) for bboxe in bboxes - ] + bboxes = [self.bboxe2cell(bboxe, img_h, img_w, H, W) for bboxe in bboxes] for bboxe, label in zip(bboxes, labels): - target_data[0, bboxe[1], bboxe[0]] = label + 1 #label + target_data[0, bboxe[1], bboxe[0]] = label + 1 # label return target_data def compute_FTP(self, pred, target): - confusion = confusion_matrix(target.flatten().cpu().numpy(), - pred.flatten().cpu().numpy(), - labels=range(len(self.CLASSES)+1)) + confusion = confusion_matrix( + target.flatten().cpu().numpy(), pred.flatten().cpu().numpy(), labels=range(len(self.CLASSES) + 1) + ) tn = confusion[0, 0] tp = np.diagonal(confusion).sum() - tn fn = np.tril(confusion, k=-1).sum() @@ -57,7 +52,6 @@ def compute_FTP(self, pred, target): return tp, fp, fn def computer_prf(self, tp, fp, fn): - if tp == 0 and fn == 0 and fp == 0: return 1.0, 1.0, 1.0 @@ -66,16 +60,17 @@ def computer_prf(self, tp, fp, fn): f1 = 0.0 if (p + r == 0) else 2 * (p * r) / (p + r) return p, r, f1 - - def evaluate(self, - results, - metric='mAP', - logger=None, - proposal_nums=(100, 300, 1000), - iou_thr=0.5, - scale_ranges=None, - fomo=False): - if fomo: #just with here evaluate for fomo data + def evaluate( + self, + results, + metric='mAP', + logger=None, + proposal_nums=(100, 300, 1000), + iou_thr=0.5, + scale_ranges=None, + fomo=False, + ): + if fomo: # just with here evaluate for fomo data annotations = [self.get_ann_info(i) for i in range(len(self))] eval_results = OrderedDict() tmp = [] @@ -102,9 +97,11 @@ def evaluate(self, return eval_results else: # object evaluate - return super().evaluate(results, - metric=metric, - logger=logger, - proposal_nums=proposal_nums, - iou_thr=iou_thr, - scale_ranges=scale_ranges) + return super().evaluate( + results, + metric=metric, + logger=logger, + proposal_nums=proposal_nums, + iou_thr=iou_thr, + scale_ranges=scale_ranges, + ) diff --git a/edgelab/datasets/yolodataset.py b/edgelab/datasets/yolodataset.py index f06ffc32..28c37077 100644 --- a/edgelab/datasets/yolodataset.py +++ b/edgelab/datasets/yolodataset.py @@ -11,55 +11,119 @@ class CustomYOLOv5CocoDataset(YOLOv5CocoDataset): METAINFO = { 'classes': (), # palette is a list of color tuples, which is used for visualization. - 'palette': - [(220, 20, 60), (119, 11, 32), (0, 0, 142), (0, 0, 230), (106, 0, 228), - (0, 60, 100), (0, 80, 100), (0, 0, 70), (0, 0, 192), (250, 170, 30), - (100, 170, 30), (220, 220, 0), (175, 116, 175), (250, 0, 30), - (165, 42, 42), (255, 77, 255), (0, 226, 252), (182, 182, 255), - (0, 82, 0), (120, 166, 157), (110, 76, 0), (174, 57, 255), - (199, 100, 0), (72, 0, 118), (255, 179, 240), (0, 125, 92), - (209, 0, 151), (188, 208, 182), (0, 220, 176), (255, 99, 164), - (92, 0, 73), (133, 129, 255), (78, 180, 255), (0, 228, 0), - (174, 255, 243), (45, 89, 255), (134, 134, 103), (145, 148, 174), - (255, 208, 186), (197, 226, 255), (171, 134, 1), (109, 63, 54), - (207, 138, 255), (151, 0, 95), (9, 80, 61), (84, 105, 51), - (74, 65, 105), (166, 196, 102), (208, 195, 210), (255, 109, 65), - (0, 143, 149), (179, 0, 194), (209, 99, 106), (5, 121, 0), - (227, 255, 205), (147, 186, 208), (153, 69, 1), (3, 95, 161), - (163, 255, 0), (119, 0, 170), (0, 182, 199), (0, 165, 120), - (183, 130, 88), (95, 32, 0), (130, 114, 135), (110, 129, 133), - (166, 74, 118), (219, 142, 185), (79, 210, 114), (178, 90, 62), - (65, 70, 15), (127, 167, 115), (59, 105, 106), (142, 108, 45), - (196, 172, 0), (95, 54, 80), (128, 76, 255), (201, 57, 1), - (246, 0, 122), (191, 162, 208)] + 'palette': [ + (220, 20, 60), + (119, 11, 32), + (0, 0, 142), + (0, 0, 230), + (106, 0, 228), + (0, 60, 100), + (0, 80, 100), + (0, 0, 70), + (0, 0, 192), + (250, 170, 30), + (100, 170, 30), + (220, 220, 0), + (175, 116, 175), + (250, 0, 30), + (165, 42, 42), + (255, 77, 255), + (0, 226, 252), + (182, 182, 255), + (0, 82, 0), + (120, 166, 157), + (110, 76, 0), + (174, 57, 255), + (199, 100, 0), + (72, 0, 118), + (255, 179, 240), + (0, 125, 92), + (209, 0, 151), + (188, 208, 182), + (0, 220, 176), + (255, 99, 164), + (92, 0, 73), + (133, 129, 255), + (78, 180, 255), + (0, 228, 0), + (174, 255, 243), + (45, 89, 255), + (134, 134, 103), + (145, 148, 174), + (255, 208, 186), + (197, 226, 255), + (171, 134, 1), + (109, 63, 54), + (207, 138, 255), + (151, 0, 95), + (9, 80, 61), + (84, 105, 51), + (74, 65, 105), + (166, 196, 102), + (208, 195, 210), + (255, 109, 65), + (0, 143, 149), + (179, 0, 194), + (209, 99, 106), + (5, 121, 0), + (227, 255, 205), + (147, 186, 208), + (153, 69, 1), + (3, 95, 161), + (163, 255, 0), + (119, 0, 170), + (0, 182, 199), + (0, 165, 120), + (183, 130, 88), + (95, 32, 0), + (130, 114, 135), + (110, 129, 133), + (166, 74, 118), + (219, 142, 185), + (79, 210, 114), + (178, 90, 62), + (65, 70, 15), + (127, 167, 115), + (59, 105, 106), + (142, 108, 45), + (196, 172, 0), + (95, 54, 80), + (128, 76, 255), + (201, 57, 1), + (246, 0, 122), + (191, 162, 208), + ], } - def __init__(self, - *args, - ann_file: str = '', - metainfo: Optional[dict] = None, - data_root: str = '', - filter_supercat: bool = True, - batch_shapes_cfg: Optional[dict] = None, - classes: Optional[Sequence[str]] = None, - **kwargs): + def __init__( + self, + *args, + ann_file: str = '', + metainfo: Optional[dict] = None, + data_root: str = '', + filter_supercat: bool = True, + batch_shapes_cfg: Optional[dict] = None, + classes: Optional[Sequence[str]] = None, + **kwargs, + ): if metainfo is None and not self.METAINFO['classes']: if not osp.isabs(ann_file) and ann_file: self.ann_file = osp.join(data_root, ann_file) with open(self.ann_file, 'r') as f: data = json.load(f) if filter_supercat: - catgories = tuple(cat['name'] for cat in data['categories'] - if cat['supercategory'] != 'none') + catgories = tuple(cat['name'] for cat in data['categories'] if cat['supercategory'] != 'none') else: catgories = tuple(cat['name'] for cat in data['categories']) self.METAINFO['classes'] = catgories elif classes: self.METAINFO['classes'] = classes - super().__init__(*args, - ann_file=ann_file, - metainfo=metainfo, - data_root=data_root, - batch_shapes_cfg=batch_shapes_cfg, - **kwargs) + super().__init__( + *args, + ann_file=ann_file, + metainfo=metainfo, + data_root=data_root, + batch_shapes_cfg=batch_shapes_cfg, + **kwargs, + ) diff --git a/edgelab/engine/__init__.py b/edgelab/engine/__init__.py index af84d5b3..17a9cd06 100644 --- a/edgelab/engine/__init__.py +++ b/edgelab/engine/__init__.py @@ -1,9 +1,13 @@ from .runner import GetEpochBasedTrainLoop -from .hooks import (TextLoggerHook, TensorboardLoggerHook, WandbLoggerHook, - Posevisualization, DetFomoVisualizationHook) +from .hooks import TextLoggerHook, TensorboardLoggerHook, WandbLoggerHook, Posevisualization, DetFomoVisualizationHook __all__ = [ - 'TextLoggerHook', 'TensorboardLoggerHook', 'WandbLoggerHook', - 'PaviLoggerHook', 'ClearMLLoggerHook', 'GetEpochBasedTrainLoop', - 'Posevisualization', 'DetFomoVisualizationHook' + 'TextLoggerHook', + 'TensorboardLoggerHook', + 'WandbLoggerHook', + 'PaviLoggerHook', + 'ClearMLLoggerHook', + 'GetEpochBasedTrainLoop', + 'Posevisualization', + 'DetFomoVisualizationHook', ] diff --git a/edgelab/engine/apis/mmdet/__init__.py b/edgelab/engine/apis/mmdet/__init__.py index 664ea069..a4e5be28 100644 --- a/edgelab/engine/apis/mmdet/__init__.py +++ b/edgelab/engine/apis/mmdet/__init__.py @@ -2,7 +2,13 @@ from .train import init_random_seed, set_random_seed, auto_scale_lr, train_detector __all__ = [ - 'single_gpu_test_mmcls', 'single_gpu_test_fomo', 'init_random_seed', - 'set_random_seed','auto_scale_lr', 'train_detector', - 'multi_gpu_test', 'collect_results_cpu', 'collect_results_gpu', -] \ No newline at end of file + 'single_gpu_test_mmcls', + 'single_gpu_test_fomo', + 'init_random_seed', + 'set_random_seed', + 'auto_scale_lr', + 'train_detector', + 'multi_gpu_test', + 'collect_results_cpu', + 'collect_results_gpu', +] diff --git a/edgelab/engine/apis/mmdet/test.py b/edgelab/engine/apis/mmdet/test.py index 8cb1216b..d7c9df8f 100644 --- a/edgelab/engine/apis/mmdet/test.py +++ b/edgelab/engine/apis/mmdet/test.py @@ -35,8 +35,12 @@ def single_gpu_test_mmcls(model, data_loader, audio): for data in data_loader: data.dataset[name] = data.dataset[name].unsqueeze(0) if name == 'img': - data.dataset.update({'image_file': data.dataset['image_file'].data, - 'keypoints': torch.tensor(data.dataset['keypoints']).unsqueeze(0)}) # pfld + data.dataset.update( + { + 'image_file': data.dataset['image_file'].data, + 'keypoints': torch.tensor(data.dataset['keypoints']).unsqueeze(0), + } + ) # pfld with torch.no_grad(): result = model(return_loss=False, **data.dataset) results.append(result) @@ -103,16 +107,12 @@ def collect_results_cpu(result_part, size, tmpdir=None): if tmpdir is None: MAX_LEN = 512 # 32 is whitespace - dir_tensor = torch.full((MAX_LEN, ), - 32, - dtype=torch.uint8, - device='cuda') + dir_tensor = torch.full((MAX_LEN,), 32, dtype=torch.uint8, device='cuda') if rank == 0: mmcv.mkdir_or_exist('.dist_test') tmpdir = tempfile.mkdtemp(dir='.dist_test') - tmpdir = torch.tensor( - bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') - dir_tensor[:len(tmpdir)] = tmpdir + tmpdir = torch.tensor(bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') + dir_tensor[: len(tmpdir)] = tmpdir dist.broadcast(dir_tensor, 0) tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip() else: @@ -156,8 +156,7 @@ def collect_results_gpu(result_part, size): rank, world_size = get_dist_info() # dump result part to tensor with pickle - part_tensor = torch.tensor( - bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda') + part_tensor = torch.tensor(bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda') # gather all result part tensor shape shape_tensor = torch.tensor(part_tensor.shape, device='cuda') shape_list = [shape_tensor.clone() for _ in range(world_size)] @@ -165,18 +164,15 @@ def collect_results_gpu(result_part, size): # padding result part tensor to max length shape_max = torch.tensor(shape_list).max() part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda') - part_send[:shape_tensor[0]] = part_tensor - part_recv_list = [ - part_tensor.new_zeros(shape_max) for _ in range(world_size) - ] + part_send[: shape_tensor[0]] = part_tensor + part_recv_list = [part_tensor.new_zeros(shape_max) for _ in range(world_size)] # gather all result part dist.all_gather(part_recv_list, part_send) if rank == 0: part_list = [] for recv, shape in zip(part_recv_list, shape_list): - part_list.append( - pickle.loads(recv[:shape[0]].cpu().numpy().tobytes())) + part_list.append(pickle.loads(recv[: shape[0]].cpu().numpy().tobytes())) # sort the results ordered_results = [] for res in zip(*part_list): @@ -184,4 +180,4 @@ def collect_results_gpu(result_part, size): # the dataloader may pad some samples ordered_results = ordered_results[:size] return ordered_results - return None \ No newline at end of file + return None diff --git a/edgelab/engine/apis/mmdet/train.py b/edgelab/engine/apis/mmdet/train.py index 1f6d4a1d..458958a1 100644 --- a/edgelab/engine/apis/mmdet/train.py +++ b/edgelab/engine/apis/mmdet/train.py @@ -5,19 +5,23 @@ import numpy as np import torch import torch.distributed as dist -from mmcv.runner import (DistSamplerSeedHook, EpochBasedRunner, - Fp16OptimizerHook, OptimizerHook, build_runner, - get_dist_info) +from mmcv.runner import ( + DistSamplerSeedHook, + EpochBasedRunner, + Fp16OptimizerHook, + OptimizerHook, + build_runner, + get_dist_info, +) from mmdet.datasets.utils import replace_ImageToTensor -from mmdet.core import DistEvalHook, build_optimizer +from mmdet.core import DistEvalHook, build_optimizer from mmdet.datasets import build_dataset -from mmdet.utils import (build_ddp, build_dp, compat_cfg, - find_latest_checkpoint, get_root_logger) +from mmdet.utils import build_ddp, build_dp, compat_cfg, find_latest_checkpoint, get_root_logger from edgelab.engine.hooks.evalhook import Feval from edgelab.datasets.builder import build_dataloader - + def init_random_seed(seed=None, device='cuda'): """Initialize random seed. @@ -80,10 +84,8 @@ def auto_scale_lr(cfg, distributed, logger): logger (logging.Logger): Logger. """ # Get flag from config - if ('auto_scale_lr' not in cfg) or \ - (not cfg.auto_scale_lr.get('enable', False)): - logger.info('Automatic scaling of learning rate (LR)' - ' has been disabled.') + if ('auto_scale_lr' not in cfg) or (not cfg.auto_scale_lr.get('enable', False)): + logger.info('Automatic scaling of learning rate (LR)' ' has been disabled.') return # Get base batch size from config @@ -101,38 +103,33 @@ def auto_scale_lr(cfg, distributed, logger): # calculate the batch size samples_per_gpu = cfg.data.train_dataloader.samples_per_gpu batch_size = num_gpus * samples_per_gpu - logger.info(f'Training with {num_gpus} GPU(s) with {samples_per_gpu} ' - f'samples per GPU. The total batch size is {batch_size}.') + logger.info( + f'Training with {num_gpus} GPU(s) with {samples_per_gpu} ' + f'samples per GPU. The total batch size is {batch_size}.' + ) if batch_size != base_batch_size: # scale LR with # [linear scaling rule](https://arxiv.org/abs/1706.02677) scaled_lr = (batch_size / base_batch_size) * cfg.optimizer.lr - logger.info('LR has been automatically scaled ' - f'from {cfg.optimizer.lr} to {scaled_lr}') + logger.info('LR has been automatically scaled ' f'from {cfg.optimizer.lr} to {scaled_lr}') cfg.optimizer.lr = scaled_lr else: - logger.info('The batch size match the ' - f'base batch size: {base_batch_size}, ' - f'will not scaling the LR ({cfg.optimizer.lr}).') - + logger.info( + 'The batch size match the ' + f'base batch size: {base_batch_size}, ' + f'will not scaling the LR ({cfg.optimizer.lr}).' + ) -def train_detector(model, - dataset, - cfg, - distributed=False, - validate=False, - timestamp=None, - meta=None): +def train_detector(model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None): cfg = compat_cfg(cfg) logger = get_root_logger(log_level=cfg.log_level) # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] - runner_type = 'EpochBasedRunner' if 'runner' not in cfg else cfg.runner[ - 'type'] + runner_type = 'EpochBasedRunner' if 'runner' not in cfg else cfg.runner['type'] train_dataloader_default_args = dict( samples_per_gpu=2, @@ -142,12 +139,10 @@ def train_detector(model, dist=distributed, seed=cfg.seed, runner_type=runner_type, - persistent_workers=False) + persistent_workers=False, + ) - train_loader_cfg = { - **train_dataloader_default_args, - **cfg.data.get('train_dataloader', {}) - } + train_loader_cfg = {**train_dataloader_default_args, **cfg.data.get('train_dataloader', {})} data_loaders = [build_dataloader(ds, **train_loader_cfg) for ds in dataset] @@ -161,7 +156,8 @@ def train_detector(model, cfg.device, device_ids=[int(os.environ['LOCAL_RANK'])], broadcast_buffers=False, - find_unused_parameters=find_unused_parameters) + find_unused_parameters=find_unused_parameters, + ) else: model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids) @@ -170,13 +166,8 @@ def train_detector(model, optimizer = build_optimizer(model, cfg.optimizer) runner = build_runner( - cfg.runner, - default_args=dict( - model=model, - optimizer=optimizer, - work_dir=cfg.work_dir, - logger=logger, - meta=meta)) + cfg.runner, default_args=dict(model=model, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta) + ) # an ugly workaround to make .log and .log.json filenames the same runner.timestamp = timestamp @@ -184,8 +175,7 @@ def train_detector(model, # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: - optimizer_config = Fp16OptimizerHook( - **cfg.optimizer_config, **fp16_cfg, distributed=distributed) + optimizer_config = Fp16OptimizerHook(**cfg.optimizer_config, **fp16_cfg, distributed=distributed) elif distributed and 'type' not in cfg.optimizer_config: optimizer_config = OptimizerHook(**cfg.optimizer_config) else: @@ -198,7 +188,8 @@ def train_detector(model, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None), - custom_hooks_config=cfg.get('custom_hooks', None)) + custom_hooks_config=cfg.get('custom_hooks', None), + ) if distributed: if isinstance(runner, EpochBasedRunner): @@ -207,22 +198,15 @@ def train_detector(model, # register eval hooks if validate: val_dataloader_default_args = dict( - samples_per_gpu=1, - workers_per_gpu=2, - dist=distributed, - shuffle=False, - persistent_workers=False) - - val_dataloader_args = { - **val_dataloader_default_args, - **cfg.data.get('val_dataloader', {}) - } + samples_per_gpu=1, workers_per_gpu=2, dist=distributed, shuffle=False, persistent_workers=False + ) + + val_dataloader_args = {**val_dataloader_default_args, **cfg.data.get('val_dataloader', {})} # Support batch_size > 1 in validation if val_dataloader_args['samples_per_gpu'] > 1: # Replace 'ImageToTensor' to 'DefaultFormatBundle' - cfg.data.val.pipeline = replace_ImageToTensor( - cfg.data.val.pipeline) + cfg.data.val.pipeline = replace_ImageToTensor(cfg.data.val.pipeline) val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) val_dataloader = build_dataloader(val_dataset, **val_dataloader_args) @@ -232,8 +216,7 @@ def train_detector(model, # In this PR (https://github.com/open-mmlab/mmcv/pull/1193), the # priority of IterTimerHook has been modified from 'NORMAL' to 'LOW'. print(eval_cfg) - runner.register_hook( - eval_hook(val_dataloader, **eval_cfg), priority='LOW') + runner.register_hook(eval_hook(val_dataloader, **eval_cfg), priority='LOW') resume_from = None if cfg.resume_from is None and cfg.get('auto_resume'): diff --git a/edgelab/engine/hooks/__init__.py b/edgelab/engine/hooks/__init__.py index ef8d3d96..3b5233a2 100644 --- a/edgelab/engine/hooks/__init__.py +++ b/edgelab/engine/hooks/__init__.py @@ -2,7 +2,11 @@ from .visualization_hook import Posevisualization, DetFomoVisualizationHook __all__ = [ - 'TextLoggerHook', 'TensorboardLoggerHook', 'WandbLoggerHook', - 'PaviLoggerHook', 'ClearMLLoggerHook', 'Posevisualization', - 'DetFomoVisualizationHook' + 'TextLoggerHook', + 'TensorboardLoggerHook', + 'WandbLoggerHook', + 'PaviLoggerHook', + 'ClearMLLoggerHook', + 'Posevisualization', + 'DetFomoVisualizationHook', ] diff --git a/edgelab/engine/hooks/evalhook.py b/edgelab/engine/hooks/evalhook.py index b07c39dd..126e7ba1 100644 --- a/edgelab/engine/hooks/evalhook.py +++ b/edgelab/engine/hooks/evalhook.py @@ -17,36 +17,50 @@ def show_result(result, img_path, classes): w, h, label = i label = classes[label - 1] cv2.circle(img, (int(W * w), int(H * h)), 5, (0, 0, 255), 1) - cv2.putText(img, - str(label), - org=(int(W * w), int(H * h)), - color=(255, 0, 0), - fontScale=1, - fontFace=cv2.FONT_HERSHEY_SIMPLEX) + cv2.putText( + img, + str(label), + org=(int(W * w), int(H * h)), + color=(255, 0, 0), + fontScale=1, + fontFace=cv2.FONT_HERSHEY_SIMPLEX, + ) cv2.imshow('img', img) cv2.waitKey(0) @HOOKS.register_module() class Feval(EvalHook): - - def __init__(self, - dataloader: DataLoader, - start: Optional[int] = None, - interval: int = 1, - by_epoch: bool = True, - fomo: bool = False, - save_best: Optional[str] = None, - rule: Optional[str] = None, - test_fn: Optional[Callable] = None, - greater_keys: Optional[List[str]] = None, - less_keys: Optional[List[str]] = None, - out_dir: Optional[str] = None, - file_client_args: Optional[dict] = None, - **eval_kwargs): - super().__init__(dataloader, start, interval, by_epoch, save_best, - rule, test_fn, greater_keys, less_keys, out_dir, - file_client_args, **eval_kwargs) + def __init__( + self, + dataloader: DataLoader, + start: Optional[int] = None, + interval: int = 1, + by_epoch: bool = True, + fomo: bool = False, + save_best: Optional[str] = None, + rule: Optional[str] = None, + test_fn: Optional[Callable] = None, + greater_keys: Optional[List[str]] = None, + less_keys: Optional[List[str]] = None, + out_dir: Optional[str] = None, + file_client_args: Optional[dict] = None, + **eval_kwargs, + ): + super().__init__( + dataloader, + start, + interval, + by_epoch, + save_best, + rule, + test_fn, + greater_keys, + less_keys, + out_dir, + file_client_args, + **eval_kwargs, + ) self.gts, self.pts = [], [] self.fomo = fomo @@ -65,10 +79,7 @@ def evaluate(self, runner, results): runner (:obj:`mmcv.Runner`): The underlined training runner. results (list): Output results. """ - eval_res = self.dataloader.dataset.evaluate(results, - logger=runner.logger, - fomo=self.fomo, - **self.eval_kwargs) + eval_res = self.dataloader.dataset.evaluate(results, logger=runner.logger, fomo=self.fomo, **self.eval_kwargs) for name, val in eval_res.items(): runner.log_buffer.output[name] = val @@ -82,7 +93,8 @@ def evaluate(self, runner, results): if not eval_res: warnings.warn( 'Since `eval_res` is an empty dict, the behavior to save ' - 'the best checkpoint will be skipped in this evaluation.') + 'the best checkpoint will be skipped in this evaluation.' + ) return None if self.key_indicator == 'auto': @@ -90,4 +102,4 @@ def evaluate(self, runner, results): self._init_rule(self.rule, list(eval_res.keys())[0]) return eval_res[self.key_indicator] - return None \ No newline at end of file + return None diff --git a/edgelab/engine/hooks/logger/__init__.py b/edgelab/engine/hooks/logger/__init__.py index c9f170e3..b9592496 100644 --- a/edgelab/engine/hooks/logger/__init__.py +++ b/edgelab/engine/hooks/logger/__init__.py @@ -4,7 +4,4 @@ from .clearml import ClearMLLoggerHook from .tensorboard import TensorboardLoggerHook -__all__ = [ - 'TextLoggerHook', 'PaviLoggerHook', 'WandbLoggerHook', 'ClearMLLoggerHook', - 'TensorboardLoggerHook' -] +__all__ = ['TextLoggerHook', 'PaviLoggerHook', 'WandbLoggerHook', 'ClearMLLoggerHook', 'TensorboardLoggerHook'] diff --git a/edgelab/engine/hooks/logger/clearml.py b/edgelab/engine/hooks/logger/clearml.py index 2319d123..b47ad7f1 100644 --- a/edgelab/engine/hooks/logger/clearml.py +++ b/edgelab/engine/hooks/logger/clearml.py @@ -2,6 +2,7 @@ from edgelab.registry import HOOKS from mmengine.dist.utils import master_only + # from mmcv.runner import HOOKS # from mmcv.runner.dist_utils import master_only @@ -10,27 +11,36 @@ @HOOKS.register_module(force=True) class ClearMLLoggerHook(TextLoggerHook): - - def __init__(self, - by_epoch: bool = True, - interval: int = 10, - ignore_last: bool = True, - reset_flag: bool = False, - interval_exp_name: int = 1000, - out_dir: Optional[str] = None, - out_suffix: Union[str, tuple] = ..., - keep_local: bool = True, - ndigits: int = 4, - init_kwargs: Optional[Dict] = None, - file_client_args: Optional[Dict] = None): - super().__init__(by_epoch, interval, ignore_last, reset_flag, - interval_exp_name, out_dir, out_suffix, keep_local, - ndigits, file_client_args) + def __init__( + self, + by_epoch: bool = True, + interval: int = 10, + ignore_last: bool = True, + reset_flag: bool = False, + interval_exp_name: int = 1000, + out_dir: Optional[str] = None, + out_suffix: Union[str, tuple] = ..., + keep_local: bool = True, + ndigits: int = 4, + init_kwargs: Optional[Dict] = None, + file_client_args: Optional[Dict] = None, + ): + super().__init__( + by_epoch, + interval, + ignore_last, + reset_flag, + interval_exp_name, + out_dir, + out_suffix, + keep_local, + ndigits, + file_client_args, + ) try: import clearml except ImportError: - raise ImportError( - 'Please run "pip install clearml" to install clearml') + raise ImportError('Please run "pip install clearml" to install clearml') self.clearml = clearml self.init_kwargs = init_kwargs @@ -45,7 +55,6 @@ def before_run(self, runner) -> None: def log(self, runner) -> None: tags = self.get_loggable_tags(runner) for tag, val in tags.items(): - self.task_logger.report_scalar(tag, tag, val, - self.get_iter(runner)) + self.task_logger.report_scalar(tag, tag, val, self.get_iter(runner)) - return super().log(runner) \ No newline at end of file + return super().log(runner) diff --git a/edgelab/engine/hooks/logger/pavi.py b/edgelab/engine/hooks/logger/pavi.py index 365c1d95..99562429 100644 --- a/edgelab/engine/hooks/logger/pavi.py +++ b/edgelab/engine/hooks/logger/pavi.py @@ -8,6 +8,7 @@ import mmcv from edgelab.registry import HOOKS from mmengine.dist.utils import master_only + # from mmcv.runner import HOOKS # from mmcv.parallel.scatter_gather import scatter @@ -18,27 +19,37 @@ @HOOKS.register_module(force=True) class PaviLoggerHook(TextLoggerHook): - - def __init__(self, - init_kwargs: Optional[Dict] = None, - add_graph: Optional[bool] = None, - img_key: Optional[str] = None, - add_last_ckpt: bool = False, - interval: int = 10, - ignore_last: bool = True, - reset_flag: bool = False, - by_epoch: bool = True, - add_graph_kwargs: Optional[Dict] = None, - add_ckpt_kwargs: Optional[Dict] = None, - interval_exp_name: int = 1000, - out_dir: Optional[str] = None, - out_suffix: Union[str, tuple] = ..., - keep_local: bool = True, - ndigits: int = 4, - file_client_args: Optional[Dict] = None): - super().__init__(by_epoch, interval, ignore_last, reset_flag, - interval_exp_name, out_dir, out_suffix, keep_local, - ndigits, file_client_args) + def __init__( + self, + init_kwargs: Optional[Dict] = None, + add_graph: Optional[bool] = None, + img_key: Optional[str] = None, + add_last_ckpt: bool = False, + interval: int = 10, + ignore_last: bool = True, + reset_flag: bool = False, + by_epoch: bool = True, + add_graph_kwargs: Optional[Dict] = None, + add_ckpt_kwargs: Optional[Dict] = None, + interval_exp_name: int = 1000, + out_dir: Optional[str] = None, + out_suffix: Union[str, tuple] = ..., + keep_local: bool = True, + ndigits: int = 4, + file_client_args: Optional[Dict] = None, + ): + super().__init__( + by_epoch, + interval, + ignore_last, + reset_flag, + interval_exp_name, + out_dir, + out_suffix, + keep_local, + ndigits, + file_client_args, + ) self.init_kwargs = init_kwargs @@ -48,19 +59,20 @@ def __init__(self, self.add_graph_interval = add_graph_kwargs.get('interval', 1) self.img_key = add_graph_kwargs.get('img_key', 'img') self.opset_version = add_graph_kwargs.get('opset_version', 11) - self.dummy_forward_kwargs = add_graph_kwargs.get( - 'dummy_forward_kwargs', {}) + self.dummy_forward_kwargs = add_graph_kwargs.get('dummy_forward_kwargs', {}) if add_graph is not None: warnings.warn( '"add_graph" is deprecated in `PaviLoggerHook`, please use ' 'the key "active" of add_graph_kwargs instead', - DeprecationWarning) + DeprecationWarning, + ) self.add_graph = add_graph if img_key is not None: warnings.warn( '"img_key" is deprecated in `PaviLoggerHook`, please use ' 'the key "img_key" of add_graph_kwargs instead', - DeprecationWarning) + DeprecationWarning, + ) self.img_key = img_key add_ckpt_kwargs = {} if add_ckpt_kwargs is None else add_ckpt_kwargs @@ -76,8 +88,8 @@ def before_run(self, runner) -> None: from pavi import SummaryWriter except ImportError: raise ImportError( - 'No module named pavi, please contact pavi team or visit' - 'document for pavi installation instructions.') + 'No module named pavi, please contact pavi team or visit' 'document for pavi installation instructions.' + ) self.run_name = runner.work_dir.split('/')[-1] @@ -88,10 +100,9 @@ def before_run(self, runner) -> None: if runner.meta is not None: if 'config_dict' in runner.meta: config_dict = runner.meta['config_dict'] - assert isinstance( - config_dict, - dict), ('meta["config_dict"] has to be of a dict, ' - f'but got {type(config_dict)}') + assert isinstance(config_dict, dict), ( + 'meta["config_dict"] has to be of a dict, ' f'but got {type(config_dict)}' + ) elif 'config_file' in runner.meta: config_file = runner.meta['config_file'] config_dict = dict(mmcv.Config.fromfile(config_file)) @@ -104,8 +115,7 @@ def before_run(self, runner) -> None: config_dict.setdefault('max_iter', runner.max_iters) # non-serializable values are first converted in # mmcv.dump to json - config_dict = json.loads( - mmcv.dump(config_dict, file_format='json')) + config_dict = json.loads(mmcv.dump(config_dict, file_format='json')) session_text = yaml.dump(config_dict) self.init_kwargs.setdefault('session_text', session_text) self.writer = SummaryWriter(**self.init_kwargs) @@ -118,14 +128,11 @@ def get_step(self, runner) -> int: return self.get_iter(runner) def _add_ckpt(self, runner, ckpt_path: str, step: int) -> None: - if osp.islink(ckpt_path): ckpt_path = osp.join(runner.work_dir, os.readlink(ckpt_path)) if osp.isfile(ckpt_path): - self.writer.add_snapshot_file(tag=self.run_name, - snapshot_file_path=ckpt_path, - iteration=step) + self.writer.add_snapshot_file(tag=self.run_name, snapshot_file_path=ckpt_path, iteration=step) # def _add_graph(self, runner, step: int) -> None: # from mmcv.runner.iter_based_runner import IterLoader @@ -159,8 +166,7 @@ def _add_ckpt(self, runner, ckpt_path: str, step: int) -> None: def log(self, runner) -> None: tags = self.get_loggable_tags(runner, add_mode=False) if tags: - self.writer.add_scalars(self.get_mode(runner), tags, - self.get_step(runner)) + self.writer.add_scalars(self.get_mode(runner), tags, self.get_step(runner)) return super().log(runner) @master_only @@ -182,9 +188,11 @@ def before_train_epoch(self, runner) -> None: return None step = self.get_epoch(runner) - if (self.add_graph and step >= self.add_graph_start - and ((step - self.add_graph_start) % self.add_graph_interval - == 0)): # noqa: E129 + if ( + self.add_graph + and step >= self.add_graph_start + and ((step - self.add_graph_start) % self.add_graph_interval == 0) + ): # noqa: E129 self._add_graph(runner, step) @master_only @@ -195,9 +203,11 @@ def before_train_iter(self, runner) -> None: return None step = self.get_iter(runner) - if (self.add_graph and step >= self.add_graph_start - and ((step - self.add_graph_start) % self.add_graph_interval - == 0)): # noqa: E129 + if ( + self.add_graph + and step >= self.add_graph_start + and ((step - self.add_graph_start) % self.add_graph_interval == 0) + ): # noqa: E129 self._add_graph(runner, step) @master_only @@ -209,10 +219,11 @@ def after_train_epoch(self, runner) -> None: step = self.get_epoch(runner) - if (self.add_ckpt and step >= self.add_ckpt_start - and ((step - self.add_ckpt_start) % self.add_ckpt_interval - == 0)): # noqa: E129 - + if ( + self.add_ckpt + and step >= self.add_ckpt_start + and ((step - self.add_ckpt_start) % self.add_ckpt_interval == 0) + ): # noqa: E129 ckpt_path = osp.join(runner.work_dir, f'epoch_{step}.pth') self._add_ckpt(runner, ckpt_path, step) @@ -226,10 +237,11 @@ def after_train_iter(self, runner) -> None: step = self.get_iter(runner) - if (self.add_ckpt and step >= self.add_ckpt_start - and ((step - self.add_ckpt_start) % self.add_ckpt_interval - == 0)): # noqa: E129 - + if ( + self.add_ckpt + and step >= self.add_ckpt_start + and ((step - self.add_ckpt_start) % self.add_ckpt_interval == 0) + ): # noqa: E129 ckpt_path = osp.join(runner.work_dir, f'iter_{step}.pth') - self._add_ckpt(runner, ckpt_path, step) \ No newline at end of file + self._add_ckpt(runner, ckpt_path, step) diff --git a/edgelab/engine/hooks/logger/tensorboard.py b/edgelab/engine/hooks/logger/tensorboard.py index 7fa0c32b..bc1650e5 100644 --- a/edgelab/engine/hooks/logger/tensorboard.py +++ b/edgelab/engine/hooks/logger/tensorboard.py @@ -3,10 +3,12 @@ from edgelab.registry import HOOKS from mmengine.dist.utils import master_only + # from mmcv.runner import HOOKS # from mmcv.runner.dist_utils import master_only from mmengine.utils.dl_utils import TORCH_VERSION + # from mmcv.runner.hooks.logger.text import TextLoggerHook from .text import TextLoggerHook @@ -14,21 +16,31 @@ @HOOKS.register_module(force=True) class TensorboardLoggerHook(TextLoggerHook): - - def __init__(self, - by_epoch: bool = True, - interval: int = 10, - ignore_last: bool = True, - reset_flag: bool = False, - interval_exp_name: int = 1000, - out_dir: Optional[str] = None, - out_suffix: Union[str, tuple] = ..., - keep_local: bool = True, - ndigits: int = 4, - file_client_args: Optional[Dict] = None): - super().__init__(by_epoch, interval, ignore_last, reset_flag, - interval_exp_name, out_dir, out_suffix, keep_local, - ndigits, file_client_args) + def __init__( + self, + by_epoch: bool = True, + interval: int = 10, + ignore_last: bool = True, + reset_flag: bool = False, + interval_exp_name: int = 1000, + out_dir: Optional[str] = None, + out_suffix: Union[str, tuple] = ..., + keep_local: bool = True, + ndigits: int = 4, + file_client_args: Optional[Dict] = None, + ): + super().__init__( + by_epoch, + interval, + ignore_last, + reset_flag, + interval_exp_name, + out_dir, + out_suffix, + keep_local, + ndigits, + file_client_args, + ) self.log_dir = out_dir @@ -41,8 +53,7 @@ def before_run(self, runner) -> None: try: from tensorboardX import SummaryWriter except ImportError: - raise ImportError('Please install tensorboardX to use ' - 'TensorboardLoggerHook.') + raise ImportError('Please install tensorboardX to use ' 'TensorboardLoggerHook.') else: try: from torch.utils.tensorboard import SummaryWriter @@ -50,7 +61,8 @@ def before_run(self, runner) -> None: raise ImportError( 'Please run "pip install future tensorboard" to install ' 'the dependencies to use torch.utils.tensorboard ' - '(applicable to PyTorch 1.1 or higher)') + '(applicable to PyTorch 1.1 or higher)' + ) self._tensorboard = SummaryWriter(self.log_dir) @master_only diff --git a/edgelab/engine/hooks/logger/text.py b/edgelab/engine/hooks/logger/text.py index 812523ed..1bef5bb8 100644 --- a/edgelab/engine/hooks/logger/text.py +++ b/edgelab/engine/hooks/logger/text.py @@ -14,20 +14,29 @@ @HOOKS.register_module(force=True) class TextLoggerHook(LoggerHook): - - def __init__(self, - interval: int = 10, - ignore_last: bool = False, - interval_exp_name: int = 1000, - out_dir: Optional[Union[str, Path]] = None, - out_suffix: list(str()) = [''], - keep_local: bool = True, - file_client_args: Optional[dict] = None, - log_metric_by_epoch: bool = True, - backend_args: Optional[dict] = None): - super().__init__(interval, ignore_last, interval_exp_name, out_dir, - out_suffix, keep_local, file_client_args, - log_metric_by_epoch, backend_args) + def __init__( + self, + interval: int = 10, + ignore_last: bool = False, + interval_exp_name: int = 1000, + out_dir: Optional[Union[str, Path]] = None, + out_suffix: list(str()) = [''], + keep_local: bool = True, + file_client_args: Optional[dict] = None, + log_metric_by_epoch: bool = True, + backend_args: Optional[dict] = None, + ): + super().__init__( + interval, + ignore_last, + interval_exp_name, + out_dir, + out_suffix, + keep_local, + file_client_args, + log_metric_by_epoch, + backend_args, + ) self.ndigits = 4 self.handltype = [] @@ -56,36 +65,21 @@ def before_train_epoch(self, runner): print('-' * 120) super().before_train_epoch(runner) - def after_train_iter(self, - runner: Runner, - batch_idx: int, - data_batch: Optional[Union[dict, tuple, list]] = None, - outputs: Optional[dict] = None) -> None: + def after_train_iter( + self, + runner: Runner, + batch_idx: int, + data_batch: Optional[Union[dict, tuple, list]] = None, + outputs: Optional[dict] = None, + ) -> None: super().after_train_iter(runner, batch_idx, data_batch, outputs) - self._progress_log(outputs, - runner, - runner.train_dataloader, - batch_idx, - mode='train') - - def after_val_iter(self, - runner: Runner, - batch_idx: int, - data_batch=None, - outputs=None) -> None: - super().after_val_iter(runner=runner, - batch_idx=batch_idx, - data_batch=data_batch, - outputs=outputs) - parsed_cfg = runner.log_processor._parse_windows_size( - runner, batch_idx, runner.log_processor.custom_cfg) - log_tag = runner.log_processor._collect_scalars( - parsed_cfg, runner, 'val') - self._progress_log(log_tag, - runner, - runner.val_dataloader, - batch_idx, - mode='val') + self._progress_log(outputs, runner, runner.train_dataloader, batch_idx, mode='train') + + def after_val_iter(self, runner: Runner, batch_idx: int, data_batch=None, outputs=None) -> None: + super().after_val_iter(runner=runner, batch_idx=batch_idx, data_batch=data_batch, outputs=outputs) + parsed_cfg = runner.log_processor._parse_windows_size(runner, batch_idx, runner.log_processor.custom_cfg) + log_tag = runner.log_processor._collect_scalars(parsed_cfg, runner, 'val') + self._progress_log(log_tag, runner, runner.val_dataloader, batch_idx, mode='val') def after_train_epoch(self, runner) -> None: super().after_train_epoch(runner) @@ -93,15 +87,9 @@ def after_train_epoch(self, runner) -> None: print('') def _after_epoch(self, runner, mode: str = 'train') -> None: - return super()._after_epoch(runner, mode) - def _progress_log(self, - log_dict: dict, - runner: Runner, - dataloader, - idx: int, - mode='train'): + def _progress_log(self, log_dict: dict, runner: Runner, dataloader, idx: int, mode='train'): head = '\n' end = '' current_epoch = runner.epoch @@ -113,8 +101,7 @@ def _progress_log(self, head += "Mode".center(10) end += f"{mode:^10}" head += "Epoch".center(10) - end += f"{(current_epoch+1) if mode=='train' else current_epoch}/{max_epochs}".center( - 10) + end += f"{(current_epoch+1) if mode=='train' else current_epoch}/{max_epochs}".center(10) for key, value in log_dict.items(): if isinstance(value, torch.Tensor): @@ -127,8 +114,7 @@ def _progress_log(self, value = sum(self.logData.get(key)) / len(self.logData.get(key)) head += f'{key:^10}' - end += f'{self._round_float(value):^10}' if isinstance( - value, float) else f'{value:^10}' + end += f'{self._round_float(value):^10}' if isinstance(value, float) else f'{value:^10}' eta_sec = runner.message_hub.get_info('eta') eta_str = str(datetime.timedelta(seconds=int(eta_sec))) @@ -149,10 +135,7 @@ def _progress_log(self, if self.bar.n == len(dataloader): del self.bar - def setloglevel(self, - runner: Runner, - handler: logging.Handler = logging.StreamHandler, - level: int = logging.ERROR): + def setloglevel(self, runner: Runner, handler: logging.Handler = logging.StreamHandler, level: int = logging.ERROR): if handler in self.handltype: return for i, hand in enumerate(runner.logger.handlers): @@ -173,9 +156,7 @@ def _round_float(self, items): def _get_max_memory(self, runner: Runner) -> int: device = getattr(runner.model, 'output_device', None) mem = torch.cuda.max_memory_allocated(device=device) - mem_mb = torch.tensor([int(mem) // (1048576)], - dtype=torch.int, - device=device) + mem_mb = torch.tensor([int(mem) // (1048576)], dtype=torch.int, device=device) if runner.world_size > 1: dist.reduce(mem_mb, 0, op=dist.ReduceOp.MAX) return f'{mem_mb.item()}MB' @@ -201,4 +182,4 @@ def progressInterval(self, idx: int, mode: str = 'train'): def iterInterval(self, runner: Runner): interval = runner.iter - self.currentIter self.currentIter = runner.iter - return interval \ No newline at end of file + return interval diff --git a/edgelab/engine/hooks/logger/wandb.py b/edgelab/engine/hooks/logger/wandb.py index b3f9a7da..d8019e99 100644 --- a/edgelab/engine/hooks/logger/wandb.py +++ b/edgelab/engine/hooks/logger/wandb.py @@ -4,6 +4,7 @@ from mmengine.utils import scandir from edgelab.registry import HOOKS from mmengine.dist.utils import master_only + # from mmcv.utils import scandir # from mmcv.runner import HOOKS # from mmcv.runner.dist_utils import master_only @@ -12,30 +13,39 @@ @HOOKS.register_module(force=True) class WandbLoggerHook(TextLoggerHook): - - def __init__(self, - init_kwargs: Optional[Dict] = None, - commit: bool = True, - by_epoch: bool = True, - with_step: bool = True, - log_artifact: bool = True, - interval: int = 10, - ignore_last: bool = True, - reset_flag: bool = False, - interval_exp_name: int = 1000, - out_dir: Optional[str] = None, - out_suffix: Union[str, tuple] = ..., - keep_local: bool = True, - ndigits: int = 4, - file_client_args: Optional[Dict] = None): - super().__init__(by_epoch, interval, ignore_last, reset_flag, - interval_exp_name, out_dir, out_suffix, keep_local, - ndigits, file_client_args) + def __init__( + self, + init_kwargs: Optional[Dict] = None, + commit: bool = True, + by_epoch: bool = True, + with_step: bool = True, + log_artifact: bool = True, + interval: int = 10, + ignore_last: bool = True, + reset_flag: bool = False, + interval_exp_name: int = 1000, + out_dir: Optional[str] = None, + out_suffix: Union[str, tuple] = ..., + keep_local: bool = True, + ndigits: int = 4, + file_client_args: Optional[Dict] = None, + ): + super().__init__( + by_epoch, + interval, + ignore_last, + reset_flag, + interval_exp_name, + out_dir, + out_suffix, + keep_local, + ndigits, + file_client_args, + ) try: import wandb except ImportError: - raise ImportError( - 'Please run "pip install wandb" to install wandb') + raise ImportError('Please run "pip install wandb" to install wandb') self.wandb = wandb self.init_kwargs = init_kwargs self.commit = commit @@ -58,8 +68,7 @@ def log(self, runner) -> None: tags = self.get_loggable_tags(runner) if tags: if self.with_step: - self.wandb.log( - tags, step=self.get_iter(runner), commit=self.commit) + self.wandb.log(tags, step=self.get_iter(runner), commit=self.commit) else: tags['global_step'] = self.get_iter(runner) self.wandb.log(tags, commit=self.commit) @@ -69,10 +78,9 @@ def log(self, runner) -> None: def after_run(self, runner) -> None: super().after_run(runner) if self.log_artifact: - wandb_artifact = self.wandb.Artifact( - name='artifacts', type='model') + wandb_artifact = self.wandb.Artifact(name='artifacts', type='model') for filename in scandir(runner.work_dir, self.out_suffix, True): local_filepath = osp.join(runner.work_dir, filename) wandb_artifact.add_file(local_filepath) self.wandb.log_artifact(wandb_artifact) - self.wandb.join() \ No newline at end of file + self.wandb.join() diff --git a/edgelab/engine/hooks/visualization_hook.py b/edgelab/engine/hooks/visualization_hook.py index 244634c1..4c1c5985 100644 --- a/edgelab/engine/hooks/visualization_hook.py +++ b/edgelab/engine/hooks/visualization_hook.py @@ -17,14 +17,13 @@ @HOOKS.register_module() class Posevisualization(Hook): - def __init__( self, enable: bool = False, interval: int = 50, kpt_thr: float = 0.3, show: bool = False, - wait_time: float = 0., + wait_time: float = 0.0, out_dir: Optional[str] = None, backend_args: Optional[dict] = None, ): @@ -35,10 +34,12 @@ def __init__( if self.show: # No need to think about vis backends. self._visualizer._vis_backends = {} - warnings.warn('The show is True, it means that only ' - 'the prediction results are visualized ' - 'without storing data, so vis_backends ' - 'needs to be excluded.') + warnings.warn( + 'The show is True, it means that only ' + 'the prediction results are visualized ' + 'without storing data, so vis_backends ' + 'needs to be excluded.' + ) self.wait_time = wait_time self.enable = enable @@ -46,8 +47,9 @@ def __init__( self._test_index = 0 self.backend_args = backend_args - def after_test_iter(self, runner: Runner, batch_idx: int, data_batch: dict, - outputs: Sequence[PoseDataSample]) -> None: + def after_test_iter( + self, runner: Runner, batch_idx: int, data_batch: dict, outputs: Sequence[PoseDataSample] + ) -> None: """Run after every testing iterations. Args: @@ -60,8 +62,7 @@ def after_test_iter(self, runner: Runner, batch_idx: int, data_batch: dict, return if self.out_dir is not None: - self.out_dir = os.path.join(runner.work_dir, runner.timestamp, - self.out_dir) + self.out_dir = os.path.join(runner.work_dir, runner.timestamp, self.out_dir) mmengine.mkdir_or_exist(self.out_dir) self._visualizer.set_dataset_meta(runner.test_evaluator.dataset_meta) @@ -77,12 +78,8 @@ def after_test_iter(self, runner: Runner, batch_idx: int, data_batch: dict, out_file = None if self.out_dir is not None: - out_file_name, postfix = os.path.basename(img_path).rsplit( - '.', 1) - index = len([ - fname for fname in os.listdir(self.out_dir) - if fname.startswith(out_file_name) - ]) + out_file_name, postfix = os.path.basename(img_path).rsplit('.', 1) + index = len([fname for fname in os.listdir(self.out_dir) if fname.startswith(out_file_name)]) out_file = f'{out_file_name}_{index}.{postfix}' out_file = os.path.join(self.out_dir, out_file) self._visualizer.add_datasample( @@ -96,21 +93,19 @@ def after_test_iter(self, runner: Runner, batch_idx: int, data_batch: dict, wait_time=self.wait_time, kpt_thr=self.kpt_thr, out_file=out_file, - step=self._test_index) + step=self._test_index, + ) @HOOKS.register_module() class DetFomoVisualizationHook(DetVisualizationHook): - def __init__(self, *args, fomo: bool = False, **kwarg): super().__init__(*args, **kwarg) self.fomo = fomo - def after_val_iter(self, - runner, - batch_idx: int, - data_batch: DATA_BATCH = None, - outputs: Optional[Sequence] = None) -> None: + def after_val_iter( + self, runner, batch_idx: int, data_batch: DATA_BATCH = None, outputs: Optional[Sequence] = None + ) -> None: if self.fomo: if self.draw is False: return @@ -132,17 +127,15 @@ def after_val_iter(self, show=self.show, wait_time=self.wait_time, pred_score_thr=self.score_thr, - step=total_curr_iter) + step=total_curr_iter, + ) else: return super().after_val_iter(runner, batch_idx, data_batch, outputs) - def after_test_iter(self, - runner, - batch_idx: int, - data_batch: DATA_BATCH = None, - outputs: Optional[Sequence] = None) -> None: - + def after_test_iter( + self, runner, batch_idx: int, data_batch: DATA_BATCH = None, outputs: Optional[Sequence] = None + ) -> None: if self.fomo: pass else: - return super().after_test_iter(runner, batch_idx, data_batch, outputs) \ No newline at end of file + return super().after_test_iter(runner, batch_idx, data_batch, outputs) diff --git a/edgelab/engine/runner/loops.py b/edgelab/engine/runner/loops.py index c1255d15..a0f7d208 100644 --- a/edgelab/engine/runner/loops.py +++ b/edgelab/engine/runner/loops.py @@ -12,7 +12,6 @@ @LOOPS.register_module() class GetEpochBasedTrainLoop(EpochBasedTrainLoop): - def run_iter(self, idx, data_batch: Sequence[dict]) -> None: """Iterate one min-batch. @@ -20,36 +19,30 @@ def run_iter(self, idx, data_batch: Sequence[dict]) -> None: data_batch (Sequence[dict]): Batch of data from dataloader. """ self.data_batch = data_batch - self.runner.call_hook('before_train_iter', - batch_idx=idx, - data_batch=self.data_batch) + self.runner.call_hook('before_train_iter', batch_idx=idx, data_batch=self.data_batch) # Enable gradient accumulation mode and avoid unnecessary gradient # synchronization during gradient accumulation process. # outputs should be a dict of loss. - self.outputs = self.runner.model.train_step( - self.data_batch, optim_wrapper=self.runner.optim_wrapper) + self.outputs = self.runner.model.train_step(self.data_batch, optim_wrapper=self.runner.optim_wrapper) - self.runner.call_hook('after_train_iter', - batch_idx=idx, - data_batch=self.data_batch, - outputs=self.outputs) + self.runner.call_hook('after_train_iter', batch_idx=idx, data_batch=self.data_batch, outputs=self.outputs) self._iter += 1 @LOOPS.register_module() class EdgeTestLoop(BaseLoop): - - def __init__(self, - runner: Runner, - dataloader: Union[DataLoader, Dict], - evaluator: Union[Evaluator, Dict, List], - fp16: bool = False): + def __init__( + self, + runner: Runner, + dataloader: Union[DataLoader, Dict], + evaluator: Union[Evaluator, Dict, List], + fp16: bool = False, + ): super().__init__(runner, dataloader) @LOOPS.register_module() class EdgeTestRunner: - def __init__( self, model: Union[str, List], @@ -63,9 +56,9 @@ def __init__( ) net = ncnn.Net() for p in model: - if p.endswith('param'): + if p.endswith('param'): param = p - if p.endswith('bin'): + if p.endswith('bin'): bin = p net.load_param(param) net.load_model(bin) @@ -82,11 +75,12 @@ def __init__( net = onnx.load(model) onnx.checker.check_model(net) except ValueError: - raise ValueError( - 'onnx file have error,please check your onnx export code!') - providers = [ - 'CUDAExecutionProvider', 'CPUExecutionProvider' - ] if torch.cuda.is_available() else ['CPUExecutionProvider'] + raise ValueError('onnx file have error,please check your onnx export code!') + providers = ( + ['CUDAExecutionProvider', 'CPUExecutionProvider'] + if torch.cuda.is_available() + else ['CPUExecutionProvider'] + ) net = onnxruntime.InferenceSession(model, providers=providers) self.engine = 'onnx' elif model.endswith('tflite'): @@ -106,7 +100,7 @@ def __init__( def show(self): pass - + def metric(self): pass diff --git a/edgelab/engine/utils/batch_augs.py b/edgelab/engine/utils/batch_augs.py index 7ca0685b..33391a43 100644 --- a/edgelab/engine/utils/batch_augs.py +++ b/edgelab/engine/utils/batch_augs.py @@ -9,11 +9,9 @@ def pad_sample_seq_batch(x, n_samples): if x.size(0) >= n_samples: max_x_start = x.size(0) - n_samples x_start = random.randint(0, max_x_start) - x = x[:, x_start: x_start + n_samples] + x = x[:, x_start : x_start + n_samples] else: - x = F.pad( - x, (0, n_samples - x.size(1)), "constant" - ).data + x = F.pad(x, (0, n_samples - x.size(1)), "constant").data return x @@ -41,20 +39,24 @@ def __init__(self, params): if len(params['resample_factors']) > 0: self.random_resample = [ Resampler(input_sr=params['fs'], output_sr=int(params['fs'] * fac), dtype=torch.float32).to( - params['device']) for fac in params['resample_factors']] + params['device'] + ) + for fac in params['resample_factors'] + ] else: self.random_resample = [] def __call__(self, x, y, epoch): '''resample''' if len(self.random_resample) > 0 and random.random() < 0.5: - R = self.random_resample[random.randint( - 0, len(self.random_resample) - 1)] + R = self.random_resample[random.randint(0, len(self.random_resample) - 1)] x = batch_resample(R, x, self.params['seq_len']) '''mix''' - if len(self.params['augs']) > 0 and \ - random.random() <= self.params['mix_ratio'] \ - and epoch > self.params['epoch_mix']: + if ( + len(self.params['augs']) > 0 + and random.random() <= self.params['mix_ratio'] + and epoch > self.params['epoch_mix'] + ): is_mixed = True i = random.randint(0, len(self.params['augs']) - 1) aug = self.params['augs'][i] @@ -76,16 +78,11 @@ def mixup(self, data, target): idx = torch.randperm(data.size(0)) data_shuffled = data[idx, ...].clone() target_shuffled = target[idx].clone() - lam = 0.1 + 0.9 * \ - torch.rand(data.shape[0], 1, 1, - device=data.device, requires_grad=False) - G = 10 * \ - torch.log10(torch.clamp( - (data ** 2).mean(-1, keepdims=True), min=1e-5)) + lam = 0.1 + 0.9 * torch.rand(data.shape[0], 1, 1, device=data.device, requires_grad=False) + G = 10 * torch.log10(torch.clamp((data**2).mean(-1, keepdims=True), min=1e-5)) G_shuffled = G[idx] p = 1 / (1 + 10 ** ((G - G_shuffled) / 20) * (1 - lam) / lam) - data = (data * p + data_shuffled * (1 - p)) / \ - torch.sqrt(p ** 2 + (1 - p) ** 2) + data = (data * p + data_shuffled * (1 - p)) / torch.sqrt(p**2 + (1 - p) ** 2) targets = (target, target_shuffled, p.view(-1)) targets = [t.to(data.device) for t in targets] return data, targets @@ -95,14 +92,12 @@ def timemix(self, data, target): data_shuffled = data[idx].clone() target_shuffled = target[idx].clone() a = 0.5 - lam = a * \ - torch.rand(data.shape[0], 1, 1, device=data.device, - requires_grad=False) + (1 - a) + lam = a * torch.rand(data.shape[0], 1, 1, device=data.device, requires_grad=False) + (1 - a) n = data.shape[-1] n1 = (n * (1 - lam)).view(-1).int() for k, nn in enumerate(n1): if random.random() < 0.5: - data[k, :, n - nn:] = data_shuffled[k, :, n - nn:].clone() + data[k, :, n - nn :] = data_shuffled[k, :, n - nn :].clone() else: data[k, :, :nn] = data_shuffled[k, :, :nn].clone() del data_shuffled @@ -111,65 +106,69 @@ def timemix(self, data, target): return data, targets def freqmix(self, data, target): - data=data.squeeze(1) + data = data.squeeze(1) idx = torch.randperm(data.size(0)) - idx_win = random.randint( - 0, len(self.params['fft_params']['win_len']) - 1) - win = torch.hann_window( - self.params['fft_params']['win_len'][idx_win]).to(data.device) - X = torch.stft(data, - win_length=self.params['fft_params']['win_len'][idx_win], - hop_length=self.params['fft_params']['hop_len'][idx_win], - n_fft=self.params['fft_params']['n_fft'][idx_win], - return_complex=True, window=win) + idx_win = random.randint(0, len(self.params['fft_params']['win_len']) - 1) + win = torch.hann_window(self.params['fft_params']['win_len'][idx_win]).to(data.device) + X = torch.stft( + data, + win_length=self.params['fft_params']['win_len'][idx_win], + hop_length=self.params['fft_params']['hop_len'][idx_win], + n_fft=self.params['fft_params']['n_fft'][idx_win], + return_complex=True, + window=win, + ) X_shuffled = X[idx, ...].clone() target_shuffled = target[idx].clone() a = 0.5 - lam = a * \ - torch.rand(X.shape[0], 1, 1, device=X.device, - requires_grad=False) + (1 - a) + lam = a * torch.rand(X.shape[0], 1, 1, device=X.device, requires_grad=False) + (1 - a) n = X.shape[1] n1 = (n * (1 - lam)).view(-1).int() for k in range(X.shape[0]): nn = n1[k] if random.random() < 0.5: - X[k, n - nn:, :] = X_shuffled[k, n - nn:, :].clone() + X[k, n - nn :, :] = X_shuffled[k, n - nn :, :].clone() else: X[k, :nn, :] = X_shuffled[k, :nn, :].clone() del X_shuffled - data = torch.istft(X, - win_length=self.params['fft_params']['win_len'][idx_win], - hop_length=self.params['fft_params']['hop_len'][idx_win], - n_fft=self.params['fft_params']['n_fft'][idx_win], - window=win) + data = torch.istft( + X, + win_length=self.params['fft_params']['win_len'][idx_win], + hop_length=self.params['fft_params']['hop_len'][idx_win], + n_fft=self.params['fft_params']['n_fft'][idx_win], + window=win, + ) targets = (target, target_shuffled, lam.view(-1)) - data=data.unsqueeze(1) + data = data.unsqueeze(1) targets = [t.to(data.device) for t in targets] return data, targets def phmix(self, data, target): - data=data.squeeze(1) + data = data.squeeze(1) b, device = data.shape[0], data.device idx = torch.randperm(data.size(0)) - idx_win = random.randint( - 0, len(self.params['fft_params']['win_len']) - 1) + idx_win = random.randint(0, len(self.params['fft_params']['win_len']) - 1) target_shuffled = target[idx].clone() - X = torch.stft(data, - win_length=self.params['fft_params']['win_len'][idx_win], - hop_length=self.params['fft_params']['hop_len'][idx_win], - n_fft=self.params['fft_params']['n_fft'][idx_win], - return_complex=True) + X = torch.stft( + data, + win_length=self.params['fft_params']['win_len'][idx_win], + hop_length=self.params['fft_params']['hop_len'][idx_win], + n_fft=self.params['fft_params']['n_fft'][idx_win], + return_complex=True, + ) X_ph = X.angle() X_shuffled_ph = X_ph[idx, ...].clone() lam = torch.rand(b, 1, 1, device=device, requires_grad=False) ph = X_ph.mul_(lam).add_((1 - lam) * X_shuffled_ph) X = X.abs() * torch.exp(1j * ph) - data = torch.istft(X, - win_length=self.params['fft_params']['win_len'][idx_win], - hop_length=self.params['fft_params']['hop_len'][idx_win], - n_fft=self.params['fft_params']['n_fft'][idx_win]) + data = torch.istft( + X, + win_length=self.params['fft_params']['win_len'][idx_win], + hop_length=self.params['fft_params']['hop_len'][idx_win], + n_fft=self.params['fft_params']['n_fft'][idx_win], + ) targets = (target, target_shuffled, lam.view(-1) * 0.5 + 0.5) - data=data.unsqueeze(1) + data = data.unsqueeze(1) targets = [t.to(data.device) for t in targets] return data, targets @@ -184,12 +183,11 @@ def mix_loss(self, logits, target, n_classes=None, pred_one_hot=None): elif self.params['mix_loss'] == 'bce': if not pred_one_hot: target = F.one_hot(target, n_classes).float() - target_shuffled = F.one_hot( - target_shuffled, n_classes).float() * (lam < 0.9) + target_shuffled = F.one_hot(target_shuffled, n_classes).float() * (lam < 0.9) one_h_mix = torch.clamp(target + target_shuffled, max=1) loss = self.loss(logits, one_h_mix) else: - target_shuffled *= (lam < 0.9) + target_shuffled *= lam < 0.9 one_h_mix = torch.clamp(target + target_shuffled, max=1) loss = self.loss(logits, one_h_mix) return loss diff --git a/edgelab/engine/utils/helper_funcs.py b/edgelab/engine/utils/helper_funcs.py index e0bd0101..c068018c 100644 --- a/edgelab/engine/utils/helper_funcs.py +++ b/edgelab/engine/utils/helper_funcs.py @@ -8,8 +8,8 @@ epsilon = 1e-8 -class AugBasic: +class AugBasic: def __init__(self, fs): super().__init__() self.fs = fs @@ -39,7 +39,7 @@ def make_weights_for_balanced_classes(samples, nclasses): count = [0] * nclasses for item in samples: count[item[1]] += 1 - weight_per_class = [0.] * nclasses + weight_per_class = [0.0] * nclasses N = float(sum(count)) for i in range(nclasses): weight_per_class[i] = N / float(count[i]) @@ -105,7 +105,7 @@ def find_first_nnz(t, q, dim=1): return mask_max_indices -def accuracy(output, target, topk=(1, )): +def accuracy(output, target, topk=(1,)): """Computes the precision@k for the specified values of k""" maxk = max(topk) batch_size = target.size(0) @@ -113,9 +113,7 @@ def accuracy(output, target, topk=(1, )): pred = pred.t() with torch.no_grad(): correct = pred.eq(target.view(1, -1).expand_as(pred)) - return [ - correct[:k].view(-1).float().sum(0) * 100. / batch_size for k in topk - ] + return [correct[:k].view(-1).float().sum(0) * 100.0 / batch_size for k in topk] def average_precision(output, target): @@ -156,7 +154,7 @@ def pad_sample_seq(x, n_samples): if x.size(-1) >= n_samples: max_x_start = x.size(-1) - n_samples x_start = random.randint(0, max_x_start) - x = x[x_start:x_start + n_samples] + x = x[x_start : x_start + n_samples] else: x = F.pad(x, (0, n_samples - x.size(-1)), "constant").data return x @@ -166,7 +164,7 @@ def pad_sample_seq_batch(x, n_samples): if x.size(0) >= n_samples: max_x_start = x.size(0) - n_samples x_start = random.randint(0, max_x_start) - x = x[:, x_start:x_start + n_samples] + x = x[:, x_start : x_start + n_samples] else: x = F.pad(x, (0, n_samples - x.size(1)), "constant").data return x @@ -183,21 +181,14 @@ def add_weight_decay(model, weight_decay=1e-5, skip_list=()): no_decay.append(param) else: decay.append(param) - return [{ - 'params': no_decay, - 'weight_decay': 0. - }, { - 'params': decay, - 'weight_decay': weight_decay - }] + return [{'params': no_decay, 'weight_decay': 0.0}, {'params': decay, 'weight_decay': weight_decay}] def _get_bn_param_ids(net): bn_ids = [] for m in net.modules(): print(m) - if isinstance(m, torch.nn.BatchNorm1d) or isinstance( - m, torch.nn.LayerNorm): + if isinstance(m, torch.nn.BatchNorm1d) or isinstance(m, torch.nn.LayerNorm): bn_ids.append(id(m.weight)) bn_ids.append(id(m.bias)) elif isinstance(m, torch.nn.Conv1d) or isinstance(m, torch.nn.Linear): @@ -215,10 +206,7 @@ def reduce_tensor(tensor, n): def gather_tensor(tensor, n): rt = tensor.clone() - tensor_list = [ - torch.zeros(n, device=tensor.device, dtype=torch.cuda.float()) - for _ in range(n) - ] + tensor_list = [torch.zeros(n, device=tensor.device, dtype=torch.cuda.float()) for _ in range(n)] dist.all_gather(tensor_list, rt) return tensor_list @@ -234,7 +222,7 @@ def representative_dataset(dataset): for i, fn in enumerate(dataset): if 'img' in fn.keys(): data = fn['img'] - if not isinstance(data, torch.Tensor): # for yolov3 + if not isinstance(data, torch.Tensor): # for yolov3 data = data[0].data data = data.permute(1, 2, 0) else: @@ -264,4 +252,4 @@ def check_type(type): from mmpose.models import build_posenet as build_model from mmpose.datasets import build_dataset, build_dataloader - return setup_multi_processes, build_model, build_dataset, build_dataloader \ No newline at end of file + return setup_multi_processes, build_model, build_dataset, build_dataloader diff --git a/edgelab/engine/utils/resample.py b/edgelab/engine/utils/resample.py index dbaeb170..8d287798 100644 --- a/edgelab/engine/utils/resample.py +++ b/edgelab/engine/utils/resample.py @@ -16,14 +16,7 @@ class Resampler(torch.nn.Module): with improvements to include additional filter types and input parameters that align with the librosa api """ - def __init__(self, - input_sr, - output_sr, - dtype, - num_zeros=64, - cutoff_ratio=0.95, - filter='kaiser', - beta=14.0): + def __init__(self, input_sr, output_sr, dtype, num_zeros=64, cutoff_ratio=0.95, filter='kaiser', beta=14.0): super().__init__() # init the base class """ This creates an object that can apply a symmetric FIR filter @@ -59,7 +52,7 @@ def __init__(self, return def gcd(a, b): - """ Return the greatest common divisor of a and b""" + """Return the greatest common divisor of a and b""" assert isinstance(a, int) and isinstance(b, int) if b == 0: return a @@ -111,8 +104,7 @@ def gcd(a, b): # We want the weights as used by torch's conv1d code; format is # (out_channels, in_channels, kernel_width) # https://pytorch.org/docs/stable/nn.functional.html - weights = torch.tensor((output_sr, input_sr, kernel_width), - dtype=dtype) + weights = torch.tensor((output_sr, input_sr, kernel_width), dtype=dtype) # Computations involving time will be in units of 1 block. Actually this # is the same as the `canonical` time axis since each block has input_sr @@ -130,12 +122,11 @@ def gcd(a, b): # convolution or correlation, and the logic is tricky.. I will just find # which sign works. - times = (np.arange(output_sr, dtype=np_dtype).reshape( - (output_sr, 1, 1)) / output_sr - - np.arange(input_sr, dtype=np_dtype).reshape( - (1, input_sr, 1)) / input_sr - - (np.arange(kernel_width, dtype=np_dtype).reshape( - (1, 1, kernel_width)) - blocks_per_side)) + times = ( + np.arange(output_sr, dtype=np_dtype).reshape((output_sr, 1, 1)) / output_sr + - np.arange(input_sr, dtype=np_dtype).reshape((1, input_sr, 1)) / input_sr + - (np.arange(kernel_width, dtype=np_dtype).reshape((1, 1, kernel_width)) - blocks_per_side) + ) def hann_window(a): """ @@ -145,13 +136,10 @@ def hann_window(a): The heaviside function returns (a > 0 ? 1 : 0). """ - return np.heaviside(1 - np.abs(a), - 0.0) * (0.5 + 0.5 * np.cos(a * np.pi)) + return np.heaviside(1 - np.abs(a), 0.0) * (0.5 + 0.5 * np.cos(a * np.pi)) def kaiser_window(a, beta): - w = special.i0( - beta * np.sqrt(np.clip(1 - ( - (a - 0.0) / 1.0)**2.0, 0.0, 1.0))) / special.i0(beta) + w = special.i0(beta * np.sqrt(np.clip(1 - ((a - 0.0) / 1.0) ** 2.0, 0.0, 1.0))) / special.i0(beta) return np.heaviside(1 - np.abs(a), 0.0) * w # The weights below are a sinc function times a Hann-window function. @@ -164,13 +152,19 @@ def kaiser_window(a, beta): # in order to have the same magnitude as the original input function, # we need to divide by the number of those deltas per unit time. if filter == 'hann': - weights = (np.sinc(times * zeros_per_block) * - hann_window(times / window_radius_in_blocks) * - zeros_per_block / input_sr) + weights = ( + np.sinc(times * zeros_per_block) + * hann_window(times / window_radius_in_blocks) + * zeros_per_block + / input_sr + ) else: - weights = (np.sinc(times * zeros_per_block) * - kaiser_window(times / window_radius_in_blocks, beta) * - zeros_per_block / input_sr) + weights = ( + np.sinc(times * zeros_per_block) + * kaiser_window(times / window_radius_in_blocks, beta) + * zeros_per_block + / input_sr + ) self.input_sr = input_sr self.output_sr = output_sr @@ -185,8 +179,7 @@ def kaiser_window(a, beta): self.resample_type = 'integer_downsample' self.padding = input_sr * blocks_per_side weights = torch.tensor(weights, dtype=dtype, requires_grad=False) - self.weights = weights.transpose(1, 2).contiguous().view( - 1, 1, input_sr * kernel_width) + self.weights = weights.transpose(1, 2).contiguous().view(1, 1, input_sr * kernel_width) elif input_sr == 1: # In this case we'll be doing conv_transpose, so we want the same weights that @@ -195,15 +188,12 @@ def kaiser_window(a, beta): self.resample_type = 'integer_upsample' self.padding = output_sr * blocks_per_side weights = torch.tensor(weights, dtype=dtype, requires_grad=False) - self.weights = weights.flip(2).transpose(0, 2).contiguous().view( - 1, 1, output_sr * kernel_width) + self.weights = weights.flip(2).transpose(0, 2).contiguous().view(1, 1, output_sr * kernel_width) else: self.resample_type = 'general' self.reshaped = False self.padding = blocks_per_side - self.weights = torch.tensor(weights, - dtype=dtype, - requires_grad=False) + self.weights = torch.tensor(weights, dtype=dtype, requires_grad=False) self.weights = torch.nn.Parameter(self.weights, requires_grad=False) @@ -229,20 +219,14 @@ def forward(self, data): (minibatch_size, seq_len) = data.shape # will be shape (minibatch_size, in_channels, seq_len) with in_channels == 1 data.unsqueeze_(1) - data = torch.nn.functional.conv1d(data, - self.weights, - stride=self.input_sr, - padding=self.padding) + data = torch.nn.functional.conv1d(data, self.weights, stride=self.input_sr, padding=self.padding) # shape will be (minibatch_size, out_channels = 1, seq_len); # return as (minibatch_size, seq_len) return data.squeeze_(1) elif self.resample_type == 'integer_upsample': data.unsqueeze_(1) - data = torch.nn.functional.conv_transpose1d(data, - self.weights, - stride=self.output_sr, - padding=self.padding) + data = torch.nn.functional.conv_transpose1d(data, self.weights, stride=self.output_sr, padding=self.padding) return data.squeeze_(1) else: @@ -253,19 +237,15 @@ def forward(self, data): # TODO: pad with zeros. raise RuntimeError("Signal is too short to resample") # data = data[:, 0:(num_blocks*self.input_sr)] # Truncate input - data = data[:, 0:(num_blocks * self.input_sr)].view( - minibatch_size, num_blocks, self.input_sr) + data = data[:, 0 : (num_blocks * self.input_sr)].view(minibatch_size, num_blocks, self.input_sr) # Torch's conv1d expects input data with shape (minibatch, in_channels, time_steps), so transpose data.transpose_(1, 2) - data = torch.nn.functional.conv1d(data, - self.weights, - padding=self.padding) + data = torch.nn.functional.conv1d(data, self.weights, padding=self.padding) assert data.shape == (minibatch_size, self.output_sr, num_blocks) - return data.transpose(1, 2).contiguous().view( - minibatch_size, num_blocks * self.output_sr) + return data.transpose(1, 2).contiguous().view(minibatch_size, num_blocks * self.output_sr) if __name__ == '__main__': diff --git a/edgelab/evaluation/__init__.py b/edgelab/evaluation/__init__.py index bd3f6e41..7a9e4f1e 100644 --- a/edgelab/evaluation/__init__.py +++ b/edgelab/evaluation/__init__.py @@ -1,4 +1,4 @@ from .point_metric import PointMetric from .fomo_metric import FomoMetric -__all__ = ['PointMetric','FomoMetric'] +__all__ = ['PointMetric', 'FomoMetric'] diff --git a/edgelab/evaluation/fomo_metric.py b/edgelab/evaluation/fomo_metric.py index 6f8a17b1..3d8e581b 100644 --- a/edgelab/evaluation/fomo_metric.py +++ b/edgelab/evaluation/fomo_metric.py @@ -10,19 +10,15 @@ @METRICS.register_module() class FomoMetric(BaseMetric): - - def __init__(self, - collect_device: str = 'cpu', - prefix: Optional[str] = None) -> None: + def __init__(self, collect_device: str = 'cpu', prefix: Optional[str] = None) -> None: super().__init__(collect_device, prefix) self.posit_offset = torch.tensor( - [[0, -1, 0], [0, -1, -1], [0, 0, -1], [0, 1, 0], [0, 1, 1], - [0, 0, 1], [0, 1, -1], [0, -1, 1], [0, 0, 0]], - dtype=torch.long) + [[0, -1, 0], [0, -1, -1], [0, 0, -1], [0, 1, 0], [0, 1, 1], [0, 0, 1], [0, 1, -1], [0, -1, 1], [0, 0, 0]], + dtype=torch.long, + ) def compute_ftp(self, preds, target): - preds, target = preds.to(torch.device('cpu')), target.to( - torch.device('cpu')) + preds, target = preds.to(torch.device('cpu')), target.to(torch.device('cpu')) preds = torch.softmax(preds, dim=-1) B, H, W, C = preds.shape # Get the category id of each box @@ -45,14 +41,13 @@ def compute_ftp(self, preds, target): if torch.any(site < 0) or torch.any(site >= H): continue # The prediction is considered to be correct if it is near the ground truth box - if site in preds_index and preds_max[site.chunk( - 3)] == target_max[ti.chunk(3)]: + if site in preds_index and preds_max[site.chunk(3)] == target_max[ti.chunk(3)]: preds_max[site.chunk(3)] = target_max[ti.chunk(3)] target_max[site.chunk(3)] = target_max[ti.chunk(3)] # Calculate the confusion matrix - confusion = confusion_matrix(target_max.flatten().cpu().numpy(), - preds_max.flatten().cpu().numpy(), - labels=range(preds.shape[-1])) + confusion = confusion_matrix( + target_max.flatten().cpu().numpy(), preds_max.flatten().cpu().numpy(), labels=range(preds.shape[-1]) + ) # Calculate the value of P、R、F1 based on the confusion matrix tn = confusion[0, 0] tp = np.diagonal(confusion).sum() - tn @@ -73,8 +68,7 @@ def computer_prf(self, tp, fp, fn): def process(self, data_batch, data_samples) -> None: TP = FP = FN = [] - preds, target = data_samples[0]['pred_instances'][ - 'pred'], data_samples[0]['pred_instances']['labels'] + preds, target = data_samples[0]['pred_instances']['pred'], data_samples[0]['pred_instances']['labels'] preds = tuple([pred.permute(0, 2, 3, 1) for pred in preds]) tp, fp, fn = multi_apply(self.compute_ftp, preds, target) diff --git a/edgelab/evaluation/point_metric.py b/edgelab/evaluation/point_metric.py index a68953e0..95ae4700 100644 --- a/edgelab/evaluation/point_metric.py +++ b/edgelab/evaluation/point_metric.py @@ -19,7 +19,7 @@ def pose_acc(pred, target, hw, th=10): th = th acc = [] for p, t in zip(pred, target): - distans = ((t[0] - p[0])**2 + (t[1] - p[1])**2)**0.5 + distans = ((t[0] - p[0]) ** 2 + (t[1] - p[1]) ** 2) ** 0.5 if distans > th: acc.append(0) elif distans > 1: @@ -31,29 +31,17 @@ def pose_acc(pred, target, hw, th=10): @METRICS.register_module() class PointMetric(BaseMetric): - - def __init__(self, - collect_device: str = 'cpu', - prefix: Optional[str] = 'keypoint') -> None: + def __init__(self, collect_device: str = 'cpu', prefix: Optional[str] = 'keypoint') -> None: super().__init__(collect_device, prefix) def process(self, data_batch: Any, data_samples: Sequence[dict]) -> None: target = data_batch['data_samples']['keypoints'] - size = data_batch['data_samples']['hw'] #.cpu().numpy() - result = np.array( - [i.cpu().numpy() for i in data_samples[0]['results']]) + size = data_batch['data_samples']['hw'] # .cpu().numpy() + result = np.array([i.cpu().numpy() for i in data_samples[0]['results']]) - result = result if len(result.shape) == 2 else result[ - None, :] # onnx shape(2,), tflite shape(1,2) + result = result if len(result.shape) == 2 else result[None, :] # onnx shape(2,), tflite shape(1,2) acc = pose_acc(result.copy(), target, size) - self.results.append({ - 'Acc': - acc, - 'pred': - result, - 'image_file': - data_batch['data_samples']['image_file'] - }) + self.results.append({'Acc': acc, 'pred': result, 'image_file': data_batch['data_samples']['image_file']}) def compute_metrics(self, results: list) -> dict: return {'Acc': sum([i['Acc'] for i in results]) / len(results)} diff --git a/edgelab/models/backbones/AxesNet.py b/edgelab/models/backbones/AxesNet.py index b9b7ef8b..7a510b2b 100644 --- a/edgelab/models/backbones/AxesNet.py +++ b/edgelab/models/backbones/AxesNet.py @@ -6,39 +6,30 @@ @MODELS.register_module() class AxesNet(nn.Module): - - def __init__(self, - num_axes=3, # axes number - frequency=62.5, # sample frequency - window=1000, # window size - num_classes=-1 - ): + def __init__( + self, num_axes=3, frequency=62.5, window=1000, num_classes=-1 # axes number # sample frequency # window size + ): super().__init__() self.num_classes = num_classes self.intput_feature = num_axes * int(frequency * window / 1000) liner_feature = self.liner_feature_fit() - self.fc1 = nn.Linear(in_features=self.intput_feature, - out_features=liner_feature, bias=True) - self.fc2 = nn.Linear( - in_features=liner_feature, out_features=liner_feature, bias=True) + self.fc1 = nn.Linear(in_features=self.intput_feature, out_features=liner_feature, bias=True) + self.fc2 = nn.Linear(in_features=liner_feature, out_features=liner_feature, bias=True) if self.num_classes > 0: self.classifier = nn.Linear(in_features=liner_feature, out_features=num_classes, bias=True) - def liner_feature_fit(self): - return (int(self.intput_feature / 1024) + 1) * 256 def forward(self, x): - x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) - + if self.num_classes > 0: x = self.classifier(x) - - return (x, ) + + return (x,) if __name__ == '__main__': diff --git a/edgelab/models/backbones/EfficientNet.py b/edgelab/models/backbones/EfficientNet.py index 7ab25dd2..2484667c 100644 --- a/edgelab/models/backbones/EfficientNet.py +++ b/edgelab/models/backbones/EfficientNet.py @@ -13,9 +13,17 @@ class MBConvConfig: # Stores information listed at Table 1 of the EfficientNet paper - def __init__(self, expand_ratio: float, kernel: int, stride: int, - input_channels: int, out_channels: int, num_layers: int, - width_mult: float, depth_mult: float) -> None: + def __init__( + self, + expand_ratio: float, + kernel: int, + stride: int, + input_channels: int, + out_channels: int, + num_layers: int, + width_mult: float, + depth_mult: float, + ) -> None: self.expand_ratio = expand_ratio self.kernel = kernel self.stride = stride @@ -24,9 +32,7 @@ def __init__(self, expand_ratio: float, kernel: int, stride: int, self.num_layers = self.adjust_depth(num_layers, depth_mult) @staticmethod - def adjust_channels(channels: int, - width_mult: float, - min_value: Optional[int] = None) -> int: + def adjust_channels(channels: int, width_mult: float, min_value: Optional[int] = None) -> int: return make_divisible(channels * width_mult, 8, min_value) @staticmethod @@ -35,13 +41,14 @@ def adjust_depth(num_layers: int, depth_mult: float): class MBConv(nn.Module): - - def __init__(self, - cnf: MBConvConfig, - stochastic_depth_prob: float, - norm_layer: Callable[..., nn.Module], - se_layer: Callable[..., nn.Module] = SqueezeExcitation, - rep: bool = False) -> None: + def __init__( + self, + cnf: MBConvConfig, + stochastic_depth_prob: float, + norm_layer: Callable[..., nn.Module], + se_layer: Callable[..., nn.Module] = SqueezeExcitation, + rep: bool = False, + ) -> None: super().__init__() if not (1 <= cnf.stride <= 2): @@ -53,48 +60,45 @@ def __init__(self, activation_layer = nn.ReLU # expand - expanded_channels = cnf.adjust_channels(cnf.input_channels, - cnf.expand_ratio) + expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio) if expanded_channels != cnf.input_channels: layers.append( - ConvNormActivation(cnf.input_channels, - expanded_channels, - kernel_size=1, - norm_layer=norm_layer, - activation_layer=activation_layer)) + ConvNormActivation( + cnf.input_channels, + expanded_channels, + kernel_size=1, + norm_layer=norm_layer, + activation_layer=activation_layer, + ) + ) # depthwise if rep: - layers.append( - RepConv1x1(expanded_channels, - expanded_channels, - stride=cnf.stride, - act_cfg=activation_layer)) + layers.append(RepConv1x1(expanded_channels, expanded_channels, stride=cnf.stride, act_cfg=activation_layer)) else: layers.append( - ConvNormActivation(expanded_channels, - expanded_channels, - kernel_size=cnf.kernel, - stride=cnf.stride, - groups=expanded_channels, - norm_layer=norm_layer, - activation_layer=activation_layer)) + ConvNormActivation( + expanded_channels, + expanded_channels, + kernel_size=cnf.kernel, + stride=cnf.stride, + groups=expanded_channels, + norm_layer=norm_layer, + activation_layer=activation_layer, + ) + ) # squeeze and excitation squeeze_channels = max(1, cnf.input_channels // 4) - layers.append( - se_layer(expanded_channels, - squeeze_channels, - activation=partial(nn.ReLU, inplace=True))) + layers.append(se_layer(expanded_channels, squeeze_channels, activation=partial(nn.ReLU, inplace=True))) # project layers.append( - ConvNormActivation(expanded_channels, - cnf.out_channels, - kernel_size=1, - norm_layer=norm_layer, - activation_layer=None)) + ConvNormActivation( + expanded_channels, cnf.out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=None + ) + ) self.block = nn.Sequential(*layers) self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row") @@ -130,18 +134,20 @@ class EfficientNet(BaseModule): 'b4': [1.4, 1.8, 0.4], 'b5': [1.6, 2.2, 0.5], 'b6': [1.8, 2.6, 0.5], - 'b7': [2.0, 3.1, 0.5] + 'b7': [2.0, 3.1, 0.5], } - def __init__(self, - arch='b0', - input_channels=3, - out_indices=(2, ), - norm_cfg='BN', - frozen_stages=-1, - norm_eval=False, - rep=False, - init_cfg: Optional[dict] = None): + def __init__( + self, + arch='b0', + input_channels=3, + out_indices=(2,), + norm_cfg='BN', + frozen_stages=-1, + norm_eval=False, + rep=False, + init_cfg: Optional[dict] = None, + ): super().__init__(init_cfg) assert arch in self.width_depth_mult.keys() @@ -153,20 +159,15 @@ def __init__(self, width_depth_setting = self.width_depth_mult[arch] self.layer_name = [f'layer{i}' for i in range(1, len(self.arch) + 1)] - block_conf = partial(MBConvConfig, - width_mult=width_depth_setting[0], - depth_mult=width_depth_setting[1]) + block_conf = partial(MBConvConfig, width_mult=width_depth_setting[0], depth_mult=width_depth_setting[1]) stochastic_depth_prob = width_depth_setting[-1] arch_param = [block_conf(*i) for i in self.arch] - self.conv1 = ConvNormActivation(input_channels, - arch_param[0].input_channels, - 3, - 2, - norm_layer=norm_cfg, - activation_layer='ReLU') + self.conv1 = ConvNormActivation( + input_channels, arch_param[0].input_channels, 3, 2, norm_layer=norm_cfg, activation_layer='ReLU' + ) total_stage_blocks = sum([cnf.num_layers for cnf in arch_param]) stage_block_id = 0 @@ -178,10 +179,8 @@ def __init__(self, if layer: conf.input_channels = conf.out_channels conf.stride = 1 - sd_prob = stochastic_depth_prob * float( - stage_block_id) / total_stage_blocks - layer.append( - MBConv(conf, sd_prob, norm_layer=norm_cfg, rep=rep)) + sd_prob = stochastic_depth_prob * float(stage_block_id) / total_stage_blocks + layer.append(MBConv(conf, sd_prob, norm_layer=norm_cfg, rep=rep)) stage_block_id += 1 self.add_module(name, nn.Sequential(*layer)) diff --git a/edgelab/models/backbones/MobileNetv2.py b/edgelab/models/backbones/MobileNetv2.py index d7a4380a..33839c7e 100644 --- a/edgelab/models/backbones/MobileNetv2.py +++ b/edgelab/models/backbones/MobileNetv2.py @@ -5,23 +5,24 @@ from mmengine.model import BaseModule from edgelab.registry import BACKBONES, MODELS from torchvision.models._utils import _make_divisible -from edgelab.models.layers.rep import RepBlock,RepConv1x1 +from edgelab.models.layers.rep import RepBlock, RepConv1x1 from ..base.general import InvertedResidual, ConvNormActivation @BACKBONES.register_module() class MobileNetv2(BaseModule): - - def __init__(self, - widen_factor: float = 1.0, - inverted_residual_setting: Optional[List[List[int]]] = None, - round_nearest: int = 8, - block: Optional[dict] = None, - norm_layer: Optional[dict] = None, - out_indices: Tuple[int, ...] = (1, 2, 3), - gray_input: bool = False, - rep: bool = False, - init_cfg: Union[dict, List[dict], None] = None): + def __init__( + self, + widen_factor: float = 1.0, + inverted_residual_setting: Optional[List[List[int]]] = None, + round_nearest: int = 8, + block: Optional[dict] = None, + norm_layer: Optional[dict] = None, + out_indices: Tuple[int, ...] = (1, 2, 3), + gray_input: bool = False, + rep: bool = False, + init_cfg: Union[dict, List[dict], None] = None, + ): super().__init__(init_cfg) self.out_indices = out_indices if block is None and not rep: @@ -49,21 +50,20 @@ def __init__(self, [6, 320, 1, 1], ] - assert len(inverted_residual_setting) and len( - inverted_residual_setting[0] - ) == 4, ValueError( + assert len(inverted_residual_setting) and len(inverted_residual_setting[0]) == 4, ValueError( f"inverted_residual_setting should be non-empty or a 4-element list, got {inverted_residual_setting}" ) - in_channels = _make_divisible(in_channels * widen_factor, - round_nearest) + in_channels = _make_divisible(in_channels * widen_factor, round_nearest) - self.conv1 = ConvNormActivation(1 if gray_input else 3, - in_channels, - kernel_size=3, - stride=2, - norm_layer=norm_layer, - activation_layer='ReLU6') + self.conv1 = ConvNormActivation( + 1 if gray_input else 3, + in_channels, + kernel_size=3, + stride=2, + norm_layer=norm_layer, + activation_layer='ReLU6', + ) self.layers = [] for idx, (t, c, n, s) in enumerate(inverted_residual_setting): @@ -72,19 +72,12 @@ def __init__(self, for i in range(n): stride = s if i == 0 else 1 if block is RepBlock: - layer = block(in_channels, - out_channels, - stride=stride, - groups=in_channels, - norm_layer=norm_layer) - + layer = block(in_channels, out_channels, stride=stride, groups=in_channels, norm_layer=norm_layer) + elif block is RepConv1x1: - layer=block(in_channels,out_channels,stride=stride,depth=6) + layer = block(in_channels, out_channels, stride=stride, depth=6) else: - layer = block(in_channels, - out_channels, - stride, - expand_ratio=t) + layer = block(in_channels, out_channels, stride, expand_ratio=t) in_channels = out_channels tmp_layers.append(layer) @@ -113,4 +106,4 @@ def init_weights(self): nn.init.zeros_(m.bias) elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.ones_(m.weight) - nn.init.zeros_(m.bias) \ No newline at end of file + nn.init.zeros_(m.bias) diff --git a/edgelab/models/backbones/MobileNetv3.py b/edgelab/models/backbones/MobileNetv3.py index 0348697e..bd0208e3 100644 --- a/edgelab/models/backbones/MobileNetv3.py +++ b/edgelab/models/backbones/MobileNetv3.py @@ -12,15 +12,21 @@ class InvertedResidualConfig: # Analytic model configuration table - def __init__(self, input_channels: int, kernel: int, - expanded_channels: int, out_channels: int, use_se: bool, - activation: str, stride: int, dilation: int, - widen_factor: float): - self.input_channels = self.adjust_channels(input_channels, - widen_factor) + def __init__( + self, + input_channels: int, + kernel: int, + expanded_channels: int, + out_channels: int, + use_se: bool, + activation: str, + stride: int, + dilation: int, + widen_factor: float, + ): + self.input_channels = self.adjust_channels(input_channels, widen_factor) self.kernel = kernel - self.expanded_channels = self.adjust_channels(expanded_channels, - widen_factor) + self.expanded_channels = self.adjust_channels(expanded_channels, widen_factor) self.out_channels = self.adjust_channels(out_channels, widen_factor) self.use_se = use_se self.use_hs = activation == "HS" @@ -34,11 +40,12 @@ def adjust_channels(channels: int, widen_factor: float): class InvertedResidual(nn.Module): # Main details of MobileNetV3 - def __init__(self, - cnf: InvertedResidualConfig, - norm_layer: Callable[..., nn.Module], - se_layer: Callable[..., nn.Module] = partial( - SElayer, scale_activation=nn.Hardsigmoid)): + def __init__( + self, + cnf: InvertedResidualConfig, + norm_layer: Callable[..., nn.Module], + se_layer: Callable[..., nn.Module] = partial(SElayer, scale_activation=nn.Hardsigmoid), + ): super().__init__() if not (1 <= cnf.stride <= 2): @@ -52,34 +59,39 @@ def __init__(self, # expand if cnf.expanded_channels != cnf.input_channels: layers.append( - ConvNormActivation(cnf.input_channels, - cnf.expanded_channels, - kernel_size=1, - norm_layer=norm_layer, - activation_layer=activation_layer)) + ConvNormActivation( + cnf.input_channels, + cnf.expanded_channels, + kernel_size=1, + norm_layer=norm_layer, + activation_layer=activation_layer, + ) + ) # depthwise stride = 1 if cnf.dilation > 1 else cnf.stride layers.append( - ConvNormActivation(cnf.expanded_channels, - cnf.expanded_channels, - kernel_size=cnf.kernel, - stride=stride, - dilation=cnf.dilation, - groups=cnf.expanded_channels, - norm_layer=norm_layer, - activation_layer=activation_layer)) + ConvNormActivation( + cnf.expanded_channels, + cnf.expanded_channels, + kernel_size=cnf.kernel, + stride=stride, + dilation=cnf.dilation, + groups=cnf.expanded_channels, + norm_layer=norm_layer, + activation_layer=activation_layer, + ) + ) if cnf.use_se: squeeze_channels = make_divisible(cnf.expanded_channels // 4, 8) layers.append(se_layer(cnf.expanded_channels, squeeze_channels)) # project layers.append( - ConvNormActivation(cnf.expanded_channels, - cnf.out_channels, - kernel_size=1, - norm_layer=norm_layer, - activation_layer=None)) + ConvNormActivation( + cnf.expanded_channels, cnf.out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=None + ) + ) self.block = nn.Sequential(*layers) self.out_channels = cnf.out_channels @@ -94,22 +106,23 @@ def forward(self, input: Tensor) -> Tensor: @VISBACKENDS.register_module() class MobileNetV3(BaseModule): - - def __init__(self, - arch='small', - widen_factor=1, - out_indices=(1, ), - frozen_stages=-1, - input_channels: int = 3, - conv_cfg=dict(type='Conv'), - norm_cfg=None, - act_cfg=dict(type='Hardswish'), - norm_eval=False, - reduced_tail: bool = False, - dilated: bool = False, - pretrained=None, - init_cfg=None, - **kwargs): + def __init__( + self, + arch='small', + widen_factor=1, + out_indices=(1,), + frozen_stages=-1, + input_channels: int = 3, + conv_cfg=dict(type='Conv'), + norm_cfg=None, + act_cfg=dict(type='Hardswish'), + norm_eval=False, + reduced_tail: bool = False, + dilated: bool = False, + pretrained=None, + init_cfg=None, + **kwargs, + ): super(MobileNetV3, self).__init__(init_cfg) reduce_divider = 2 if reduced_tail else 1 @@ -130,12 +143,13 @@ def __init__(self, ir_conf(80, 3, 184, 80, False, "HS", 1, 1), ir_conf(80, 3, 480, 112, True, "HS", 1, 1), ir_conf(112, 3, 672, 112, True, "HS", 1, 1), - ir_conf(112, 5, 672, 160 // reduce_divider, True, "HS", 2, - dilation), # C4 - ir_conf(160 // reduce_divider, 5, 960 // reduce_divider, - 160 // reduce_divider, True, "HS", 1, dilation), - ir_conf(160 // reduce_divider, 5, 960 // reduce_divider, - 160 // reduce_divider, True, "HS", 1, dilation), + ir_conf(112, 5, 672, 160 // reduce_divider, True, "HS", 2, dilation), # C4 + ir_conf( + 160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation + ), + ir_conf( + 160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation + ), ] elif arch == "small": inverted_residual_setting = [ @@ -147,12 +161,9 @@ def __init__(self, ir_conf(40, 5, 240, 40, True, "HS", 1, 1), ir_conf(40, 5, 120, 48, True, "HS", 1, 1), ir_conf(48, 5, 144, 48, True, "HS", 1, 1), - ir_conf(48, 5, 288, 96 // reduce_divider, True, "HS", 2, - dilation), # C4 - ir_conf(96 // reduce_divider, 5, 576 // reduce_divider, - 96 // reduce_divider, True, "HS", 1, dilation), - ir_conf(96 // reduce_divider, 5, 576 // reduce_divider, - 96 // reduce_divider, True, "HS", 1, dilation), + ir_conf(48, 5, 288, 96 // reduce_divider, True, "HS", 2, dilation), # C4 + ir_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation), + ir_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation), ] else: raise ValueError("Unsupported model type {}".format(arch)) @@ -176,7 +187,8 @@ def __init__(self, kernel_size=3, stride=2, norm_layer=norm_layer, - activation_layer=act_cfg if act_cfg else nn.Hardswish) + activation_layer=act_cfg if act_cfg else nn.Hardswish, + ) self.layers = [] diff --git a/edgelab/models/backbones/ShuffleNetV2.py b/edgelab/models/backbones/ShuffleNetV2.py index 131c7ed3..2971128a 100644 --- a/edgelab/models/backbones/ShuffleNetV2.py +++ b/edgelab/models/backbones/ShuffleNetV2.py @@ -25,7 +25,6 @@ def channel_shuffle(x: Tensor, groups: int) -> Tensor: class InvertedResidual(nn.Module): - def __init__(self, inp: int, oup: int, stride: int) -> None: super(InvertedResidual, self).__init__() @@ -38,18 +37,9 @@ def __init__(self, inp: int, oup: int, stride: int) -> None: if self.stride > 1: self.branch1 = nn.Sequential( - self.depthwise_conv(inp, - inp, - kernel_size=3, - stride=self.stride, - padding=1), + self.depthwise_conv(inp, inp, kernel_size=3, stride=self.stride, padding=1), nn.BatchNorm2d(inp), - nn.Conv2d(inp, - branch_features, - kernel_size=1, - stride=1, - padding=0, - bias=False), + nn.Conv2d(inp, branch_features, kernel_size=1, stride=1, padding=0, bias=False), nn.BatchNorm2d(branch_features), nn.ReLU(inplace=True), ) @@ -57,44 +47,28 @@ def __init__(self, inp: int, oup: int, stride: int) -> None: self.branch1 = nn.Identity() self.branch2 = nn.Sequential( - nn.Conv2d(inp if (self.stride > 1) else branch_features, - branch_features, - kernel_size=1, - stride=1, - padding=0, - bias=False), + nn.Conv2d( + inp if (self.stride > 1) else branch_features, + branch_features, + kernel_size=1, + stride=1, + padding=0, + bias=False, + ), nn.BatchNorm2d(branch_features), nn.ReLU(inplace=True), - self.depthwise_conv(branch_features, - branch_features, - kernel_size=3, - stride=self.stride, - padding=1), + self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1), nn.BatchNorm2d(branch_features), - nn.Conv2d(branch_features, - branch_features, - kernel_size=1, - stride=1, - padding=0, - bias=False), + nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False), nn.BatchNorm2d(branch_features), nn.ReLU(inplace=True), ) @staticmethod - def depthwise_conv(i: int, - o: int, - kernel_size: int, - stride: int = 1, - padding: int = 0, - bias: bool = False) -> nn.Conv2d: - return nn.Conv2d(i, - o, - kernel_size, - stride, - padding, - bias=bias, - groups=i) + def depthwise_conv( + i: int, o: int, kernel_size: int, stride: int = 1, padding: int = 0, bias: bool = False + ) -> nn.Conv2d: + return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i) def forward(self, x: Tensor) -> Tensor: if self.stride == 1: @@ -109,27 +83,28 @@ def forward(self, x: Tensor) -> Tensor: @BACKBONES.register_module(name='TmpShuffleNetV2') class ShuffleNetV2(BaseModule): - - def __init__(self, - widen_factor=1, - out_indices=(2, ), - frozen_stages=-1, - input_channels: int = 3, - conv_cfg=dict(type='Conv'), - norm_cfg=None, - act_cfg=dict(type='ReLu'), - norm_eval=False, - reduced_tail=False, - dilate=False, - pretrained=None, - init_cfg: Optional[dict] = None): + def __init__( + self, + widen_factor=1, + out_indices=(2,), + frozen_stages=-1, + input_channels: int = 3, + conv_cfg=dict(type='Conv'), + norm_cfg=None, + act_cfg=dict(type='ReLu'), + norm_eval=False, + reduced_tail=False, + dilate=False, + pretrained=None, + init_cfg: Optional[dict] = None, + ): super(ShuffleNetV2, self).__init__(init_cfg) arch = { "0.25": [24, 24, 48, 96, 512], "0.5": [24, 48, 96, 192, 1024], "1.0": [24, 116, 232, 464, 1024], "1.5": [24, 176, 352, 704, 1024], - "2.0": [24, 244, 488, 976, 2048] + "2.0": [24, 244, 488, 976, 2048], } layer_repeats = [4, 8, 4] @@ -137,8 +112,7 @@ def __init__(self, tmp_channel = arch['1.0'] perchannel_widen = [] for i in tmp_channel: - perchannel_widen.append( - make_divisible(i * float(widen_factor), 8)) + perchannel_widen.append(make_divisible(i * float(widen_factor), 8)) else: perchannel_widen = arch[str(widen_factor)] @@ -146,24 +120,23 @@ def __init__(self, self.out_indices = out_indices self.frozen_stages = frozen_stages self.norm_eval = norm_eval - self.conv1 = ConvNormActivation(input_channels, - output_channels, - kernel_size=3, - stride=2, - padding=1, - bias=False, - norm_layer='BatchNorm2d', - activation_layer='ReLU') + self.conv1 = ConvNormActivation( + input_channels, + output_channels, + kernel_size=3, + stride=2, + padding=1, + bias=False, + norm_layer='BatchNorm2d', + activation_layer='ReLU', + ) input_channels = output_channels self.layer_names = [f'layer{i}' for i in [1, 2, 3]] - for name, repeats, output_channels in zip(self.layer_names, - layer_repeats, - perchannel_widen[1:]): + for name, repeats, output_channels in zip(self.layer_names, layer_repeats, perchannel_widen[1:]): layer = [InvertedResidual(input_channels, output_channels, 2)] for _ in range(repeats - 1): - layer.append( - InvertedResidual(output_channels, output_channels, 1)) + layer.append(InvertedResidual(output_channels, output_channels, 1)) input_channels = output_channels self.add_module(name, nn.Sequential(*layer)) diff --git a/edgelab/models/backbones/SoundNet.py b/edgelab/models/backbones/SoundNet.py index ba1125e4..bc8f23e1 100644 --- a/edgelab/models/backbones/SoundNet.py +++ b/edgelab/models/backbones/SoundNet.py @@ -5,24 +5,26 @@ class ResBlock1dTF(nn.Module): - def __init__(self, dim, dilation=1, kernel_size=3): super().__init__() self.block_t = nn.Sequential( # nn.ReflectionPad1d(dilation * (kernel_size//2)), - nn.Conv1d(dim, - dim, - kernel_size=kernel_size, - stride=1, - padding=dilation * (kernel_size // 2), - bias=False, - dilation=dilation, - groups=dim), + nn.Conv1d( + dim, + dim, + kernel_size=kernel_size, + stride=1, + padding=dilation * (kernel_size // 2), + bias=False, + dilation=dilation, + groups=dim, + ), nn.BatchNorm1d(dim), - nn.LeakyReLU(0.2, True)) - self.block_f = nn.Sequential(nn.Conv1d(dim, dim, 1, 1, bias=False), - nn.BatchNorm1d(dim), - nn.LeakyReLU(0.2, True)) + nn.LeakyReLU(0.2, True), + ) + self.block_f = nn.Sequential( + nn.Conv1d(dim, dim, 1, 1, bias=False), nn.BatchNorm1d(dim), nn.LeakyReLU(0.2, True) + ) self.shortcut = nn.Conv1d(dim, dim, 1, 1) def forward(self, x): @@ -30,47 +32,43 @@ def forward(self, x): class AADownsample(nn.Module): - def __init__(self, filt_size=3, stride=2, channels=None): super(AADownsample, self).__init__() self.filt_size = filt_size self.stride = stride self.channels = channels ha = torch.arange(1, filt_size // 2 + 1 + 1, 1) - a = torch.cat((ha, ha.flip(dims=[ - -1, - ])[1:])).float() + a = torch.cat( + ( + ha, + ha.flip( + dims=[ + -1, + ] + )[1:], + ) + ).float() a = a / a.sum() filt = a[None, :] - self.register_buffer('filt', filt[None, :, :].repeat( - (self.channels, 1, 1))) + self.register_buffer('filt', filt[None, :, :].repeat((self.channels, 1, 1))) def forward(self, x): # x_pad = F.pad(x, (self.filt_size//2, self.filt_size//2)) - y = F.conv1d(x, - self.filt, - stride=self.stride, - padding=self.filt_size // 2, - groups=x.shape[1]) + y = F.conv1d(x, self.filt, stride=self.stride, padding=self.filt_size // 2, groups=x.shape[1]) return y class Down(nn.Module): - def __init__(self, channels, d=2, k=3): super().__init__() kk = d + 1 self.down = nn.Sequential( # nn.ReflectionPad1d(kk // 2), - nn.Conv1d(channels, - channels * 2, - kernel_size=kk, - stride=1, - padding=kk // 2, - bias=False), + nn.Conv1d(channels, channels * 2, kernel_size=kk, stride=1, padding=kk // 2, bias=False), nn.BatchNorm1d(channels * 2), nn.LeakyReLU(0.2, True), - AADownsample(channels=channels * 2, stride=d, filt_size=k)) + AADownsample(channels=channels * 2, stride=d, filt_size=k), + ) def forward(self, x): x = self.down(x) @@ -79,12 +77,7 @@ def forward(self, x): @BACKBONES.register_module() class SoundNetRaw(nn.Module): - - def __init__(self, - nf=2, - clip_length=None, - factors=[4, 4, 4], - out_channel=32): + def __init__(self, nf=2, clip_length=None, factors=[4, 4, 4], out_channel=32): super().__init__() model = [ # nn.ReflectionPad1d(3), @@ -106,9 +99,7 @@ def __init__(self, for _, f in enumerate(factors): for i in range(1): for j in range(3): - model += [ - ResBlock1dTF(dim=nf, dilation=3**j, kernel_size=7) - ] + model += [ResBlock1dTF(dim=nf, dilation=3**j, kernel_size=7)] model += [Down(channels=nf, d=f, k=f * 2 + 1)] nf *= 2 self.down2 = nn.Sequential(*model) diff --git a/edgelab/models/backbones/SqueezeNet.py b/edgelab/models/backbones/SqueezeNet.py index 007cb449..64389082 100644 --- a/edgelab/models/backbones/SqueezeNet.py +++ b/edgelab/models/backbones/SqueezeNet.py @@ -9,28 +9,18 @@ class Squeeze(nn.Module): - - def __init__(self, inplanes: int, squeeze_planes: int, - expand_planes: int) -> None: + def __init__(self, inplanes: int, squeeze_planes: int, expand_planes: int) -> None: super(Squeeze, self).__init__() self.inplanes = inplanes expand1x1_planes = expand_planes // 2 expand3x3_planes = expand_planes // 2 - self.squeeze = ConvNormActivation(inplanes, - squeeze_planes, - kernel_size=1, - activation_layer='ReLU') - - self.expand1x1 = ConvNormActivation(squeeze_planes, - expand1x1_planes, - kernel_size=1, - activation_layer='ReLU') - - self.expand3x3 = ConvNormActivation(squeeze_planes, - expand3x3_planes, - kernel_size=3, - padding=1, - activation_layer='ReLU') + self.squeeze = ConvNormActivation(inplanes, squeeze_planes, kernel_size=1, activation_layer='ReLU') + + self.expand1x1 = ConvNormActivation(squeeze_planes, expand1x1_planes, kernel_size=1, activation_layer='ReLU') + + self.expand3x3 = ConvNormActivation( + squeeze_planes, expand3x3_planes, kernel_size=3, padding=1, activation_layer='ReLU' + ) def forward(self, x: torch.Tensor) -> torch.Tensor: x = self.squeeze(x) @@ -39,16 +29,25 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: @BACKBONES.register_module() class SqueezeNet(BaseModule): - arch = [[128, 16, 128], [128, 32, 256], [256, 32, 256], [256, 48, 384], - [384, 48, 384], [384, 64, 512], [512, 64, 512]] - - def __init__(self, - input_channels: int = 3, - widen_factor: float = 1.0, - out_indices=(1, ), - frozen_stages=-1, - norm_eval=False, - init_cfg: Optional[dict] = None): + arch = [ + [128, 16, 128], + [128, 32, 256], + [256, 32, 256], + [256, 48, 384], + [384, 48, 384], + [384, 64, 512], + [512, 64, 512], + ] + + def __init__( + self, + input_channels: int = 3, + widen_factor: float = 1.0, + out_indices=(1,), + frozen_stages=-1, + norm_eval=False, + init_cfg: Optional[dict] = None, + ): super().__init__(init_cfg) arch_setting = self.arch if widen_factor == 1.0: @@ -66,10 +65,9 @@ def __init__(self, arch_setting.insert(0, frist_setting) for i, setting in enumerate(arch_setting): arch_setting[i] = [ - make_divisible(setting[0] * - widen_factor, 8) if i != 0 else frist_out, + make_divisible(setting[0] * widen_factor, 8) if i != 0 else frist_out, setting[1], - make_divisible(setting[-1] * widen_factor, 8) + make_divisible(setting[-1] * widen_factor, 8), ] self.out_indices = out_indices @@ -80,12 +78,8 @@ def __init__(self, self.conv1 = ConvNormActivation(*frist_conv, activation_layer='ReLU') self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True) - self.layer_name = [ - f'layer{i}' for i in range(1, - len(arch_setting) + 1) - ] + self.layer_name = [f'layer{i}' for i in range(1, len(arch_setting) + 1)] for name, param in zip(self.layer_name, arch_setting): - layer = Squeeze(*param) self.add_module(name, layer) diff --git a/edgelab/models/backbones/__init__.py b/edgelab/models/backbones/__init__.py index bf40e8fb..66b61b1f 100644 --- a/edgelab/models/backbones/__init__.py +++ b/edgelab/models/backbones/__init__.py @@ -9,6 +9,13 @@ from .MobileNetv2 import MobileNetv2 __all__ = [ - 'PfldMobileNetV2', 'SoundNetRaw', 'CustomShuffleNetV2', 'AxesNet', - 'MobileNetV3', 'ShuffleNetV2', 'SqueezeNet','EfficientNet','MobileNetv2' + 'PfldMobileNetV2', + 'SoundNetRaw', + 'CustomShuffleNetV2', + 'AxesNet', + 'MobileNetV3', + 'ShuffleNetV2', + 'SqueezeNet', + 'EfficientNet', + 'MobileNetv2', ] diff --git a/edgelab/models/backbones/pfld_mobilenet_v2.py b/edgelab/models/backbones/pfld_mobilenet_v2.py index 2eaf4e1c..d6ccc3a5 100644 --- a/edgelab/models/backbones/pfld_mobilenet_v2.py +++ b/edgelab/models/backbones/pfld_mobilenet_v2.py @@ -6,27 +6,11 @@ @BACKBONES.register_module() class PfldMobileNetV2(nn.Module): - - def __init__(self, - inchannel=3, - layer1=[16, 16, 16, 16, 16], - layer2=[32, 32, 32, 32, 32, 32], - out_channel=16): + def __init__(self, inchannel=3, layer1=[16, 16, 16, 16, 16], layer2=[32, 32, 32, 32, 32, 32], out_channel=16): super(PfldMobileNetV2, self).__init__() inp = 32 - self.conv1 = CBR(inchannel, - inp, - kernel=3, - stride=2, - padding=1, - bias=False) - self.conv2 = CBR(inp, - inp, - kernel=3, - stride=1, - padding=1, - groups=inp, - bias=False) + self.conv1 = CBR(inchannel, inp, kernel=3, stride=2, padding=1, bias=False) + self.conv2 = CBR(inp, inp, kernel=3, stride=1, padding=1, groups=inp, bias=False) layer = [] for idx, oup in enumerate(layer1): @@ -47,7 +31,6 @@ def __init__(self, self.block2 = InvertedResidual(inp, out_channel, 1, False, 2) def forward(self, x): - x = self.conv1(x) x = self.conv2(x) diff --git a/edgelab/models/backbones/shufflenetv2.py b/edgelab/models/backbones/shufflenetv2.py index d2ac9baa..12d612b7 100644 --- a/edgelab/models/backbones/shufflenetv2.py +++ b/edgelab/models/backbones/shufflenetv2.py @@ -8,16 +8,17 @@ @BACKBONES.register_module() class CustomShuffleNetV2(ShuffleNetV2): - - def __init__(self, - widen_factor=1.0, - out_indices=(3, ), - frozen_stages=-1, - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - norm_eval=False, - with_cp=False): + def __init__( + self, + widen_factor=1.0, + out_indices=(3,), + frozen_stages=-1, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + norm_eval=False, + with_cp=False, + ): # super().__init__(widen_factor, out_indices, frozen_stages, conv_cfg, # norm_cfg, act_cfg, norm_eval, with_cp) @@ -27,12 +28,10 @@ def __init__(self, self.stage_blocks = [4, 8, 4] for index in out_indices: if index not in range(0, 4): - raise ValueError('the item in out_indices must in ' - f'range(0, 4). But received {index}') + raise ValueError('the item in out_indices must in ' f'range(0, 4). But received {index}') if frozen_stages not in range(-1, 4): - raise ValueError('frozen_stages must be in range(-1, 4). ' - f'But received {frozen_stages}') + raise ValueError('frozen_stages must be in range(-1, 4). ' f'But received {frozen_stages}') self.out_indices = out_indices self.frozen_stages = frozen_stages self.conv_cfg = conv_cfg @@ -52,19 +51,19 @@ def __init__(self, elif widen_factor == 2.0: channels = [244, 488, 976, 2048] else: - raise ValueError( - 'widen_factor must be in [0.25, 0.5, 1.0, 1.5, 2.0]. ' - f'But received {widen_factor}') + raise ValueError('widen_factor must be in [0.25, 0.5, 1.0, 1.5, 2.0]. ' f'But received {widen_factor}') self.in_channels = 24 - self.conv1 = ConvModule(in_channels=3, - out_channels=self.in_channels, - kernel_size=3, - stride=2, - padding=1, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg) + self.conv1 = ConvModule( + in_channels=3, + out_channels=self.in_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) @@ -75,9 +74,12 @@ def __init__(self, output_channels = channels[-1] self.layers.append( - ConvModule(in_channels=self.in_channels, - out_channels=output_channels, - kernel_size=1, - conv_cfg=conv_cfg, - norm_cfg=norm_cfg, - act_cfg=act_cfg)) + ConvModule( + in_channels=self.in_channels, + out_channels=output_channels, + kernel_size=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + ) diff --git a/edgelab/models/base/general.py b/edgelab/models/base/general.py index 91404ddb..17c243e5 100644 --- a/edgelab/models/base/general.py +++ b/edgelab/models/base/general.py @@ -11,8 +11,7 @@ def get_conv(conv): conv = getattr(nn, conv) elif isinstance(conv, str) and conv in MODELS.module_dict: conv = MODELS.get(conv) - elif (isinstance(conv, type.__class__) - and issubclass(conv, nn.Module)) or hasattr(conv, '__call__'): + elif (isinstance(conv, type.__class__) and issubclass(conv, nn.Module)) or hasattr(conv, '__call__'): pass else: raise ValueError( @@ -30,8 +29,7 @@ def get_norm(norm): norm = getattr(nn, norm) elif isinstance(norm, str) and norm in MODELS.module_dict: norm = MODELS.get(norm) - elif (isinstance(norm, type.__class__) - and issubclass(norm, nn.Module)) or hasattr(norm, '__call__'): + elif (isinstance(norm, type.__class__) and issubclass(norm, nn.Module)) or hasattr(norm, '__call__'): pass else: raise ValueError( @@ -47,8 +45,7 @@ def get_act(act): act = getattr(nn, act) elif isinstance(act, str) and act in MODELS.module_dict: act = MODELS.get(act) - elif (isinstance(act, type.__class__) - and issubclass(act, nn.Module)) or hasattr(act, '__call__'): + elif (isinstance(act, type.__class__) and issubclass(act, nn.Module)) or hasattr(act, '__call__'): pass else: raise ValueError( @@ -58,7 +55,6 @@ def get_act(act): class ConvNormActivation(nn.Sequential): - def __init__( self, in_channels: int, @@ -68,12 +64,9 @@ def __init__( padding: Optional[int] = None, bias: Optional[bool] = None, groups: int = 1, - norm_layer: Optional[Callable[..., nn.Module]] or Dict - or AnyStr = nn.BatchNorm2d, - activation_layer: Optional[Callable[..., nn.Module]] or Dict - or AnyStr = nn.ReLU, - conv_layer: Optional[Callable[..., nn.Module]] or Dict - or AnyStr = None, + norm_layer: Optional[Callable[..., nn.Module]] or Dict or AnyStr = nn.BatchNorm2d, + activation_layer: Optional[Callable[..., nn.Module]] or Dict or AnyStr = nn.ReLU, + conv_layer: Optional[Callable[..., nn.Module]] or Dict or AnyStr = None, dilation: int = 1, inplace: bool = True, ) -> None: @@ -84,14 +77,16 @@ def __init__( conv_layer = nn.Conv2d else: conv_layer = get_conv(conv_layer) - conv = conv_layer(in_channels, - out_channels, - kernel_size, - stride, - padding, - dilation=dilation, - groups=groups, - bias=norm_layer is None if bias is None else bias) + conv = conv_layer( + in_channels, + out_channels, + kernel_size, + stride, + padding, + dilation=dilation, + groups=groups, + bias=norm_layer is None if bias is None else bias, + ) self.add_module('conv', conv) if norm_layer is not None: norm_layer = get_norm(norm_layer) @@ -104,7 +99,6 @@ def __init__( class SqueezeExcitation(torch.nn.Module): - def __init__( self, input_channels: int, @@ -115,18 +109,12 @@ def __init__( super().__init__() self.avgpool = torch.nn.AdaptiveAvgPool2d(1) self.activation = get_act(activation)(inplace=True) - self.conv1 = ConvNormActivation(input_channels, - squeeze_channels, - 1, - padding=0, - norm_layer=None, - activation_layer=activation) - self.conv2 = ConvNormActivation(squeeze_channels, - input_channels, - 1, - padding=0, - norm_layer=None, - activation_layer=activation) + self.conv1 = ConvNormActivation( + input_channels, squeeze_channels, 1, padding=0, norm_layer=None, activation_layer=activation + ) + self.conv2 = ConvNormActivation( + squeeze_channels, input_channels, 1, padding=0, norm_layer=None, activation_layer=activation + ) self.scale_activation = get_act(scale_activation)() def _scale(self, input: torch.Tensor) -> torch.Tensor: @@ -144,11 +132,11 @@ def CBR(inp, oup, kernel, stride, bias=False, padding=1, groups=1, act='ReLU'): return nn.Sequential( nn.Conv2d(inp, oup, kernel, stride, padding, groups=groups, bias=bias), nn.BatchNorm2d(oup), - nn.Identity() if not act else getattr(nn, act)(inplace=True)) + nn.Identity() if not act else getattr(nn, act)(inplace=True), + ) class InvertedResidual(nn.Module): - def __init__(self, inp, oup, stride, residual, expand_ratio=6): super(InvertedResidual, self).__init__() self.stride = stride @@ -160,13 +148,7 @@ def __init__(self, inp, oup, stride, residual, expand_ratio=6): nn.Conv2d(inp, inp * expand_ratio, 1, 1, 0, bias=False), nn.BatchNorm2d(inp * expand_ratio), nn.ReLU(inplace=True), - nn.Conv2d(inp * expand_ratio, - inp * expand_ratio, - 3, - stride, - 1, - groups=inp * expand_ratio, - bias=False), + nn.Conv2d(inp * expand_ratio, inp * expand_ratio, 3, stride, 1, groups=inp * expand_ratio, bias=False), nn.BatchNorm2d(inp * expand_ratio), nn.ReLU(inplace=True), nn.Conv2d(inp * expand_ratio, oup, 1, 1, 0, bias=False), @@ -177,4 +159,4 @@ def forward(self, x): if self.residual: return x + self.conv(x) else: - return self.conv(x) \ No newline at end of file + return self.conv(x) diff --git a/edgelab/models/classifiers/Audio_speech.py b/edgelab/models/classifiers/Audio_speech.py index b1ce1f9b..2c4b11c4 100644 --- a/edgelab/models/classifiers/Audio_speech.py +++ b/edgelab/models/classifiers/Audio_speech.py @@ -13,15 +13,17 @@ class Audio_classify(BaseClassifier): CLASSIFICATION NETWORK """ - def __init__(self, - backbone, - n_cls, - loss=dict(), - multilabel=False, - data_preprocessor=None, - head=None, - loss_cls=None, - pretrained=None): + def __init__( + self, + backbone, + n_cls, + loss=dict(), + multilabel=False, + data_preprocessor=None, + head=None, + loss_cls=None, + pretrained=None, + ): super(BaseClassifier, self).__init__() self.backbone = MODELS.build(backbone) self.cls_head = MODELS.build(head) @@ -48,11 +50,11 @@ def loss(self, img, **kwargs): if MessageHub.get_current_instance().get_info('ismixed'): target = MessageHub.get_current_instance().get_info('target') - loss = MessageHub.get_current_instance().get_info( - 'audio_loss').mix_loss(result, - target, - self.n_cls, - pred_one_hot=self.mutilabel) + loss = ( + MessageHub.get_current_instance() + .get_info('audio_loss') + .mix_loss(result, target, self.n_cls, pred_one_hot=self.mutilabel) + ) else: loss = self._loss(result, kwargs['labels']) @@ -62,11 +64,4 @@ def predict(self, img, **kwargs): features = self.backbone(img) result = self.sm(self.cls_head(features)) # return [{'pred_label':{"score":result},"gt_label":{"label":kwargs['labels']}}] - return [{ - 'pred_label': { - "label": torch.max(result, dim=1)[1] - }, - "gt_label": { - "label": kwargs['labels'] - } - }] + return [{'pred_label': {"label": torch.max(result, dim=1)[1]}, "gt_label": {"label": kwargs['labels']}}] diff --git a/edgelab/models/classifiers/__init__.py b/edgelab/models/classifiers/__init__.py index 3829e0be..29655b97 100644 --- a/edgelab/models/classifiers/__init__.py +++ b/edgelab/models/classifiers/__init__.py @@ -2,4 +2,4 @@ from .accelerometer import AccelerometerClassifier from .image import ImageClassifier -__all__=['Audio_classify', 'AccelerometerClassifier', 'ImageClassifier'] \ No newline at end of file +__all__ = ['Audio_classify', 'AccelerometerClassifier', 'ImageClassifier'] diff --git a/edgelab/models/classifiers/accelerometer.py b/edgelab/models/classifiers/accelerometer.py index 48f36900..3c219c3b 100644 --- a/edgelab/models/classifiers/accelerometer.py +++ b/edgelab/models/classifiers/accelerometer.py @@ -40,15 +40,17 @@ class AccelerometerClassifier(BaseClassifier): Defaults to None. """ - def __init__(self, - backbone: dict, - neck: Optional[dict] = None, - head: Optional[dict] = None, - pretrained: Optional[str] = None, - train_cfg: Optional[dict] = None, - data_preprocessor: Optional[dict] = None, - softmax: bool = True, - init_cfg: Optional[dict] = None): + def __init__( + self, + backbone: dict, + neck: Optional[dict] = None, + head: Optional[dict] = None, + pretrained: Optional[str] = None, + train_cfg: Optional[dict] = None, + data_preprocessor: Optional[dict] = None, + softmax: bool = True, + init_cfg: Optional[dict] = None, + ): if pretrained is not None: init_cfg = dict(type='Pretrained', checkpoint=pretrained) @@ -61,8 +63,7 @@ def __init__(self, # Set batch augmentations by `train_cfg` data_preprocessor['batch_augments'] = train_cfg - super(AccelerometerClassifier, self).__init__( - init_cfg=init_cfg, data_preprocessor=data_preprocessor) + super(AccelerometerClassifier, self).__init__(init_cfg=init_cfg, data_preprocessor=data_preprocessor) if not isinstance(backbone, nn.Module): backbone = MODELS.build(backbone) @@ -74,14 +75,10 @@ def __init__(self, self.backbone = backbone self.neck = neck self.head = head - - self.softmax = softmax - def forward(self, - inputs: torch.Tensor, - data_samples: Optional[List[ClsDataSample]] = None, - mode: str = 'tensor'): + self.softmax = softmax + def forward(self, inputs: torch.Tensor, data_samples: Optional[List[ClsDataSample]] = None, mode: str = 'tensor'): if mode == 'tensor': feats = self.extract_feat(inputs) head_out = self.head(feats) if self.with_head else feats @@ -94,14 +91,11 @@ def forward(self, raise RuntimeError(f'Invalid mode "{mode}".') def extract_feat(self, inputs, stage='neck'): - - assert stage in ['backbone', 'neck', 'pre_logits'], \ - (f'Invalid output stage "{stage}", please choose from "backbone", ' - '"neck" and "pre_logits"') + assert stage in ['backbone', 'neck', 'pre_logits'], ( + f'Invalid output stage "{stage}", please choose from "backbone", ' '"neck" and "pre_logits"' + ) x = self.backbone(inputs) - - if stage == 'backbone': return x @@ -111,23 +105,18 @@ def extract_feat(self, inputs, stage='neck'): if stage == 'neck': return x - assert self.with_head and hasattr(self.head, 'pre_logits'), \ - "No head or the head doesn't implement `pre_logits` method." + assert self.with_head and hasattr( + self.head, 'pre_logits' + ), "No head or the head doesn't implement `pre_logits` method." return self.head.pre_logits(x) - def loss(self, inputs: torch.Tensor, - data_samples: List[ClsDataSample]) -> dict: - - + def loss(self, inputs: torch.Tensor, data_samples: List[ClsDataSample]) -> dict: feats = self.extract_feat(inputs) - return self.head.loss(feats, data_samples) - def predict(self, - inputs: torch.Tensor, - data_samples: Optional[List[ClsDataSample]] = None, - **kwargs) -> List[ClsDataSample]: - + def predict( + self, inputs: torch.Tensor, data_samples: Optional[List[ClsDataSample]] = None, **kwargs + ) -> List[ClsDataSample]: feats = self.extract_feat(inputs) return self.head.predict(feats, data_samples, **kwargs) diff --git a/edgelab/models/classifiers/image.py b/edgelab/models/classifiers/image.py index b6feee45..31993784 100644 --- a/edgelab/models/classifiers/image.py +++ b/edgelab/models/classifiers/image.py @@ -9,27 +9,25 @@ from mmcls.structures import ClsDataSample from mmcls.models.classifiers import ImageClassifier as MMImageClassifier + @MODELS.register_module() class ImageClassifier(MMImageClassifier): - def __init__(self, - backbone: dict, - neck: Optional[dict] = None, - head: Optional[dict] = None, - pretrained: Optional[str] = None, - train_cfg: Optional[dict] = None, - data_preprocessor: Optional[dict] = None, - softmax: bool = True, - init_cfg: Optional[dict] = None): - + def __init__( + self, + backbone: dict, + neck: Optional[dict] = None, + head: Optional[dict] = None, + pretrained: Optional[str] = None, + train_cfg: Optional[dict] = None, + data_preprocessor: Optional[dict] = None, + softmax: bool = True, + init_cfg: Optional[dict] = None, + ): super(ImageClassifier, self).__init__(backbone, neck, head, pretrained, train_cfg, data_preprocessor, init_cfg) - + self.softmax = softmax - - def forward(self, - inputs: torch.Tensor, - data_samples: Optional[List[ClsDataSample]] = None, - mode: str = 'tensor'): - + + def forward(self, inputs: torch.Tensor, data_samples: Optional[List[ClsDataSample]] = None, mode: str = 'tensor'): if mode == 'tensor': feats = self.extract_feat(inputs) head_out = self.head(feats) if self.with_head else feats @@ -40,4 +38,3 @@ def forward(self, return self.predict(inputs, data_samples) else: raise RuntimeError(f'Invalid mode "{mode}".') - diff --git a/edgelab/models/detectors/fastestdet.py b/edgelab/models/detectors/fastestdet.py index 4f9b0094..3145f680 100644 --- a/edgelab/models/detectors/fastestdet.py +++ b/edgelab/models/detectors/fastestdet.py @@ -7,24 +7,24 @@ @MODELS.register_module() class FastestDet(SingleStageDetector): - def __init__( self, backbone, neck=None, bbox_head=None, train_cfg=None, - test_cfg=dict(nms_pre=1000, - min_bbox_size=0, - score_thr=0.05, - conf_thr=0.005, - nms=dict(type='nms', iou_threshold=0.45), - max_per_img=100), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + conf_thr=0.005, + nms=dict(type='nms', iou_threshold=0.45), + max_per_img=100, + ), pretrained=None, init_cfg=None, ): - super().__init__(backbone, neck, bbox_head, train_cfg, test_cfg, - pretrained, init_cfg) + super().__init__(backbone, neck, bbox_head, train_cfg, test_cfg, pretrained, init_cfg) self.backbone = MODELS.build(backbone) self.neck = MODELS.build(neck) self.bbox_head = MODELS.build(bbox_head) @@ -64,10 +64,8 @@ def forward_test(self, img, img_metas, **kwargs): if 'fomo' in kwargs.keys(): return self.bbox_head.post_handle(result) - results_list = self.bbox_head.handle_preds( - result, result.device, img_metas[0][0]['ori_shape'][:2]) + results_list = self.bbox_head.handle_preds(result, result.device, img_metas[0][0]['ori_shape'][:2]) bbox_results = [ - bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) - for det_bboxes, det_labels in results_list + bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) for det_bboxes, det_labels in results_list ] return bbox_results diff --git a/edgelab/models/detectors/pfld.py b/edgelab/models/detectors/pfld.py index 041a4418..023bebb1 100644 --- a/edgelab/models/detectors/pfld.py +++ b/edgelab/models/detectors/pfld.py @@ -18,16 +18,13 @@ class PFLD(BasePoseEstimator): pretrained: Model pre-training weight path """ - def __init__(self, - backbone: dict, - head: dict, - pretrained: Optional[str] = None): + def __init__(self, backbone: dict, head: dict, pretrained: Optional[str] = None): super(PFLD, self).__init__(backbone, head=head) self.backbone = MODELS.build(backbone) self.head = MODELS.build(head) self.pretrained = pretrained - def forward(self, inputs, data_samples = None, mode='tensor'): + def forward(self, inputs, data_samples=None, mode='tensor'): if mode == 'loss': return self.loss(inputs, data_samples) elif mode == 'predict': @@ -35,8 +32,7 @@ def forward(self, inputs, data_samples = None, mode='tensor'): elif mode == 'tensor': return self.forward_(inputs, data_samples) else: - raise ValueError( - f'params mode recive a not exception params:{mode}') + raise ValueError(f'params mode recive a not exception params:{mode}') def loss(self, inputs, data_samples): x = self.extract_feat(inputs) @@ -50,11 +46,11 @@ def predict(self, inputs, data_samples): res = PoseDataSample(**data_samples) res.results = x res.pred_instances = InstanceData( - keypoints=np.array([x.reshape(-1, 2).cpu().numpy()]) * - data_samples['init_size'][1].cpu().numpy()) + keypoints=np.array([x.reshape(-1, 2).cpu().numpy()]) * data_samples['init_size'][1].cpu().numpy() + ) return [res] def forward_(self, inputs, data_samples): x = self.extract_feat(inputs) - return self.head(x) \ No newline at end of file + return self.head(x) diff --git a/edgelab/models/heads/__init__.py b/edgelab/models/heads/__init__.py index 3811d680..6e51f390 100644 --- a/edgelab/models/heads/__init__.py +++ b/edgelab/models/heads/__init__.py @@ -7,6 +7,11 @@ from .yolo_head import YOLOV5Head __all__ = [ - 'Audio_head', 'TAggregate', 'PFLDhead', 'Fastest_Head', 'FomoHead', - 'AxesClsHead', 'YOLOV5Head', + 'Audio_head', + 'TAggregate', + 'PFLDhead', + 'Fastest_Head', + 'FomoHead', + 'AxesClsHead', + 'YOLOV5Head', ] diff --git a/edgelab/models/heads/axes_head.py b/edgelab/models/heads/axes_head.py index 7c84cdf8..0733a887 100644 --- a/edgelab/models/heads/axes_head.py +++ b/edgelab/models/heads/axes_head.py @@ -3,25 +3,22 @@ from mmcls.models.heads import ClsHead from typing import Optional, Tuple, Union + @MODELS.register_module() class AxesClsHead(ClsHead): - def __init__(self, - loss: dict = dict(type='CrossEntropyLoss', loss_weight=1.0), - topk: Union[int, Tuple[int]] = (1, ), - cal_acc: bool = False, - init_cfg: Optional[dict] = None): + def __init__( + self, + loss: dict = dict(type='CrossEntropyLoss', loss_weight=1.0), + topk: Union[int, Tuple[int]] = (1,), + cal_acc: bool = False, + init_cfg: Optional[dict] = None, + ): super(AxesClsHead, self).__init__(loss, topk, cal_acc, init_cfg=init_cfg) - - + def forward(self, feats: Tuple[torch.Tensor]) -> torch.Tensor: """The forward process.""" - + pre_logits = self.pre_logits(feats) # The ClsHead doesn't have the final classification head, # just return the unpacked inputs. return pre_logits - - - - - diff --git a/edgelab/models/heads/cls_head.py b/edgelab/models/heads/cls_head.py index bada41b6..825a5304 100644 --- a/edgelab/models/heads/cls_head.py +++ b/edgelab/models/heads/cls_head.py @@ -5,7 +5,6 @@ @MODELS.register_module() class Audio_head(nn.Module): - def __init__(self, in_channels, n_classes, drop=0.5): super(Audio_head, self).__init__() self.avg = nn.AdaptiveAvgPool1d(1) @@ -15,5 +14,3 @@ def __init__(self, in_channels, n_classes, drop=0.5): def forward(self, x): return self.fc1(self.dp(self.fc(self.avg(x).flatten(1)))) - - \ No newline at end of file diff --git a/edgelab/models/heads/fastestdet_head.py b/edgelab/models/heads/fastestdet_head.py index ec6a1ca2..f20428e1 100644 --- a/edgelab/models/heads/fastestdet_head.py +++ b/edgelab/models/heads/fastestdet_head.py @@ -5,7 +5,7 @@ import torch.nn as nn from mmcv.cnn import is_norm from mmengine.model import BaseModule -from mmengine.model import normal_init, constant_init +from mmengine.model import normal_init, constant_init from edgelab.registry import MODELS from ..base.general import CBR @@ -14,7 +14,6 @@ @MODELS.register_module() class Fastest_Head(BaseModule): - def __init__( self, input_channels, @@ -23,7 +22,7 @@ def __init__( loss_cls: dict = dict(type='NLLLoss'), train_cfg: dict = None, test_cfg: dict = None, - init_cfg: Optional[dict] = dict(type='Normal', std=0.01) + init_cfg: Optional[dict] = dict(type='Normal', std=0.01), ) -> None: super(Fastest_Head, self).__init__(init_cfg) @@ -44,9 +43,11 @@ def __init__( def _make_layer(self, inp, oup): return nn.Sequential( nn.Conv2d(inp, inp, 5, 1, 2, groups=inp, bias=False), - nn.BatchNorm2d(inp), nn.ReLU(inplace=True), + nn.BatchNorm2d(inp), + nn.ReLU(inplace=True), nn.Conv2d(inp, oup, 1, stride=1, padding=0, bias=False), - nn.BatchNorm2d(oup)) + nn.BatchNorm2d(oup), + ) def forward(self, x): x = self.conv1(x) @@ -55,38 +56,22 @@ def forward(self, x): cls = self.softmax(self.cls_layers(x)) return torch.concat((obj, reg, cls), dim=1) - def forward_train(self, - x, - img_metas, - gt_bboxes, - gt_labels=None, - gt_bboxes_ignore=None, - proposal_cfg=None, - **kwargs): + def forward_train( + self, x, img_metas, gt_bboxes, gt_labels=None, gt_bboxes_ignore=None, proposal_cfg=None, **kwargs + ): results = self(x) - loss = self.loss(results, - gt_bboxes=gt_bboxes, - gt_labels=gt_labels, - gt_bbox_ignore=gt_bboxes_ignore, - img_metas=img_metas) + loss = self.loss( + results, gt_bboxes=gt_bboxes, gt_labels=gt_labels, gt_bbox_ignore=gt_bboxes_ignore, img_metas=img_metas + ) return loss def simple_test(self, img, img_metas, rescale=False): result = self(img) - results_list = self.handle_preds(result, result.device, - img_metas[0][0]['ori_shape'][:2]) + results_list = self.handle_preds(result, result.device, img_metas[0][0]['ori_shape'][:2]) return results_list - def loss(self, - pred_maps, - gt_bboxes, - gt_labels, - img_metas, - gt_bbox_ignore=None): - - target = self.merge_gt(gt_bboxes=gt_bboxes, - gt_labels=gt_labels, - img_metas=img_metas) + def loss(self, pred_maps, gt_bboxes, gt_labels, img_metas, gt_bbox_ignore=None): + target = self.merge_gt(gt_bboxes=gt_bboxes, gt_labels=gt_labels, img_metas=img_metas) gt_box, gt_cls, ps_index = self.build_target(pred_maps, target) ft = torch.cuda.FloatTensor if pred_maps[0].is_cuda else torch.Tensor @@ -128,27 +113,26 @@ def loss(self, tobj[b, gy, gx] = iou.float() # Count the number of positive samples for each image n = torch.bincount(b) - factor[b, gy, gx] = (1. / (n[b] / (H * W))) * 0.25 + factor[b, gy, gx] = (1.0 / (n[b] / (H * W))) * 0.25 - obj_loss = (self.loss_conf( - pobj, - tobj, - ) * factor).mean() + obj_loss = ( + self.loss_conf( + pobj, + tobj, + ) + * factor + ).mean() loss = (iou_loss * 8) + (obj_loss * 16) + cls_loss - return dict(loss=loss, - iou_loss=iou_loss, - cls_loss=cls_loss, - obj_loss=obj_loss) + return dict(loss=loss, iou_loss=iou_loss, cls_loss=cls_loss, obj_loss=obj_loss) def build_target(self, preds, targets): N, C, H, W = preds.shape gt_box, gt_cls, ps_index = [], [], [] # The four vertices of each grid are the reference points where the center point of the box will return - quadrant = torch.tensor([[0, 0], [1, 0], [0, 1], [1, 1]], - device=preds.device) + quadrant = torch.tensor([[0, 0], [1, 0], [0, 1], [1, 1]], device=preds.device) if targets.shape[0] > 0: # Map the coordinates onto the feature map scale scale = torch.ones(6).to(preds.device) @@ -202,12 +186,7 @@ def merge_gt(self, gt_bboxes, gt_labels, img_metas): return target - def handle_preds(self, - preds, - device, - shape, - conf_thresh=0.25, - nms_thresh=0.45): + def handle_preds(self, preds, device, shape, conf_thresh=0.25, nms_thresh=0.45): total_bboxes, output_bboxes = [], [] # Convert the feature map to the coordinates of the detection box N, C, H, W = preds.shape @@ -222,7 +201,7 @@ def handle_preds(self, pcls = pred[:, :, :, 5:] # bboxe confidence - bboxes[..., 4] = (pobj.squeeze(-1)**0.6) * (pcls.max(dim=-1)[0]**0.4) + bboxes[..., 4] = (pobj.squeeze(-1) ** 0.6) * (pcls.max(dim=-1)[0] ** 0.4) bboxes[..., 5] = pcls.argmax(dim=-1) # bboxes coordinate @@ -267,14 +246,15 @@ def handle_preds(self, coord = torch.Tensor(coord).to(device) idxs = torch.Tensor(idxs).squeeze(1).to(device) scores = torch.Tensor(scores).squeeze(1).to(device) - keep = torchvision.ops.batched_nms(coord, scores, idxs, - nms_thresh) + keep = torchvision.ops.batched_nms(coord, scores, idxs, nms_thresh) for i in keep: output.append(temp[i]) output_bboxes.append( - (torch.Tensor(output)[..., :5] * - torch.Tensor([shape[1], shape[0], shape[1], shape[0], 1]), - torch.Tensor(output)[..., 5])) + ( + torch.Tensor(output)[..., :5] * torch.Tensor([shape[1], shape[0], shape[1], shape[0], 1]), + torch.Tensor(output)[..., 5], + ) + ) return output_bboxes diff --git a/edgelab/models/heads/pfld_head.py b/edgelab/models/heads/pfld_head.py index 77c3d831..4c50902e 100644 --- a/edgelab/models/heads/pfld_head.py +++ b/edgelab/models/heads/pfld_head.py @@ -11,9 +11,9 @@ class PFLDhead(nn.Module): """ The head of the pfld model mainly uses convolution and global average pooling - + Args: - num_point: The model needs to predict the number of key points, + num_point: The model needs to predict the number of key points, and set the output of the model according to this value input_channel: The number of channels of the head input feature map feature_num: Number of channels in the middle feature map of the head @@ -27,24 +27,12 @@ def __init__( input_channel: int = 16, feature_num: Sequence[int] = [32, 32], act_cfg: Union[dict, str, None] = "ReLU", - loss_cfg: dict = dict(type='PFLDLoss') + loss_cfg: dict = dict(type='PFLDLoss'), ) -> None: super().__init__() - self.conv1 = CBR(input_channel, - feature_num[0], - 3, - 2, - padding=1, - bias=False, - act=act_cfg) - self.conv2 = CBR(feature_num[0], - feature_num[1], - 2, - 1, - bias=False, - padding=0, - act=act_cfg) + self.conv1 = CBR(input_channel, feature_num[0], 3, 2, padding=1, bias=False, act=act_cfg) + self.conv2 = CBR(feature_num[0], feature_num[1], 2, 1, bias=False, padding=0, act=act_cfg) self.avg_pool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Linear(input_channel + sum(feature_num), num_point * 2) @@ -73,13 +61,10 @@ def forward(self, x): def loss(self, features, data_samples): preds = self.forward(features) - labels = torch.as_tensor(data_samples['keypoints'], - device=preds.device, - dtype=torch.float32) + labels = torch.as_tensor(data_samples['keypoints'], device=preds.device, dtype=torch.float32) loss = self.lossFunction(preds, labels) - acc = pose_acc(preds.cpu().detach().numpy(), labels, - data_samples['hw']) + acc = pose_acc(preds.cpu().detach().numpy(), labels, data_samples['hw']) return {"loss": loss, "Acc": torch.as_tensor(acc, dtype=torch.float32)} def predict(self, features): - return self.forward(features) \ No newline at end of file + return self.forward(features) diff --git a/edgelab/models/heads/taggregate_head.py b/edgelab/models/heads/taggregate_head.py index 1186098d..22aea393 100644 --- a/edgelab/models/heads/taggregate_head.py +++ b/edgelab/models/heads/taggregate_head.py @@ -9,8 +9,9 @@ def __init__(self, clip_length=None, embed_dim=64, n_layers=6, nhead=6, n_classe super(TAggregate, self).__init__() self.num_tokens = 1 drop_rate = 0.1 - enc_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=nhead, activation="gelu", - dim_feedforward=dim_feedforward, dropout=drop_rate) + enc_layer = nn.TransformerEncoderLayer( + d_model=embed_dim, nhead=nhead, activation="gelu", dim_feedforward=dim_feedforward, dropout=drop_rate + ) self.transformer_enc = nn.TransformerEncoder(enc_layer, num_layers=n_layers, norm=nn.LayerNorm(embed_dim)) self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) self.pos_embed = nn.Parameter(torch.zeros(1, clip_length + self.num_tokens, embed_dim)) diff --git a/edgelab/models/heads/yolo_head.py b/edgelab/models/heads/yolo_head.py index cb2d34d1..bfd6b102 100644 --- a/edgelab/models/heads/yolo_head.py +++ b/edgelab/models/heads/yolo_head.py @@ -16,17 +16,16 @@ @MODELS.register_module() class DetHead(BaseModel): - - def __init__(self, - num_classes: int, - in_channels: Union[int, Sequence], - widen_factor: float = 1.0, - num_base_priors: int = 3, - featmap_strides: Sequence[int] = (8, 16, 32), - anchors=[[(10, 13), (16, 30), (33, 23)], - [(30, 61), (62, 45), (59, 119)], - [(116, 90), (156, 198), (373, 326)]], - init_cfg: OptMultiConfig = None): + def __init__( + self, + num_classes: int, + in_channels: Union[int, Sequence], + widen_factor: float = 1.0, + num_base_priors: int = 3, + featmap_strides: Sequence[int] = (8, 16, 32), + anchors=[[(10, 13), (16, 30), (33, 23)], [(30, 61), (62, 45), (59, 119)], [(116, 90), (156, 198), (373, 326)]], + init_cfg: OptMultiConfig = None, + ): super().__init__(init_cfg=init_cfg) self.num_classes = num_classes self.widen_factor = widen_factor @@ -40,12 +39,9 @@ def __init__(self, self.num_base_priors = num_base_priors if isinstance(in_channels, int): - self.in_channels = [make_divisible(in_channels, widen_factor) - ] * self.num_levels + self.in_channels = [make_divisible(in_channels, widen_factor)] * self.num_levels else: - self.in_channels = [ - make_divisible(i, widen_factor) for i in in_channels - ] + self.in_channels = [make_divisible(i, widen_factor) for i in in_channels] self._init_layers() @@ -53,9 +49,7 @@ def _init_layers(self): """initialize conv layers in YOLOv5 head.""" self.convs_pred = nn.ModuleList() for i in range(self.num_levels): - conv_pred = nn.Conv2d(self.in_channels[i], - self.num_base_priors * self.num_out_attrib, - 1) + conv_pred = nn.Conv2d(self.in_channels[i], self.num_base_priors * self.num_out_attrib, 1) self.convs_pred.append(conv_pred) @@ -65,7 +59,7 @@ def init_weights(self): for mi, s in zip(self.convs_pred, self.featmap_strides): # from b = mi.bias.data.view(self.num_base_priors, -1) # obj (8 objects per 640 image) - b.data[:, 4] += math.log(8 / (640 / s)**2) + b.data[:, 4] += math.log(8 / (640 / s) ** 2) b.data[:, 5:] += math.log(0.6 / (self.num_classes - 0.999999)) mi.bias.data = b.view(-1) @@ -94,36 +88,32 @@ def _forward(self, x) -> List[Tensor]: def forward_split(self, x: Tensor, convs: nn.Module): pred_map = convs(x) bs, _, ny, nx = pred_map.shape - pred_map = pred_map.view(bs, self.num_base_priors, self.num_out_attrib, - ny, nx) + pred_map = pred_map.view(bs, self.num_base_priors, self.num_out_attrib, ny, nx) cls_score = pred_map[:, :, 5:, ...].reshape(bs, -1, ny, nx) bbox_pred = pred_map[:, :, :4, ...].reshape(bs, -1, ny, nx) objectness = pred_map[:, :, 4:5, ...].reshape(bs, -1, ny, nx) return cls_score, bbox_pred, objectness - def forward_single(self, x: Tensor, - convs: nn.Module) -> Tensor: + def forward_single(self, x: Tensor, convs: nn.Module) -> Tensor: """Forward feature of a single scale level.""" pred_map = convs(x) pred_map = pred_map.sigmoid() bs, _, ny, nx = pred_map.shape - pred_map = pred_map.view(bs, self.num_base_priors, self.num_out_attrib, - ny*nx) + pred_map = pred_map.view(bs, self.num_base_priors, self.num_out_attrib, ny * nx) return pred_map.permute(0, 1, 3, 2).contiguous(), nx, ny - def process(self, pred_map) -> Tuple[Tensor,Tensor]: + def process(self, pred_map) -> Tuple[Tensor, Tensor]: res = [] - + for idx, (feat_, nx, ny) in enumerate(pred_map): bs = feat_.shape[0] grid, grid_ = self.get_grid(nx, ny, idx, feat_.device) - - feat_xy, feat_wh, feat_cls = torch.split(feat_, [2, 2, self.num_classes+1], dim=-1) + + feat_xy, feat_wh, feat_cls = torch.split(feat_, [2, 2, self.num_classes + 1], dim=-1) xy = (feat_xy * 2 - 0.5 + grid) * torch.as_tensor( - self.featmap_strides[idx], - dtype=torch.float, - device=feat_.device) - wh = 2 * feat_wh + self.featmap_strides[idx], dtype=torch.float, device=feat_.device + ) + wh = 2 * feat_wh wh *= wh wh *= grid_ cls = feat_cls * 100 @@ -131,24 +121,22 @@ def process(self, pred_map) -> Tuple[Tensor,Tensor]: res.append(out.view(bs, -1, self.num_out_attrib)) return torch.cat(res, 1) - + def get_grid(self, x, y, idx, device): if torch.__version__ > '1.10.0': - dy, dx = torch.meshgrid([ - torch.arange(y, device=device), - torch.arange(x, device=device) - ], - indexing='ij') + dy, dx = torch.meshgrid([torch.arange(y, device=device), torch.arange(x, device=device)], indexing='ij') else: - dy, dx = torch.meshgrid([ - torch.arange(y, device=device), - torch.arange(x, device=device) - ]) + dy, dx = torch.meshgrid([torch.arange(y, device=device), torch.arange(x, device=device)]) grid = torch.stack((dx, dy), dim=2).expand(1, 1, y, x, 2).view(1, 1, -1, 2).float().to(device) - grid_ = self.anchors[idx].clone().view( - (1, self.num_base_priors, 1, 2)).expand( - (1, self.num_base_priors, y*x, 2)).float().to(device) + grid_ = ( + self.anchors[idx] + .clone() + .view((1, self.num_base_priors, 1, 2)) + .expand((1, self.num_base_priors, y * x, 2)) + .float() + .to(device) + ) return grid, grid_ @@ -157,31 +145,21 @@ def get_grid(self, x, y, idx, device): class YOLOV5Head(YOLOv5Head): head_module: DetHead - def loss(self, x: Tuple[Tensor], batch_data_samples: Union[list, - dict]) -> dict: + def loss(self, x: Tuple[Tensor], batch_data_samples: Union[list, dict]) -> dict: outs = self.head_module(x) outputs = unpack_gt_instances(batch_data_samples) - (batch_gt_instances, batch_gt_instances_ignore, - batch_img_metas) = outputs + (batch_gt_instances, batch_gt_instances_ignore, batch_img_metas) = outputs - loss_inputs = outs + (batch_gt_instances, batch_img_metas, - batch_gt_instances_ignore) + loss_inputs = outs + (batch_gt_instances, batch_img_metas, batch_gt_instances_ignore) losses = self.loss_by_feat(*loss_inputs) return losses - def predict(self, - x: Tuple[Tensor], - batch_data_samples: SampleList, - rescale: bool = False) -> InstanceList: - batch_img_metas = [ - data_samples.metainfo for data_samples in batch_data_samples - ] + def predict(self, x: Tuple[Tensor], batch_data_samples: SampleList, rescale: bool = False) -> InstanceList: + batch_img_metas = [data_samples.metainfo for data_samples in batch_data_samples] outs = self.head_module(x) - predictions = self.predict_by_feat(*outs, - batch_img_metas=batch_img_metas, - rescale=rescale) + predictions = self.predict_by_feat(*outs, batch_img_metas=batch_img_metas, rescale=rescale) return predictions def forward(self, x) -> Tuple[Tensor, Tensor]: - return self.head_module._forward(x) \ No newline at end of file + return self.head_module._forward(x) diff --git a/edgelab/models/layers/attention.py b/edgelab/models/layers/attention.py index 010970d8..83440c29 100644 --- a/edgelab/models/layers/attention.py +++ b/edgelab/models/layers/attention.py @@ -8,26 +8,12 @@ @MODELS.register_module() class SEAttention(BaseModule): - - def __init__(self, - in_channels: int, - r: int = 4, - init_cfg: Union[dict, List[dict], None] = None): + def __init__(self, in_channels: int, r: int = 4, init_cfg: Union[dict, List[dict], None] = None): super().__init__(init_cfg) middle_channels = in_channels // r self.avgPool = nn.AdaptiveAvgPool2d((1, 1)) - self.conv1 = ConvNormActivation(in_channels, - middle_channels, - 1, - 1, - bias=True, - activation_layer='ReLU') - self.conv2 = ConvNormActivation(middle_channels, - in_channels, - 1, - 1, - bias=True, - activation_layer='Sigmoid') + self.conv1 = ConvNormActivation(in_channels, middle_channels, 1, 1, bias=True, activation_layer='ReLU') + self.conv2 = ConvNormActivation(middle_channels, in_channels, 1, 1, bias=True, activation_layer='Sigmoid') def forward(self, inputs): x = self.conv2(self.conv1(self.avgPool(inputs))) @@ -36,11 +22,7 @@ def forward(self, inputs): @MODELS.register_module() class SpatialAttention(BaseModule): - - def __init__(self, - kernel_size: int = 3, - stride: int = 1, - init_cfg: Union[dict, List[dict], None] = None): + def __init__(self, kernel_size: int = 3, stride: int = 1, init_cfg: Union[dict, List[dict], None] = None): super().__init__(init_cfg) if isinstance(kernel_size, int): kernel_size = (kernel_size, kernel_size) @@ -49,13 +31,14 @@ def __init__(self, else: raise ValueError() - self.conv = ConvNormActivation(2, - 1, - kernel_size, - stride, - padding=((kernel_size[0] - 1) // 2, - (kernel_size[1] - 1) // 2), - activation_layer='Sigmoid') + self.conv = ConvNormActivation( + 2, + 1, + kernel_size, + stride, + padding=((kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2), + activation_layer='Sigmoid', + ) def forward(self, inputs): max_x, _ = torch.max(inputs, dim=1, keepdim=True) @@ -67,20 +50,12 @@ def forward(self, inputs): @MODELS.register_module() class ChannelAttention(BaseModule): - - def __init__(self, - in_channels: int, - r: int = 4, - init_cfg: Union[dict, List[dict], None] = None): + def __init__(self, in_channels: int, r: int = 4, init_cfg: Union[dict, List[dict], None] = None): super().__init__(init_cfg) self.avgPool = nn.AdaptiveAvgPool2d((1, 1)) self.maxPool = nn.AdaptiveMaxPool2d((1, 1)) - self.conv1 = ConvNormActivation(in_channels, - in_channels // 2, - 1, - bias=False, - activation_layer='ReLU') + self.conv1 = ConvNormActivation(in_channels, in_channels // 2, 1, bias=False, activation_layer='ReLU') self.conv2 = nn.Conv2d(in_channels // 2, in_channels, 1, bias=False) self.act = nn.Sigmoid() @@ -95,12 +70,9 @@ def forward(self, inputs): @MODELS.register_module() class CBAMAttention(BaseModule): - - def __init__(self, - in_channels: int, - kernel_size: int = 3, - r: int = 4, - init_cfg: Union[dict, List[dict], None] = None): + def __init__( + self, in_channels: int, kernel_size: int = 3, r: int = 4, init_cfg: Union[dict, List[dict], None] = None + ): super().__init__(init_cfg) self.ca = ChannelAttention(in_channels, r=r) self.sa = SpatialAttention() @@ -114,51 +86,37 @@ def forward(self, inputs): @MODELS.register_module() class SERes(BaseModule): - def __init__(self, init_cfg: Union[dict, List[dict], None] = None): super().__init__(init_cfg) @MODELS.register_module() class CA(BaseModule): - def __init__(self, init_cfg: Union[dict, List[dict], None] = None): super().__init__(init_cfg) @MODELS.register_module() class ECAMAttention(BaseModule): - - def __init__(self, - in_channels: int, - kernel_size: int = 3, - init_cfg: Union[dict, List[dict], None] = None): + def __init__(self, in_channels: int, kernel_size: int = 3, init_cfg: Union[dict, List[dict], None] = None): super().__init__(init_cfg) self.avgPool = nn.AdaptiveAvgPool2d((1, 1)) - self.conv = nn.Conv1d(1, - 1, - kernel_size, - padding=(kernel_size - 1) // 2, - bias=False) + self.conv = nn.Conv1d(1, 1, kernel_size, padding=(kernel_size - 1) // 2, bias=False) self.act = nn.Sigmoid() def forward(self, inputs): x: torch.Tensor = self.avgPool(inputs) - x = self.conv(x.squeeze(-1).transpose(-1, - -2)).transpose(-1, - -2).unsqueeze(-1) + x = self.conv(x.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) x = self.act(x) return inputs * x.expand_as(inputs) @MODELS.register_module() class ECA(BaseModule): - - def __init__(self, - in_channels: int, - kernel_size: Union[int, Tuple[int]] = 3, - init_cfg: Union[dict, List[dict], None] = None): + def __init__( + self, in_channels: int, kernel_size: Union[int, Tuple[int]] = 3, init_cfg: Union[dict, List[dict], None] = None + ): super().__init__(init_cfg) if isinstance(kernel_size, int): kernel_size = (kernel_size, kernel_size) @@ -168,14 +126,9 @@ def __init__(self, raise ValueError() self.avgPool = nn.AdaptiveAvgPool2d((1, 1)) - self.unfold = nn.Unfold(kernel_size=(1, kernel_size[1]), - padding=(0, (kernel_size[1] - 1) // 2)) - - self.conv = nn.Conv1d(in_channels, - in_channels, - kernel_size=kernel_size[1], - bias=False, - groups=in_channels) + self.unfold = nn.Unfold(kernel_size=(1, kernel_size[1]), padding=(0, (kernel_size[1] - 1) // 2)) + + self.conv = nn.Conv1d(in_channels, in_channels, kernel_size=kernel_size[1], bias=False, groups=in_channels) self.act = nn.Sigmoid() def forward(self, inputs): @@ -189,6 +142,7 @@ def forward(self, inputs): if __name__ == '__main__': import torch + se = CBAMAttention(32, 3, 4) input = torch.rand((16, 32, 192, 192)) - print(se(input).shape) \ No newline at end of file + print(se(input).shape) diff --git a/edgelab/models/layers/rep.py b/edgelab/models/layers/rep.py index 9f783ec6..558a5da3 100644 --- a/edgelab/models/layers/rep.py +++ b/edgelab/models/layers/rep.py @@ -79,9 +79,7 @@ def fuse_conv_norm( std = (norm_var + norm_eps).sqrt() t = (norm_gamm / std).reshape(-1, 1, 1, 1) - return conv_weight * t, norm_beta + ( - (0 if conv_bias is None else conv_bias) - - norm_mean) * norm_gamm / std + return conv_weight * t, norm_beta + ((0 if conv_bias is None else conv_bias) - norm_mean) * norm_gamm / std elif isinstance(block, nn.BatchNorm2d): in_channels = block.num_features b = in_channels // groups @@ -106,16 +104,18 @@ def fuse_conv_norm( @MODELS.register_module(force=True) class RepConv1x1(BaseModule): - def __init__(self, - in_channels: int, - out_channels: int, - use_res: bool = True, - use_dense: bool = True, - stride: int = 1, - depth: int = 6, - groups: int = 1, - act_cfg: dict = dict(type="ReLU"), - init_cfg: Union[dict, List[dict], None] = None): + def __init__( + self, + in_channels: int, + out_channels: int, + use_res: bool = True, + use_dense: bool = True, + stride: int = 1, + depth: int = 6, + groups: int = 1, + act_cfg: dict = dict(type="ReLU"), + init_cfg: Union[dict, List[dict], None] = None, + ): super().__init__(init_cfg) self.depth = depth @@ -123,13 +123,7 @@ def __init__(self, self.use_dense = use_dense self.groups = groups - self.conv3x3 = ConvNormActivation(in_channels, - out_channels, - 3, - stride, - 1, - bias=True, - activation_layer=None) + self.conv3x3 = ConvNormActivation(in_channels, out_channels, 3, stride, 1, bias=True, activation_layer=None) self.conv = nn.ModuleList() for i in range(depth): @@ -143,12 +137,7 @@ def __init__(self, self.dense_norm = nn.BatchNorm2d(out_channels) - self.fuse_conv = nn.Conv2d(in_channels, - out_channels, - 3, - padding=1, - stride=stride, - bias=True) + self.fuse_conv = nn.Conv2d(in_channels, out_channels, 3, padding=1, stride=stride, bias=True) self.act = get_act(act_cfg)() def forward(self, x) -> None: diff --git a/edgelab/models/layers/test.py b/edgelab/models/layers/test.py index 6d071928..5a83ccbf 100644 --- a/edgelab/models/layers/test.py +++ b/edgelab/models/layers/test.py @@ -1,34 +1,33 @@ import torch import torchvision - -def fuse(conv, bn): + +def fuse(conv, bn): fused = torch.nn.Conv2d( conv.in_channels, conv.out_channels, kernel_size=conv.kernel_size, stride=conv.stride, padding=conv.padding, - bias=True + bias=True, ) # setting weights w_conv = conv.weight.clone().view(conv.out_channels, -1) - w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps+bn.running_var))) - fused.weight.copy_( torch.mm(w_bn, w_conv).view(fused.weight.size()) ) - + w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) + fused.weight.copy_(torch.mm(w_bn, w_conv).view(fused.weight.size())) + # setting bias if conv.bias is not None: b_conv = conv.bias else: - b_conv = torch.zeros( conv.weight.size(0) ) - b_bn = bn.bias - bn.weight.mul(bn.running_mean).div( - torch.sqrt(bn.running_var + bn.eps) - ) - fused.bias.copy_( b_conv + b_bn ) + b_conv = torch.zeros(conv.weight.size(0)) + b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) + fused.bias.copy_(b_conv + b_bn) return fused + # Testing # we need to turn off gradient calculation because we didn't write it torch.set_grad_enabled(False) @@ -36,12 +35,9 @@ def fuse(conv, bn): resnet18 = torchvision.models.resnet18(pretrained=True) # removing all learning variables, etc resnet18.eval() -model = torch.nn.Sequential( - resnet18.conv1, - resnet18.bn1 -) +model = torch.nn.Sequential(resnet18.conv1, resnet18.bn1) f1 = model.forward(x) fused = fuse(model[0], model[1]) f2 = fused.forward(x) d = (f1 - f2).mean().item() -print("error:",d) \ No newline at end of file +print("error:", d) diff --git a/edgelab/models/losses/__init__.py b/edgelab/models/losses/__init__.py index 622a641c..a3d18386 100644 --- a/edgelab/models/losses/__init__.py +++ b/edgelab/models/losses/__init__.py @@ -3,6 +3,4 @@ from .nll_loss import NLLLoss from .bce_withlogits_loss import BCEWithLogitsLoss -__all__ = [ - 'LabelSmoothCrossEntropyLoss', 'PFLDLoss', 'NLLLoss', 'BCEWithLogitsLoss' -] +__all__ = ['LabelSmoothCrossEntropyLoss', 'PFLDLoss', 'NLLLoss', 'BCEWithLogitsLoss'] diff --git a/edgelab/models/losses/bce_withlogits_loss.py b/edgelab/models/losses/bce_withlogits_loss.py index a807a41e..01e20b21 100644 --- a/edgelab/models/losses/bce_withlogits_loss.py +++ b/edgelab/models/losses/bce_withlogits_loss.py @@ -15,13 +15,14 @@ def bcewithlogits_loss(pred, target): @LOSSES.register_module() class BCEWithLogitsLoss(nn.BCEWithLogitsLoss): - - def __init__(self, - weight: Union[Tuple[int or float, ...], Tensor, None] = None, - size_average=None, - reduce=None, - reduction: str = 'mean', - pos_weight: Tensor or int or None = None) -> None: + def __init__( + self, + weight: Union[Tuple[int or float, ...], Tensor, None] = None, + size_average=None, + reduce=None, + reduction: str = 'mean', + pos_weight: Tensor or int or None = None, + ) -> None: if isinstance(weight, (int, float)): weight = Tensor([weight]) diff --git a/edgelab/models/losses/classfication_loss.py b/edgelab/models/losses/classfication_loss.py index 95ac7cb5..d25a45c1 100644 --- a/edgelab/models/losses/classfication_loss.py +++ b/edgelab/models/losses/classfication_loss.py @@ -16,16 +16,16 @@ def __init__(self, weight=None, reduction='mean', smoothing=0.0): def _smooth_one_hot(targets: torch.Tensor, n_classes: int, smoothing=0.0): assert 0 <= smoothing < 1 with torch.no_grad(): - targets = torch.empty(size=(targets.size(0), n_classes), - device=targets.device) \ - .fill_(smoothing / (n_classes - 1)) \ - .scatter_(1, torch.tensor(targets.data.unsqueeze(1),dtype=torch.int64), 1. - smoothing) + targets = ( + torch.empty(size=(targets.size(0), n_classes), device=targets.device) + .fill_(smoothing / (n_classes - 1)) + .scatter_(1, torch.tensor(targets.data.unsqueeze(1), dtype=torch.int64), 1.0 - smoothing) + ) return targets def forward(self, inputs, targets): - targets=targets.cuda() - targets = LabelSmoothCrossEntropyLoss._smooth_one_hot(targets, inputs.size(-1), - self.smoothing) + targets = targets.cuda() + targets = LabelSmoothCrossEntropyLoss._smooth_one_hot(targets, inputs.size(-1), self.smoothing) lsm = F.log_softmax(inputs, -1) if self.weight is not None: diff --git a/edgelab/models/losses/nll_loss.py b/edgelab/models/losses/nll_loss.py index aa9fab32..18dbd074 100644 --- a/edgelab/models/losses/nll_loss.py +++ b/edgelab/models/losses/nll_loss.py @@ -12,20 +12,13 @@ def nll_loss(pred, target): @LOSSES.register_module() class NLLLoss(nn.Module): - def __init__(self, reduction='mean', loss_weight=1.0) -> None: super().__init__() self.reduction = reduction self.loss_weight = loss_weight - def forward(self, - pred, - target, - weight=None, - reduction_override=None, - avg_factor=None): + def forward(self, pred, target, weight=None, reduction_override=None, avg_factor=None): assert reduction_override in (None, 'none', 'mean', 'sum') reduction = reduction_override if reduction_override else self.reduction - loss = self.loss_weight * nll_loss( - pred, target, weight, reduction=reduction, avg_factor=avg_factor) + loss = self.loss_weight * nll_loss(pred, target, weight, reduction=reduction, avg_factor=avg_factor) return loss diff --git a/edgelab/models/losses/pfld_loss.py b/edgelab/models/losses/pfld_loss.py index eb26c132..333c3a3e 100644 --- a/edgelab/models/losses/pfld_loss.py +++ b/edgelab/models/losses/pfld_loss.py @@ -9,7 +9,6 @@ @LOSSES.register_module() class PFLDLoss(nn.Module): - def __init__(self): super(PFLDLoss, self).__init__() @@ -17,4 +16,4 @@ def forward(self, landmarks, landmark_gt): # angle_loss = torch.sum(1-torch.cos((angle-angle_gt)),axis=0) l2_distant = torch.sum((landmark_gt - landmarks) * (landmark_gt - landmarks), axis=1) - return torch.mean(l2_distant) \ No newline at end of file + return torch.mean(l2_distant) diff --git a/edgelab/models/mot/bytetrack.py b/edgelab/models/mot/bytetrack.py index 6b971ffb..2666023d 100644 --- a/edgelab/models/mot/bytetrack.py +++ b/edgelab/models/mot/bytetrack.py @@ -6,28 +6,20 @@ @MODELS.register_module() class ByteTrack(BaseMultiObjectTracker): - - def __init__(self, - detector=None, - motion=None, - tracker=None, - init_cfg=None): + def __init__(self, detector=None, motion=None, tracker=None, init_cfg=None): super().__init__(init_cfg) if detector is not None: - self.detector:BaseDetector = build_detector(detector) + self.detector: BaseDetector = build_detector(detector) if motion is not None: self.motion = motion if tracker is not None: self.tracker = tracker - - + def forward_train(self, imgs, img_metas, **kwargs): return self.detector.forward_train(imgs, img_metas, **kwargs) - + def simple_test(self, img, img_metas, **kwargs): pass - - diff --git a/edgelab/models/necks/__init__.py b/edgelab/models/necks/__init__.py index 1fce78b4..96ec2251 100644 --- a/edgelab/models/necks/__init__.py +++ b/edgelab/models/necks/__init__.py @@ -1,4 +1,4 @@ from .spp import SPP from .fpn import FPN -__all__ = ['SPP',"FPN"] +__all__ = ['SPP', "FPN"] diff --git a/edgelab/models/necks/fpn.py b/edgelab/models/necks/fpn.py index fb501830..26bd420f 100644 --- a/edgelab/models/necks/fpn.py +++ b/edgelab/models/necks/fpn.py @@ -10,7 +10,6 @@ @NECKS.register_module() class FPN(_FPN): - def __init__( self, in_channels: List[int], @@ -20,22 +19,31 @@ def __init__( start_level: int = 0, end_level: int = -1, add_extra_convs: Union[bool, str] = False, - relu_before_extra_convs: bool =True, + relu_before_extra_convs: bool = True, no_norm_on_lateral: bool = False, conv_cfg: Optional[Union[ConfigDict, dict]] = None, norm_cfg: Optional[Union[ConfigDict, dict]] = None, act_cfg: Optional[Union[ConfigDict, dict]] = None, upsample_cfg: Optional[Union[ConfigDict, dict]] = dict(mode='nearest'), - init_cfg: Union[Union[ConfigDict, dict], - List[Union[ConfigDict, - dict]]] = dict(type='Xavier', - layer='Conv2d', - distribution='uniform') + init_cfg: Union[Union[ConfigDict, dict], List[Union[ConfigDict, dict]]] = dict( + type='Xavier', layer='Conv2d', distribution='uniform' + ), ) -> None: - super().__init__(in_channels, out_channels, num_outs, start_level, - end_level, add_extra_convs, relu_before_extra_convs, - no_norm_on_lateral, conv_cfg, norm_cfg, act_cfg, - upsample_cfg, init_cfg) + super().__init__( + in_channels, + out_channels, + num_outs, + start_level, + end_level, + add_extra_convs, + relu_before_extra_convs, + no_norm_on_lateral, + conv_cfg, + norm_cfg, + act_cfg, + upsample_cfg, + init_cfg, + ) self.out_idx = out_idx assert len(out_idx) <= num_outs @@ -52,10 +60,7 @@ def forward(self, inputs: Tuple[Tensor]) -> tuple: assert len(inputs) == len(self.in_channels) # build laterals - laterals = [ - lateral_conv(inputs[i + self.start_level]) - for i, lateral_conv in enumerate(self.lateral_convs) - ] + laterals = [lateral_conv(inputs[i + self.start_level]) for i, lateral_conv in enumerate(self.lateral_convs)] # build top-down path used_backbone_levels = len(laterals) @@ -64,12 +69,10 @@ def forward(self, inputs: Tuple[Tensor]) -> tuple: # it cannot co-exist with `size` in `F.interpolate`. if 'scale_factor' in self.upsample_cfg: # fix runtime error of "+=" inplace operation in PyTorch 1.10 - laterals[i - 1] = laterals[i - 1] + F.interpolate( - laterals[i], **self.upsample_cfg) + laterals[i - 1] = laterals[i - 1] + F.interpolate(laterals[i], **self.upsample_cfg) else: prev_shape = laterals[i - 1].shape[2:] - laterals[i - 1] = laterals[i - 1] + F.interpolate( - laterals[i], size=prev_shape, **self.upsample_cfg) + laterals[i - 1] = laterals[i - 1] + F.interpolate(laterals[i], size=prev_shape, **self.upsample_cfg) # build outputs # part 1: from original levels @@ -80,7 +83,6 @@ def forward(self, inputs: Tuple[Tensor]) -> tuple: # (e.g., Faster R-CNN, Mask R-CNN) if not self.add_extra_convs: for i in range(self.num_outs - used_backbone_levels): - outs.append(F.max_pool2d(outs[-1], 1, stride=2)) # add conv layers on top of original feature maps (RetinaNet) else: @@ -100,9 +102,8 @@ def forward(self, inputs: Tuple[Tensor]) -> tuple: outs.append(self.fpn_convs[i](outs[-1])) return tuple(outs) + @NECKS.register_module() class LiteFPN(BaseModule): - def __init__(self, init_cfg: Union[dict,List[dict],None] = None): + def __init__(self, init_cfg: Union[dict, List[dict], None] = None): super().__init__(init_cfg) - - \ No newline at end of file diff --git a/edgelab/models/necks/spp.py b/edgelab/models/necks/spp.py index a7d80791..0478aec9 100644 --- a/edgelab/models/necks/spp.py +++ b/edgelab/models/necks/spp.py @@ -8,29 +8,17 @@ @MODELS.register_module() class SPP(nn.Module): - - def __init__(self, - input_channels, - output_channels, - layers=[1, 2, 3]) -> None: + def __init__(self, input_channels, output_channels, layers=[1, 2, 3]) -> None: super(SPP, self).__init__() self.layers = layers self.conv = CBR(input_channels, output_channels, 1, 1, padding=0) for idx, value in enumerate(layers): - layer = self._make_layer(output_channels, - output_channels, - groups=output_channels, - number=value) + layer = self._make_layer(output_channels, output_channels, groups=output_channels, number=value) setattr(self, f'layer{idx}', layer) self.CB = nn.Sequential( - nn.Conv2d(output_channels * len(layers), - output_channels, - 1, - 1, - 0, - bias=False), + nn.Conv2d(output_channels * len(layers), output_channels, 1, 1, 0, bias=False), nn.BatchNorm2d(output_channels), ) @@ -40,7 +28,7 @@ def forward(self, x): x = self.conv(x) out = [] - for idx,_ in enumerate(self.layers): + for idx, _ in enumerate(self.layers): tmp = getattr(self, f'layer{idx}')(x) out.append(tmp) @@ -49,23 +37,8 @@ def forward(self, x): result = self.relu(x + y) return result - def _make_layer(self, - inp, - oup, - kernel=5, - stride=1, - padding=2, - groups=1, - bias=False, - number=1): + def _make_layer(self, inp, oup, kernel=5, stride=1, padding=2, groups=1, bias=False, number=1): layer = [] for _ in range(number): - layer.append( - CBR(inp, - oup, - kernel, - stride, - padding=padding, - bias=bias, - groups=groups)) + layer.append(CBR(inp, oup, kernel, stride, padding=padding, bias=bias, groups=groups)) return nn.Sequential(*layer) diff --git a/edgelab/models/tf/tf_common.py b/edgelab/models/tf/tf_common.py index 856e6c4b..d90fae67 100644 --- a/edgelab/models/tf/tf_common.py +++ b/edgelab/models/tf/tf_common.py @@ -12,7 +12,8 @@ def __init__(self, w=None): moving_mean_initializer=keras.initializers.Constant(w.running_mean.detach().numpy()), moving_variance_initializer=keras.initializers.Constant(w.running_var.detach().numpy()), epsilon=w.eps, - momentum=w.momentum) + momentum=w.momentum, + ) def call(self, inputs): return self.bn(inputs) @@ -72,7 +73,10 @@ class TFBaseConv2d(keras.layers.Layer): # Standard convolution2d or depthwiseconv2d depends on 'g' argument. def __init__(self, w=None): super().__init__() - assert w.groups in [1, w.in_channels], "Argument(g) only be 1 for conv2d, or be in_channels for depthwise conv2d" + assert w.groups in [ + 1, + w.in_channels, + ], "Argument(g) only be 1 for conv2d, or be in_channels for depthwise conv2d" bias = True if w.bias is not None else False pad = True if (w.stride[0] == 1 and w.padding[0] == w.kernel_size[0] // 2) else False @@ -86,7 +90,7 @@ def __init__(self, w=None): use_bias=bias, # torch[out, in, h, w] to TF[h, w, in, out] kernel_initializer=keras.initializers.Constant(w.weight.permute(2, 3, 1, 0).detach().numpy()), - bias_initializer=keras.initializers.Constant(w.bias.detach().numpy()) if bias else 'zeros' + bias_initializer=keras.initializers.Constant(w.bias.detach().numpy()) if bias else 'zeros', ) else: conv = keras.layers.DepthwiseConv2D( @@ -96,7 +100,7 @@ def __init__(self, w=None): dilation_rate=w.dilation, use_bias=bias, depthwise_initializer=keras.initializers.Constant(w.weight.permute(2, 3, 0, 1).detach().numpy()), - bias_initializer=keras.initializers.Constant(w.bias.detach().numpy()) if bias else 'zeros' + bias_initializer=keras.initializers.Constant(w.bias.detach().numpy()) if bias else 'zeros', ) self.conv = conv if pad else keras.Sequential([TFPad(autopad(w.kernel_size[0], w.padding[0])), conv]) @@ -108,13 +112,12 @@ class TFDense(keras.layers.Layer): def __init__(self, w=None): super().__init__() bias = False if w.bias is None else True - self.fc = keras.layers.Dense(w.out_features, - use_bias=True if bias else False, - kernel_initializer=keras.initializers.Constant( - w.weight.permute(1, 0).detach().numpy()), - bias_initializer=keras.initializers.Constant( - w.bias.detach().numpy()) if bias else 'zeros', - ) + self.fc = keras.layers.Dense( + w.out_features, + use_bias=True if bias else False, + kernel_initializer=keras.initializers.Constant(w.weight.permute(1, 0).detach().numpy()), + bias_initializer=keras.initializers.Constant(w.bias.detach().numpy()) if bias else 'zeros', + ) def call(self, inputs): return self.fc(inputs) @@ -122,6 +125,7 @@ def call(self, inputs): class TFBaseConv1d(keras.layers.Layer): """Standard convolution1d or depthwiseconv1d depends on 'g' argument""" + def __init__(self, w=None): super().__init__() assert w.groups in [1, w.in_channels], "Argument(g) only be 1 for conv1d, or be inp for depthwise conv1d" @@ -137,7 +141,7 @@ def __init__(self, w=None): dilation_rate=w.dilation, use_bias=bias, kernel_initializer=keras.initializers.Constant(w.weight.permute(2, 1, 0).detach().numpy()), - bias_initializer=keras.initializers.Constant(w.bias.detach().numpy()) if bias else 'zeros' + bias_initializer=keras.initializers.Constant(w.bias.detach().numpy()) if bias else 'zeros', ) else: conv = keras.layers.DepthwiseConv1D( @@ -147,7 +151,7 @@ def __init__(self, w=None): dilation_rate=w.dilation, use_bias=bias, depthwise_initializer=keras.initializers.Constant(w.weight.permute(2, 0, 1).detach().numpy()), - bias_initializer=keras.initializers.Constant(w.bias.detach().numpy()) if bias else 'zeros' + bias_initializer=keras.initializers.Constant(w.bias.detach().numpy()) if bias else 'zeros', ) padding = keras.layers.ZeroPadding1D(padding=autopad(w.kernel_size[0], w.padding[0])) self.conv = conv if pad else keras.Sequential([padding, conv]) @@ -158,6 +162,7 @@ def call(self, inputs): class TFAADownsample(keras.layers.Layer): """DepthwiseConv1D with fixed weights only for audio model.""" + def __init__(self, w=None): super().__init__() pad = True if w.stride == 1 else False @@ -167,7 +172,7 @@ def __init__(self, w=None): w.stride, 'SAME' if pad else 'VALID', use_bias=False, - depthwise_initializer=keras.initializers.Constant(w.filt.permute(2, 0, 1).detach().numpy()) + depthwise_initializer=keras.initializers.Constant(w.filt.permute(2, 0, 1).detach().numpy()), ) padding = keras.layers.ZeroPadding1D(padding=autopad(w.filt_size, None)) self.filt = filt if pad else keras.Sequential([padding, filt]) @@ -178,6 +183,7 @@ def call(self, inputs): class TFActivation(keras.layers.Layer): """Activation functions""" + def __init__(self, w=None): super().__init__() @@ -188,7 +194,7 @@ def __init__(self, w=None): elif isinstance(w, nn.LeakyReLU): act = keras.layers.LeakyReLU(w.negative_slope) elif isinstance(w, nn.Sigmoid): - act = lambda x: keras.activations.sigmoid(x) # noqa + act = lambda x: keras.activations.sigmoid(x) # noqa else: raise Exception(f'no matching TensorFlow activation found for PyTorch activation {w}') self.act = act @@ -215,8 +221,11 @@ class TFUpsample(keras.layers.Layer): # TF version of torch.nn.Upsample() def __init__(self, w=None): super().__init__() - scale_factor, mode = (int(w.scale_factor), w.mode) if isinstance(w, nn.Upsample) \ - else (int(w.kwargs["scale_factor"]), w.kwargs["mode"]) + scale_factor, mode = ( + (int(w.scale_factor), w.mode) + if isinstance(w, nn.Upsample) + else (int(w.kwargs["scale_factor"]), w.kwargs["mode"]) + ) assert scale_factor % 2 == 0, "scale_factor must be multiple of 2" self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * scale_factor, x.shape[2] * scale_factor), mode) # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode) @@ -247,4 +256,4 @@ def tf_method(node): elif 'mul' in node.name: return node.args[0] * eval(str(node.args[1])) else: - raise Exception(f'No match method found for {node.name}') \ No newline at end of file + raise Exception(f'No match method found for {node.name}') diff --git a/edgelab/models/utils/computer_acc.py b/edgelab/models/utils/computer_acc.py index 30591164..a4e85c30 100644 --- a/edgelab/models/utils/computer_acc.py +++ b/edgelab/models/utils/computer_acc.py @@ -1,6 +1,5 @@ - def pose_acc(pred, target, hw, th=10): - h = hw[0] if isinstance(hw[0], int) else int(hw[0][0]) + h = hw[0] if isinstance(hw[0], int) else int(hw[0][0]) w = hw[1] if isinstance(hw[1], int) else int(hw[1][0]) pred[:, 0::2] = pred[:, 0::2] * w pred[:, 1::2] = pred[:, 1::2] * h @@ -12,7 +11,7 @@ def pose_acc(pred, target, hw, th=10): th = th acc = [] for p, t in zip(pred, target): - distans = ((t[0] - p[0])**2 + (t[1] - p[1])**2)**0.5 + distans = ((t[0] - p[0]) ** 2 + (t[1] - p[1]) ** 2) ** 0.5 if distans > th: acc.append(0) elif distans > 1: @@ -23,9 +22,9 @@ def pose_acc(pred, target, hw, th=10): def audio_acc(pred, target): - pred = pred[0] if len(pred.shape)==2 else pred # onnx shape(d,), tflite shape(1,d) + pred = pred[0] if len(pred.shape) == 2 else pred # onnx shape(d,), tflite shape(1,d) pred = pred.argsort()[::-1][:5] - correct = (target==pred).astype(float) - acc = (correct[0], correct.max()) # (top1, top5) accuracy + correct = (target == pred).astype(float) + acc = (correct[0], correct.max()) # (top1, top5) accuracy - return acc \ No newline at end of file + return acc diff --git a/edgelab/models/utils/metrics.py b/edgelab/models/utils/metrics.py index a74f604c..a78135af 100644 --- a/edgelab/models/utils/metrics.py +++ b/edgelab/models/utils/metrics.py @@ -3,14 +3,7 @@ import torch -def bbox_iou(box1, - box2, - x1y1x2y2=True, - SIoU=True, - GIoU=False, - DIoU=False, - CIoU=False, - eps=1e-7): +def bbox_iou(box1, box2, x1y1x2y2=True, SIoU=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): # Returns the IoU of box1 to box2. box1 is nx4, box2 is nx4 box1 = box1.T box2 = box2.T @@ -26,8 +19,9 @@ def bbox_iou(box1, b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 # Intersection area - inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ - (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) + inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * ( + torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1) + ).clamp(0) # Union Area w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps @@ -36,8 +30,7 @@ def bbox_iou(box1, iou = inter / union if SIoU or CIoU or DIoU or GIoU: - cw = torch.max(b1_x2, b2_x2) - torch.min( - b1_x1, b2_x1) # convex (smallest enclosing box) width + cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height if SIoU: # SIoU Loss https://arxiv.org/pdf/2205.12740.pdf s_cw = (b2_x1 + b2_x2 - b1_x1 - b1_x2) * 0.5 @@ -46,32 +39,27 @@ def bbox_iou(box1, sin_alpha_1 = torch.abs(s_cw) / sigma sin_alpha_2 = torch.abs(s_ch) / sigma threshold = pow(2, 0.5) / 2 - sin_alpha = torch.where(sin_alpha_1 > threshold, sin_alpha_2, - sin_alpha_1) + sin_alpha = torch.where(sin_alpha_1 > threshold, sin_alpha_2, sin_alpha_1) angle_cost = torch.cos(torch.arcsin(sin_alpha) * 2 - math.pi / 2) - rho_x = (s_cw / cw)**2 - rho_y = (s_ch / ch)**2 + rho_x = (s_cw / cw) ** 2 + rho_y = (s_ch / ch) ** 2 gamma = angle_cost - 2 - distance_cost = 2 - torch.exp(gamma * rho_x) - torch.exp( - gamma * rho_y) + distance_cost = 2 - torch.exp(gamma * rho_x) - torch.exp(gamma * rho_y) omiga_w = torch.abs(w1 - w2) / torch.max(w1, w2) omiga_h = torch.abs(h1 - h2) / torch.max(h1, h2) - shape_cost = torch.pow(1 - torch.exp(-1 * omiga_w), 4) + torch.pow( - 1 - torch.exp(-1 * omiga_h), 4) + shape_cost = torch.pow(1 - torch.exp(-1 * omiga_w), 4) + torch.pow(1 - torch.exp(-1 * omiga_h), 4) return iou - 0.5 * (distance_cost + shape_cost) if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 c2 = cw**2 + ch**2 + eps # convex diagonal squared - rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2)**2 + - (b2_y1 + b2_y2 - b1_y1 - b1_y2)** - 2) / 4 # center distance squared + rho2 = ( + (b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2 + ) / 4 # center distance squared if CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 - v = (4 / math.pi**2) * torch.pow( - torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) + v = (4 / math.pi**2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) with torch.no_grad(): alpha = v / (v - iou + (1 + eps)) return iou - (rho2 / c2 + v * alpha) # CIoU return iou - rho2 / c2 # DIoU c_area = cw * ch + eps # convex area - return iou - (c_area - union - ) / c_area # GIoU https://arxiv.org/pdf/1902.09630.pdf - return iou # IoU \ No newline at end of file + return iou - (c_area - union) / c_area # GIoU https://arxiv.org/pdf/1902.09630.pdf + return iou # IoU diff --git a/edgelab/registry.py b/edgelab/registry.py index b717bff8..6d59452b 100644 --- a/edgelab/registry.py +++ b/edgelab/registry.py @@ -23,63 +23,34 @@ LOSSES = MMENGINE_MODELS POSE_ESTIMATORS = MMENGINE_MODELS -LOG_PROCESSORS = Registry('log processors', - parent=MMENGINE_LOG_PROCESSORS, - locations=['edgelab']) - -VISBACKENDS = Registry('visbackends', - parent=MMENGINE_VISBACKENDS, - locations=['edgelab']) -VISUALIZERS = Registry('visualizers', - parent=MMENGINE_VISUALIZERS, - locations=['edgelab']) - -OPTIM_WRAPPERS = Registry('optim_wrapper', - parent=MMENGINE_OPTIM_WRAPPERS, - locations=['edgelab']) - -OPTIMIZERS = Registry('optimizer', - parent=MMENGINE_OPTIMIZERS, - locations=['edgelab']) +LOG_PROCESSORS = Registry('log processors', parent=MMENGINE_LOG_PROCESSORS, locations=['edgelab']) + +VISBACKENDS = Registry('visbackends', parent=MMENGINE_VISBACKENDS, locations=['edgelab']) +VISUALIZERS = Registry('visualizers', parent=MMENGINE_VISUALIZERS, locations=['edgelab']) + +OPTIM_WRAPPERS = Registry('optim_wrapper', parent=MMENGINE_OPTIM_WRAPPERS, locations=['edgelab']) + +OPTIMIZERS = Registry('optimizer', parent=MMENGINE_OPTIMIZERS, locations=['edgelab']) OPTIM_WRAPPER_CONSTRUCTORS = Registry( - 'optimizer wrapper constructor', - parent=MMENGINE_OPTIM_WRAPPER_CONSTRUCTORS, - locations=['edgelab']) + 'optimizer wrapper constructor', parent=MMENGINE_OPTIM_WRAPPER_CONSTRUCTORS, locations=['edgelab'] +) -PARAM_SCHEDULERS = Registry('param schedulers', - parent=MMENGINE_PARAM_SCHEDULERS, - locations=['edgelab']) +PARAM_SCHEDULERS = Registry('param schedulers', parent=MMENGINE_PARAM_SCHEDULERS, locations=['edgelab']) -LOOPS = Registry('loop', - parent=MMENGINE_LOOPS, - locations=['edgelab.engine.runner']) +LOOPS = Registry('loop', parent=MMENGINE_LOOPS, locations=['edgelab.engine.runner']) -MODELS = Registry('model', - parent=MMENGINE_MODELS, - locations=['edgelab.models']) +MODELS = Registry('model', parent=MMENGINE_MODELS, locations=['edgelab.models']) -DATASETS = Registry('dataset', - parent=MMENGINE_DATASETS, - locations=['edgelab.datasets']) +DATASETS = Registry('dataset', parent=MMENGINE_DATASETS, locations=['edgelab.datasets']) -EVALUATOR = Registry('evaluator', - parent=MMENGINE_EVALUATOR, - locations=['edgelab.evaluation']) +EVALUATOR = Registry('evaluator', parent=MMENGINE_EVALUATOR, locations=['edgelab.evaluation']) HOOKS = Registry('hook', parent=MMENGINE_HOOKS, locations=['edgelab.engine']) -DATA_SANPLERS = Registry('data_samplers', - parent=MMENGINE_DATA_SAMPLERS, - locations=['edgelab.datasets.pipelines']) +DATA_SANPLERS = Registry('data_samplers', parent=MMENGINE_DATA_SAMPLERS, locations=['edgelab.datasets.pipelines']) -METRICS = Registry('metrics', - parent=MMENGINE_METRICS, - locations=['edgelab.datasets']) +METRICS = Registry('metrics', parent=MMENGINE_METRICS, locations=['edgelab.datasets']) -TRANSFORMS = Registry('transforms', - parent=MMENGINE_TRANSFORMS, - locations=['edgelab.datasets']) +TRANSFORMS = Registry('transforms', parent=MMENGINE_TRANSFORMS, locations=['edgelab.datasets']) -FUNCTIONS = Registry('functions', - parent=MMENGINE_FUNCTIONS, - locations=['edgelab.datasets']) +FUNCTIONS = Registry('functions', parent=MMENGINE_FUNCTIONS, locations=['edgelab.datasets']) diff --git a/edgelab/utils/__init__.py b/edgelab/utils/__init__.py index 087b1b7a..a7d4157d 100644 --- a/edgelab/utils/__init__.py +++ b/edgelab/utils/__init__.py @@ -1,3 +1,3 @@ -from .cv import (NMS, xywh2xyxy, xyxy2cocoxywh, load_image) +from .cv import NMS, xywh2xyxy, xyxy2cocoxywh, load_image __all__ = ['NMS', 'xywh2xyxy', 'xyxy2cocoxywh', 'load_image'] diff --git a/edgelab/utils/cv.py b/edgelab/utils/cv.py index 141140e3..014d0cf1 100644 --- a/edgelab/utils/cv.py +++ b/edgelab/utils/cv.py @@ -27,19 +27,18 @@ def xyxy2cocoxywh(x, coco_format: bool = False): return y -def NMS(bbox: Union[np.ndarray, torch.Tensor], - confiden: Union[np.ndarray, torch.Tensor], - classer: Union[np.ndarray, torch.Tensor], - bbox_format="xyxy", - max_det=300, - iou_thres=0.4, - conf_thres=0.25): - +def NMS( + bbox: Union[np.ndarray, torch.Tensor], + confiden: Union[np.ndarray, torch.Tensor], + classer: Union[np.ndarray, torch.Tensor], + bbox_format="xyxy", + max_det=300, + iou_thres=0.4, + conf_thres=0.25, +): bbox = bbox if isinstance(bbox, torch.Tensor) else torch.from_numpy(bbox) - confiden = confiden if isinstance( - confiden, torch.Tensor) else torch.from_numpy(confiden) - classer = classer if isinstance( - classer, torch.Tensor) else torch.from_numpy(classer) + confiden = confiden if isinstance(confiden, torch.Tensor) else torch.from_numpy(confiden) + classer = classer if isinstance(classer, torch.Tensor) else torch.from_numpy(classer) assert bbox.shape[0] == confiden.shape[0] == classer.shape[0] @@ -54,8 +53,7 @@ def NMS(bbox: Union[np.ndarray, torch.Tensor], elif bbox_format == "xywh": bbox = xywh2xyxy(bbox) - pred = torch.cat((bbox, confiden.view( - -1, 1), torch.argmax(classer, dim=1, keepdim=True)), 1) + pred = torch.cat((bbox, confiden.view(-1, 1), torch.argmax(classer, dim=1, keepdim=True)), 1) if pred.shape[0] > max_det: pred = pred[pred[:, 4].argsort(descending=True)[:max_det]] @@ -76,7 +74,6 @@ def load_image( normalized: bool = False, format: str = 'np', ) -> Union[np.ndarray, Image.Image]: - assert format in ['np', "pil"], ValueError img = cv2.imread(path) diff --git a/edgelab/version.py b/edgelab/version.py index 2745548e..155d80a1 100644 --- a/edgelab/version.py +++ b/edgelab/version.py @@ -14,4 +14,4 @@ def parse_version_info(version_str): return tuple(version_info) -version_info = parse_version_info(__version__) \ No newline at end of file +version_info = parse_version_info(__version__) diff --git a/edgelab/visualization/__init__.py b/edgelab/visualization/__init__.py index 2bf68d6d..f1156244 100644 --- a/edgelab/visualization/__init__.py +++ b/edgelab/visualization/__init__.py @@ -1,3 +1,3 @@ -from .visualizer import (FomoLocalVisualizer, SensorClsVisualizer) +from .visualizer import FomoLocalVisualizer, SensorClsVisualizer __all__ = ['FomoLocalVisualizer', 'SensorClsVisualizer'] diff --git a/edgelab/visualization/visualizer.py b/edgelab/visualization/visualizer.py index d1a9d61c..d949fc4a 100644 --- a/edgelab/visualization/visualizer.py +++ b/edgelab/visualization/visualizer.py @@ -15,7 +15,7 @@ class FomoLocalVisualizer(DetLocalVisualizer): """ Unified Fomo and target detection visualization classes - + """ def __init__(self, name='v', *args, fomo=False, **kwargs) -> None: @@ -59,19 +59,14 @@ def fomo_add_datasample( if draw_gt and data_sample is not None: gt_img = image if 'gt_instances' in data_sample: - gt_img = self._draw_fomo_instances(gt_img, - data_sample, - classes=classes, - plaettle=plaettle) + gt_img = self._draw_fomo_instances(gt_img, data_sample, classes=classes, plaettle=plaettle) if draw_pred and data_sample is not None: pred_img = image if 'pred_instances' in data_sample: - pred_img = self._draw_fomo_instances(pred_img, - data_sample, - bbox=False, - classes=classes, - plaettle=plaettle) + pred_img = self._draw_fomo_instances( + pred_img, data_sample, bbox=False, classes=classes, plaettle=plaettle + ) if gt_img is not None and pred_img is not None: drawn_img = np.concatenate((gt_img, pred_img), axis=1) @@ -92,14 +87,13 @@ def fomo_add_datasample( self.add_image(name, drawn_img, step) def _draw_fomo_instances( - self, - img: np.ndarray, - data_sample: DetDataSample, - bbox: bool = True, - classes: Optional[Sequence[str]] = None, - plaettle: Optional[Sequence[Tuple[int, - ...]]] = None) -> np.ndarray: - + self, + img: np.ndarray, + data_sample: DetDataSample, + bbox: bool = True, + classes: Optional[Sequence[str]] = None, + plaettle: Optional[Sequence[Tuple[int, ...]]] = None, + ) -> np.ndarray: self.set_image(img) if bbox: instances: InstanceData = data_sample.gt_instances @@ -134,11 +128,7 @@ def _draw_fomo_instances( mask = pred[..., 1:] > self.pred_score_thr mask = np.any(mask, axis=2) mask = np.repeat(np.expand_dims(mask, -1), 3, axis=-1) - pred = np.ma.array(pred, - mask=~mask, - keep_mask=True, - copy=True, - fill_value=0) + pred = np.ma.array(pred, mask=~mask, keep_mask=True, copy=True, fill_value=0) pred_max = np.argmax(pred, axis=-1) @@ -149,8 +139,7 @@ def _draw_fomo_instances( idx = pred_max[i[0], i[1]] texts.append(classes[idx - 1]) if len(pred_index): - points = (pred_index + 0.5) / np.asarray( - [H, W]) * np.asarray(ori_shape) + points = (pred_index + 0.5) / np.asarray([H, W]) * np.asarray(ori_shape) self.draw_points(points, colors='r') self.draw_texts(texts, points, font_sizes=30, colors='r') @@ -203,18 +192,20 @@ class SensorClsVisualizer(Visualizer): """ @master_only - def add_datasample(self, - name: str, - data: np.ndarray, - data_sample: Optional[ClsDataSample] = None, - draw_gt: bool = True, - draw_pred: bool = True, - draw_score: bool = True, - show: bool = False, - text_cfg: dict = dict(), - wait_time: float = 0, - out_file: Optional[str] = None, - step: int = 0) -> None: + def add_datasample( + self, + name: str, + data: np.ndarray, + data_sample: Optional[ClsDataSample] = None, + draw_gt: bool = True, + draw_pred: bool = True, + draw_score: bool = True, + show: bool = False, + text_cfg: dict = dict(), + wait_time: float = 0, + out_file: Optional[str] = None, + step: int = 0, + ) -> None: """Draw datasample and save to all backends. - If ``out_file`` is specified, all storage backends are ignored @@ -251,13 +242,10 @@ def add_datasample(self, sensors = data_sample.sensors uints = [] - uints = [ - sensor['units'] for sensor in sensors - if sensor['units'] not in uints - ] + uints = [sensor['units'] for sensor in sensors if sensor['units'] not in uints] # slice the data into different sensors - inputs = [data[i::len(sensors)] for i in range(len(sensors))] + inputs = [data[i :: len(sensors)] for i in range(len(sensors))] _, axs = plt.subplots(len(uints), 1) @@ -288,17 +276,12 @@ def add_datasample(self, score_labels = [''] * len(idx) class_labels = [''] * len(idx) if draw_score and 'score' in pred_label: - score_labels = [ - f', {pred_label.score[i].item():.2f}' for i in idx - ] + score_labels = [f', {pred_label.score[i].item():.2f}' for i in idx] if classes is not None: class_labels = [f' ({classes[i]})' for i in idx] - labels = [ - str(idx[i]) + score_labels[i] + class_labels[i] - for i in range(len(idx)) - ] + labels = [str(idx[i]) + score_labels[i] + class_labels[i] for i in range(len(idx))] prefix = 'Prediction: ' texts.append(prefix + ('\n' + ' ' * len(prefix)).join(labels)) @@ -309,8 +292,7 @@ def add_datasample(self, fig.canvas.draw() buf = fig.canvas.tostring_rgb() w, h = fig.canvas.get_width_height() - image = np.frombuffer(buf, dtype=np.uint8, - count=h * w * 3).reshape(h, w, 3) + image = np.frombuffer(buf, dtype=np.uint8, count=h * w * 3).reshape(h, w, 3) self.set_image(image) drawn_img = self.get_image() @@ -321,4 +303,4 @@ def add_datasample(self, # save the image to the target file instead of vis_backends mmcv.imwrite(drawn_img[..., ::-1], out_file) else: - self.add_image(name, drawn_img, step=step) \ No newline at end of file + self.add_image(name, drawn_img, step=step) diff --git a/tools/analysis/get_featmap.py b/tools/analysis/get_featmap.py index e2544ca1..ca61be04 100644 --- a/tools/analysis/get_featmap.py +++ b/tools/analysis/get_featmap.py @@ -15,48 +15,35 @@ from tools.utils.config import load_config + def parse_args(): parser = argparse.ArgumentParser(description='Visualize feature map') parser.add_argument('config', help='Config file') - parser.add_argument( - '--img', help='Image path, include image file, dir and URL.') + parser.add_argument('--img', help='Image path, include image file, dir and URL.') parser.add_argument('--checkpoint', help='Checkpoint file') - parser.add_argument( - '--out-dir', default='./output', help='Path to output file') + parser.add_argument('--out-dir', default='./output', help='Path to output file') parser.add_argument( '--target-layers', default=['backbone'], nargs='+', type=str, - help='The target layers to get feature map, if not set, the tool will ' - 'specify the backbone') - parser.add_argument( - '--preview-model', - default=False, - action='store_true', - help='To preview all the model layers') - parser.add_argument( - '--device', default='cuda:0', help='Device used for inference') + help='The target layers to get feature map, if not set, the tool will ' 'specify the backbone', + ) + parser.add_argument('--preview-model', default=False, action='store_true', help='To preview all the model layers') + parser.add_argument('--device', default='cuda:0', help='Device used for inference') + parser.add_argument('--score-thr', type=float, default=0.3, help='Bbox score threshold') + parser.add_argument('--show', action='store_true', help='Show the featmap results') parser.add_argument( - '--score-thr', type=float, default=0.3, help='Bbox score threshold') - parser.add_argument( - '--show', action='store_true', help='Show the featmap results') - parser.add_argument( - '--channel-reduction', - default='select_max', - help='Reduce multiple channels to a single channel') - parser.add_argument( - '--topk', - type=int, - default=4, - help='Select topk channel to show by the sum of each channel') + '--channel-reduction', default='select_max', help='Reduce multiple channels to a single channel' + ) + parser.add_argument('--topk', type=int, default=4, help='Select topk channel to show by the sum of each channel') parser.add_argument( '--arrangement', nargs='+', type=int, default=[2, 2], - help='The arrangement of featmap when channel_reduction is ' - 'not None and topk > 0') + help='The arrangement of featmap when channel_reduction is ' 'not None and topk > 0', + ) parser.add_argument( '--cfg-options', nargs='+', @@ -66,21 +53,20 @@ def parse_args(): 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 'Note that the quotation marks are necessary and that no white space ' - 'is allowed.') + 'is allowed.', + ) args = parser.parse_args() return args class ActivationsWrapper: - def __init__(self, model, target_layers): self.model = model self.activations = [] self.handles = [] self.image = None for target_layer in target_layers: - self.handles.append( - target_layer.register_forward_hook(self.save_activation)) + self.handles.append(target_layer.register_forward_hook(self.save_activation)) def save_activation(self, module, input, output): self.activations.append(output) @@ -101,9 +87,7 @@ def main(): # load config tmp_folder = tempfile.TemporaryDirectory() # Modify and create temporary configuration files - config_data = load_config(args.config, - folder=tmp_folder.name, - cfg_options=args.cfg_options) + config_data = load_config(args.config, folder=tmp_folder.name, cfg_options=args.cfg_options) # load temporary configuration files cfg = Config.fromfile(config_data) tmp_folder.cleanup() @@ -122,8 +106,10 @@ def main(): if args.preview_model: print(model) - print('\n This flag is only show model, if you want to continue, ' - 'please remove `--preview-model` to get the feature map.') + print( + '\n This flag is only show model, if you want to continue, ' + 'please remove `--preview-model` to get the feature map.' + ) return target_layers = [] @@ -175,16 +161,14 @@ def main(): show=False, wait_time=0, out_file=None, - pred_score_thr=args.score_thr) + pred_score_thr=args.score_thr, + ) drawn_img = visualizer.get_image() for featmap in flatten_featmaps: shown_img = visualizer.draw_featmap( - featmap[0], - drawn_img, - channel_reduction=channel_reduction, - topk=args.topk, - arrangement=args.arrangement) + featmap[0], drawn_img, channel_reduction=channel_reduction, topk=args.topk, arrangement=args.arrangement + ) shown_imgs.append(shown_img) shown_imgs = auto_arrange_images(shown_imgs) @@ -197,8 +181,7 @@ def main(): visualizer.show(shown_imgs) if not args.show: - print(f'All done!' - f'\nResults have been saved at {os.path.abspath(args.out_dir)}') + print(f'All done!' f'\nResults have been saved at {os.path.abspath(args.out_dir)}') # Please refer to the usage tutorial: diff --git a/tools/analysis/get_flops.py b/tools/analysis/get_flops.py index e2df3afa..42d11a4b 100644 --- a/tools/analysis/get_flops.py +++ b/tools/analysis/get_flops.py @@ -19,20 +19,13 @@ def parse_args(): parser = argparse.ArgumentParser(description='Get a detector flops') parser.add_argument('config', help='train config file path') + parser.add_argument('--shape', type=int, nargs='+', default=[], help='input image size') parser.add_argument( - '--shape', - type=int, - nargs='+', - default=[], - help='input image size') - parser.add_argument( - '--show-arch', - action='store_true', - help='whether return the statistics in the form of network layers') + '--show-arch', action='store_true', help='whether return the statistics in the form of network layers' + ) parser.add_argument( - '--not-show-table', - action='store_true', - help='whether return the statistics in the form of table'), + '--not-show-table', action='store_true', help='whether return the statistics in the form of table' + ), parser.add_argument( '--cfg-options', nargs='+', @@ -42,7 +35,8 @@ def parse_args(): 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 'Note that the quotation marks are necessary and that no white space ' - 'is allowed.') + 'is allowed.', + ) return parser.parse_args() @@ -54,20 +48,18 @@ def inference(args, logger): # load config tmp_folder = tempfile.TemporaryDirectory() # Modify and create temporary configuration files - config_data = load_config(args.config, - folder=tmp_folder.name, - cfg_options=args.cfg_options) + config_data = load_config(args.config, folder=tmp_folder.name, cfg_options=args.cfg_options) # load temporary configuration files cfg = Config.fromfile(config_data) tmp_folder.cleanup() - + cfg.work_dir = tempfile.TemporaryDirectory().name cfg.log_level = 'WARN' if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) init_default_scope(cfg.get('default_scope', 'mmyolo')) - + if len(args.shape) == 1: h = w = args.shape[0] elif len(args.shape) == 2: @@ -95,17 +87,18 @@ def inference(args, logger): inputs = data['inputs'][0] else: inputs = data['inputs'] - - if inputs.shape[0] != 1: - inputs = inputs.unsqueeze(0) # batch size 1 - + + if inputs.shape[0] != 1: + inputs = inputs.unsqueeze(0) # batch size 1 + result = {'ori_shape': (h, w), 'pad_shape': inputs.shape[-2:]} outputs = get_model_complexity_info( model, input_shape=None, inputs=inputs, # the input tensor of the model show_table=not args.not_show_table, # show the complexity table - show_arch=args.show_arch) # show the complexity arch + show_arch=args.show_arch, + ) # show the complexity arch result['flops'] = outputs['flops_str'] result['params'] = outputs['params_str'] @@ -131,15 +124,18 @@ def main(): print(result['out_arch']) # print related information by network layers if pad_shape != ori_shape: - print(f'{split_line}\nUse size divisor set input shape ' - f'from {ori_shape} to {pad_shape}') - - print(f'{split_line}\n' - f'Input shape: {pad_shape}\nModel Flops: {flops}\n' - f'Model Parameters: {params}\n{split_line}') - print('!!!Please be cautious if you use the results in papers. ' - 'You may need to check if all ops are supported and verify ' - 'that the flops computation is correct.') + print(f'{split_line}\nUse size divisor set input shape ' f'from {ori_shape} to {pad_shape}') + + print( + f'{split_line}\n' + f'Input shape: {pad_shape}\nModel Flops: {flops}\n' + f'Model Parameters: {params}\n{split_line}' + ) + print( + '!!!Please be cautious if you use the results in papers. ' + 'You may need to check if all ops are supported and verify ' + 'that the flops computation is correct.' + ) if __name__ == '__main__': diff --git a/tools/dataset_converters/ei2coco.py b/tools/dataset_converters/ei2coco.py index 0d3624c9..330accb6 100644 --- a/tools/dataset_converters/ei2coco.py +++ b/tools/dataset_converters/ei2coco.py @@ -6,8 +6,7 @@ from PIL import Image from datetime import datetime -parser = argparse.ArgumentParser( - description='Edge Impulse => Coco format converter') +parser = argparse.ArgumentParser(description='Edge Impulse => Coco format converter') parser.add_argument('--data-directory', type=str, required=True) parser.add_argument('--out-directory', type=str, required=True) @@ -23,8 +22,8 @@ def current_ms(): return round(time.time() * 1000) -last_printed = current_ms() +last_printed = current_ms() def convert(path, category): @@ -34,7 +33,7 @@ def convert(path, category): converted_images = 0 print('Converting ' + category + ' data...') - + with open(os.path.join(path, 'info.labels'), 'r') as f: X = json.loads(f.read()) @@ -49,14 +48,10 @@ def convert(path, category): "year": datetime.now().strftime("%Y"), "version": "1.0", "description": "Custom model", - "date_created": datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "date_created": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), }, "images": [], - "licenses": [{ - "id": 1, - "name": "Proprietary", - "url": "https://seeedstduio.com" - }], + "licenses": [{"id": 1, "name": "Proprietary", "url": "https://seeedstduio.com"}], "type": "instances", "annotations": [], "categories": [], @@ -76,7 +71,6 @@ def convert(path, category): img_id = len(metadata['images']) + 1 for lable in labels: - if lable['label'] not in classes: classes.append(lable['label']) class_count = class_count + 1 @@ -85,44 +79,43 @@ def convert(path, category): y = lable['y'] w = lable['width'] h = lable['height'] - - - metadata['annotations'].append({ - "id": len(metadata['annotations']) + 1, - "image_id": img_id, - "category_id": classes.index(lable['label']) + 1, - "bbox": [x, y, w, h], - "area": w * h, - "segmentation": [], - "iscrowd": 0 - }) - + + metadata['annotations'].append( + { + "id": len(metadata['annotations']) + 1, + "image_id": img_id, + "category_id": classes.index(lable['label']) + 1, + "bbox": [x, y, w, h], + "area": w * h, + "segmentation": [], + "iscrowd": 0, + } + ) + im = Image.open(os.path.join(path, img_file)) image_height = im.height image_width = im.width - new_img_file = os.path.join(out_dir, category, str(ix + 1).zfill(12) + '.jpg') + new_img_file = os.path.join(out_dir, category, str(ix + 1).zfill(12) + '.jpg') im.save(new_img_file) im.close() - - metadata['images'].append({ - "id": img_id, - "file_name": os.path.basename(new_img_file), - "height": image_height, - "width": image_width, - "date_captured": datetime.now().strftime("%Y-%m-%d %H:%M:%S") - }) + + metadata['images'].append( + { + "id": img_id, + "file_name": os.path.basename(new_img_file), + "height": image_height, + "width": image_width, + "date_captured": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + } + ) converted_images = converted_images + 1 - if (converted_images == 1 or current_ms() - last_printed > 3000): + if converted_images == 1 or current_ms() - last_printed > 3000: print('[' + str(converted_images).rjust(zf) + '/' + str(total_images) + '] Converting images...') last_printed = current_ms() for c in range(0, class_count): - metadata['categories'].append({ - "id": c + 1, - "name": classes[c], - "supercategory": "" - }) + metadata['categories'].append({"id": c + 1, "name": classes[c], "supercategory": ""}) with open(annotations_file, 'w') as f: f.write(json.dumps(metadata, indent=4)) diff --git a/tools/export_quantize.py b/tools/export_quantize.py index 7087b93e..ff4ab847 100644 --- a/tools/export_quantize.py +++ b/tools/export_quantize.py @@ -17,8 +17,7 @@ def log_init(): hd = logging.StreamHandler() hd.setLevel(logging.INFO) - formatter = logging.Formatter( - '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') hd.setFormatter(formatter) loger.addHandler(hd) return loger @@ -38,28 +37,24 @@ def command(cmd, retry_num=3): def onnx_quant_static(onnx_path): onnx_dir = osp.dirname(onnx_path) onnx_name = osp.basename(onnx_path) - quant_file = osp.join(onnx_dir, - onnx_name.replace('.onnx', '_quant_static.onnx')) - - quantize_static(onnx_path, - quant_file, - quant_format=QuantFormat.QDQ, - optimize_model=False, - calibration_data_reader=Quan_Reader( - './img_e', (112, 112), 'images'), - weight_type=QuantType.QInt8) + quant_file = osp.join(onnx_dir, onnx_name.replace('.onnx', '_quant_static.onnx')) + + quantize_static( + onnx_path, + quant_file, + quant_format=QuantFormat.QDQ, + optimize_model=False, + calibration_data_reader=Quan_Reader('./img_e', (112, 112), 'images'), + weight_type=QuantType.QInt8, + ) loger.info('onnx static succeeded!\nfile in: {}'.format(quant_file)) def onnx_quant_dynamic(onnx_path): onnx_dir = osp.dirname(onnx_path) onnx_name = osp.basename(onnx_path) - quant_file = osp.join(onnx_dir, - onnx_name.replace('.onnx', '_quant_dynamic.onnx')) - quantize_dynamic(onnx_path, - quant_file, - per_channel=True, - weight_type=QuantType.QUInt8) + quant_file = osp.join(onnx_dir, onnx_name.replace('.onnx', '_quant_dynamic.onnx')) + quantize_dynamic(onnx_path, quant_file, per_channel=True, weight_type=QuantType.QUInt8) loger.info('onnx dynamic succeeded!\nfile in: {}'.format(quant_file)) @@ -77,12 +72,10 @@ def ncnn_quant(onnx_path, image_dir='./img_e', img_size=[112, 112, 3]): onnx_name = osp.basename(onnx_path) ncnn_param = osp.join(onnx_dir, onnx_name.replace('.onnx', '.param')) ncnn_bin = osp.join(onnx_dir, onnx_name.replace('.onnx', '.bin')) - ncnn_param_opt = osp.join(onnx_dir, - onnx_name.replace('.onnx', '_opt.param')) + ncnn_param_opt = osp.join(onnx_dir, onnx_name.replace('.onnx', '_opt.param')) ncnn_bin_opt = osp.join(onnx_dir, onnx_name.replace('.onnx', '_opt.bin')) ncnn_table = osp.join(onnx_dir, onnx_name.replace('.onnx', '.table')) - ncnn_param_int8 = osp.join(onnx_dir, - onnx_name.replace('.onnx', '_int8.param')) + ncnn_param_int8 = osp.join(onnx_dir, onnx_name.replace('.onnx', '_int8.param')) ncnn_bin_int8 = osp.join(onnx_dir, onnx_name.replace('.onnx', '_int8.bin')) # check ncnn's .bin and param @@ -90,9 +83,7 @@ def ncnn_quant(onnx_path, image_dir='./img_e', img_size=[112, 112, 3]): export_ncnn(onnx_path) # optimizer model - if command( - f"{ncnnoptimize} {ncnn_param} {ncnn_bin} {ncnn_param_opt} {ncnn_bin_opt} 0" - ): + if command(f"{ncnnoptimize} {ncnn_param} {ncnn_bin} {ncnn_param_opt} {ncnn_bin_opt} 0"): loger.info('export optimizer ncnn succeeded!') else: loger.warning('export optimizer ncnn fail!') @@ -110,7 +101,7 @@ def ncnn_quant(onnx_path, image_dir='./img_e', img_size=[112, 112, 3]): return if command( - f"{ncnn2int8} {ncnn_param_opt} {ncnn_bin_opt} {ncnn_param_int8} {ncnn_bin_int8} {ncnn_table}" + f"{ncnn2int8} {ncnn_param_opt} {ncnn_bin_opt} {ncnn_param_int8} {ncnn_bin_int8} {ncnn_table}" ): # quantize model loger.info('ncnn quantize succeeded!') @@ -134,17 +125,14 @@ def ncnn_fp16(onnx_path): onnx_name = osp.basename(onnx_path) ncnn_param = osp.join(onnx_dir, onnx_name.replace('.onnx', '.param')) ncnn_bin = osp.join(onnx_dir, onnx_name.replace('.onnx', '.bin')) - ncnn_param_opt = osp.join(onnx_dir, - onnx_name.replace('.onnx', '_fp16.param')) + ncnn_param_opt = osp.join(onnx_dir, onnx_name.replace('.onnx', '_fp16.param')) ncnn_bin_opt = osp.join(onnx_dir, onnx_name.replace('.onnx', '_fp16.bin')) # check ncnn's .bin and param if os.path.exists(ncnn_bin) and os.path.exists(ncnn_param): export_ncnn(onnx_path) - if command( - f"{ncnnoptimize} {ncnn_param} {ncnn_bin} {ncnn_param_opt} {ncnn_bin_opt} 65536" - ): + if command(f"{ncnnoptimize} {ncnn_param} {ncnn_bin} {ncnn_param_opt} {ncnn_bin_opt} 65536"): loger.info('export ncnn fp16 format succeeded!') else: loger.error('export ncnn fp16 format fail!') @@ -152,23 +140,22 @@ def ncnn_fp16(onnx_path): def main(args): - global onnx2ncnn, ncnnoptimize, ncnn2table,ncnn2int8,ncnnmerge,ncnn + global onnx2ncnn, ncnnoptimize, ncnn2table, ncnn2int8, ncnnmerge, ncnn func_dict = { 'onnx_fp16': onnx_fp16, 'onnx_quan_st': onnx_quant_static, 'onnx_quan_dy': onnx_quant_dynamic, 'ncnn': export_ncnn, 'ncnn_fp16': ncnn_fp16, - 'ncnn_quan': ncnn_quant + 'ncnn_quan': ncnn_quant, } - home=os.environ['HOME'] + home = os.environ['HOME'] ncnn_dir = f"{home}/software/ncnn/build" - onnx2ncnn = osp.join(ncnn_dir,'tools','onnx','onnx2ncnn') - ncnnoptimize = osp.join(ncnn_dir,'tools','ncnnoptimize') - ncnn2table = osp.join(ncnn_dir,'tools','quantize','ncnn2table') - ncnn2int8 = osp.join(ncnn_dir,'tools','quantize','ncnn2int8') - ncnnmerge = osp.join(ncnn_dir,'tools','ncnnmerge') - + onnx2ncnn = osp.join(ncnn_dir, 'tools', 'onnx', 'onnx2ncnn') + ncnnoptimize = osp.join(ncnn_dir, 'tools', 'ncnnoptimize') + ncnn2table = osp.join(ncnn_dir, 'tools', 'quantize', 'ncnn2table') + ncnn2int8 = osp.join(ncnn_dir, 'tools', 'quantize', 'ncnn2int8') + ncnnmerge = osp.join(ncnn_dir, 'tools', 'ncnnmerge') onnx_path = onnx_path = osp.abspath(args.onnx) export_type = args.type @@ -178,9 +165,7 @@ def main(args): for f in export_type: if f not in func_dict.keys(): - loger.error( - f'{f} not in {func_dict.keys()},Please enter the correct export type' - ) + loger.error(f'{f} not in {func_dict.keys()},Please enter the correct export type') if f == 'ncnn_quan' and imags_dir: func_dict[f](onnx_path, imags_dir) else: @@ -189,16 +174,13 @@ def main(args): def args_parse(): args = argparse.ArgumentParser(description='export onnx to othaer.') - args.add_argument('--onnx', - default='./weights/best.onnx', - help='onnx model file path') + args.add_argument('--onnx', default='./weights/best.onnx', help='onnx model file path') args.add_argument('--images', help='celacrater data file path') args.add_argument( '--type', nargs='+', default=['onnx_quan_dy', 'onnx_quan_st'], - help= - 'from [onnx_fp16, onnx_quan_st, onnx_quan_dy, ncnn, ncnn_fp16, ncnn_quan]' + help='from [onnx_fp16, onnx_quan_st, onnx_quan_dy, ncnn, ncnn_fp16, ncnn_quan]', ) return args.parse_args() diff --git a/tools/quan_test.py b/tools/quan_test.py index a78b19dd..276dacb5 100644 --- a/tools/quan_test.py +++ b/tools/quan_test.py @@ -1,5 +1,5 @@ import os -from typing import List,AnyStr +from typing import List, AnyStr import ncnn import numpy as np @@ -8,7 +8,7 @@ from PIL import Image import onnxruntime import tensorflow as tf -from torchvision.transforms import ToTensor,Resize,Grayscale,Compose +from torchvision.transforms import ToTensor, Resize, Grayscale, Compose input_name = 'images' output_name = 'output' @@ -23,14 +23,14 @@ def read_img(p): return img, mat_img -class Inter(): +class Inter: def __init__(self, model: List or AnyStr): if isinstance(model, list): net = ncnn.Net() for p in model: - if p.endswith('param'): + if p.endswith('param'): param = p - if p.endswith('bin'): + if p.endswith('bin'): bin = p net.load_param(param) net.load_model(bin) @@ -66,7 +66,7 @@ def __call__(self, img: np.array, input_name: AnyStr = 'input', output_name: Any extra = self.inter.create_extractor() extra.input(input_name, ncnn.Mat(img[0])) result = extra.extract(output_name)[1] - result = [result[i]for i in range(len(result))] + result = [result[i] for i in range(len(result))] else: # tf input_, output = self.inter.get_input_details()[0], self.inter.get_output_details()[0] int8 = input_['dtype'] == np.int8 or input_['dtype'] == np.uint8 diff --git a/tools/utils/config.py b/tools/utils/config.py index 6bbfe459..967a3cb2 100644 --- a/tools/utils/config.py +++ b/tools/utils/config.py @@ -15,42 +15,36 @@ def dump_config_to_log_dir(self) -> None: def replace(data: str, args: Optional[dict] = None) -> str: - """ + """ Replace the basic configuration items in the configuration file - + Args: data(str): the string to be replaced args(dict): the replaced value - + Returns: data(str): the replaced string """ - if not args: + if not args: return data for key, value in args.items(): if isinstance(value, (int, float)): - data = re.sub(f"^{key}\s?=\s?[^,{key}].*?[^,{key}].*?$\n", - f'{key}={value}\n', - data, - flags=re.MULTILINE) + data = re.sub(f"^{key}\s?=\s?[^,{key}].*?[^,{key}].*?$\n", f'{key}={value}\n', data, flags=re.MULTILINE) else: value = value.replace('\\', '/') - data = re.sub(f"^{key}\s?=\s?['\"]{{1}}.*?['\"]{{1}}.*?$\n", - f'{key}="{value}"\n', - data, - flags=re.MULTILINE) + data = re.sub(f"^{key}\s?=\s?['\"]{{1}}.*?['\"]{{1}}.*?$\n", f'{key}="{value}"\n', data, flags=re.MULTILINE) return data def replace_base_(data: str, base: Union[str, Sequence[str]]) -> str: """ Replace the _base_ configuration item in the configuration file - + Args: data(str): the string to be replaced base(str|[str]): the replaced value - - Returns: + + Returns: data(str): the replaced string """ if isinstance(base, str): @@ -63,23 +57,21 @@ def replace_base_(data: str, base: Union[str, Sequence[str]]) -> str: return data -def load_config(filename: str, - folder: str, - cfg_options: Optional[dict] = None) -> str: +def load_config(filename: str, folder: str, cfg_options: Optional[dict] = None) -> str: """ - Load the configuration file and modify the value in cfg-options at the - same time, write the modified file to the temporary file, and finally store + Load the configuration file and modify the value in cfg-options at the + same time, write the modified file to the temporary file, and finally store the modified file in the temporary path and return the corresponding path - + Args: filename: configuration file path - cfg_options: Parameters passed on the command line to modify the + cfg_options: Parameters passed on the command line to modify the configuration file - folder: The path to the temporary folder, all temporary files in + folder: The path to the temporary folder, all temporary files in the function will be stored in this folder - + Returns: - cfg_path: The path of the replaced temporary file is equal to the + cfg_path: The path of the replaced temporary file is equal to the path of the corresponding file after filename is modified """ with open(filename, 'r', encoding='gb2312') as f: @@ -162,12 +154,13 @@ def replace_value(cfg): for key, value in zip(keys, values): # the format of string cfg is # "xxx${key}xxx" or "xxx${key1}xxx${key2}xxx" - assert not isinstance(value, (dict, list, tuple)), \ - f'for the format of string cfg is ' \ - f"'xxxxx${key}xxxxx' or 'xxx${key}xxx${key}xxx', " \ - f"the type of the value of '${key}' " \ - f'can not be dict, list, or tuple' \ + assert not isinstance(value, (dict, list, tuple)), ( + f'for the format of string cfg is ' + f"'xxxxx${key}xxxxx' or 'xxx${key}xxx${key}xxx', " + f"the type of the value of '${key}' " + f'can not be dict, list, or tuple' f'but you input {type(value)} in {cfg}' + ) cfg = cfg.replace(key, str(value)) return cfg else: @@ -176,10 +169,9 @@ def replace_value(cfg): # the pattern of string "${key}" pattern_key = re.compile(r'\$\{[a-zA-Z\d_.]*\}') # the type of ori_cfg._cfg_dict is mmcv.utils.config.ConfigDict - updated_cfg = Config(replace_value(ori_cfg._cfg_dict), - cfg_text=ori_cfg._text) + updated_cfg = Config(replace_value(ori_cfg._cfg_dict), cfg_text=ori_cfg._text) # replace the model with model_wrapper if updated_cfg.get('model_wrapper', None) is not None: updated_cfg.model = updated_cfg.model_wrapper updated_cfg.pop('model_wrapper') - return updated_cfg \ No newline at end of file + return updated_cfg diff --git a/tools/utils/inference.py b/tools/utils/inference.py index 3baf8af3..a1f1c69b 100644 --- a/tools/utils/inference.py +++ b/tools/utils/inference.py @@ -21,8 +21,7 @@ from .iot_camera import IoTCamera -class Inter(): - +class Inter: def __init__(self, model: List or AnyStr or Tuple): if isinstance(model, list): try: @@ -33,9 +32,9 @@ def __init__(self, model: List or AnyStr or Tuple): ) net = ncnn.Net() for p in model: - if p.endswith('param'): + if p.endswith('param'): param = p - if p.endswith('bin'): + if p.endswith('bin'): bin = p net.load_param(param) net.load_model(bin) @@ -52,11 +51,12 @@ def __init__(self, model: List or AnyStr or Tuple): net = onnx.load(model) onnx.checker.check_model(net) except Exception: - raise ValueError( - 'onnx file have error,please check your onnx export code!') - providers = [ - 'CUDAExecutionProvider', 'CPUExecutionProvider' - ] if torch.cuda.is_available() else ['CPUExecutionProvider'] + raise ValueError('onnx file have error,please check your onnx export code!') + providers = ( + ['CUDAExecutionProvider', 'CPUExecutionProvider'] + if torch.cuda.is_available() + else ['CPUExecutionProvider'] + ) net = onnxruntime.InferenceSession(model, providers=providers) self._input_shape = net.get_inputs()[0].shape[1:] @@ -84,11 +84,13 @@ def __init__(self, model: List or AnyStr or Tuple): def input_shape(self): return self._input_shape - def __call__(self, - img: Union[np.array, torch.Tensor], - input_name: AnyStr = 'input', - output_name: AnyStr = 'output', - result_num=1): + def __call__( + self, + img: Union[np.array, torch.Tensor], + input_name: AnyStr = 'input', + output_name: AnyStr = 'output', + result_num=1, + ): # img.resize_(3,192,192) if len(img.shape) == 2: # audio if img.shape[1] > 10: # (1, 8192) to (8192, 1) @@ -112,18 +114,18 @@ def __call__(self, raise ValueError results = [] if self.engine == 'onnx': # onnx - result = self.inter.run([self.inter.get_outputs()[0].name], - {self.inter.get_inputs()[0].name: img})[0] + result = self.inter.run([self.inter.get_outputs()[0].name], {self.inter.get_inputs()[0].name: img})[0] results.append(result) elif self.engine == 'ncnn': # ncnn self.inter.opt.use_vulkan_compute = False extra = self.inter.create_extractor() - extra.input(input_name, ncnn.Mat(img[0])) # noqa + extra.input(input_name, ncnn.Mat(img[0])) # noqa result = extra.extract(output_name)[1] result = [result[i] for i in range(len(result))] else: # tf input_, outputs = self.inter.get_input_details()[0], ( - self.inter.get_output_details()[0] for i in range(result_num)) + self.inter.get_output_details()[0] for i in range(result_num) + ) int8 = input_['dtype'] == np.int8 or input_['dtype'] == np.uint8 img = img.transpose(0, 2, 3, 1) if len(img.shape) == 4 else img if int8: @@ -143,14 +145,11 @@ def __call__(self, IMG_SUFFIX = ('.jpg', '.png', '.PNG', '.jpeg') VIDEO_SUFFIX = ('.avi', '.mp4', '.mkv', '.flv', '.wmv', '.3gp') -IOT_DEVICE = ('sensorcap', ) +IOT_DEVICE = ('sensorcap',) class DataStream: - - def __init__(self, - source: Union[int, str], - shape: Optional[int or Tuple[int, int]] = None) -> None: + def __init__(self, source: Union[int, str], shape: Optional[int or Tuple[int, int]] = None) -> None: if shape: self.gray = True if shape[-1] == 1 else False self.shape = shape[:-1] @@ -162,10 +161,7 @@ def __init__(self, if isinstance(source, str): if osp.isdir(source): - self.file = [ - osp.join(source, f) for f in os.listdir(source) - if f.lower().endswith(IMG_SUFFIX) - ] + self.file = [osp.join(source, f) for f in os.listdir(source) if f.lower().endswith(IMG_SUFFIX)] self.l = len(self.file) self.file = iter(self.file) @@ -174,8 +170,7 @@ def __init__(self, self.file = [source] self.l = len(self.file) self.file = iter(self.file) - elif any( - [source.lower().endswith(mat) for mat in VIDEO_SUFFIX]): + elif any([source.lower().endswith(mat) for mat in VIDEO_SUFFIX]): self.cap = cv2.VideoCapture(source) elif source.isdigit(): self.cap = cv2.VideoCapture(int(source)) @@ -197,10 +192,7 @@ def __iter__(self): def __next__(self): if self.file: f = next(self.file) - img = load_image(f, - shape=self.shape, - mode='GRAY' if self.gray else 'RGB', - normalized=True) + img = load_image(f, shape=self.shape, mode='GRAY' if self.gray else 'RGB', normalized=True) else: while True: @@ -224,7 +216,7 @@ def __next__(self): def build_target(pred_shape, ori_shape, gt_bboxs): """ - The target feature map constructed according to the size + The target feature map constructed according to the size of the feature map output by the model bbox: xyxy """ @@ -233,13 +225,12 @@ def build_target(pred_shape, ori_shape, gt_bboxs): target_data = torch.zeros(size=(1, *pred_shape)) target_data[..., 0] = 1 for b, bboxs in enumerate(gt_bboxs): - for idx, bbox in enumerate(bboxs.bboxes): w = (bbox[2] + bbox[0]) / 2 / ori_shape[1] h = (bbox[3] + bbox[1]) / 2 / ori_shape[0] h, w = int(h.item() * H), int(w.item() * W) target_data[0, h, w, 0] = 0 # background - target_data[0, h, w, bboxs.labels[idx] + 1] = 1 #label + target_data[0, h, w, bboxs.labels[idx] + 1] = 1 # label return target_data @@ -249,20 +240,20 @@ class Infernce: Reasonable onnx, tflite, ncnn and other models """ - def __init__(self, - model: List or AnyStr or Tuple, - dataloader: Union[DataLoader, str, int, None] = None, - cfg: Optional[Config] = None, - runner=None, - source: Optional[str] = None, - task: str = 'det', - show: bool = False, - save_dir: Optional[str] = None, - audio: bool = False) -> None: - + def __init__( + self, + model: List or AnyStr or Tuple, + dataloader: Union[DataLoader, str, int, None] = None, + cfg: Optional[Config] = None, + runner=None, + source: Optional[str] = None, + task: str = 'det', + show: bool = False, + save_dir: Optional[str] = None, + audio: bool = False, + ) -> None: # chaeck source data - assert not (source is None and dataloader is None - ), 'Both source and dataload cannot be None' + assert not (source is None and dataloader is None), 'Both source and dataload cannot be None' self.class_name = dataloader.dataset.METAINFO['classes'] # load model @@ -290,8 +281,7 @@ def __init__(self, self.init(cfg) def init(self, cfg): - self.evaluator: Evaluator = self.runner.build_evaluator( - self.cfg.get('val_evaluator')) + self.evaluator: Evaluator = self.runner.build_evaluator(self.cfg.get('val_evaluator')) if hasattr(cfg.model, "data_preprocessor"): self.data_preprocess = MODELS.build(cfg.model.data_preprocessor) @@ -306,7 +296,6 @@ def test(self) -> None: R = [] F1 = [] for data in tqdm(self.dataloader): - if not self.source: if hasattr(self, "data_preprocess"): data = self.data_preprocess(data, True) @@ -328,7 +317,6 @@ def test(self) -> None: if self.task == 'pose': show_point(preds, data['data_samples']['image_file'][0]) elif self.task == 'det': - if len(preds[0].shape) > 3: preds = preds[0] elif len(preds[0].shape) > 2: @@ -343,11 +331,7 @@ def test(self) -> None: mask = pred[..., 1:] > 0.7 mask = np.any(mask, axis=2) mask = np.repeat(np.expand_dims(mask, -1), C, axis=-1) - pred = np.ma.array(pred, - mask=~mask, - keep_mask=True, - copy=True, - fill_value=0) + pred = np.ma.array(pred, mask=~mask, keep_mask=True, copy=True, fill_value=0) pred_max = np.argmax(pred, axis=-1) @@ -358,67 +342,49 @@ def test(self) -> None: idx = pred_max[i[0], i[1]] texts.append(idx - 1) if len(pred_index): - points = (pred_index + 0.5) / np.asarray( - [H, W]) * np.asarray(self.input_shape[:-1]) - show_point(points, - img=img, - labels=texts, - show=self.show, - img_file=img_path) + points = (pred_index + 0.5) / np.asarray([H, W]) * np.asarray(self.input_shape[:-1]) + show_point(points, img=img, labels=texts, show=self.show, img_file=img_path) if not self.source: ori_shape = data['data_samples'][0].ori_shape bboxes = data['data_samples'][0].gt_instances - target = build_target(preds.shape[1:], (96, 96), - bboxes) + target = build_target(preds.shape[1:], (96, 96), bboxes) data['data_samples'][0].pred_instances = InstanceData( - pred=tuple( - [torch.from_numpy(preds).permute(0, 3, 1, 2)]), - labels=tuple([target])) + pred=tuple([torch.from_numpy(preds).permute(0, 3, 1, 2)]), labels=tuple([target]) + ) - self.evaluator.process( - data_batch=data, data_samples=data['data_samples']) + self.evaluator.process(data_batch=data, data_samples=data['data_samples']) else: # performes nms - bbox, conf, classes = preds[:, :4], preds[:, 4], preds[:, - 5:] - preds = NMS(bbox, - conf, - classes, - conf_thres=50, - bbox_format='xywh') + bbox, conf, classes = preds[:, :4], preds[:, 4], preds[:, 5:] + preds = NMS(bbox, conf, classes, conf_thres=50, bbox_format='xywh') # show det result and save result - show_det(preds, - img=img, - img_file=img_path, - class_name=self.class_name, - shape=self.input_shape[:-1], - show=self.show, - save_path=self.save_dir) + show_det( + preds, + img=img, + img_file=img_path, + class_name=self.class_name, + shape=self.input_shape[:-1], + show=self.show, + save_path=self.save_dir, + ) if not self.source and not self.fomo: - ori_shape = data['data_samples'][0].ori_shape tmp = preds[:, :4] - tmp[:, - 0::2] = tmp[:, - 0::2] / self.input_shape[1] * ori_shape[1] - tmp[:, - 1::2] = tmp[:, - 1::2] / self.input_shape[0] * ori_shape[0] + tmp[:, 0::2] = tmp[:, 0::2] / self.input_shape[1] * ori_shape[1] + tmp[:, 1::2] = tmp[:, 1::2] / self.input_shape[0] * ori_shape[0] result.bboxes = tmp result.scores = preds[:, 4] result.labels = preds[:, 5].type(torch.int) # result.img_id = str(data['data_samples'][0].img_id) - for data_sample, pred_instances in zip( - data['data_samples'], [result]): + for data_sample, pred_instances in zip(data['data_samples'], [result]): data_sample.pred_instances = pred_instances samplelist_boxtype2tensor(data) - self.evaluator.process(data_batch=data, - data_samples=data['data_samples']) + self.evaluator.process(data_batch=data, data_samples=data['data_samples']) else: raise ValueError @@ -440,18 +406,13 @@ def pfld_inference(model, data_loader): # parse data input = data.dataset['img'] target = np.expand_dims(data.dataset['keypoints'], axis=0) - size = data.dataset['hw'] #.cpu().numpy() + size = data.dataset['hw'] # .cpu().numpy() input = input.cpu().numpy() result = model(input) result = np.array(result) - result = result if len(result.shape) == 2 else result[ - None, :] # onnx shape(2,), tflite shape(1,2) + result = result if len(result.shape) == 2 else result[None, :] # onnx shape(2,), tflite shape(1,2) acc = pose_acc(result.copy(), target, size) - results.append({ - 'Acc': acc, - 'pred': result, - 'image_file': data.dataset['image_file'].data - }) + results.append({'Acc': acc, 'pred': result, 'image_file': data.dataset['image_file'].data}) prog_bar.update() return results @@ -469,11 +430,7 @@ def audio_inference(model, data_loader): # result = result if len(result.shape)==2 else np.expand_dims(result, 0) # onnx shape(d,), tflite shape(1,d) # result = result[0] if len(result.shape)==2 else result acc = audio_acc(result, target) - results.append({ - 'acc': acc, - 'pred': result, - 'image_file': data.dataset['audio_file'] - }) + results.append({'acc': acc, 'pred': result, 'image_file': data.dataset['audio_file']}) prog_bar.update() return results @@ -486,23 +443,26 @@ def fomo_inference(model, data_loader): input = input.cpu().numpy() target = data.dataset['target'] result = model(input) - results.append({ - 'pred': result, - 'target': target, - }) + results.append( + { + 'pred': result, + 'target': target, + } + ) prog_bar.update() return results -def show_point(keypoints: Union[np.ndarray, Sequence[Sequence[int]], - None] = None, - img: Optional[np.ndarray] = None, - img_file: Optional[str] = None, - shape: Optional[Sequence[int]] = None, - labels: Sequence[str] = None, - win_name: str = 'test', - save_path: bool = False, - show: bool = False): +def show_point( + keypoints: Union[np.ndarray, Sequence[Sequence[int]], None] = None, + img: Optional[np.ndarray] = None, + img_file: Optional[str] = None, + shape: Optional[Sequence[int]] = None, + labels: Sequence[str] = None, + win_name: str = 'test', + save_path: bool = False, + show: bool = False, +): # load image if isinstance(img, np.ndarray): img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) @@ -510,15 +470,11 @@ def show_point(keypoints: Union[np.ndarray, Sequence[Sequence[int]], img = load_image(img_file, shape=shape, mode='BGR').copy() for idx, point in enumerate(keypoints): - img = cv2.circle(img, (int(point[0]), int(point[1])), 5, (255, 0, 0), - -1) + img = cv2.circle(img, (int(point[0]), int(point[1])), 5, (255, 0, 0), -1) if labels: - cv2.putText(img, - str(labels[idx]), (int(point[0]), int(point[1])), - 1, - color=(0, 0, 255), - thickness=1, - fontScale=1) + cv2.putText( + img, str(labels[idx]), (int(point[0]), int(point[1])), 1, color=(0, 0, 255), thickness=1, fontScale=1 + ) if show: cv2.imshow(win_name, img) cv2.waitKey(500) @@ -528,17 +484,17 @@ def show_point(keypoints: Union[np.ndarray, Sequence[Sequence[int]], cv2.imwrite(osp.join(save_path, img_name), img) -def show_det(pred: np.ndarray, - img: Optional[np.ndarray] = None, - img_file: Optional[str] = None, - win_name='Detection', - class_name=None, - shape=None, - save_path=False, - show=False) -> np.ndarray: - - assert not (img is None and img_file is None - ), "The img and img_file parameters cannot both be None" +def show_det( + pred: np.ndarray, + img: Optional[np.ndarray] = None, + img_file: Optional[str] = None, + win_name='Detection', + class_name=None, + shape=None, + save_path=False, + show=False, +) -> np.ndarray: + assert not (img is None and img_file is None), "The img and img_file parameters cannot both be None" # load image if isinstance(img, np.ndarray): @@ -550,18 +506,8 @@ def show_det(pred: np.ndarray, for i in pred: x1, y1, x2, y2 = map(int, i[:4]) img = cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 1) - cv2.putText(img, - class_name[int(i[5])], (x1, y1), - 1, - color=(0, 0, 255), - thickness=1, - fontScale=1) - cv2.putText(img, - str(round(i[4].item(), 2)), (x1, y1 - 15), - 1, - color=(0, 0, 255), - thickness=1, - fontScale=1) + cv2.putText(img, class_name[int(i[5])], (x1, y1), 1, color=(0, 0, 255), thickness=1, fontScale=1) + cv2.putText(img, str(round(i[4].item(), 2)), (x1, y1 - 15), 1, color=(0, 0, 255), thickness=1, fontScale=1) print(pred) if show: cv2.imshow(win_name, img) @@ -570,7 +516,7 @@ def show_det(pred: np.ndarray, if save_path: img_name = osp.basename(img_file) cv2.imwrite(osp.join(save_path, img_name), img) - + return pred @@ -579,4 +525,4 @@ def show_det(pred: np.ndarray, data = iter(data) for img in data: cv2.imshow('aaa', img) - cv2.waitKey(0) \ No newline at end of file + cv2.waitKey(0) diff --git a/tools/utils/iot_camera.py b/tools/utils/iot_camera.py index 06e3a783..2463cd89 100644 --- a/tools/utils/iot_camera.py +++ b/tools/utils/iot_camera.py @@ -14,8 +14,7 @@ ProductId = [0x8060, 0x8061] -class IoTCamera(): - +class IoTCamera: def __init__(self, device_id=0): self.ProductId = [] @@ -43,25 +42,16 @@ def start(self): break def read_data(self): - # Device not present, or user is not allowed to access device. with self.handle.claimInterface(2): # Do stuff with endpoints on claimed interface. self.handle.setInterfaceAltSetting(2, 0) - self.handle.controlRead(0x01 << 5, - request=0x22, - value=0x01, - index=2, - length=2048, - timeout=1000) + self.handle.controlRead(0x01 << 5, request=0x22, value=0x01, index=2, length=2048, timeout=1000) # Build a list of transfer objects and submit them to prime the pump. transfer_list = [] for _ in range(1): transfer = self.handle.getTransfer() - transfer.setBulk(usb1.ENDPOINT_IN | 2, - 2048, - callback=self.processReceivedData, - timeout=1000) + transfer.setBulk(usb1.ENDPOINT_IN | 2, 2048, callback=self.processReceivedData, timeout=1000) transfer.submit() transfer_list.append(transfer) # Loop as long as there is at least one submitted transfer. @@ -70,13 +60,10 @@ def read_data(self): self.context.handleEvents() def pare_data(self, data: bytearray): - - if len(data) == 8 and int.from_bytes(bytes(data[:4]), - 'big') == WEBUSB_JPEG_MAGIC: + if len(data) == 8 and int.from_bytes(bytes(data[:4]), 'big') == WEBUSB_JPEG_MAGIC: self.expect_size = int.from_bytes(bytes(data[4:]), 'big') self.buff = bytearray() - elif len(data) == 8 and int.from_bytes(bytes(data[:4]), - 'big') == WEBUSB_TEXT_MAGIC: + elif len(data) == 8 and int.from_bytes(bytes(data[:4]), 'big') == WEBUSB_TEXT_MAGIC: self.expect_size = int.from_bytes(bytes(data[4:]), 'big') self.buff = bytearray() else: @@ -113,7 +100,7 @@ def processReceivedData(self, transfer): # transfer.close() return - data = transfer.getBuffer()[:transfer.getActualLength()] + data = transfer.getBuffer()[: transfer.getActualLength()] # Process data... self.pare_data(data) # Resubmit transfer once data is processed. @@ -127,12 +114,7 @@ def connect(self): return False with self.handle.claimInterface(2): self.handle.setInterfaceAltSetting(2, 0) - self.handle.controlRead(0x01 << 5, - request=0x22, - value=0x01, - index=2, - length=2048, - timeout=1000) + self.handle.controlRead(0x01 << 5, request=0x22, value=0x01, index=2, length=2048, timeout=1000) print('device is connected') return True @@ -141,15 +123,9 @@ def disconnect(self): print('Resetting device...') with usb1.USBContext() as context: handle = context.getByVendorIDAndProductID( - VendorId, - self.ProductId[self.device_id], - skip_on_error=False).open() - handle.controlRead(0x01 << 5, - request=0x22, - value=0x00, - index=2, - length=2048, - timeout=1000) + VendorId, self.ProductId[self.device_id], skip_on_error=False + ).open() + handle.controlRead(0x01 << 5, request=0x22, value=0x00, index=2, length=2048, timeout=1000) handle.close() print('Device has been reset!') return True @@ -165,30 +141,24 @@ def get_rlease_device(self, did, get=True): product_id = device.getProductID() vendor_id = device.getVendorID() device_addr = device.getDeviceAddress() - bus = '->'.join( - str(x) for x in ['Bus %03i' % (device.getBusNumber(), )] + - device.getPortNumberList()) + bus = '->'.join(str(x) for x in ['Bus %03i' % (device.getBusNumber(),)] + device.getPortNumberList()) if vendor_id == VendorId and product_id in ProductId and tmp == did: self.ProductId.append(product_id) print( - '\r' + - f'\033[4;31mID {vendor_id:04x}:{product_id:04x} {bus} Device {device_addr} \033[0m', - end='') + '\r' + f'\033[4;31mID {vendor_id:04x}:{product_id:04x} {bus} Device {device_addr} \033[0m', end='' + ) if get: return device.open() else: device.close() print( - '\r' + - f'\033[4;31mID {vendor_id:04x}:{product_id:04x} {bus} Device {device_addr} CLOSED\033[0m', - flush=True) + '\r' + + f'\033[4;31mID {vendor_id:04x}:{product_id:04x} {bus} Device {device_addr} CLOSED\033[0m', + flush=True, + ) elif vendor_id == VendorId and product_id in ProductId: self.ProductId.append(product_id) - print( - f'\033[0;31mID {vendor_id:04x}:{product_id:04x} {bus} Device {device_addr}\033[0m' - ) + print(f'\033[0;31mID {vendor_id:04x}:{product_id:04x} {bus} Device {device_addr}\033[0m') tmp = tmp + 1 else: - print( - f'ID {vendor_id:04x}:{product_id:04x} {bus} Device {device_addr}' - ) + print(f'ID {vendor_id:04x}:{product_id:04x} {bus} Device {device_addr}') diff --git a/tools/utils/quant_read.py b/tools/utils/quant_read.py index 95ee66e6..3783b9e2 100644 --- a/tools/utils/quant_read.py +++ b/tools/utils/quant_read.py @@ -1,43 +1,42 @@ import os import numpy as np from PIL import Image -from torchvision.transforms import Compose,ToTensor,Resize,Grayscale +from torchvision.transforms import Compose, ToTensor, Resize, Grayscale from onnxruntime.quantization import CalibrationDataReader -img_format = ['.JPG','.PNG','.JPEG'] +img_format = ['.JPG', '.PNG', '.JPEG'] class Quan_Reader(CalibrationDataReader): - def __init__(self,images_folder,size,input_name,batch_size=1) -> None: + def __init__(self, images_folder, size, input_name, batch_size=1) -> None: # super(CalibrationDataReader).__init__(self) self.images_folder = images_folder self.size = size self.input_name = input_name - self.transfor = Compose([ToTensor(),Grayscale(),Resize(size=size)]) - self.num=0 - + self.transfor = Compose([ToTensor(), Grayscale(), Resize(size=size)]) + self.num = 0 + self.enum_data_dicts = None self.init() - def init(self): file_ls = os.listdir(self.images_folder) - self.file_ls = iter([os.path.join(self.images_folder,i) for i in file_ls if os.path.splitext(i)[-1].upper() in img_format]) - + self.file_ls = iter( + [os.path.join(self.images_folder, i) for i in file_ls if os.path.splitext(i)[-1].upper() in img_format] + ) def get_next(self) -> dict: try: - a= next(self.file_ls) + a = next(self.file_ls) if a is None: raise StopIteration - img =self.process_data(a) - return {self.input_name:np.array([img])} + img = self.process_data(a) + return {self.input_name: np.array([img])} except Exception: return None - - def process_data(self,file): + def process_data(self, file): img = Image.open(file) img = self.transfor(img).cpu().numpy() - return img \ No newline at end of file + return img From 5f458abc8a6ec59485cd23e24ad3ad7d5b7313d5 Mon Sep 17 00:00:00 2001 From: nullptr Date: Fri, 7 Jul 2023 21:08:09 +0800 Subject: [PATCH 2/3] fix: edgelab import scope error Author: nullptr --- tools/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/inference.py b/tools/inference.py index 60069252..03af8580 100644 --- a/tools/inference.py +++ b/tools/inference.py @@ -304,7 +304,7 @@ def after_test_epoch(self, _, metrics=None): runner.register_hook(SaveMetricHook(), "LOWEST") elif checkpoint_ext in {".tflite", ".onnx"}: - from tools.utils.inference import Infernce + from edgelab.tools.utils.inference import Infernce # TODO: Support inference '.tflite', '.onnx' model on different devices # TODO: Support MMEngine metric hooks From d4c5259f0304fffaad923b10632112ab008d18b4 Mon Sep 17 00:00:00 2001 From: nullptr Date: Fri, 7 Jul 2023 21:22:04 +0800 Subject: [PATCH 3/3] refactor: use max_epochs instead of epochs Author: nullptr --- .../3axes_accelerometer_62.5Hz_1s_classify.py | 4 ++-- .../ali_classiyf_small_8k_8192.py | 2 +- .../mobnetv2_0.35_rep_1bx16_300e_cifar10.py | 4 ++-- .../mobnetv2_0.35_rep_1bx16_300e_cifar100.py | 4 ++-- .../mobnetv2_0.35_rep_1bx16_300e_custom.py | 4 ++-- .../mobnetv2_1.0_1bx16_300e_cifar10.py | 4 ++-- .../mobnetv2_1.0_1bx16_300e_cifar100.py | 4 ++-- .../mobnetv2_1.0_1bx16_300e_custom.py | 4 ++-- .../mobnetv3_small_1bx16_300e_cifar10.py | 4 ++-- .../mobnetv3_small_1bx16_300e_cifar100.py | 4 ++-- .../mobnetv3_small_1bx16_300e_custom.py | 4 ++-- .../fastestdet/fastestdet_shuffv2_spp_voc.py | 2 +- configs/fomo/fomo_mobnetv2_0.35_x8_abl_coco.py | 4 ++-- .../fomo/fomo_mobnetv2_0.35_x8_abl_coco_x.py | 4 ++-- configs/fomo/fomo_mobnetv2_x8_coco.py | 2 +- configs/fomo/fomo_mobnetv2_x8_voc.py | 2 +- configs/pfld/pfld_dan_fpn_x8_192.py | 4 ++-- configs/pfld/pfld_mbv2n_112.py | 4 ++-- configs/yolov3/yolov3_mbv2_416_voc.py | 2 +- configs/yolov5/base_arch.py | 2 +- configs/yolov5/yolov5_l_1xb16_300e_coco.py | 2 +- configs/yolov5/yolov5_m_1xb16_300e_coco.py | 2 +- configs/yolov5/yolov5_n_1xb16_300e_coco.py | 2 +- configs/yolov5/yolov5_s_1xb16_300e_coco.py | 2 +- configs/yolov5/yolov5_tiny_1xb16_300e_coco.py | 2 +- configs/yolox/base_arch.py | 2 +- configs/yolox/yolox_tiny_1xb16_300e_coco.py | 2 +- docs/edgeimpulse/ei_ml_blocks.md | 2 +- docs/tutorials/config.md | 8 ++++---- docs/tutorials/export/overview.md | 2 +- docs/tutorials/training/fomo.md | 6 +++--- docs/tutorials/training/pfld.md | 6 +++--- docs/tutorials/training/yolov5.md | 4 ++-- docs/zh_cn/edgeimpulse/ei_ml_blocks.md | 2 +- docs/zh_cn/tutorials/config.md | 4 ++-- docs/zh_cn/tutorials/export/overview.md | 2 +- docs/zh_cn/tutorials/training/fomo.md | 6 +++--- docs/zh_cn/tutorials/training/pfld.md | 6 +++--- docs/zh_cn/tutorials/training/yolov5.md | 4 ++-- edgelab/datasets/yolodataset.py | 1 + .../Google-Colab-PFLD-Grove-Example.ipynb | 18 ++++++++---------- .../Google-Colab-YOLOv5-A1101-Example.ipynb | 14 ++++++-------- scripts/test_functional.sh | 4 ++-- 43 files changed, 84 insertions(+), 87 deletions(-) diff --git a/configs/accelerometer/3axes_accelerometer_62.5Hz_1s_classify.py b/configs/accelerometer/3axes_accelerometer_62.5Hz_1s_classify.py index 77f8209b..28ce4d1e 100644 --- a/configs/accelerometer/3axes_accelerometer_62.5Hz_1s_classify.py +++ b/configs/accelerometer/3axes_accelerometer_62.5Hz_1s_classify.py @@ -78,12 +78,12 @@ # optimizer lr = 0.0005 -epochs = 10 +max_epochs = 10 optim_wrapper = dict(type='OptimWrapper', optimizer=dict(type='Adam', lr=lr, betas=[0.9, 0.99], weight_decay=0)) -train_cfg = dict(by_epoch=True, max_epochs=epochs) +train_cfg = dict(by_epoch=True, max_epochs=max_epochs) val_cfg = dict() test_cfg = dict() diff --git a/configs/audio_classify/ali_classiyf_small_8k_8192.py b/configs/audio_classify/ali_classiyf_small_8k_8192.py index aa8a2cb4..c3477714 100644 --- a/configs/audio_classify/ali_classiyf_small_8k_8192.py +++ b/configs/audio_classify/ali_classiyf_small_8k_8192.py @@ -136,7 +136,7 @@ # optimizer lr = 0.0003 -epochs = 1500 +max_epochs = 1500 find_unused_parameters = True optim_wrapper = dict(optimizer=dict(type='AdamW', lr=lr, betas=(0.9, 0.99), weight_decay=5e-4, eps=1e-7)) diff --git a/configs/classification/mobnetv2_0.35_rep_1bx16_300e_cifar10.py b/configs/classification/mobnetv2_0.35_rep_1bx16_300e_cifar10.py index 01bf4d71..a6ee519c 100644 --- a/configs/classification/mobnetv2_0.35_rep_1bx16_300e_cifar10.py +++ b/configs/classification/mobnetv2_0.35_rep_1bx16_300e_cifar10.py @@ -16,7 +16,7 @@ # optimizer lr = 0.01 -epochs = 300 +max_epochs = 300 model = dict( @@ -101,4 +101,4 @@ auto_scale_lr = dict(base_batch_size=batch_size) -train_cfg = dict(by_epoch=True, max_epochs=epochs) +train_cfg = dict(by_epoch=True, max_epochs=max_epochs) diff --git a/configs/classification/mobnetv2_0.35_rep_1bx16_300e_cifar100.py b/configs/classification/mobnetv2_0.35_rep_1bx16_300e_cifar100.py index 9493e226..2adf7853 100644 --- a/configs/classification/mobnetv2_0.35_rep_1bx16_300e_cifar100.py +++ b/configs/classification/mobnetv2_0.35_rep_1bx16_300e_cifar100.py @@ -16,7 +16,7 @@ # optimizer lr = 0.01 -epochs = 300 +max_epochs = 300 model = dict( @@ -101,4 +101,4 @@ auto_scale_lr = dict(base_batch_size=batch_size) -train_cfg = dict(by_epoch=True, max_epochs=epochs) +train_cfg = dict(by_epoch=True, max_epochs=max_epochs) diff --git a/configs/classification/mobnetv2_0.35_rep_1bx16_300e_custom.py b/configs/classification/mobnetv2_0.35_rep_1bx16_300e_custom.py index 1c0ff8f2..33f1cd2d 100644 --- a/configs/classification/mobnetv2_0.35_rep_1bx16_300e_custom.py +++ b/configs/classification/mobnetv2_0.35_rep_1bx16_300e_custom.py @@ -16,7 +16,7 @@ # optimizer lr = 0.01 -epochs = 300 +max_epochs = 300 data_preprocessor = dict( type='mmcls.ClsDataPreprocessor', @@ -108,4 +108,4 @@ auto_scale_lr = dict(base_batch_size=batch_size) -train_cfg = dict(by_epoch=True, max_epochs=epochs) +train_cfg = dict(by_epoch=True, max_epochs=max_epochs) diff --git a/configs/classification/mobnetv2_1.0_1bx16_300e_cifar10.py b/configs/classification/mobnetv2_1.0_1bx16_300e_cifar10.py index d936acce..7a5a9b7c 100644 --- a/configs/classification/mobnetv2_1.0_1bx16_300e_cifar10.py +++ b/configs/classification/mobnetv2_1.0_1bx16_300e_cifar10.py @@ -16,7 +16,7 @@ # optimizer lr = 0.01 -epochs = 300 +max_epochs = 300 model = dict( type='edgelab.ImageClassifier', @@ -99,4 +99,4 @@ auto_scale_lr = dict(base_batch_size=batch_size) -train_cfg = dict(by_epoch=True, max_epochs=epochs) +train_cfg = dict(by_epoch=True, max_epochs=max_epochs) diff --git a/configs/classification/mobnetv2_1.0_1bx16_300e_cifar100.py b/configs/classification/mobnetv2_1.0_1bx16_300e_cifar100.py index 1f27d0d1..95fdbb55 100644 --- a/configs/classification/mobnetv2_1.0_1bx16_300e_cifar100.py +++ b/configs/classification/mobnetv2_1.0_1bx16_300e_cifar100.py @@ -16,7 +16,7 @@ # optimizer lr = 0.01 -epochs = 300 +max_epochs = 300 model = dict( type='edgelab.ImageClassifier', @@ -99,4 +99,4 @@ auto_scale_lr = dict(base_batch_size=batch_size) -train_cfg = dict(by_epoch=True, max_epochs=epochs) +train_cfg = dict(by_epoch=True, max_epochs=max_epochs) diff --git a/configs/classification/mobnetv2_1.0_1bx16_300e_custom.py b/configs/classification/mobnetv2_1.0_1bx16_300e_custom.py index 21af69ec..ab2476ac 100644 --- a/configs/classification/mobnetv2_1.0_1bx16_300e_custom.py +++ b/configs/classification/mobnetv2_1.0_1bx16_300e_custom.py @@ -16,7 +16,7 @@ # optimizer lr = 0.01 -epochs = 300 +max_epochs = 300 data_preprocessor = dict( type='mmcls.ClsDataPreprocessor', @@ -107,4 +107,4 @@ auto_scale_lr = dict(base_batch_size=batch_size) -train_cfg = dict(by_epoch=True, max_epochs=epochs) +train_cfg = dict(by_epoch=True, max_epochs=max_epochs) diff --git a/configs/classification/mobnetv3_small_1bx16_300e_cifar10.py b/configs/classification/mobnetv3_small_1bx16_300e_cifar10.py index 89dcec51..795e337c 100644 --- a/configs/classification/mobnetv3_small_1bx16_300e_cifar10.py +++ b/configs/classification/mobnetv3_small_1bx16_300e_cifar10.py @@ -16,7 +16,7 @@ # optimizer lr = 0.01 -epochs = 300 +max_epochs = 300 model = dict( type='edgelab.ImageClassifier', @@ -93,4 +93,4 @@ auto_scale_lr = dict(base_batch_size=batch_size) -train_cfg = dict(by_epoch=True, max_epochs=epochs) +train_cfg = dict(by_epoch=True, max_epochs=max_epochs) diff --git a/configs/classification/mobnetv3_small_1bx16_300e_cifar100.py b/configs/classification/mobnetv3_small_1bx16_300e_cifar100.py index 8a4c3106..93b96301 100644 --- a/configs/classification/mobnetv3_small_1bx16_300e_cifar100.py +++ b/configs/classification/mobnetv3_small_1bx16_300e_cifar100.py @@ -16,7 +16,7 @@ # optimizer lr = 0.01 -epochs = 300 +max_epochs = 300 model = dict( type='edgelab.ImageClassifier', @@ -93,4 +93,4 @@ auto_scale_lr = dict(base_batch_size=batch_size) -train_cfg = dict(by_epoch=True, max_epochs=epochs) +train_cfg = dict(by_epoch=True, max_epochs=max_epochs) diff --git a/configs/classification/mobnetv3_small_1bx16_300e_custom.py b/configs/classification/mobnetv3_small_1bx16_300e_custom.py index 1ec49a10..8da4007b 100644 --- a/configs/classification/mobnetv3_small_1bx16_300e_custom.py +++ b/configs/classification/mobnetv3_small_1bx16_300e_custom.py @@ -16,7 +16,7 @@ # optimizer lr = 0.01 -epochs = 300 +max_epochs = 300 data_preprocessor = dict( type='mmcls.ClsDataPreprocessor', @@ -110,4 +110,4 @@ auto_scale_lr = dict(base_batch_size=batch_size) -train_cfg = dict(by_epoch=True, max_epochs=epochs) +train_cfg = dict(by_epoch=True, max_epochs=max_epochs) diff --git a/configs/fastestdet/fastestdet_shuffv2_spp_voc.py b/configs/fastestdet/fastestdet_shuffv2_spp_voc.py index 81052bef..a276912c 100644 --- a/configs/fastestdet/fastestdet_shuffv2_spp_voc.py +++ b/configs/fastestdet/fastestdet_shuffv2_spp_voc.py @@ -98,7 +98,7 @@ # optimizer lr = 0.001 -epochs = 300 +max_epochs = 300 optimizer = dict(type='SGD', lr=lr, momentum=0.949, weight_decay=0.0005) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/fomo/fomo_mobnetv2_0.35_x8_abl_coco.py b/configs/fomo/fomo_mobnetv2_0.35_x8_abl_coco.py index d319cdaf..8fbe2f1f 100644 --- a/configs/fomo/fomo_mobnetv2_0.35_x8_abl_coco.py +++ b/configs/fomo/fomo_mobnetv2_0.35_x8_abl_coco.py @@ -99,7 +99,7 @@ # data_preprocessor=dict(type='mmdet.DetDataPreprocessor') # optimizer lr = 0.001 -epochs = 100 +max_epochs = 100 find_unused_parameters = True @@ -109,7 +109,7 @@ val_evaluator = dict(type="FomoMetric") test_evaluator = val_evaluator -train_cfg = dict(by_epoch=True, max_epochs=epochs) +train_cfg = dict(by_epoch=True, max_epochs=max_epochs) # learning policy param_scheduler = [ diff --git a/configs/fomo/fomo_mobnetv2_0.35_x8_abl_coco_x.py b/configs/fomo/fomo_mobnetv2_0.35_x8_abl_coco_x.py index 7c8b2cbc..b332581f 100644 --- a/configs/fomo/fomo_mobnetv2_0.35_x8_abl_coco_x.py +++ b/configs/fomo/fomo_mobnetv2_0.35_x8_abl_coco_x.py @@ -99,7 +99,7 @@ # data_preprocessor=dict(type='mmdet.DetDataPreprocessor') # optimizer lr = 0.001 -epochs = 100 +max_epochs = 100 find_unused_parameters = True @@ -109,7 +109,7 @@ val_evaluator = dict(type="FomoMetric") test_evaluator = val_evaluator -train_cfg = dict(by_epoch=True, max_epochs=epochs) +train_cfg = dict(by_epoch=True, max_epochs=max_epochs) # learning policy param_scheduler = [ diff --git a/configs/fomo/fomo_mobnetv2_x8_coco.py b/configs/fomo/fomo_mobnetv2_x8_coco.py index 300e8d18..a416cc5a 100644 --- a/configs/fomo/fomo_mobnetv2_x8_coco.py +++ b/configs/fomo/fomo_mobnetv2_x8_coco.py @@ -90,7 +90,7 @@ # optimizer lr = 0.001 -epochs = 300 +max_epochs = 300 find_unused_parameters = True diff --git a/configs/fomo/fomo_mobnetv2_x8_voc.py b/configs/fomo/fomo_mobnetv2_x8_voc.py index 3be8525f..bf152bfb 100644 --- a/configs/fomo/fomo_mobnetv2_x8_voc.py +++ b/configs/fomo/fomo_mobnetv2_x8_voc.py @@ -85,7 +85,7 @@ # optimizer lr = 0.001 -epochs = 300 +max_epochs = 300 find_unused_parameters = True diff --git a/configs/pfld/pfld_dan_fpn_x8_192.py b/configs/pfld/pfld_dan_fpn_x8_192.py index 8af9b6c6..80c8e237 100644 --- a/configs/pfld/pfld_dan_fpn_x8_192.py +++ b/configs/pfld/pfld_dan_fpn_x8_192.py @@ -92,7 +92,7 @@ test_dataloader = val_dataloader lr = 0.0001 -epochs = 1000 +max_epochs = 1000 evaluation = dict(save_best='loss') optim_wrapper = dict(optimizer=dict(type='Adam', lr=lr, betas=(0.9, 0.99), weight_decay=1e-6)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) @@ -101,7 +101,7 @@ find_unused_parameters = True -train_cfg = dict(by_epoch=True, max_epochs=epochs) +train_cfg = dict(by_epoch=True, max_epochs=max_epochs) # learning policy param_scheduler = [ dict(type='LinearLR', begin=0, end=500, start_factor=0.001, by_epoch=False), # warm-up diff --git a/configs/pfld/pfld_mbv2n_112.py b/configs/pfld/pfld_mbv2n_112.py index 0995c0f3..971b39ef 100644 --- a/configs/pfld/pfld_mbv2n_112.py +++ b/configs/pfld/pfld_mbv2n_112.py @@ -71,7 +71,7 @@ test_dataloader = val_dataloader lr = 0.0001 -epochs = 1000 +max_epochs = 1000 evaluation = dict(save_best='loss') optim_wrapper = dict(optimizer=dict(type='Adam', lr=lr, betas=(0.9, 0.99), weight_decay=1e-6)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) @@ -80,7 +80,7 @@ find_unused_parameters = True -train_cfg = dict(by_epoch=True, max_epochs=epochs) +train_cfg = dict(by_epoch=True, max_epochs=max_epochs) # learning policy param_scheduler = [ dict(type='LinearLR', begin=0, end=500, start_factor=0.001, by_epoch=False), # warm-up diff --git a/configs/yolov3/yolov3_mbv2_416_voc.py b/configs/yolov3/yolov3_mbv2_416_voc.py index 05e188ff..0b3eac4a 100644 --- a/configs/yolov3/yolov3_mbv2_416_voc.py +++ b/configs/yolov3/yolov3_mbv2_416_voc.py @@ -124,7 +124,7 @@ train_cfg = dict(max_epochs=200, val_interval=1) # optimizer lr = 0.001 -epochs = 300 +max_epochs = 300 find_unused_parameters = True diff --git a/configs/yolov5/base_arch.py b/configs/yolov5/base_arch.py index ffa53167..93bec5e0 100644 --- a/configs/yolov5/base_arch.py +++ b/configs/yolov5/base_arch.py @@ -29,7 +29,7 @@ # -----train val related----- # Base learning rate for optim_wrapper. Corresponding to 8xb16=128 bs base_lr = 0.01 -max_epochs = 300 # Maximum training epochs +max_epochs = 300 # Maximum training max_epochs model_test_cfg = dict( # The config of multi-label for multi-class prediction. diff --git a/configs/yolov5/yolov5_l_1xb16_300e_coco.py b/configs/yolov5/yolov5_l_1xb16_300e_coco.py index 02ab4ef2..ac1da1b8 100644 --- a/configs/yolov5/yolov5_l_1xb16_300e_coco.py +++ b/configs/yolov5/yolov5_l_1xb16_300e_coco.py @@ -23,7 +23,7 @@ # training lr = 0.01 -epochs = 300 # Maximum training epochs +max_epochs = 300 # Maximum training max_epochs # ======================modify end====================== diff --git a/configs/yolov5/yolov5_m_1xb16_300e_coco.py b/configs/yolov5/yolov5_m_1xb16_300e_coco.py index 053f76fe..85ebfbe8 100644 --- a/configs/yolov5/yolov5_m_1xb16_300e_coco.py +++ b/configs/yolov5/yolov5_m_1xb16_300e_coco.py @@ -23,7 +23,7 @@ # training lr = 0.01 -epochs = 300 # Maximum training epochs +max_epochs = 300 # Maximum training max_epochs # ======================modify end====================== diff --git a/configs/yolov5/yolov5_n_1xb16_300e_coco.py b/configs/yolov5/yolov5_n_1xb16_300e_coco.py index 02f48097..64666080 100644 --- a/configs/yolov5/yolov5_n_1xb16_300e_coco.py +++ b/configs/yolov5/yolov5_n_1xb16_300e_coco.py @@ -23,7 +23,7 @@ # training lr = 0.01 -epochs = 300 # Maximum training epochs +max_epochs = 300 # Maximum training max_epochs # ======================modify end====================== diff --git a/configs/yolov5/yolov5_s_1xb16_300e_coco.py b/configs/yolov5/yolov5_s_1xb16_300e_coco.py index 4cf5652f..167db708 100644 --- a/configs/yolov5/yolov5_s_1xb16_300e_coco.py +++ b/configs/yolov5/yolov5_s_1xb16_300e_coco.py @@ -23,7 +23,7 @@ # training lr = 0.01 -epochs = 300 # Maximum training epochs +max_epochs = 300 # Maximum training max_epochs # ======================modify end====================== diff --git a/configs/yolov5/yolov5_tiny_1xb16_300e_coco.py b/configs/yolov5/yolov5_tiny_1xb16_300e_coco.py index ae04e464..387e61c1 100644 --- a/configs/yolov5/yolov5_tiny_1xb16_300e_coco.py +++ b/configs/yolov5/yolov5_tiny_1xb16_300e_coco.py @@ -23,7 +23,7 @@ # training lr = 0.01 -epochs = 300 # Maximum training epochs +max_epochs = 300 # Maximum training max_epochs # ======================modify end====================== diff --git a/configs/yolox/base_arch.py b/configs/yolox/base_arch.py index 7a39436d..e2f5d21c 100644 --- a/configs/yolox/base_arch.py +++ b/configs/yolox/base_arch.py @@ -29,7 +29,7 @@ # -----train val related----- # Base learning rate for optim_wrapper. Corresponding to 8xb16=128 bs base_lr = 0.01 -max_epochs = 300 # Maximum training epochs +max_epochs = 300 # Maximum training max_epochs model_test_cfg = dict( # The config of multi-label for multi-class prediction. diff --git a/configs/yolox/yolox_tiny_1xb16_300e_coco.py b/configs/yolox/yolox_tiny_1xb16_300e_coco.py index f3336ddd..d1a64b3d 100644 --- a/configs/yolox/yolox_tiny_1xb16_300e_coco.py +++ b/configs/yolox/yolox_tiny_1xb16_300e_coco.py @@ -17,7 +17,7 @@ # training lr = 0.01 -epochs = 300 # Maximum training epochs +max_epochs = 300 # Maximum training max_epochs # ======================modify end====================== diff --git a/docs/edgeimpulse/ei_ml_blocks.md b/docs/edgeimpulse/ei_ml_blocks.md index 19011d6f..787debcd 100644 --- a/docs/edgeimpulse/ei_ml_blocks.md +++ b/docs/edgeimpulse/ei_ml_blocks.md @@ -61,7 +61,7 @@ You run this pipeline via Docker. This encapsulates all dependencies and package 11. Run the container to test the script (you don't need to rebuild the container if you make changes). ```sh - docker run --shm-size=1024m --rm -v $PWD:/scripts edgelab-fomo --data-directory data/ --epochs 30 --learning-rate 0.00001 --out-directory out/ + docker run --shm-size=1024m --rm -v $PWD:/scripts edgelab-fomo --data-directory data/ --max_epochs 30 --learning-rate 0.00001 --out-directory out/ ``` 12. This creates a `.tflite` file in the `out` directory. diff --git a/docs/tutorials/config.md b/docs/tutorials/config.md index 9cbfe76a..aa5af2c1 100644 --- a/docs/tutorials/config.md +++ b/docs/tutorials/config.md @@ -109,7 +109,7 @@ height=96 # Input image height width=96 # Input image width batch_size=16 # Batch size of a single GPU during validation workers=4 # Worker to pre-fetch data for each single GPU during validation -epoches=300 # Maximum training epochs: 300 epochs +epoches=300 # Maximum training max_epochs: 300 max_epochs lr=0.001 # Learn rate ``` @@ -220,14 +220,14 @@ log_config=dict( # Config to register logger hook dict(type='TextLoggerHook', ndigits=4), # TXT logger dict(type='TensorboardLoggerHook', ndigits=4) # Tensorboard logger ]) # The logger used to record the training process -epochs=300 +max_epochs=300 runner=dict(type='EpochBasedRunner', # Type of runner to use (i.e. IterBasedRunner or EpochBasedRunner) - max_epochs=epochs) # Runner that runs the workflow in total max_epochs. For IterBasedRunner use `max_iters` + max_epochs=max_epochs) # Runner that runs the workflow in total max_epochs. For IterBasedRunner use `max_iters` dist_params=dict(backend='nccl') # Parameters to setup distributed training, the port can also be set log_level = 'INFO' # The level of logging load_from = None # Load models as a pre-trained model from a given path, this will not resume training resume_from = None # Resume checkpoints from a given path, the training will be resumed from the epoch when the checkpoint's is saved -workflow = [('train', 1)] # Workflow for runner. [('train', 1)] means there is only one workflow and the workflow named 'train' is executed once. The workflow trains the model by 300 epochs according to the total_epochs +workflow = [('train', 1)] # Workflow for runner. [('train', 1)] means there is only one workflow and the workflow named 'train' is executed once. The workflow trains the model by 300 max_epochs according to the total_max_epochs opencv_num_threads = 1 # Disable OpenCV multi-threads to save memory work_dir = './work_dirs' # Directory to save the model checkpoints and logs for the current experiments ``` diff --git a/docs/tutorials/export/overview.md b/docs/tutorials/export/overview.md index bc24dc78..4dd839fc 100644 --- a/docs/tutorials/export/overview.md +++ b/docs/tutorials/export/overview.md @@ -49,7 +49,7 @@ python3 tools/export.py --help # --algorithm {l2,kl} TFLite: conversion algorithm # --backend {qnnpack,fbgemm} # TFLite: conveter backend -# --calibration_epochs CALIBRATION_EPOCHS, --calibration-epochs CALIBRATION_EPOCHS +# --calibration_epochs CALIBRATION_EPOCHS, --calibration-max_epochs CALIBRATION_EPOCHS # TFLite: max epoches for quantization calibration # --mean MEAN [MEAN ...] # TFLite: mean for model input (quantization), range: [0, 1], applied to all channels, using the average if multiple values are provided diff --git a/docs/tutorials/training/fomo.md b/docs/tutorials/training/fomo.md index 0f54dfbf..9dc47ae7 100644 --- a/docs/tutorials/training/fomo.md +++ b/docs/tutorials/training/fomo.md @@ -20,7 +20,7 @@ We will choose a appropriate configuration file depending on the type of trainin For the FOMO model example, we use `fomo_mobnetv2_0.35_x8_abl_coco.py` as the configuration file, which is located in the folder under the EdgeLab root directory `configs/fomo` and its additionally inherits the `default_runtime_det.py` configuration file. -For beginners, we recommend to pay attention to the `data_root` and `epochs` parameters in this configuration file at first. +For beginners, we recommend to pay attention to the `data_root` and `max_epochs` parameters in this configuration file at first. ::: details `fomo_mobnetv2_0.35_x8_abl_coco.py` @@ -96,7 +96,7 @@ test_dataloader=val_dataloader # optimizer lr=0.001 -epochs=300 +max_epochs=300 find_unused_parameters=True optim_wrapper=dict(optimizer=dict(type='Adam', lr=lr, weight_decay=5e-4,eps=1e-7)) @@ -131,7 +131,7 @@ python3 tools/train.py \ configs/fomo/fomo_mobnetv2_0.35_x8_abl_coco.py \ --cfg-options \ data_root='datasets/mask' \ - epochs=50 + max_epochs=50 ``` During training, the model weights and related log information are saved to the path `work_dirs/fomo_mobnetv2_0.35_x8_abl_coco` by default, and you can use tools such as [TensorBoard](https://www.tensorflow.org/tensorboard/get_started) to monitor for training. diff --git a/docs/tutorials/training/pfld.md b/docs/tutorials/training/pfld.md index b1291569..73da28bc 100644 --- a/docs/tutorials/training/pfld.md +++ b/docs/tutorials/training/pfld.md @@ -18,7 +18,7 @@ We will choose a appropriate configuration file depending on the type of trainin For the meter PFLD model example, we use `pfld_mv2n_112.py` as the configuration file, which is located in the folder under the EdgeLab root directory `configs/pfld` and its additionally inherits the `default_runtime_pose.py` configuration file. -For beginners, we recommend to pay attention to the `data_root` and `epochs` parameters in this configuration file at first. +For beginners, we recommend to pay attention to the `data_root` and `max_epochs` parameters in this configuration file at first. ::: details `pfld_mv2n_112.py` @@ -89,7 +89,7 @@ val_dataloader=dict( test_dataloader=val_dataloader lr=0.0001 -epochs=300 +max_epochs=300 evaluation=dict(save_best='loss') optim_wrapper=dict( optimizer=dict(type='Adam', lr=lr, betas=(0.9, 0.99), weight_decay=1e-6)) @@ -126,7 +126,7 @@ python3 tools/train.py \ configs/pfld/pfld_mv2n_112.py \ --cfg-options \ data_root='datasets/meter' \ - epochs=50 + max_epochs=50 ``` During training, the model weights and related log information are saved to the path `work_dirs/pfld_mv2n_112` by default, and you can use tools such as [TensorBoard](https://www.tensorflow.org/tensorboard/get_started) to monitor for training. diff --git a/docs/tutorials/training/yolov5.md b/docs/tutorials/training/yolov5.md index 5720f4d2..8a517631 100644 --- a/docs/tutorials/training/yolov5.md +++ b/docs/tutorials/training/yolov5.md @@ -18,7 +18,7 @@ We will choose a appropriate configuration file depending on the type of trainin For the yolov5 model example, we use `yolov5_tiny_1xb16_300e_coco.py` as the configuration file, which is located in the folder under the EdgeLab root directory `configs/yolov5` and its additionally inherits the `base_arch.py` configuration file. -For beginners, we recommend to pay attention to the `data_root` and `epochs` parameters in this configuration file at first. +For beginners, we recommend to pay attention to the `data_root` and `max_epochs` parameters in this configuration file at first. ::: details `yolov5_tiny_1xb16_300e_coco.py` @@ -73,7 +73,7 @@ python3 tools/train.py \ configs/yolov5/yolov5_tiny_1xb16_300e_coco.py \ --cfg-options \ data_root='datasets/digital_meter' \ - epochs=50 + max_epochs=50 ``` During training, the model weights and related log information are saved to the path `work_dirs/yolov5_tiny_1xb16_300e_coco` by default, and you can use tools such as [TensorBoard](https://www.tensorflow.org/tensorboard/get_started) to monitor for training. diff --git a/docs/zh_cn/edgeimpulse/ei_ml_blocks.md b/docs/zh_cn/edgeimpulse/ei_ml_blocks.md index eaaa08f6..90d00c5e 100644 --- a/docs/zh_cn/edgeimpulse/ei_ml_blocks.md +++ b/docs/zh_cn/edgeimpulse/ei_ml_blocks.md @@ -62,7 +62,7 @@ EdgeLab 中的模型支持在 Edge Impulse 上运行,具体信息见 GitHub 11. 运行容器来测试脚本 (如果你做了修改,你不需要重建容器)。 ```sh - docker run --shm-size=1024m --rm -v $PWD:/scripts edgelab-fomo --data-directory data/ --epochs 30 --learning-rate 0.00001 --out-directory out/. + docker run --shm-size=1024m --rm -v $PWD:/scripts edgelab-fomo --data-directory data/ --max_epochs 30 --learning-rate 0.00001 --out-directory out/. ``` 12. 这将在 `out` 目录下创建一个 `.tflite` 文件。 diff --git a/docs/zh_cn/tutorials/config.md b/docs/zh_cn/tutorials/config.md index cb9725b8..df96b91b 100644 --- a/docs/zh_cn/tutorials/config.md +++ b/docs/zh_cn/tutorials/config.md @@ -220,9 +220,9 @@ log_config=dict( # 配置注册记录器 Hook dict(type='TextLoggerHook', ndigits=4), # TXT 文本日志 dict(type='TensorboardLoggerHook', ndigits=4) # Tensorboard 日志 ]) # 记录训练过程的日志 -epochs=300 +max_epochs=300 runner=dict(type='EpochBasedRunner', # 使用的 runner 类型 (例如 IterBasedRunner 或者 EpochBasedRunner) - max_epochs=epochs) # runner 运行 max_epochs 次工作流,对于 IterBasedRunner 使用 max_iters + max_epochs=max_epochs) # runner 运行 max_epochs 次工作流,对于 IterBasedRunner 使用 max_iters dist_params=dict(backend='nccl') # 设置分布式训练的参数,也可以设置端口 log_level='INFO' # 日志等级 load_from=None # 从给定路径加载模型作为预训练模型,不会恢复训练 diff --git a/docs/zh_cn/tutorials/export/overview.md b/docs/zh_cn/tutorials/export/overview.md index 6f9c5b70..62f952bf 100644 --- a/docs/zh_cn/tutorials/export/overview.md +++ b/docs/zh_cn/tutorials/export/overview.md @@ -49,7 +49,7 @@ python3 tools/export.py --help # --algorithm {l2,kl} TFLite: conversion algorithm # --backend {qnnpack,fbgemm} # TFLite: conveter backend -# --calibration_epochs CALIBRATION_EPOCHS, --calibration-epochs CALIBRATION_EPOCHS +# --calibration_epochs CALIBRATION_EPOCHS, --calibration-max_epochs CALIBRATION_EPOCHS # TFLite: max epoches for quantization calibration # --mean MEAN [MEAN ...] # TFLite: mean for model input (quantization), range: [0, 1], applied to all channels, using the average if multiple values are provided diff --git a/docs/zh_cn/tutorials/training/fomo.md b/docs/zh_cn/tutorials/training/fomo.md index 292fee15..4a1cfd18 100644 --- a/docs/zh_cn/tutorials/training/fomo.md +++ b/docs/zh_cn/tutorials/training/fomo.md @@ -20,7 +20,7 @@ 对于 FOMO 模型示例,我们使用 `fomo_mobnetv2_0.35_x8_abl_coco.py` 作为配置文件,它位于 EdgeLab 主目录路径 `configs/fomo` 下的文件夹中,并额外继承了 `default_runtime_det.py` 配置文件。 -配置文件内容如下,对于初学者,我们建议首先注意该配置文件中 `data_root` 和 `epochs` 这两个参数。 +配置文件内容如下,对于初学者,我们建议首先注意该配置文件中 `data_root` 和 `max_epochs` 这两个参数。 ::: details `fomo_mobnetv2_0.35_x8_abl_coco.py` @@ -96,7 +96,7 @@ test_dataloader=val_dataloader # optimizer lr=0.001 -epochs=300 +max_epochs=300 find_unused_parameters=True optim_wrapper=dict(optimizer=dict(type='Adam', lr=lr, weight_decay=5e-4,eps=1e-7)) @@ -131,7 +131,7 @@ python3 tools/train.py \ configs/fomo/fomo_mobnetv2_0.35_x8_abl_coco.py \ --cfg-options \ data_root='datasets/mask' \ - epochs=50 + max_epochs=50 ``` 在训练期间,训练得到的模型权重和相关的日志信息会默认保存至路径 `work_dirs/fomo_mobnetv2_0.35_x8_abl_coco` 下,您可以使用 [TensorBoard](https://www.tensorflow.org/tensorboard/get_started) 等工具事实监测训练情况。 diff --git a/docs/zh_cn/tutorials/training/pfld.md b/docs/zh_cn/tutorials/training/pfld.md index 8bb68d03..7149d443 100644 --- a/docs/zh_cn/tutorials/training/pfld.md +++ b/docs/zh_cn/tutorials/training/pfld.md @@ -18,7 +18,7 @@ 对于表计 PFLD 模型示例,我们使用 `pfld_mv2n_112.py` 作为配置文件,它位于 EdgeLab 主目录路径 `configs/pfld` 下的文件夹中,并额外继承了 `default_runtime_pose.py` 配置文件。 -配置文件内容如下,对于初学者,我们建议首先注意该配置文件中 `data_root` 和 `epochs` 这两个参数。 +配置文件内容如下,对于初学者,我们建议首先注意该配置文件中 `data_root` 和 `max_epochs` 这两个参数。 ::: details `pfld_mv2n_112.py` @@ -89,7 +89,7 @@ val_dataloader=dict( test_dataloader=val_dataloader lr=0.0001 -epochs=300 +max_epochs=300 evaluation=dict(save_best='loss') optim_wrapper=dict( optimizer=dict(type='Adam', lr=lr, betas=(0.9, 0.99), weight_decay=1e-6)) @@ -126,7 +126,7 @@ python3 tools/train.py \ configs/pfld/pfld_mv2n_112.py \ --cfg-options \ data_root='datasets/meter' \ - epochs=50 + max_epochs=50 ``` 在训练期间,训练得到的模型权重和相关的日志信息会默认保存至路径 `work_dirs/pfld_mv2n_112` 下,您可以使用 [TensorBoard](https://www.tensorflow.org/tensorboard/get_started) 等工具事实监测训练情况。 diff --git a/docs/zh_cn/tutorials/training/yolov5.md b/docs/zh_cn/tutorials/training/yolov5.md index e108f948..cf006c5f 100644 --- a/docs/zh_cn/tutorials/training/yolov5.md +++ b/docs/zh_cn/tutorials/training/yolov5.md @@ -19,7 +19,7 @@ 对于 YOLOv5 模型的例子,我们使用 `yolov5_tiny_1xb16_300e_coco.py` 作为配置文件,它位于EdgeLab根目录 `configs/yolov5` 下的文件夹中,其另外继承了 `base_arch.py` 配置文件。 -对于初学者,我们建议首先注意这个配置文件中的 `data_root` 和 `epochs` 参数。 +对于初学者,我们建议首先注意这个配置文件中的 `data_root` 和 `max_epochs` 参数。 ::: details `yolov5_tiny_1xb16_300e_coco.py` @@ -74,7 +74,7 @@ python3 tools/train.py \ configs/yolov5/yolov5_tiny_1xb16_300e_coco.py \ --cfg-options \ data_root='datasets/digital_meter' \ - epochs=50 + max_epochs=50 ``` 在训练过程中,模型权重和相关日志信息默认保存在 `work_dirs/yolov5_tiny_1xb16_300e_coco` 路径下,你可以使用 [TensorBoard](https://www.tensorflow.org/tensorboard/get_started) 等工具来监测训练情况。 diff --git a/edgelab/datasets/yolodataset.py b/edgelab/datasets/yolodataset.py index 28c37077..1dbacb94 100644 --- a/edgelab/datasets/yolodataset.py +++ b/edgelab/datasets/yolodataset.py @@ -109,6 +109,7 @@ def __init__( if metainfo is None and not self.METAINFO['classes']: if not osp.isabs(ann_file) and ann_file: self.ann_file = osp.join(data_root, ann_file) + print("=" * 30, self.ann_file) with open(self.ann_file, 'r') as f: data = json.load(f) if filter_supercat: diff --git a/notebooks/Google-Colab-PFLD-Grove-Example.ipynb b/notebooks/Google-Colab-PFLD-Grove-Example.ipynb index 0338c0a3..163c01ae 100644 --- a/notebooks/Google-Colab-PFLD-Grove-Example.ipynb +++ b/notebooks/Google-Colab-PFLD-Grove-Example.ipynb @@ -259,7 +259,7 @@ "\n", "- `data_root` - the datasets path, which located at path `datasets/meter`\n", "\n", - "- `epochs`- the train epochs, we use `50` to reduce the training time\n", + "- `max_epochs`- the train max_epochs, we use `50` to reduce the training time\n", "\n", "- `load_from` the pre-train weights, make training faster\n", "\n", @@ -279,13 +279,12 @@ "outputs": [], "source": [ "!${PYTHON_EXEC} tools/train.py \\\n", - " pose \\\n", " configs/pfld/pfld_mbv2n_112.py \\\n", " --cfg-options \\\n", - " epochs=50 \\\n", + " max_epochs=50 \\\n", " num_classes=1 \\\n", " data_root='datasets/meter' \\\n", - " load_from=pre-train/pfld_mv2n_112.pth " + " load_from='pre-train/pfld_mv2n_112.pth'" ] }, { @@ -362,8 +361,7 @@ }, "outputs": [], "source": [ - "!${PYTHON_EXEC} tools/test.py \\\n", - " pose \\\n", + "!${PYTHON_EXEC} tools/inference.py \\\n", " configs/pfld/pfld_mbv2n_112.py \\\n", " \"$(cat work_dirs/pfld_mbv2n_112/last_checkpoint)\" \\\n", " --dump work_dirs/pfld_mbv2n_112/last_checkpoint.pkl \\\n", @@ -425,10 +423,10 @@ }, "outputs": [], "source": [ - "!${PYTHON_EXEC} tools/torch2tflite.py \\\n", + "!${PYTHON_EXEC} tools/export.py \\\n", " configs/pfld/pfld_mbv2n_112.py \\\n", - " --checkpoint $(cat work_dirs/pfld_mbv2n_112/last_checkpoint) \\\n", - " --type int8 \\\n", + " $(cat work_dirs/pfld_mbv2n_112/last_checkpoint) \\\n", + " tflite \\\n", " --cfg-options \\\n", " data_root='datasets/meter'" ] @@ -650,7 +648,7 @@ "!cd example/grove && \\\n", " ${PYTHON_EXEC} tools/ufconv/uf2conv.py \\\n", " -t 1 \\\n", - " -c \"$(cat ../../work_dirs/pfld_mbv2n_112/last_checkpoint)_int8.tflite\" \\\n", + " -c \"$(cat ../../work_dirs/pfld_mbv2n_112/last_checkpoint | sed -e 's/.pth/_int8.tflite/g')\" \\\n", " -o model.uf2" ] }, diff --git a/notebooks/Google-Colab-YOLOv5-A1101-Example.ipynb b/notebooks/Google-Colab-YOLOv5-A1101-Example.ipynb index 3fd9a9b7..4c4d8f2e 100644 --- a/notebooks/Google-Colab-YOLOv5-A1101-Example.ipynb +++ b/notebooks/Google-Colab-YOLOv5-A1101-Example.ipynb @@ -204,7 +204,7 @@ "\n", "- `data_root` - the datasets path, which located at path `datasets/digital_meter`\n", "\n", - "- `max_epochs`- the train epochs, we use `50` to reduce the training time\n", + "- `max_epochs`- the train max_epochs, we use `50` to reduce the training time\n", "\n", "- `num_classes` - the calsses number of datasets, we use `11` here ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'N']\n", "\n", @@ -220,7 +220,6 @@ "outputs": [], "source": [ "!${PYTHON_EXEC} tools/train.py \\\n", - " det \\\n", " configs/yolov5/yolov5_tiny_1xb16_300e_coco.py \\\n", " --cfg-options \\\n", " max_epochs=50 \\\n", @@ -290,8 +289,7 @@ }, "outputs": [], "source": [ - "!${PYTHON_EXEC} tools/test.py \\\n", - " det \\\n", + "!${PYTHON_EXEC} tools/inference.py \\\n", " configs/yolov5/yolov5_tiny_1xb16_300e_coco.py \\\n", " \"$(cat work_dirs/yolov5_tiny_1xb16_300e_coco/last_checkpoint)\" \\\n", " --dump work_dirs/yolov5_tiny_1xb16_300e_coco/last_checkpoint.pkl \\\n", @@ -345,10 +343,10 @@ }, "outputs": [], "source": [ - "!${PYTHON_EXEC} tools/torch2tflite.py \\\n", + "!${PYTHON_EXEC} tools/export.py \\\n", " configs/yolov5/yolov5_tiny_1xb16_300e_coco.py \\\n", - " --checkpoint $(cat work_dirs/yolov5_tiny_1xb16_300e_coco/last_checkpoint) \\\n", - " --type int8 \\\n", + " $(cat work_dirs/yolov5_tiny_1xb16_300e_coco/last_checkpoint) \\\n", + " tflite \\\n", " --cfg-options \\\n", " data_root='datasets/digital_meter/' \\\n", " num_classes=11" @@ -540,7 +538,7 @@ "!cd example/grove && \\\n", " ${PYTHON_EXEC} tools/ufconv/uf2conv.py \\\n", " -t 18 \\\n", - " -c \"$(cat ../../work_dirs/yolov5_tiny_1xb16_300e_coco/last_checkpoint)_int8.tflite\" \\\n", + " -c \"$(cat ../../work_dirs/yolov5_tiny_1xb16_300e_coco/last_checkpoint | sed -e 's/.pth/_int8.tflite/g')\" \\\n", " -o model.uf2" ] }, diff --git a/scripts/test_functional.sh b/scripts/test_functional.sh index 3eb195e2..d89f5307 100644 --- a/scripts/test_functional.sh +++ b/scripts/test_functional.sh @@ -47,7 +47,7 @@ functional_test_core() CONFIG_FILE_NAME="$(basename -- ${CONFIG_FILE})" DATASETS_PATH="datasets/$(basename -- ${DATASETS_URL})" - DATASETS_DIR="${DATASETS_PATH%.*}" + DATASETS_DIR="${DATASETS_PATH%.*}/" LAST_CHECKPOINT="work_dirs/${CONFIG_FILE_NAME%.*}/last_checkpoint" echo -e "CONFIG_FILE=${CONFIG_FILE}" @@ -76,7 +76,7 @@ functional_test_core() "${CONFIG_FILE}" \ "$(cat ${LAST_CHECKPOINT})" \ tflite onnx \ - --calibration_epochs 1 \ + --calibration-epochs 1 \ --cfg-options \ data_root="${DATASETS_DIR}" return $?