diff --git a/scripts/classification/imagenet/train_imagenet.py b/scripts/classification/imagenet/train_imagenet.py index d9effe9ea4..40f7adb9e5 100644 --- a/scripts/classification/imagenet/train_imagenet.py +++ b/scripts/classification/imagenet/train_imagenet.py @@ -108,6 +108,8 @@ def parse_args(): help='whether to use group norm.') parser.add_argument('--amp', action='store_true', help='Use MXNet AMP for mixed precision training.') + parser.add_argument('--auto-layout', action='store_true', + help='Add layout optimization to AMP. Must be used in addition of `--amp`.') opt = parser.parse_args() return opt @@ -125,8 +127,10 @@ def main(): logger.info(opt) + assert not opt.auto_layout or opt.amp, "--auto-layout needs to be used with --amp" + if opt.amp: - amp.init() + amp.init(layout_optimization=opt.auto_layout) batch_size = opt.batch_size classes = 1000 diff --git a/scripts/detection/faster_rcnn/train_faster_rcnn.py b/scripts/detection/faster_rcnn/train_faster_rcnn.py index c0146b176e..cee450173f 100644 --- a/scripts/detection/faster_rcnn/train_faster_rcnn.py +++ b/scripts/detection/faster_rcnn/train_faster_rcnn.py @@ -121,6 +121,8 @@ def parse_args(): help='Whether to use static memory allocation. Memory usage will increase.') parser.add_argument('--amp', action='store_true', help='Use MXNet AMP for mixed precision training.') + parser.add_argument('--auto-layout', action='store_true', + help='Add layout optimization to AMP. Must be used in addition of `--amp`.') parser.add_argument('--horovod', action='store_true', help='Use MXNet Horovod for distributed training. Must be run with OpenMPI. ' '--gpus is ignored when using --horovod.') @@ -622,8 +624,10 @@ def train(net, train_data, val_data, eval_metric, batch_size, ctx, args): # fix seed for mxnet, numpy and python builtin random generator. gutils.random.seed(args.seed) + assert not args.auto_layout or args.amp, "--auto-layout needs to be used with --amp" + if args.amp: - amp.init() + amp.init(layout_optimization=args.auto_layout) # training contexts if args.horovod: diff --git a/scripts/detection/ssd/train_ssd.py b/scripts/detection/ssd/train_ssd.py index 849d0ee47e..c017d86f21 100644 --- a/scripts/detection/ssd/train_ssd.py +++ b/scripts/detection/ssd/train_ssd.py @@ -89,6 +89,8 @@ def parse_args(): 'Currently supports only COCO.') parser.add_argument('--amp', action='store_true', help='Use MXNet AMP for mixed precision training.') + parser.add_argument('--auto-layout', action='store_true', + help='Add layout optimization to AMP. Must be used in addition of `--amp`.') parser.add_argument('--horovod', action='store_true', help='Use MXNet Horovod for distributed training. Must be run with OpenMPI. ' '--gpus is ignored when using --horovod.') @@ -360,8 +362,10 @@ def train(net, train_data, val_data, eval_metric, ctx, args): if __name__ == '__main__': args = parse_args() + assert not args.auto_layout or args.amp, "--auto-layout needs to be used with --amp" + if args.amp: - amp.init() + amp.init(layout_optimization=args.auto_layout) if args.horovod: hvd.init() diff --git a/scripts/detection/yolo/train_yolo3.py b/scripts/detection/yolo/train_yolo3.py index 95684eaa83..fae863bd8e 100644 --- a/scripts/detection/yolo/train_yolo3.py +++ b/scripts/detection/yolo/train_yolo3.py @@ -97,6 +97,8 @@ def parse_args(): parser.add_argument('--label-smooth', action='store_true', help='Use label smoothing.') parser.add_argument('--amp', action='store_true', help='Use MXNet AMP for mixed precision training.') + parser.add_argument('--auto-layout', action='store_true', + help='Add layout optimization to AMP. Must be used in addition of `--amp`.') parser.add_argument('--horovod', action='store_true', help='Use MXNet Horovod for distributed training. Must be run with OpenMPI. ' '--gpus is ignored when using --horovod.') @@ -325,8 +327,10 @@ def train(net, train_data, val_data, eval_metric, ctx, args): if __name__ == '__main__': args = parse_args() + assert not args.auto_layout or args.amp, "--auto-layout needs to be used with --amp" + if args.amp: - amp.init() + amp.init(layout_optimization=args.auto_layout) if args.horovod: if hvd is None: diff --git a/scripts/instance/mask_rcnn/train_mask_rcnn.py b/scripts/instance/mask_rcnn/train_mask_rcnn.py index 2ab4f96e13..f9a3e7d267 100644 --- a/scripts/instance/mask_rcnn/train_mask_rcnn.py +++ b/scripts/instance/mask_rcnn/train_mask_rcnn.py @@ -124,6 +124,8 @@ def parse_args(): help='Whether to use static memory allocation. Memory usage will increase.') parser.add_argument('--amp', action='store_true', help='Use MXNet AMP for mixed precision training.') + parser.add_argument('--auto-layout', action='store_true', + help='Add layout optimization to AMP. Must be used in addition of `--amp`.') parser.add_argument('--horovod', action='store_true', help='Use MXNet Horovod for distributed training. Must be run with OpenMPI. ' '--gpus is ignored when using --horovod.') @@ -700,8 +702,10 @@ def train(net, train_data, val_data, eval_metric, batch_size, ctx, logger, args) # fix seed for mxnet, numpy and python builtin random generator. gutils.random.seed(args.seed) + assert not args.auto_layout or args.amp, "--auto-layout needs to be used with --amp" + if args.amp: - amp.init() + amp.init(layout_optimization=args.auto_layout) # training contexts if args.horovod: diff --git a/scripts/segmentation/train.py b/scripts/segmentation/train.py index c18ea96917..d884a1d8f4 100644 --- a/scripts/segmentation/train.py +++ b/scripts/segmentation/train.py @@ -8,6 +8,7 @@ import mxnet as mx from mxnet import gluon, autograd from mxnet.gluon.data.vision import transforms +from mxnet.contrib import amp import gluoncv gluoncv.utils.check_version('0.6.0') @@ -95,6 +96,11 @@ def parse_args(): # synchronized Batch Normalization parser.add_argument('--syncbn', action='store_true', default=False, help='using Synchronized Cross-GPU BatchNorm') + # performance related + parser.add_argument('--amp', action='store_true', + help='Use MXNet AMP for mixed precision training.') + parser.add_argument('--auto-layout', action='store_true', + help='Add layout optimization to AMP. Must be used in addition of `--amp`.') # the parser args = parser.parse_args() @@ -210,7 +216,12 @@ def __init__(self, args, logger): v.wd_mult = 0.0 self.optimizer = gluon.Trainer(self.net.module.collect_params(), args.optimizer, - optimizer_params, kvstore=kv) + optimizer_params, update_on_kvstore=(False if args.amp else None)) + + + if args.amp: + amp.init_trainer(self.optimizer) + # evaluation metrics self.metric = gluoncv.utils.metrics.SegmentationMetric(trainset.num_class) @@ -222,7 +233,11 @@ def training(self, epoch): outputs = self.net(data.astype(args.dtype, copy=False)) losses = self.criterion(outputs, target) mx.nd.waitall() - autograd.backward(losses) + if args.amp: + with amp.scale_loss(losses, self.optimizer) as scaled_losses: + autograd.backward(scaled_losses) + else: + autograd.backward(losses) self.optimizer.step(self.args.batch_size) for loss in losses: train_loss += np.mean(loss.asnumpy()) / len(losses) @@ -262,7 +277,10 @@ def save_checkpoint(net, args, epoch, mIoU, is_best=False): if __name__ == "__main__": args = parse_args() + assert not args.auto_layout or args.amp, "--auto-layout needs to be used with --amp" + if args.amp: + amp.init(layout_optimization=args.auto_layout) # build logger filehandler = logging.FileHandler(os.path.join(args.save_dir, args.logging_file)) streamhandler = logging.StreamHandler()