Skip to content

Commit

Permalink
Clean the code. Add ResNeXt. Add new progress bar.
Browse files Browse the repository at this point in the history
  • Loading branch information
bearpaw committed Jun 7, 2017
1 parent c869f55 commit e76d4cd
Show file tree
Hide file tree
Showing 23 changed files with 329 additions and 1,832 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ checkpoint
data
cifar-debug.py
test.eps
dev
monitor.py

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "utils/progress"]
path = utils/progress
url = https://github.com/verigak/progress.git
8 changes: 8 additions & 0 deletions TRAINING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Training recipes

## CIFAR-10

ResNet-110
```sh
CUDA_VISIBLE_DEVICES=0,1 python cifar.py -a -dataset cifar10 -nGPU 2 -batchSize 128 -depth 110
```
1 change: 1 addition & 0 deletions checkpoints/resnext-8x64d/log.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
LR Train Loss Valid Loss Train Acc. Valid Acc.
175 changes: 124 additions & 51 deletions cifar.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,26 +36,22 @@
import torch.utils.data as data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
# import torchvision.models as models
import models

from utils import *
from utils import Bar, Logger, AverageMeter, accuracy, mkdir_p


model_names = sorted(name for name in models.__dict__
if name.islower() and not name.startswith("__")
and callable(models.__dict__[name]))

parser = argparse.ArgumentParser(description='PyTorch CIFAR10/100 Training')
parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet20',
choices=model_names,
help='model architecture: ' +
' | '.join(model_names) +
' (default: resnet18)')
# Datasets
parser.add_argument('-d', '--dataset', default='cifar10', type=str)
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
help='number of data loading workers (default: 4)')
parser.add_argument('--epochs', default=164, type=int, metavar='N',
# Optimization options
parser.add_argument('--epochs', default=300, type=int, metavar='N',
help='number of total epochs to run')
parser.add_argument('--start_epoch', default=0, type=int, metavar='N',
help='manual epoch number (useful on restarts)')
Expand All @@ -65,19 +61,34 @@
help='test batchsize')
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
metavar='LR', help='initial learning rate')
parser.add_argument('--schedule', type=int, nargs='+', default=[150, 225],
help='Decrease learning rate at these epochs.')
parser.add_argument('--gamma', type=float, default=0.1, help='LR is multiplied by gamma on schedule.')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
help='momentum')
parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
parser.add_argument('--weight-decay', '--wd', default=5e-4, type=float,
metavar='W', help='weight decay (default: 1e-4)')
# Checkpoints
parser.add_argument('-c', '--checkpoint', default='checkpoint', type=str, metavar='PATH',
help='path to save checkpoint (default: checkpoint)')
parser.add_argument('--resume', default='', type=str, metavar='PATH',
help='path to latest checkpoint (default: none)')
# Architecture
parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet20',
choices=model_names,
help='model architecture: ' +
' | '.join(model_names) +
' (default: resnet18)')
parser.add_argument('--depth', type=int, default=29, help='Model depth.')
parser.add_argument('--cardinality', type=int, default=8, help='Model cardinality (group).')
parser.add_argument('--widen-factor', type=int, default=4, help='Widen factor. 4 -> 64, 8 -> 128, ...')
# Miscs
parser.add_argument('--manualSeed', type=int, help='manual seed')
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
help='evaluate model on validation set')
parser.add_argument('--manualSeed', type=int, help='manual seed')

args = parser.parse_args()
state = {k: v for k, v in args._get_kwargs()}

# Validate dataset
assert args.dataset == 'cifar10' or args.dataset == 'cifar100', 'Dataset can only be cifar10 or cifar100.'
Expand Down Expand Up @@ -133,7 +144,15 @@ def main():

# Model
print("=> creating model '{}'".format(args.arch))
model = models.__dict__[args.arch](num_classes=num_classes)
if args.arch == 'resnext':
model = models.__dict__[args.arch](
cardinality=args.cardinality,
num_classes=num_classes,
depth=args.depth,
widen_factor=args.widen_factor
)
else:
model = models.__dict__[args.arch](num_classes=num_classes)
if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
model.features = torch.nn.DataParallel(model.features)
model.cuda()
Expand All @@ -159,7 +178,7 @@ def main():
logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True)
else:
logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
logger.set_names(['Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.'])
logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.'])


if args.evaluate:
Expand All @@ -170,18 +189,15 @@ def main():

# Train and val
for epoch in range(start_epoch, args.epochs):
lr = adjust_learning_rate(optimizer, epoch)
adjust_learning_rate(optimizer, epoch)

print('\nEpoch: [%d | %d] LR: %f' % (epoch, args.epochs, lr))
print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr']))

train_loss, train_acc = train(trainloader, model, criterion, optimizer, epoch, use_cuda)
test_loss, test_acc = test(testloader, model, criterion, epoch, use_cuda)

print(' Train Loss: %.8f, Train Acc: %.2f' % (train_loss, train_acc*100))
print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc*100))

# append logger file
logger.append([train_loss, test_loss, train_acc, test_acc])
logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc])

# save model
is_best = test_acc > best_acc
Expand All @@ -202,50 +218,111 @@ def main():
print(best_acc)

def train(trainloader, model, criterion, optimizer, epoch, use_cuda):
# switch to train mode
model.train()
train_loss = 0
correct = 0
total = 0

batch_time = AverageMeter()
data_time = AverageMeter()
losses = AverageMeter()
top1 = AverageMeter()
top5 = AverageMeter()
end = time.time()

bar = Bar('Processing', max=len(trainloader))
for batch_idx, (inputs, targets) in enumerate(trainloader):
# measure data loading time
data_time.update(time.time() - end)

if use_cuda:
inputs, targets = inputs.cuda(), targets.cuda()
optimizer.zero_grad()
inputs, targets = inputs.cuda(), targets.cuda(async=True)
inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets)

# compute output
outputs = model(inputs)
loss = criterion(outputs, targets)

# measure accuracy and record loss
prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
losses.update(loss.data[0], inputs.size(0))
top1.update(prec1[0], inputs.size(0))
top5.update(prec5[0], inputs.size(0))

# compute gradient and do SGD step
optimizer.zero_grad()
loss.backward()
optimizer.step()

train_loss += loss.data[0]
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += predicted.eq(targets.data).cpu().sum()

progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
% (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
return (train_loss/total, correct*1.0/total)
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()

# plot progress
bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format(
batch=batch_idx + 1,
size=len(trainloader),
data=data_time.avg,
bt=batch_time.avg,
total=bar.elapsed_td,
eta=bar.eta_td,
loss=losses.avg,
top1=top1.avg,
top5=top5.avg,
)
bar.next()
bar.finish()
return (losses.avg, top1.avg)

def test(testloader, model, criterion, epoch, use_cuda):
global best_acc

batch_time = AverageMeter()
data_time = AverageMeter()
losses = AverageMeter()
top1 = AverageMeter()
top5 = AverageMeter()

# switch to evaluate mode
model.eval()
test_loss = 0
correct = 0
total = 0

end = time.time()
bar = Bar('Processing', max=len(testloader))
for batch_idx, (inputs, targets) in enumerate(testloader):
# measure data loading time
data_time.update(time.time() - end)

if use_cuda:
inputs, targets = inputs.cuda(), targets.cuda()
inputs, targets = torch.autograd.Variable(inputs, volatile=True), torch.autograd.Variable(targets)

# compute output
outputs = model(inputs)
loss = criterion(outputs, targets)

test_loss += loss.data[0]
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += predicted.eq(targets.data).cpu().sum()

progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
% (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
return (test_loss/total, correct*1.0/total)
# measure accuracy and record loss
prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
losses.update(loss.data[0], inputs.size(0))
top1.update(prec1[0], inputs.size(0))
top5.update(prec5[0], inputs.size(0))

# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()

# plot progress
bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format(
batch=batch_idx + 1,
size=len(testloader),
data=data_time.avg,
bt=batch_time.avg,
total=bar.elapsed_td,
eta=bar.eta_td,
loss=losses.avg,
top1=top1.avg,
top5=top5.avg,
)
bar.next()
bar.finish()
return (losses.avg, top1.avg)

def save_checkpoint(state, is_best, checkpoint='checkpoint', filename='checkpoint.pth.tar'):
filepath = os.path.join(checkpoint, filename)
Expand All @@ -254,15 +331,11 @@ def save_checkpoint(state, is_best, checkpoint='checkpoint', filename='checkpoin
shutil.copyfile(filepath, os.path.join(checkpoint, 'model_best.pth.tar'))

def adjust_learning_rate(optimizer, epoch):
deday = 0
if epoch >= 122:
deday = 2
elif epoch >= 81:
deday = 1
lr = args.lr * (0.1 ** deday)
for param_group in optimizer.param_groups:
param_group['lr'] = lr
return lr
global state
if epoch in args.schedule:
state['lr'] *= args.gamma
for param_group in optimizer.param_groups:
param_group['lr'] = state['lr']

if __name__ == '__main__':
main()
13 changes: 13 additions & 0 deletions exp/cifar_train_softatt_residual_false.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
GPUID=$1
DATASET=$2
NET=$3

cd ..

CUDA_VISIBLE_DEVICES=$GPUID python cifar.py -d $DATASET -a ${NET}20 --checkpoint checkpoint/$DATASET/ResSoftAttNet_res_false/${NET}20 --manualSeed 1234
CUDA_VISIBLE_DEVICES=$GPUID python cifar.py -d $DATASET -a ${NET}32 --checkpoint checkpoint/$DATASET/ResSoftAttNet_res_false/${NET}32 --manualSeed 1234
CUDA_VISIBLE_DEVICES=$GPUID python cifar.py -d $DATASET -a ${NET}44 --checkpoint checkpoint/$DATASET/ResSoftAttNet_res_false/${NET}44 --manualSeed 1234
CUDA_VISIBLE_DEVICES=$GPUID python cifar.py -d $DATASET -a ${NET}56 --checkpoint checkpoint/$DATASET/ResSoftAttNet_res_false/${NET}56 --manualSeed 1234
CUDA_VISIBLE_DEVICES=$GPUID python cifar.py -d $DATASET -a ${NET}110 --checkpoint checkpoint/$DATASET/ResSoftAttNet_res_false/${NET}110 --manualSeed 1234

cd -
11 changes: 6 additions & 5 deletions models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,12 @@
from .alexnet import *
from .vgg import *
from .resnet import *
from .preresnet import *
from .hourglass import *
from .resattnet import *
from .ressoftattnet import *
from .resadvnet import *
from .resnext import *
# from .preresnet import *
# from .hourglass import *
# from .resattnet import *
# from .ressoftattnet import *
# from .resadvnet import *
# from .squeezenet import *
# from .inception import *
# from .densenet import *
17 changes: 3 additions & 14 deletions models/alexnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,14 @@
(c) YANG, Wei
'''
import torch.nn as nn
import torch.utils.model_zoo as model_zoo


__all__ = ['AlexNet', 'alexnet']


model_urls = {
'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
}
__all__ = ['alexnet']


class AlexNet(nn.Module):

def __init__(self, num_classes=1000):
def __init__(self, num_classes=10):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=5),
Expand All @@ -42,14 +36,9 @@ def forward(self, x):
return x


def alexnet(pretrained=False, **kwargs):
def alexnet(**kwargs):
r"""AlexNet model architecture from the
`"One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = AlexNet(**kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['alexnet']))
return model
Loading

0 comments on commit e76d4cd

Please sign in to comment.