Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
803ce40
fix depth of resnet/preresnet on cifar10/cifar100
Jan 30, 2018
0195db9
Pytorch 4.x, 1.x compatibility
sytelus Jan 21, 2019
b1f27c5
added grad ratio scheduler
sytelus Jan 21, 2019
136498b
enable geo lr
sytelus Jan 21, 2019
17579a3
fix decay_factor typo
sytelus Jan 21, 2019
934921c
changed user in clone
sytelus Jan 21, 2019
26ff1c4
added step call
sytelus Jan 21, 2019
3a7aaae
Merge branch 'master' of https://github.com/sytelus/pytorch-classific…
sytelus Jan 21, 2019
f5c752d
Change scheme from grad ratio mul to adj sum
sytelus Jan 21, 2019
33b480d
new scheme, -ve sign bug fixed
sytelus Jan 21, 2019
ce4bd9a
back to ratios with clipping
sytelus Jan 21, 2019
d5528c7
params for LR 0.005
sytelus Jan 21, 2019
e2669b4
add no_grad for scheduler, remove memory pin for imagenet, fix bug fo…
sytelus Jan 22, 2019
257ee80
use epoch in step, exp disc method
sytelus Jan 22, 2019
052e262
blended lr
sytelus Jan 22, 2019
f93d75f
change bleding to first 20 epochs
sytelus Jan 22, 2019
324bda0
Actually enable blended rates
sytelus Jan 22, 2019
e84d00c
added shake it lr, analysis notebook
sytelus Jan 23, 2019
98e430f
set graph title from nw and ds
sytelus Jan 23, 2019
726bb87
added two more results
sytelus Jan 23, 2019
dc2151b
switch to geo lr without decay
sytelus Jan 23, 2019
a0098be
densenet alexnet baselines, all results plotted, added line styles
sytelus Jan 23, 2019
7d47d8e
added print_model
sytelus Jan 23, 2019
f1acd5c
Fix base path for notebook, add more results from today
sytelus Jan 24, 2019
4fb598d
New results for WRN, imagenet, LR0.005 baseline
sytelus Jan 30, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,8 @@ ENV/

# Rope project settings
.ropeproject

#checkpoint files
*.eps
*.tar
.vs/pytorch-classification/v15/.suo
Binary file added .vs/pytorch-classification/v15/.suo
Binary file not shown.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Classification on CIFAR-10/100 and ImageNet with PyTorch.
* Install [PyTorch](http://pytorch.org/)
* Clone recursively
```
git clone --recursive https://github.com/bearpaw/pytorch-classification.git
git clone --recursive https://github.com/sytelus/pytorch-classification.git
```

## Training
Expand Down
102 changes: 64 additions & 38 deletions cifar.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import shutil
import time
import random
import numpy as np

import torch
import torch.nn as nn
Expand All @@ -20,8 +21,7 @@
import torchvision.datasets as datasets
import models.cifar as models

from utils import Bar, Logger, AverageMeter, accuracy, mkdir_p, savefig

from utils import Bar, Logger, AverageMeter, accuracy, mkdir_p, savefig, GradientRatioScheduler, print_model

model_names = sorted(name for name in models.__dict__
if name.islower() and not name.startswith("__")
Expand Down Expand Up @@ -64,14 +64,22 @@
' | '.join(model_names) +
' (default: resnet18)')
parser.add_argument('--depth', type=int, default=29, help='Model depth.')
parser.add_argument('--block-name', type=str, default='BasicBlock',
help='the building block for Resnet and Preresnet: BasicBlock, Bottleneck (default: Basicblock for cifar10/cifar100)')
parser.add_argument('--cardinality', type=int, default=8, help='Model cardinality (group).')
parser.add_argument('--widen-factor', type=int, default=4, help='Widen factor. 4 -> 64, 8 -> 128, ...')
parser.add_argument('--growthRate', type=int, default=12, help='Growth rate for DenseNet.')
parser.add_argument('--compressionRate', type=int, default=2, help='Compression Rate (theta) for DenseNet.')
# Miscs
parser.add_argument('--manualSeed', type=int, help='manual seed')
parser.add_argument('--manualSeed', type=int, help='manual seed', default=42)
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
help='evaluate model on validation set')
parser.add_argument('-det', '--deterministic', dest='deterministic', action='store_true',
help='Set deterministic flag for CUDA')
parser.add_argument('--geo-lr', default=1, type=int,
help='Enable Geometric learning rate on every ith batch')
parser.add_argument('--save-checkpoint-model', dest='save_checkpoint_model', action='store_true',
help='Save model on checkpoint')
#Device options
parser.add_argument('--gpu-id', default='0', type=str,
help='id(s) for CUDA_VISIBLE_DEVICES')
Expand All @@ -87,12 +95,17 @@
use_cuda = torch.cuda.is_available()

# Random seed
if args.deterministic:
torch.backends.cudnn.deterministic = True
if args.manualSeed is None:
args.manualSeed = random.randint(1, 10000)
random.seed(args.manualSeed)
np.random.seed(args.manualSeed)
torch.manual_seed(args.manualSeed)
if use_cuda:
torch.cuda.manual_seed_all(args.manualSeed)
else:
print("WARNING: CUDA is not available")

best_acc = 0 # best test accuracy

Expand All @@ -103,8 +116,6 @@ def main():
if not os.path.isdir(args.checkpoint):
mkdir_p(args.checkpoint)



# Data
print('==> Preparing dataset %s' % args.dataset)
transform_train = transforms.Compose([
Expand All @@ -125,12 +136,13 @@ def main():
dataloader = datasets.CIFAR100
num_classes = 100


trainset = dataloader(root='./data', train=True, download=True, transform=transform_train)
trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers)
trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True,
num_workers=args.workers)

testset = dataloader(root='./data', train=False, download=False, transform=transform_test)
testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers)
testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False,
num_workers=args.workers)

# Model
print("==> creating model '{}'".format(args.arch))
Expand Down Expand Up @@ -161,16 +173,24 @@ def main():
model = models.__dict__[args.arch](
num_classes=num_classes,
depth=args.depth,
block_name=args.block_name,
)
else:
model = models.__dict__[args.arch](num_classes=num_classes)

print("Geometric LR: {}".format(args.geo_lr))

model = torch.nn.DataParallel(model).cuda()
cudnn.benchmark = True
print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0))

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

param_lr = GradientRatioScheduler.get_params_base_lr(model, args.lr)
optimizer = optim.SGD(param_lr, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
scheduler = GradientRatioScheduler(optimizer)

print_model(model)
input("Cont?")

# Resume
title = 'cifar-10-' + args.arch
Expand All @@ -187,7 +207,7 @@ def main():
logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True)
else:
logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.'])
logger.set_names(['Epoch', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.', 'Time', 'Learning Rate'])


if args.evaluate:
Expand All @@ -198,26 +218,28 @@ def main():

# Train and val
for epoch in range(start_epoch, args.epochs):
adjust_learning_rate(optimizer, epoch)
adjust_learning_rate(scheduler, epoch)

print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr']))
print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, max(scheduler.get_lr())))

train_loss, train_acc = train(trainloader, model, criterion, optimizer, epoch, use_cuda)
st = time.time()
train_loss, train_acc = train(trainloader, model, criterion, optimizer, scheduler, epoch, use_cuda)
test_loss, test_acc = test(testloader, model, criterion, epoch, use_cuda)

# append logger file
logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc])
logger.append([epoch, train_loss, test_loss, train_acc, test_acc, time.time()-st, scheduler.get_lr()])

# save model
is_best = test_acc > best_acc
best_acc = max(test_acc, best_acc)
save_checkpoint({
'epoch': epoch + 1,
'state_dict': model.state_dict(),
'acc': test_acc,
'best_acc': best_acc,
'optimizer' : optimizer.state_dict(),
}, is_best, checkpoint=args.checkpoint)
if args.save_checkpoint_model:
save_checkpoint({
'epoch': epoch + 1,
'state_dict': model.state_dict(),
'acc': test_acc,
'best_acc': best_acc,
'optimizer' : optimizer.state_dict(),
}, is_best, checkpoint=args.checkpoint)

logger.close()
logger.plot()
Expand All @@ -226,7 +248,7 @@ def main():
print('Best acc:')
print(best_acc)

def train(trainloader, model, criterion, optimizer, epoch, use_cuda):
def train(trainloader, model, criterion, optimizer, scheduler,epoch, use_cuda):
# switch to train mode
model.train()

Expand All @@ -252,15 +274,20 @@ def train(trainloader, model, criterion, optimizer, epoch, use_cuda):

# measure accuracy and record loss
prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
losses.update(loss.data[0], inputs.size(0))
top1.update(prec1[0], inputs.size(0))
top5.update(prec5[0], inputs.size(0))
losses.update(loss.item(), inputs.size(0))
top1.update(prec1.item(), inputs.size(0))
top5.update(prec5.item(), inputs.size(0))

# compute gradient and do SGD step
optimizer.zero_grad()
loss.backward()
optimizer.step()

if args.geo_lr > 0 and batch_idx % args.geo_lr == 0:
with torch.no_grad():
scheduler.on_after_batch()
scheduler.step(epoch)

# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
Expand Down Expand Up @@ -301,17 +328,18 @@ def test(testloader, model, criterion, epoch, use_cuda):

if use_cuda:
inputs, targets = inputs.cuda(), targets.cuda()
inputs, targets = torch.autograd.Variable(inputs, volatile=True), torch.autograd.Variable(targets)
inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets)

# compute output
outputs = model(inputs)
loss = criterion(outputs, targets)
with torch.no_grad():
outputs = model(inputs)
loss = criterion(outputs, targets)

# measure accuracy and record loss
prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
losses.update(loss.data[0], inputs.size(0))
top1.update(prec1[0], inputs.size(0))
top5.update(prec5[0], inputs.size(0))
# measure accuracy and record loss
prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
losses.update(loss.item(), inputs.size(0))
top1.update(prec1.item(), inputs.size(0))
top5.update(prec5.item(), inputs.size(0))

# measure elapsed time
batch_time.update(time.time() - end)
Expand Down Expand Up @@ -339,12 +367,10 @@ def save_checkpoint(state, is_best, checkpoint='checkpoint', filename='checkpoin
if is_best:
shutil.copyfile(filepath, os.path.join(checkpoint, 'model_best.pth.tar'))

def adjust_learning_rate(optimizer, epoch):
def adjust_learning_rate(scheduler, epoch):
global state
if epoch in args.schedule:
state['lr'] *= args.gamma
for param_group in optimizer.param_groups:
param_group['lr'] = state['lr']
scheduler.set_decay_factor(scheduler.get_decay_factor() * args.gamma)

if __name__ == '__main__':
main()
Loading