Exported source
import torch,random
import fastcore.all as fc
from torch import nn
from torch.nn import init
from fastAIcourse.datasets import *
from fastAIcourse.conv import *
from fastAIcourse.learner import *
from fastAIcourse.activations import *
from fastAIcourse.init import *
from fastAIcourse.sgd import *
from fastAIcourse.resnet import *
Exported source
import pickle,gzip,math,os,time,shutil
import matplotlib as mpl,numpy as np,matplotlib.pyplot as plt
from collections.abc import Mapping
from pathlib import Path
from operator import attrgetter,itemgetter
from functools import partial
from copy import copy
from contextlib import contextmanager
import torchvision.transforms.functional as TF,torch.nn.functional as F
from torch import tensor,optim
from torch.utils.data import DataLoader,default_collate
from torch.optim import lr_scheduler
from torcheval.metrics import MulticlassAccuracy
from datasets import load_dataset,load_dataset_builder
from fastcore.test import test_close
from torch import distributions
torch.set_printoptions(precision=2, linewidth=140, sci_mode=False)
torch.manual_seed(1)
mpl.rcParams['image.cmap'] = 'gray_r'
import logging
logging.disable(logging.WARNING)
set_seed(42)
if fc.defaults.cpus>8: fc.defaults.cpus=8
xl,yl = 'image','label'
name = "fashion_mnist"
bs = 1024
xmean,xstd = 0.28, 0.35
@inplace
def transformi(b): b[xl] = [(TF.to_tensor(o)-xmean)/xstd for o in b[xl]]
dsd = load_dataset(name)
tds = dsd.with_transform(transformi)
dls = DataLoaders.from_dd(tds, bs, num_workers=fc.defaults.cpus)
metrics = MetricsCB(accuracy=MulticlassAccuracy())
astats = ActivationStats(fc.risinstance(GeneralRelu))
cbs = [DeviceCB(), metrics, ProgressCB(plot=True), astats]
act_gr = partial(GeneralRelu, leak=0.1, sub=0.4)
iw = partial(init_weights, leaky=0.1)
set_seed(42)
lr,epochs = 6e-2,5
Going wider
Exported source
def get_model(act=nn.ReLU, nfs=(16,32,64,128,256,512), norm=nn.BatchNorm2d):
layers = [ResBlock(1, 16, ks=5, stride=1, act=act, norm=norm)]
layers += [ResBlock(nfs[i], nfs[i+1], act=act, norm=norm, stride=2) for i in range(len(nfs)-1)]
layers += [nn.Flatten(), nn.Linear(nfs[-1], 10, bias=False), nn.BatchNorm1d(10)]
return nn.Sequential(*layers)
lr = 1e-2
tmax = epochs * len(dls.train)
sched = partial(lr_scheduler.OneCycleLR, max_lr=lr, total_steps=tmax)
xtra = [BatchSchedCB(sched)]
model = get_model(act_gr, norm=nn.BatchNorm2d).apply(iw)
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW)
0.824 |
0.704 |
0 |
train |
0.859 |
0.563 |
0 |
eval |
0.898 |
0.381 |
1 |
train |
0.872 |
0.422 |
1 |
eval |
0.922 |
0.265 |
2 |
train |
0.907 |
0.294 |
2 |
eval |
0.941 |
0.196 |
3 |
train |
0.928 |
0.240 |
3 |
eval |
0.963 |
0.139 |
4 |
train |
0.933 |
0.222 |
4 |
eval |
Pooling
Exported source
class GlobalAvgPool(nn.Module):
def forward(self, x): return x.mean((-2,-1))
Exported source
def get_model2(act=nn.ReLU, nfs=(16,32,64,128,256), norm=nn.BatchNorm2d):
layers = [ResBlock(1, 16, ks=5, stride=1, act=act, norm=norm)]
layers += [ResBlock(nfs[i], nfs[i+1], act=act, norm=norm, stride=2) for i in range(len(nfs)-1)]
layers += [ResBlock(256, 512, act=act, norm=norm), GlobalAvgPool()]
layers += [nn.Linear(512, 10, bias=False), nn.BatchNorm1d(10)]
return nn.Sequential(*layers)
TrainLearner(get_model2(), dls, F.cross_entropy, lr=lr, cbs=[DeviceCB()]).summary()
Tot params: 4907588; MFLOPS: 33.0
ResBlock |
(1024, 1, 28, 28) |
(1024, 16, 28, 28) |
6928 |
5.3 |
ResBlock |
(1024, 16, 28, 28) |
(1024, 32, 14, 14) |
14560 |
2.8 |
ResBlock |
(1024, 32, 14, 14) |
(1024, 64, 7, 7) |
57792 |
2.8 |
ResBlock |
(1024, 64, 7, 7) |
(1024, 128, 4, 4) |
230272 |
3.7 |
ResBlock |
(1024, 128, 4, 4) |
(1024, 256, 2, 2) |
919296 |
3.7 |
ResBlock |
(1024, 256, 2, 2) |
(1024, 512, 2, 2) |
3673600 |
14.7 |
GlobalAvgPool |
(1024, 512, 2, 2) |
(1024, 512) |
0 |
0.0 |
Linear |
(1024, 512) |
(1024, 10) |
5120 |
0.0 |
BatchNorm1d |
(1024, 10) |
(1024, 10) |
20 |
0.0 |
set_seed(42)
model = get_model2(act_gr, norm=nn.BatchNorm2d).apply(iw)
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW)
learn.fit(epochs)
0.822 |
0.715 |
0 |
train |
0.857 |
0.518 |
0 |
eval |
0.898 |
0.384 |
1 |
train |
0.881 |
0.389 |
1 |
eval |
0.921 |
0.267 |
2 |
train |
0.906 |
0.286 |
2 |
eval |
0.941 |
0.199 |
3 |
train |
0.925 |
0.244 |
3 |
eval |
0.962 |
0.141 |
4 |
train |
0.929 |
0.227 |
4 |
eval |
Exported source
def get_model3(act=nn.ReLU, nfs=(16,32,64,128,256), norm=nn.BatchNorm2d):
layers = [ResBlock(1, 16, ks=5, stride=1, act=act, norm=norm)]
layers += [ResBlock(nfs[i], nfs[i+1], act=act, norm=norm, stride=2) for i in range(len(nfs)-1)]
layers += [GlobalAvgPool(), nn.Linear(256, 10, bias=False), nn.BatchNorm1d(10)]
return nn.Sequential(*layers)
TrainLearner(get_model3(), dls, F.cross_entropy, lr=lr, cbs=[DeviceCB()]).summary()
Tot params: 1231428; MFLOPS: 18.3
ResBlock |
(1024, 1, 28, 28) |
(1024, 16, 28, 28) |
6928 |
5.3 |
ResBlock |
(1024, 16, 28, 28) |
(1024, 32, 14, 14) |
14560 |
2.8 |
ResBlock |
(1024, 32, 14, 14) |
(1024, 64, 7, 7) |
57792 |
2.8 |
ResBlock |
(1024, 64, 7, 7) |
(1024, 128, 4, 4) |
230272 |
3.7 |
ResBlock |
(1024, 128, 4, 4) |
(1024, 256, 2, 2) |
919296 |
3.7 |
GlobalAvgPool |
(1024, 256, 2, 2) |
(1024, 256) |
0 |
0.0 |
Linear |
(1024, 256) |
(1024, 10) |
2560 |
0.0 |
BatchNorm1d |
(1024, 10) |
(1024, 10) |
20 |
0.0 |
[o.shape for o in get_model3()[0].parameters()]
set_seed(42)
model = get_model3(act_gr, norm=nn.BatchNorm2d).apply(iw)
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW)
learn.fit(epochs)
0.810 |
0.758 |
0 |
train |
0.871 |
0.450 |
0 |
eval |
0.895 |
0.401 |
1 |
train |
0.897 |
0.339 |
1 |
eval |
0.919 |
0.276 |
2 |
train |
0.895 |
0.319 |
2 |
eval |
0.939 |
0.207 |
3 |
train |
0.927 |
0.246 |
3 |
eval |
0.960 |
0.152 |
4 |
train |
0.929 |
0.230 |
4 |
eval |
Exported source
def get_model4(act=nn.ReLU, nfs=(16,32,64,128,256), norm=nn.BatchNorm2d):
layers = [conv(1, 16, ks=5, stride=1, act=act, norm=norm)]
layers += [ResBlock(nfs[i], nfs[i+1], act=act, norm=norm, stride=2) for i in range(len(nfs)-1)]
layers += [GlobalAvgPool(), nn.Linear(256, 10, bias=False), nn.BatchNorm1d(10)]
return nn.Sequential(*layers)
[o.shape for o in get_model4()[0].parameters()]
[torch.Size([16, 1, 5, 5]),
torch.Size([16]),
torch.Size([16]),
torch.Size([16])]
TrainLearner(get_model4(), dls, F.cross_entropy, lr=lr, cbs=[DeviceCB()]).summary()
Tot params: 1224948; MFLOPS: 13.3
Sequential |
(1024, 1, 28, 28) |
(1024, 16, 28, 28) |
448 |
0.3 |
ResBlock |
(1024, 16, 28, 28) |
(1024, 32, 14, 14) |
14560 |
2.8 |
ResBlock |
(1024, 32, 14, 14) |
(1024, 64, 7, 7) |
57792 |
2.8 |
ResBlock |
(1024, 64, 7, 7) |
(1024, 128, 4, 4) |
230272 |
3.7 |
ResBlock |
(1024, 128, 4, 4) |
(1024, 256, 2, 2) |
919296 |
3.7 |
GlobalAvgPool |
(1024, 256, 2, 2) |
(1024, 256) |
0 |
0.0 |
Linear |
(1024, 256) |
(1024, 10) |
2560 |
0.0 |
BatchNorm1d |
(1024, 10) |
(1024, 10) |
20 |
0.0 |
set_seed(42)
model = get_model4(act_gr, norm=nn.BatchNorm2d).apply(iw)
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW)
learn.fit(epochs)
0.804 |
0.782 |
0 |
train |
0.869 |
0.474 |
0 |
eval |
0.898 |
0.393 |
1 |
train |
0.892 |
0.366 |
1 |
eval |
0.918 |
0.277 |
2 |
train |
0.896 |
0.340 |
2 |
eval |
0.940 |
0.202 |
3 |
train |
0.923 |
0.244 |
3 |
eval |
0.961 |
0.148 |
4 |
train |
0.925 |
0.238 |
4 |
eval |
Data augmentation
After 20 epochs without augmentation:
{'accuracy': '0.999', 'loss': '0.012', 'epoch': 19, 'train': True}
{'accuracy': '0.924', 'loss': '0.284', 'epoch': 19, 'train': False}
With batchnorm, weight decay doesn’t really regularize.
Exported source
from torchvision import transforms
def tfm_batch(b, tfm_x=fc.noop, tfm_y = fc.noop): return tfm_x(b[0]),tfm_y(b[1])
tfms = nn.Sequential(transforms.RandomCrop(28, padding=4),
transforms.RandomHorizontalFlip())
augcb = BatchTransformCB(partial(tfm_batch, tfm_x=tfms), on_val=False)
model = get_model()
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=[SingleBatchCB(), augcb])
xb,yb = learn.batch
show_images(xb[:16], imsize=1.5)
Exported source
@fc.patch
@fc.delegates(show_images)
def show_image_batch(self:Learner, max_n=9, cbs=None, **kwargs):
self.fit(1, cbs=[SingleBatchCB()]+fc.L(cbs))
show_images(self.batch[0][:max_n], **kwargs)
learn.show_image_batch(max_n=16, imsize=(1.5))
tfms = nn.Sequential(transforms.RandomCrop(28, padding=1),
transforms.RandomHorizontalFlip())
augcb = BatchTransformCB(partial(tfm_batch, tfm_x=tfms), on_val=False)
set_seed(42)
epochs = 20
lr = 1e-2
tmax = epochs * len(dls.train)
sched = partial(lr_scheduler.OneCycleLR, max_lr=lr, total_steps=tmax)
xtra = [BatchSchedCB(sched), augcb]
model = get_model(act_gr, norm=nn.BatchNorm2d).apply(iw)
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW)
learn.fit(epochs)
0.764 |
0.879 |
0 |
train |
0.824 |
0.604 |
0 |
eval |
0.858 |
0.597 |
1 |
train |
0.869 |
0.495 |
1 |
eval |
0.877 |
0.477 |
2 |
train |
0.823 |
0.520 |
2 |
eval |
0.887 |
0.392 |
3 |
train |
0.874 |
0.378 |
3 |
eval |
0.894 |
0.336 |
4 |
train |
0.877 |
0.394 |
4 |
eval |
0.906 |
0.288 |
5 |
train |
0.904 |
0.281 |
5 |
eval |
0.914 |
0.258 |
6 |
train |
0.858 |
0.400 |
6 |
eval |
0.919 |
0.236 |
7 |
train |
0.914 |
0.252 |
7 |
eval |
0.923 |
0.223 |
8 |
train |
0.919 |
0.234 |
8 |
eval |
0.930 |
0.198 |
9 |
train |
0.922 |
0.222 |
9 |
eval |
0.934 |
0.189 |
10 |
train |
0.922 |
0.222 |
10 |
eval |
0.940 |
0.173 |
11 |
train |
0.930 |
0.205 |
11 |
eval |
0.943 |
0.164 |
12 |
train |
0.927 |
0.207 |
12 |
eval |
0.949 |
0.148 |
13 |
train |
0.932 |
0.193 |
13 |
eval |
0.952 |
0.139 |
14 |
train |
0.937 |
0.185 |
14 |
eval |
0.959 |
0.121 |
15 |
train |
0.939 |
0.180 |
15 |
eval |
0.962 |
0.111 |
16 |
train |
0.939 |
0.181 |
16 |
eval |
0.966 |
0.102 |
17 |
train |
0.941 |
0.180 |
17 |
eval |
0.970 |
0.093 |
18 |
train |
0.943 |
0.175 |
18 |
eval |
0.971 |
0.090 |
19 |
train |
0.944 |
0.174 |
19 |
eval |
A custom collation function could let you do per-item transformations.
mdl_path = Path('models')
mdl_path.mkdir(exist_ok=True)
torch.save(learn.model, mdl_path/'data_aug.pkl')
Test time augmentation (TTA)
Exported source
class CapturePreds(Callback):
def before_fit(self, learn): self.all_inps,self.all_preds,self.all_targs = [],[],[]
def after_batch(self, learn):
self.all_inps. append(to_cpu(learn.batch[0]))
self.all_preds.append(to_cpu(learn.preds))
self.all_targs.append(to_cpu(learn.batch[1]))
def after_fit(self, learn):
self.all_preds,self.all_targs,self.all_inps = map(torch.cat, [self.all_preds,self.all_targs,self.all_inps])
Exported source
@fc.patch
def capture_preds(self: Learner, cbs=None, inps=False):
cp = CapturePreds()
self.fit(1, train=False, cbs=[cp]+fc.L(cbs))
res = cp.all_preds,cp.all_targs
if inps: res = res+(cp.all_inps,)
return res
ap1, at = learn.capture_preds()
ttacb = BatchTransformCB(partial(tfm_batch, tfm_x=TF.hflip), on_val=True)
ap2, at = learn.capture_preds(cbs=[ttacb])
ap1.shape,ap2.shape,at.shape
(torch.Size([10000, 10]), torch.Size([10000, 10]), torch.Size([10000]))
ap = torch.stack([ap1,ap2]).mean(0).argmax(1)
round((ap==at).float().mean().item(), 3)
Random erase
xb,_ = next(iter(dls.train))
xbt = xb[:16]
xm,xs = xbt.mean(),xbt.std()
(tensor(-0.80), tensor(2.06))
szx = int(pct*xbt.shape[-2])
szy = int(pct*xbt.shape[-1])
stx = int(random.random()*(1-pct)*xbt.shape[-2])
sty = int(random.random()*(1-pct)*xbt.shape[-1])
stx,sty,szx,szy
init.normal_(xbt[:,:,stx:stx+szx,sty:sty+szy], mean=xm, std=xs);
show_images(xbt, imsize=1.5)
(tensor(-3.36), tensor(2.56))
Exported source
def _rand_erase1(x, pct, xm, xs, mn, mx):
szx = int(pct*x.shape[-2])
szy = int(pct*x.shape[-1])
stx = int(random.random()*(1-pct)*x.shape[-2])
sty = int(random.random()*(1-pct)*x.shape[-1])
init.normal_(x[:,:,stx:stx+szx,sty:sty+szy], mean=xm, std=xs)
x.clamp_(mn, mx)
xb,_ = next(iter(dls.train))
xbt = xb[:16]
_rand_erase1(xbt, 0.2, xbt.mean(), xbt.std(), xbt.min(), xbt.max())
show_images(xbt, imsize=1.5)
xbt.mean(),xbt.std(),xbt.min(), xbt.max()
(tensor(0.09), tensor(1.04), tensor(-0.80), tensor(2.06))
Exported source
def rand_erase(x, pct=0.2, max_num = 4):
xm,xs,mn,mx = x.mean(),x.std(),x.min(),x.max()
num = random.randint(0, max_num)
for i in range(num): _rand_erase1(x, pct, xm, xs, mn, mx)
# print(num)
return x
xb,_ = next(iter(dls.train))
xbt = xb[:16]
rand_erase(xbt, 0.2, 4)
show_images(xbt, imsize=1.5)
Exported source
class RandErase(nn.Module):
def __init__(self, pct=0.2, max_num=4):
super().__init__()
self.pct,self.max_num = pct,max_num
def forward(self, x): return rand_erase(x, self.pct, self.max_num)
tfms = nn.Sequential(transforms.RandomCrop(28, padding=1),
transforms.RandomHorizontalFlip(),
RandErase())
augcb = BatchTransformCB(partial(tfm_batch, tfm_x=tfms), on_val=False)
model = get_model()
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=[DeviceCB(), SingleBatchCB(), augcb])
learn.fit(1)
xb,yb = learn.batch
show_images(xb[:16], imsize=1.5)
epochs = 20
lr = 2e-2
tmax = epochs * len(dls.train)
sched = partial(lr_scheduler.OneCycleLR, max_lr=lr, total_steps=tmax)
xtra = [BatchSchedCB(sched), augcb]
model = get_model(act_gr, norm=nn.BatchNorm2d).apply(iw)
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW)
learn.fit(epochs)
0.760 |
0.871 |
0 |
train |
0.813 |
0.607 |
0 |
eval |
0.842 |
0.596 |
1 |
train |
0.845 |
0.472 |
1 |
eval |
0.856 |
0.480 |
2 |
train |
0.856 |
0.427 |
2 |
eval |
0.866 |
0.405 |
3 |
train |
0.856 |
0.421 |
3 |
eval |
0.872 |
0.374 |
4 |
train |
0.822 |
0.491 |
4 |
eval |
0.885 |
0.323 |
5 |
train |
0.880 |
0.363 |
5 |
eval |
0.895 |
0.295 |
6 |
train |
0.852 |
0.456 |
6 |
eval |
0.899 |
0.278 |
7 |
train |
0.869 |
0.368 |
7 |
eval |
0.907 |
0.257 |
8 |
train |
0.901 |
0.301 |
8 |
eval |
0.912 |
0.244 |
9 |
train |
0.910 |
0.260 |
9 |
eval |
0.917 |
0.231 |
10 |
train |
0.916 |
0.229 |
10 |
eval |
0.922 |
0.215 |
11 |
train |
0.921 |
0.220 |
11 |
eval |
0.926 |
0.206 |
12 |
train |
0.928 |
0.201 |
12 |
eval |
0.930 |
0.191 |
13 |
train |
0.924 |
0.208 |
13 |
eval |
0.933 |
0.185 |
14 |
train |
0.921 |
0.219 |
14 |
eval |
0.938 |
0.172 |
15 |
train |
0.929 |
0.198 |
15 |
eval |
0.941 |
0.163 |
16 |
train |
0.936 |
0.178 |
16 |
eval |
0.944 |
0.153 |
17 |
train |
0.939 |
0.172 |
17 |
eval |
0.947 |
0.146 |
18 |
train |
0.940 |
0.169 |
18 |
eval |
0.949 |
0.142 |
19 |
train |
0.939 |
0.172 |
19 |
eval |
Random copy
xb,_ = next(iter(dls.train))
xbt = xb[:16]
szx = int(pct*xbt.shape[-2])
szy = int(pct*xbt.shape[-1])
stx1 = int(random.random()*(1-pct)*xbt.shape[-2])
sty1 = int(random.random()*(1-pct)*xbt.shape[-1])
stx2 = int(random.random()*(1-pct)*xbt.shape[-2])
sty2 = int(random.random()*(1-pct)*xbt.shape[-1])
stx1,sty1,stx2,sty2,szx,szy
xbt[:,:,stx1:stx1+szx,sty1:sty1+szy] = xbt[:,:,stx2:stx2+szx,sty2:sty2+szy]
show_images(xbt, imsize=1.5)
Exported source
def _rand_copy1(x, pct):
szx = int(pct*x.shape[-2])
szy = int(pct*x.shape[-1])
stx1 = int(random.random()*(1-pct)*x.shape[-2])
sty1 = int(random.random()*(1-pct)*x.shape[-1])
stx2 = int(random.random()*(1-pct)*x.shape[-2])
sty2 = int(random.random()*(1-pct)*x.shape[-1])
x[:,:,stx1:stx1+szx,sty1:sty1+szy] = x[:,:,stx2:stx2+szx,sty2:sty2+szy]
xb,_ = next(iter(dls.train))
xbt = xb[:16]
_rand_copy1(xbt, 0.2)
show_images(xbt, imsize=1.5)
Exported source
def rand_copy(x, pct=0.2, max_num = 4):
num = random.randint(0, max_num)
for i in range(num): _rand_copy1(x, pct)
# print(num)
return x
xb,_ = next(iter(dls.train))
xbt = xb[:16]
rand_copy(xbt, 0.2, 4)
show_images(xbt, imsize=1.5)
Exported source
class RandCopy(nn.Module):
def __init__(self, pct=0.2, max_num=4):
super().__init__()
self.pct,self.max_num = pct,max_num
def forward(self, x): return rand_copy(x, self.pct, self.max_num)
tfms = nn.Sequential(transforms.RandomCrop(28, padding=1),
transforms.RandomHorizontalFlip(),
RandCopy())
augcb = BatchTransformCB(partial(tfm_batch, tfm_x=tfms), on_val=False)
model = get_model()
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=[DeviceCB(), SingleBatchCB(), augcb])
learn.fit(1)
xb,yb = learn.batch
show_images(xb[:16], imsize=1.5)
set_seed(1)
epochs = 20
lr = 1e-2
tmax = epochs * len(dls.train)
sched = partial(lr_scheduler.OneCycleLR, max_lr=lr, total_steps=tmax)
xtra = [BatchSchedCB(sched), augcb]
model = get_model(act_gr, norm=nn.BatchNorm2d).apply(iw)
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW)
learn.fit(epochs)
0.739 |
0.940 |
0 |
train |
0.810 |
0.599 |
0 |
eval |
0.832 |
0.656 |
1 |
train |
0.842 |
0.534 |
1 |
eval |
0.849 |
0.558 |
2 |
train |
0.838 |
0.487 |
2 |
eval |
0.858 |
0.467 |
3 |
train |
0.827 |
0.528 |
3 |
eval |
0.873 |
0.394 |
4 |
train |
0.860 |
0.418 |
4 |
eval |
0.885 |
0.344 |
5 |
train |
0.868 |
0.391 |
5 |
eval |
0.891 |
0.321 |
6 |
train |
0.885 |
0.334 |
6 |
eval |
0.899 |
0.293 |
7 |
train |
0.906 |
0.261 |
7 |
eval |
0.910 |
0.258 |
8 |
train |
0.913 |
0.242 |
8 |
eval |
0.913 |
0.249 |
9 |
train |
0.897 |
0.294 |
9 |
eval |
0.914 |
0.242 |
10 |
train |
0.921 |
0.229 |
10 |
eval |
0.922 |
0.221 |
11 |
train |
0.923 |
0.215 |
11 |
eval |
0.925 |
0.212 |
12 |
train |
0.927 |
0.206 |
12 |
eval |
0.929 |
0.200 |
13 |
train |
0.925 |
0.209 |
13 |
eval |
0.934 |
0.189 |
14 |
train |
0.918 |
0.226 |
14 |
eval |
0.937 |
0.177 |
15 |
train |
0.933 |
0.187 |
15 |
eval |
0.942 |
0.167 |
16 |
train |
0.937 |
0.178 |
16 |
eval |
0.944 |
0.159 |
17 |
train |
0.939 |
0.171 |
17 |
eval |
0.946 |
0.152 |
18 |
train |
0.939 |
0.170 |
18 |
eval |
0.951 |
0.142 |
19 |
train |
0.940 |
0.171 |
19 |
eval |
model2 = get_model(act_gr, norm=nn.BatchNorm2d).apply(iw)
learn2 = TrainLearner(model2, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW)
learn2.fit(epochs)
0.745 |
0.935 |
0 |
train |
0.823 |
0.573 |
0 |
eval |
0.838 |
0.648 |
1 |
train |
0.863 |
0.455 |
1 |
eval |
0.853 |
0.542 |
2 |
train |
0.812 |
0.598 |
2 |
eval |
0.860 |
0.471 |
3 |
train |
0.864 |
0.408 |
3 |
eval |
0.871 |
0.398 |
4 |
train |
0.858 |
0.438 |
4 |
eval |
0.884 |
0.348 |
5 |
train |
0.898 |
0.295 |
5 |
eval |
0.896 |
0.308 |
6 |
train |
0.883 |
0.345 |
6 |
eval |
0.901 |
0.284 |
7 |
train |
0.891 |
0.298 |
7 |
eval |
0.899 |
0.290 |
8 |
train |
0.903 |
0.284 |
8 |
eval |
0.916 |
0.243 |
9 |
train |
0.905 |
0.271 |
9 |
eval |
0.914 |
0.245 |
10 |
train |
0.916 |
0.243 |
10 |
eval |
0.919 |
0.227 |
11 |
train |
0.922 |
0.227 |
11 |
eval |
0.925 |
0.211 |
12 |
train |
0.923 |
0.220 |
12 |
eval |
0.930 |
0.197 |
13 |
train |
0.932 |
0.198 |
13 |
eval |
0.934 |
0.186 |
14 |
train |
0.930 |
0.201 |
14 |
eval |
0.938 |
0.173 |
15 |
train |
0.934 |
0.194 |
15 |
eval |
0.943 |
0.163 |
16 |
train |
0.929 |
0.205 |
16 |
eval |
0.943 |
0.160 |
17 |
train |
0.938 |
0.183 |
17 |
eval |
0.946 |
0.152 |
18 |
train |
0.938 |
0.183 |
18 |
eval |
0.947 |
0.150 |
19 |
train |
0.937 |
0.185 |
19 |
eval |
mdl_path = Path('models')
torch.save(learn.model, mdl_path/'randcopy1.pkl')
torch.save(learn2.model, mdl_path/'randcopy2.pkl')
cp1 = CapturePreds()
learn.fit(1, train=False, cbs=cp1)
cp2 = CapturePreds()
learn2.fit(1, train=False, cbs=cp2)
ap = torch.stack([cp1.all_preds,cp2.all_preds]).mean(0).argmax(1)
round((ap==cp1.all_targs).float().mean().item(), 3)
Dropout
p = 0.1
dist = distributions.binomial.Binomial(probs=1-p)
dist.sample((10,))
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
Exported source
class Dropout(nn.Module):
def __init__(self, p=0.1):
super().__init__()
self.p = p
def forward(self, x):
if not self.training: return x
dist = distributions.binomial.Binomial(tensor(1.0).to(x.device), probs=1-self.p)
return x * dist.sample(x.size()) * 1/(1-self.p)
Exported source
def get_dropmodel(act=nn.ReLU, nfs=(16,32,64,128,256,512), norm=nn.BatchNorm2d, drop=0.0):
layers = [ResBlock(1, 16, ks=5, stride=1, act=act, norm=norm), nn.Dropout2d(drop)]
layers += [ResBlock(nfs[i], nfs[i+1], act=act, norm=norm, stride=2) for i in range(len(nfs)-1)]
layers += [nn.Flatten(), Dropout(drop), nn.Linear(nfs[-1], 10, bias=False), nn.BatchNorm1d(10)]
return nn.Sequential(*layers)
set_seed(42)
epochs=5
lr = 1e-2
tmax = epochs * len(dls.train)
sched = partial(lr_scheduler.OneCycleLR, max_lr=lr, total_steps=tmax)
xtra = [BatchSchedCB(sched)]
model = get_dropmodel(act_gr, norm=nn.BatchNorm2d, drop=0.1).apply(iw)
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW)
0.809 |
0.745 |
0 |
train |
0.854 |
0.482 |
0 |
eval |
0.894 |
0.392 |
1 |
train |
0.892 |
0.340 |
1 |
eval |
0.917 |
0.277 |
2 |
train |
0.910 |
0.278 |
2 |
eval |
0.937 |
0.208 |
3 |
train |
0.927 |
0.234 |
3 |
eval |
0.956 |
0.155 |
4 |
train |
0.930 |
0.225 |
4 |
eval |
Exported source
class TTD_CB(Callback):
def before_epoch(self, learn):
learn.model.apply(lambda m: m.train() if isinstance(m, (nn.Dropout,nn.Dropout2d)) else None)
Augment 2
Exported source
@inplace
def transformi(b): b[xl] = [(TF.to_tensor(o)*2-1) for o in b[xl]]
tds = dsd.with_transform(transformi)
dls = DataLoaders.from_dd(tds, bs, num_workers=fc.defaults.cpus)
set_seed(42)
epochs = 20
lr = 1e-2
tmax = epochs * len(dls.train)
sched = partial(lr_scheduler.OneCycleLR, max_lr=lr, total_steps=tmax)
xtra = [BatchSchedCB(sched), augcb]
model = get_model(act_gr, norm=nn.BatchNorm2d).apply(iw)
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW)
learn.fit(epochs)
0.742 |
0.932 |
0 |
train |
0.798 |
0.623 |
0 |
eval |
0.832 |
0.663 |
1 |
train |
0.785 |
0.712 |
1 |
eval |
0.855 |
0.535 |
2 |
train |
0.848 |
0.496 |
2 |
eval |
0.854 |
0.486 |
3 |
train |
0.790 |
0.620 |
3 |
eval |
0.867 |
0.412 |
4 |
train |
0.865 |
0.390 |
4 |
eval |
0.885 |
0.344 |
5 |
train |
0.880 |
0.353 |
5 |
eval |
0.887 |
0.335 |
6 |
train |
0.873 |
0.377 |
6 |
eval |
0.899 |
0.292 |
7 |
train |
0.908 |
0.260 |
7 |
eval |
0.904 |
0.274 |
8 |
train |
0.897 |
0.286 |
8 |
eval |
0.905 |
0.266 |
9 |
train |
0.909 |
0.258 |
9 |
eval |
0.915 |
0.240 |
10 |
train |
0.917 |
0.232 |
10 |
eval |
0.920 |
0.227 |
11 |
train |
0.913 |
0.243 |
11 |
eval |
0.924 |
0.214 |
12 |
train |
0.922 |
0.216 |
12 |
eval |
0.929 |
0.202 |
13 |
train |
0.930 |
0.201 |
13 |
eval |
0.934 |
0.185 |
14 |
train |
0.933 |
0.191 |
14 |
eval |
0.934 |
0.183 |
15 |
train |
0.936 |
0.182 |
15 |
eval |
0.941 |
0.166 |
16 |
train |
0.938 |
0.179 |
16 |
eval |
0.943 |
0.163 |
17 |
train |
0.940 |
0.177 |
17 |
eval |
0.945 |
0.158 |
18 |
train |
0.938 |
0.180 |
18 |
eval |
0.947 |
0.152 |
19 |
train |
0.940 |
0.177 |
19 |
eval |
torch.save(learn.model, 'models/data_aug2.pkl')
Back to top