Augmentation

Author

Benedict Thekkel

Exported source

import torch,random
import fastcore.all as fc

from torch import nn
from torch.nn import init

from fastAIcourse.datasets import *
from fastAIcourse.conv import *
from fastAIcourse.learner import *
from fastAIcourse.activations import *
from fastAIcourse.init import *
from fastAIcourse.sgd import *
from fastAIcourse.resnet import *

Exported source

import pickle,gzip,math,os,time,shutil
import matplotlib as mpl,numpy as np,matplotlib.pyplot as plt
from collections.abc import Mapping
from pathlib import Path
from operator import attrgetter,itemgetter
from functools import partial
from copy import copy
from contextlib import contextmanager

import torchvision.transforms.functional as TF,torch.nn.functional as F
from torch import tensor,optim
from torch.utils.data import DataLoader,default_collate
from torch.optim import lr_scheduler
from torcheval.metrics import MulticlassAccuracy
from datasets import load_dataset,load_dataset_builder

from fastcore.test import test_close
from torch import distributions

torch.set_printoptions(precision=2, linewidth=140, sci_mode=False)
torch.manual_seed(1)
mpl.rcParams['image.cmap'] = 'gray_r'

import logging
logging.disable(logging.WARNING)

set_seed(42)

if fc.defaults.cpus>8: fc.defaults.cpus=8

xl,yl = 'image','label'
name = "fashion_mnist"
bs = 1024
xmean,xstd = 0.28, 0.35

@inplace
def transformi(b): b[xl] = [(TF.to_tensor(o)-xmean)/xstd for o in b[xl]]

dsd = load_dataset(name)
tds = dsd.with_transform(transformi)
dls = DataLoaders.from_dd(tds, bs, num_workers=fc.defaults.cpus)

metrics = MetricsCB(accuracy=MulticlassAccuracy())
astats = ActivationStats(fc.risinstance(GeneralRelu))
cbs = [DeviceCB(), metrics, ProgressCB(plot=True), astats]
act_gr = partial(GeneralRelu, leak=0.1, sub=0.4)
iw = partial(init_weights, leaky=0.1)

set_seed(42)
lr,epochs = 6e-2,5

Going wider

Exported source

def get_model(act=nn.ReLU, nfs=(16,32,64,128,256,512), norm=nn.BatchNorm2d):
    layers = [ResBlock(1, 16, ks=5, stride=1, act=act, norm=norm)]
    layers += [ResBlock(nfs[i], nfs[i+1], act=act, norm=norm, stride=2) for i in range(len(nfs)-1)]
    layers += [nn.Flatten(), nn.Linear(nfs[-1], 10, bias=False), nn.BatchNorm1d(10)]
    return nn.Sequential(*layers)

lr = 1e-2
tmax = epochs * len(dls.train)
sched = partial(lr_scheduler.OneCycleLR, max_lr=lr, total_steps=tmax)
xtra = [BatchSchedCB(sched)]
model = get_model(act_gr, norm=nn.BatchNorm2d).apply(iw)
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW)

learn.fit(epochs)

accuracy	loss	epoch	train
0.824	0.704	0	train
0.859	0.563	0	eval
0.898	0.381	1	train
0.872	0.422	1	eval
0.922	0.265	2	train
0.907	0.294	2	eval
0.941	0.196	3	train
0.928	0.240	3	eval
0.963	0.139	4	train
0.933	0.222	4	eval

Pooling

Exported source

class GlobalAvgPool(nn.Module):
    def forward(self, x): return x.mean((-2,-1))

Exported source

def get_model2(act=nn.ReLU, nfs=(16,32,64,128,256), norm=nn.BatchNorm2d):
    layers = [ResBlock(1, 16, ks=5, stride=1, act=act, norm=norm)]
    layers += [ResBlock(nfs[i], nfs[i+1], act=act, norm=norm, stride=2) for i in range(len(nfs)-1)]
    layers += [ResBlock(256, 512, act=act, norm=norm), GlobalAvgPool()]
    layers += [nn.Linear(512, 10, bias=False), nn.BatchNorm1d(10)]
    return nn.Sequential(*layers)

TrainLearner(get_model2(), dls, F.cross_entropy, lr=lr, cbs=[DeviceCB()]).summary()

Tot params: 4907588; MFLOPS: 33.0

Module	Input	Output	Num params	MFLOPS
ResBlock	(1024, 1, 28, 28)	(1024, 16, 28, 28)	6928	5.3
ResBlock	(1024, 16, 28, 28)	(1024, 32, 14, 14)	14560	2.8
ResBlock	(1024, 32, 14, 14)	(1024, 64, 7, 7)	57792	2.8
ResBlock	(1024, 64, 7, 7)	(1024, 128, 4, 4)	230272	3.7
ResBlock	(1024, 128, 4, 4)	(1024, 256, 2, 2)	919296	3.7
ResBlock	(1024, 256, 2, 2)	(1024, 512, 2, 2)	3673600	14.7
GlobalAvgPool	(1024, 512, 2, 2)	(1024, 512)	0	0.0
Linear	(1024, 512)	(1024, 10)	5120	0.0
BatchNorm1d	(1024, 10)	(1024, 10)	20	0.0

set_seed(42)
model = get_model2(act_gr, norm=nn.BatchNorm2d).apply(iw)
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW)
learn.fit(epochs)

accuracy	loss	epoch	train
0.822	0.715	0	train
0.857	0.518	0	eval
0.898	0.384	1	train
0.881	0.389	1	eval
0.921	0.267	2	train
0.906	0.286	2	eval
0.941	0.199	3	train
0.925	0.244	3	eval
0.962	0.141	4	train
0.929	0.227	4	eval

Exported source

def get_model3(act=nn.ReLU, nfs=(16,32,64,128,256), norm=nn.BatchNorm2d):
    layers = [ResBlock(1, 16, ks=5, stride=1, act=act, norm=norm)]
    layers += [ResBlock(nfs[i], nfs[i+1], act=act, norm=norm, stride=2) for i in range(len(nfs)-1)]
    layers += [GlobalAvgPool(), nn.Linear(256, 10, bias=False), nn.BatchNorm1d(10)]
    return nn.Sequential(*layers)

TrainLearner(get_model3(), dls, F.cross_entropy, lr=lr, cbs=[DeviceCB()]).summary()

Tot params: 1231428; MFLOPS: 18.3

Module	Input	Output	Num params	MFLOPS
ResBlock	(1024, 1, 28, 28)	(1024, 16, 28, 28)	6928	5.3
ResBlock	(1024, 16, 28, 28)	(1024, 32, 14, 14)	14560	2.8
ResBlock	(1024, 32, 14, 14)	(1024, 64, 7, 7)	57792	2.8
ResBlock	(1024, 64, 7, 7)	(1024, 128, 4, 4)	230272	3.7
ResBlock	(1024, 128, 4, 4)	(1024, 256, 2, 2)	919296	3.7
GlobalAvgPool	(1024, 256, 2, 2)	(1024, 256)	0	0.0
Linear	(1024, 256)	(1024, 10)	2560	0.0
BatchNorm1d	(1024, 10)	(1024, 10)	20	0.0

[o.shape for o in get_model3()[0].parameters()]

set_seed(42)
model = get_model3(act_gr, norm=nn.BatchNorm2d).apply(iw)
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW)
learn.fit(epochs)

accuracy	loss	epoch	train
0.810	0.758	0	train
0.871	0.450	0	eval
0.895	0.401	1	train
0.897	0.339	1	eval
0.919	0.276	2	train
0.895	0.319	2	eval
0.939	0.207	3	train
0.927	0.246	3	eval
0.960	0.152	4	train
0.929	0.230	4	eval

Exported source

def get_model4(act=nn.ReLU, nfs=(16,32,64,128,256), norm=nn.BatchNorm2d):
    layers = [conv(1, 16, ks=5, stride=1, act=act, norm=norm)]
    layers += [ResBlock(nfs[i], nfs[i+1], act=act, norm=norm, stride=2) for i in range(len(nfs)-1)]
    layers += [GlobalAvgPool(), nn.Linear(256, 10, bias=False), nn.BatchNorm1d(10)]
    return nn.Sequential(*layers)

[o.shape for o in get_model4()[0].parameters()]

[torch.Size([16, 1, 5, 5]),
 torch.Size([16]),
 torch.Size([16]),
 torch.Size([16])]

TrainLearner(get_model4(), dls, F.cross_entropy, lr=lr, cbs=[DeviceCB()]).summary()

Tot params: 1224948; MFLOPS: 13.3

Module	Input	Output	Num params	MFLOPS
Sequential	(1024, 1, 28, 28)	(1024, 16, 28, 28)	448	0.3
ResBlock	(1024, 16, 28, 28)	(1024, 32, 14, 14)	14560	2.8
ResBlock	(1024, 32, 14, 14)	(1024, 64, 7, 7)	57792	2.8
ResBlock	(1024, 64, 7, 7)	(1024, 128, 4, 4)	230272	3.7
ResBlock	(1024, 128, 4, 4)	(1024, 256, 2, 2)	919296	3.7
GlobalAvgPool	(1024, 256, 2, 2)	(1024, 256)	0	0.0
Linear	(1024, 256)	(1024, 10)	2560	0.0
BatchNorm1d	(1024, 10)	(1024, 10)	20	0.0

set_seed(42)
model = get_model4(act_gr, norm=nn.BatchNorm2d).apply(iw)
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW)
learn.fit(epochs)

accuracy	loss	epoch	train
0.804	0.782	0	train
0.869	0.474	0	eval
0.898	0.393	1	train
0.892	0.366	1	eval
0.918	0.277	2	train
0.896	0.340	2	eval
0.940	0.202	3	train
0.923	0.244	3	eval
0.961	0.148	4	train
0.925	0.238	4	eval

Data augmentation

After 20 epochs without augmentation:

{'accuracy': '0.999', 'loss': '0.012', 'epoch': 19, 'train': True}
{'accuracy': '0.924', 'loss': '0.284', 'epoch': 19, 'train': False}

With batchnorm, weight decay doesn’t really regularize.

Exported source

from torchvision import transforms

def tfm_batch(b, tfm_x=fc.noop, tfm_y = fc.noop): return tfm_x(b[0]),tfm_y(b[1])

tfms = nn.Sequential(transforms.RandomCrop(28, padding=4),
                     transforms.RandomHorizontalFlip())

augcb = BatchTransformCB(partial(tfm_batch, tfm_x=tfms), on_val=False)
model = get_model()
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=[SingleBatchCB(), augcb])

learn.fit(1)

xb,yb = learn.batch
show_images(xb[:16], imsize=1.5)

Exported source

@fc.patch
@fc.delegates(show_images)
def show_image_batch(self:Learner, max_n=9, cbs=None, **kwargs):
    self.fit(1, cbs=[SingleBatchCB()]+fc.L(cbs))
    show_images(self.batch[0][:max_n], **kwargs)

learn.show_image_batch(max_n=16, imsize=(1.5))

tfms = nn.Sequential(transforms.RandomCrop(28, padding=1),
                     transforms.RandomHorizontalFlip())
augcb = BatchTransformCB(partial(tfm_batch, tfm_x=tfms), on_val=False)

set_seed(42)
epochs = 20
lr = 1e-2
tmax = epochs * len(dls.train)
sched = partial(lr_scheduler.OneCycleLR, max_lr=lr, total_steps=tmax)
xtra = [BatchSchedCB(sched), augcb]
model = get_model(act_gr, norm=nn.BatchNorm2d).apply(iw)
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW)
learn.fit(epochs)

accuracy	loss	epoch	train
0.764	0.879	0	train
0.824	0.604	0	eval
0.858	0.597	1	train
0.869	0.495	1	eval
0.877	0.477	2	train
0.823	0.520	2	eval
0.887	0.392	3	train
0.874	0.378	3	eval
0.894	0.336	4	train
0.877	0.394	4	eval
0.906	0.288	5	train
0.904	0.281	5	eval
0.914	0.258	6	train
0.858	0.400	6	eval
0.919	0.236	7	train
0.914	0.252	7	eval
0.923	0.223	8	train
0.919	0.234	8	eval
0.930	0.198	9	train
0.922	0.222	9	eval
0.934	0.189	10	train
0.922	0.222	10	eval
0.940	0.173	11	train
0.930	0.205	11	eval
0.943	0.164	12	train
0.927	0.207	12	eval
0.949	0.148	13	train
0.932	0.193	13	eval
0.952	0.139	14	train
0.937	0.185	14	eval
0.959	0.121	15	train
0.939	0.180	15	eval
0.962	0.111	16	train
0.939	0.181	16	eval
0.966	0.102	17	train
0.941	0.180	17	eval
0.970	0.093	18	train
0.943	0.175	18	eval
0.971	0.090	19	train
0.944	0.174	19	eval

A custom collation function could let you do per-item transformations.

mdl_path = Path('models')
mdl_path.mkdir(exist_ok=True)
torch.save(learn.model, mdl_path/'data_aug.pkl')

Test time augmentation (TTA)

Exported source

class CapturePreds(Callback):
    def before_fit(self, learn): self.all_inps,self.all_preds,self.all_targs = [],[],[]
    def after_batch(self, learn):
        self.all_inps. append(to_cpu(learn.batch[0]))
        self.all_preds.append(to_cpu(learn.preds))
        self.all_targs.append(to_cpu(learn.batch[1]))
    def after_fit(self, learn):
        self.all_preds,self.all_targs,self.all_inps = map(torch.cat, [self.all_preds,self.all_targs,self.all_inps])

Exported source

@fc.patch
def capture_preds(self: Learner, cbs=None, inps=False):
    cp = CapturePreds()
    self.fit(1, train=False, cbs=[cp]+fc.L(cbs))
    res = cp.all_preds,cp.all_targs
    if inps: res = res+(cp.all_inps,)
    return res

ap1, at = learn.capture_preds()

accuracy	loss	epoch	train
0.944	0.174	0	eval

ttacb = BatchTransformCB(partial(tfm_batch, tfm_x=TF.hflip), on_val=True)
ap2, at = learn.capture_preds(cbs=[ttacb])

accuracy	loss	epoch	train
0.942	0.175	0	eval

ap1.shape,ap2.shape,at.shape

(torch.Size([10000, 10]), torch.Size([10000, 10]), torch.Size([10000]))

ap = torch.stack([ap1,ap2]).mean(0).argmax(1)

round((ap==at).float().mean().item(), 3)

0.946

Random erase

xb,_ = next(iter(dls.train))
xbt = xb[:16]

xm,xs = xbt.mean(),xbt.std()

xbt.min(), xbt.max()

(tensor(-0.80), tensor(2.06))

pct = 0.2

szx = int(pct*xbt.shape[-2])
szy = int(pct*xbt.shape[-1])
stx = int(random.random()*(1-pct)*xbt.shape[-2])
sty = int(random.random()*(1-pct)*xbt.shape[-1])
stx,sty,szx,szy

(14, 0, 5, 5)

init.normal_(xbt[:,:,stx:stx+szx,sty:sty+szy], mean=xm, std=xs);

show_images(xbt, imsize=1.5)

xbt.min(), xbt.max()

(tensor(-3.36), tensor(2.56))

Exported source

def _rand_erase1(x, pct, xm, xs, mn, mx):
    szx = int(pct*x.shape[-2])
    szy = int(pct*x.shape[-1])
    stx = int(random.random()*(1-pct)*x.shape[-2])
    sty = int(random.random()*(1-pct)*x.shape[-1])
    init.normal_(x[:,:,stx:stx+szx,sty:sty+szy], mean=xm, std=xs)
    x.clamp_(mn, mx)

xb,_ = next(iter(dls.train))
xbt = xb[:16]
_rand_erase1(xbt, 0.2, xbt.mean(), xbt.std(), xbt.min(), xbt.max())
show_images(xbt, imsize=1.5)

xbt.mean(),xbt.std(),xbt.min(), xbt.max()

(tensor(0.09), tensor(1.04), tensor(-0.80), tensor(2.06))

Exported source

def rand_erase(x, pct=0.2, max_num = 4):
    xm,xs,mn,mx = x.mean(),x.std(),x.min(),x.max()
    num = random.randint(0, max_num)
    for i in range(num): _rand_erase1(x, pct, xm, xs, mn, mx)
#     print(num)
    return x

xb,_ = next(iter(dls.train))
xbt = xb[:16]
rand_erase(xbt, 0.2, 4)
show_images(xbt, imsize=1.5)

Exported source

class RandErase(nn.Module):
    def __init__(self, pct=0.2, max_num=4):
        super().__init__()
        self.pct,self.max_num = pct,max_num
    def forward(self, x): return rand_erase(x, self.pct, self.max_num)

tfms = nn.Sequential(transforms.RandomCrop(28, padding=1),
                     transforms.RandomHorizontalFlip(),
                     RandErase())
augcb = BatchTransformCB(partial(tfm_batch, tfm_x=tfms), on_val=False)

model = get_model()
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=[DeviceCB(), SingleBatchCB(), augcb])
learn.fit(1)
xb,yb = learn.batch
show_images(xb[:16], imsize=1.5)

epochs = 20
lr = 2e-2
tmax = epochs * len(dls.train)
sched = partial(lr_scheduler.OneCycleLR, max_lr=lr, total_steps=tmax)
xtra = [BatchSchedCB(sched), augcb]
model = get_model(act_gr, norm=nn.BatchNorm2d).apply(iw)
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW)
learn.fit(epochs)

accuracy	loss	epoch	train
0.760	0.871	0	train
0.813	0.607	0	eval
0.842	0.596	1	train
0.845	0.472	1	eval
0.856	0.480	2	train
0.856	0.427	2	eval
0.866	0.405	3	train
0.856	0.421	3	eval
0.872	0.374	4	train
0.822	0.491	4	eval
0.885	0.323	5	train
0.880	0.363	5	eval
0.895	0.295	6	train
0.852	0.456	6	eval
0.899	0.278	7	train
0.869	0.368	7	eval
0.907	0.257	8	train
0.901	0.301	8	eval
0.912	0.244	9	train
0.910	0.260	9	eval
0.917	0.231	10	train
0.916	0.229	10	eval
0.922	0.215	11	train
0.921	0.220	11	eval
0.926	0.206	12	train
0.928	0.201	12	eval
0.930	0.191	13	train
0.924	0.208	13	eval
0.933	0.185	14	train
0.921	0.219	14	eval
0.938	0.172	15	train
0.929	0.198	15	eval
0.941	0.163	16	train
0.936	0.178	16	eval
0.944	0.153	17	train
0.939	0.172	17	eval
0.947	0.146	18	train
0.940	0.169	18	eval
0.949	0.142	19	train
0.939	0.172	19	eval

Random copy

xb,_ = next(iter(dls.train))
xbt = xb[:16]

szx = int(pct*xbt.shape[-2])
szy = int(pct*xbt.shape[-1])
stx1 = int(random.random()*(1-pct)*xbt.shape[-2])
sty1 = int(random.random()*(1-pct)*xbt.shape[-1])
stx2 = int(random.random()*(1-pct)*xbt.shape[-2])
sty2 = int(random.random()*(1-pct)*xbt.shape[-1])
stx1,sty1,stx2,sty2,szx,szy

(9, 18, 3, 7, 5, 5)

xbt[:,:,stx1:stx1+szx,sty1:sty1+szy] = xbt[:,:,stx2:stx2+szx,sty2:sty2+szy]

show_images(xbt, imsize=1.5)

Exported source

def _rand_copy1(x, pct):
    szx = int(pct*x.shape[-2])
    szy = int(pct*x.shape[-1])
    stx1 = int(random.random()*(1-pct)*x.shape[-2])
    sty1 = int(random.random()*(1-pct)*x.shape[-1])
    stx2 = int(random.random()*(1-pct)*x.shape[-2])
    sty2 = int(random.random()*(1-pct)*x.shape[-1])
    x[:,:,stx1:stx1+szx,sty1:sty1+szy] = x[:,:,stx2:stx2+szx,sty2:sty2+szy]

xb,_ = next(iter(dls.train))
xbt = xb[:16]
_rand_copy1(xbt, 0.2)
show_images(xbt, imsize=1.5)

Exported source

def rand_copy(x, pct=0.2, max_num = 4):
    num = random.randint(0, max_num)
    for i in range(num): _rand_copy1(x, pct)
#     print(num)
    return x

xb,_ = next(iter(dls.train))
xbt = xb[:16]
rand_copy(xbt, 0.2, 4)
show_images(xbt, imsize=1.5)

Exported source

class RandCopy(nn.Module):
    def __init__(self, pct=0.2, max_num=4):
        super().__init__()
        self.pct,self.max_num = pct,max_num
    def forward(self, x): return rand_copy(x, self.pct, self.max_num)

tfms = nn.Sequential(transforms.RandomCrop(28, padding=1),
                     transforms.RandomHorizontalFlip(),
                     RandCopy())
augcb = BatchTransformCB(partial(tfm_batch, tfm_x=tfms), on_val=False)

model = get_model()
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=[DeviceCB(), SingleBatchCB(), augcb])
learn.fit(1)
xb,yb = learn.batch
show_images(xb[:16], imsize=1.5)

set_seed(1)
epochs = 20
lr = 1e-2
tmax = epochs * len(dls.train)
sched = partial(lr_scheduler.OneCycleLR, max_lr=lr, total_steps=tmax)
xtra = [BatchSchedCB(sched), augcb]
model = get_model(act_gr, norm=nn.BatchNorm2d).apply(iw)
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW)
learn.fit(epochs)

accuracy	loss	epoch	train
0.739	0.940	0	train
0.810	0.599	0	eval
0.832	0.656	1	train
0.842	0.534	1	eval
0.849	0.558	2	train
0.838	0.487	2	eval
0.858	0.467	3	train
0.827	0.528	3	eval
0.873	0.394	4	train
0.860	0.418	4	eval
0.885	0.344	5	train
0.868	0.391	5	eval
0.891	0.321	6	train
0.885	0.334	6	eval
0.899	0.293	7	train
0.906	0.261	7	eval
0.910	0.258	8	train
0.913	0.242	8	eval
0.913	0.249	9	train
0.897	0.294	9	eval
0.914	0.242	10	train
0.921	0.229	10	eval
0.922	0.221	11	train
0.923	0.215	11	eval
0.925	0.212	12	train
0.927	0.206	12	eval
0.929	0.200	13	train
0.925	0.209	13	eval
0.934	0.189	14	train
0.918	0.226	14	eval
0.937	0.177	15	train
0.933	0.187	15	eval
0.942	0.167	16	train
0.937	0.178	16	eval
0.944	0.159	17	train
0.939	0.171	17	eval
0.946	0.152	18	train
0.939	0.170	18	eval
0.951	0.142	19	train
0.940	0.171	19	eval

model2 = get_model(act_gr, norm=nn.BatchNorm2d).apply(iw)
learn2 = TrainLearner(model2, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW)
learn2.fit(epochs)

accuracy	loss	epoch	train
0.745	0.935	0	train
0.823	0.573	0	eval
0.838	0.648	1	train
0.863	0.455	1	eval
0.853	0.542	2	train
0.812	0.598	2	eval
0.860	0.471	3	train
0.864	0.408	3	eval
0.871	0.398	4	train
0.858	0.438	4	eval
0.884	0.348	5	train
0.898	0.295	5	eval
0.896	0.308	6	train
0.883	0.345	6	eval
0.901	0.284	7	train
0.891	0.298	7	eval
0.899	0.290	8	train
0.903	0.284	8	eval
0.916	0.243	9	train
0.905	0.271	9	eval
0.914	0.245	10	train
0.916	0.243	10	eval
0.919	0.227	11	train
0.922	0.227	11	eval
0.925	0.211	12	train
0.923	0.220	12	eval
0.930	0.197	13	train
0.932	0.198	13	eval
0.934	0.186	14	train
0.930	0.201	14	eval
0.938	0.173	15	train
0.934	0.194	15	eval
0.943	0.163	16	train
0.929	0.205	16	eval
0.943	0.160	17	train
0.938	0.183	17	eval
0.946	0.152	18	train
0.938	0.183	18	eval
0.947	0.150	19	train
0.937	0.185	19	eval

mdl_path = Path('models')
torch.save(learn.model,  mdl_path/'randcopy1.pkl')
torch.save(learn2.model, mdl_path/'randcopy2.pkl')

cp1 = CapturePreds()
learn.fit(1, train=False, cbs=cp1)

accuracy	loss	epoch	train
0.940	0.171	0	eval

cp2 = CapturePreds()
learn2.fit(1, train=False, cbs=cp2)

accuracy	loss	epoch	train
0.937	0.185	0	eval

ap = torch.stack([cp1.all_preds,cp2.all_preds]).mean(0).argmax(1)

round((ap==cp1.all_targs).float().mean().item(), 3)

0.942

Dropout

p = 0.1
dist = distributions.binomial.Binomial(probs=1-p)
dist.sample((10,))

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

Exported source

class Dropout(nn.Module):
    def __init__(self, p=0.1):
        super().__init__()
        self.p = p

    def forward(self, x):
        if not self.training: return x
        dist = distributions.binomial.Binomial(tensor(1.0).to(x.device), probs=1-self.p)
        return x * dist.sample(x.size()) * 1/(1-self.p)

Exported source

def get_dropmodel(act=nn.ReLU, nfs=(16,32,64,128,256,512), norm=nn.BatchNorm2d, drop=0.0):
    layers = [ResBlock(1, 16, ks=5, stride=1, act=act, norm=norm), nn.Dropout2d(drop)]
    layers += [ResBlock(nfs[i], nfs[i+1], act=act, norm=norm, stride=2) for i in range(len(nfs)-1)]
    layers += [nn.Flatten(), Dropout(drop), nn.Linear(nfs[-1], 10, bias=False), nn.BatchNorm1d(10)]
    return nn.Sequential(*layers)

set_seed(42)
epochs=5
lr = 1e-2
tmax = epochs * len(dls.train)
sched = partial(lr_scheduler.OneCycleLR, max_lr=lr, total_steps=tmax)
xtra = [BatchSchedCB(sched)]
model = get_dropmodel(act_gr, norm=nn.BatchNorm2d, drop=0.1).apply(iw)
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW)

learn.fit(epochs)

accuracy	loss	epoch	train
0.809	0.745	0	train
0.854	0.482	0	eval
0.894	0.392	1	train
0.892	0.340	1	eval
0.917	0.277	2	train
0.910	0.278	2	eval
0.937	0.208	3	train
0.927	0.234	3	eval
0.956	0.155	4	train
0.930	0.225	4	eval

Exported source

class TTD_CB(Callback):
    def before_epoch(self, learn):
        learn.model.apply(lambda m: m.train() if isinstance(m, (nn.Dropout,nn.Dropout2d)) else None)

Augment 2

Exported source

@inplace
def transformi(b): b[xl] = [(TF.to_tensor(o)*2-1) for o in b[xl]]

tds = dsd.with_transform(transformi)
dls = DataLoaders.from_dd(tds, bs, num_workers=fc.defaults.cpus)

set_seed(42)
epochs = 20
lr = 1e-2
tmax = epochs * len(dls.train)
sched = partial(lr_scheduler.OneCycleLR, max_lr=lr, total_steps=tmax)
xtra = [BatchSchedCB(sched), augcb]
model = get_model(act_gr, norm=nn.BatchNorm2d).apply(iw)
learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW)
learn.fit(epochs)

accuracy	loss	epoch	train
0.742	0.932	0	train
0.798	0.623	0	eval
0.832	0.663	1	train
0.785	0.712	1	eval
0.855	0.535	2	train
0.848	0.496	2	eval
0.854	0.486	3	train
0.790	0.620	3	eval
0.867	0.412	4	train
0.865	0.390	4	eval
0.885	0.344	5	train
0.880	0.353	5	eval
0.887	0.335	6	train
0.873	0.377	6	eval
0.899	0.292	7	train
0.908	0.260	7	eval
0.904	0.274	8	train
0.897	0.286	8	eval
0.905	0.266	9	train
0.909	0.258	9	eval
0.915	0.240	10	train
0.917	0.232	10	eval
0.920	0.227	11	train
0.913	0.243	11	eval
0.924	0.214	12	train
0.922	0.216	12	eval
0.929	0.202	13	train
0.930	0.201	13	eval
0.934	0.185	14	train
0.933	0.191	14	eval
0.934	0.183	15	train
0.936	0.182	15	eval
0.941	0.166	16	train
0.938	0.179	16	eval
0.943	0.163	17	train
0.940	0.177	17	eval
0.945	0.158	18	train
0.938	0.180	18	eval
0.947	0.152	19	train
0.940	0.177	19	eval

torch.save(learn.model, 'models/data_aug2.pkl')