import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms
from torchvision.transforms import ToPILImage
import matplotlib.pyplot as plt
import numpy as np
import os
import copy
import time
Transfer Learning
Transfer Learning
= torch.device('cuda' if torch.cuda.is_available() else 'cpu') device
torch.cuda.empty_cache()
= np.array([0.485, 0.456, 0.406])
mean = np.array([0.229, 0.224, 0.225]) std
= {
data_transforms 'train': transforms.Compose([
224),
transforms.RandomResizedCrop(
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean, std)
]),'val': transforms.Compose([
256),
transforms.Resize(224),
transforms.CenterCrop(
transforms.ToTensor(),
transforms.Normalize(mean, std)
]), }
= 'Data/hymenoptera_data'
data_dir = {x: datasets.ImageFolder(os.path.join(data_dir, x),
image_datasets
data_transforms[x])for x in ['train', 'val']}
= {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
dataloaders =True, num_workers=0)
shufflefor x in ['train', 'val']}
= {x: len(image_datasets[x]) for x in ['train', 'val']}
dataset_sizes = image_datasets['train'].classes
class_names
= torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device print(class_names)
['ants', 'bees']
def imshow(inp, title):
"""Imshow for Tensor."""
= inp.numpy().transpose((1, 2, 0))
inp = std * inp + mean
inp = np.clip(inp, 0, 1)
inp
plt.imshow(inp)
plt.title(title) plt.show()
# Get a batch of training data
= next(iter(dataloaders['train']))
inputs, classes
# Make a grid from batch
= torchvision.utils.make_grid(inputs)
out
=[class_names[x] for x in classes]) imshow(out, title
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
= time.time()
since
= copy.deepcopy(model.state_dict())
best_model_wts = 0.0
best_acc
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
# Set model to training mode
model.train() else:
eval() # Set model to evaluate mode
model.
= 0.0
running_loss = 0
running_corrects
# Iterate over data.
for inputs, labels in dataloaders[phase]:
= inputs.to(device)
inputs = labels.to(device)
labels
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
= model(inputs)
outputs = torch.max(outputs, 1)
_, preds = criterion(outputs, labels)
loss
# backward + optimize only if in training phase
if phase == 'train':
optimizer.zero_grad()
loss.backward()
optimizer.step()
# statistics
+= loss.item() * inputs.size(0)
running_loss += torch.sum(preds == labels.data)
running_corrects
if phase == 'train':
scheduler.step()
= running_loss / dataset_sizes[phase]
epoch_loss = running_corrects.double() / dataset_sizes[phase]
epoch_acc
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
= epoch_acc
best_acc = copy.deepcopy(model.state_dict())
best_model_wts
print()
= time.time() - since
time_elapsed print('Training complete in {:.0f}m {:.0f}s'.format(
// 60, time_elapsed % 60))
time_elapsed print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)return model
import timm
print(timm.list_models('resnet*')[:10])
['resnet10t', 'resnet14t', 'resnet18', 'resnet18d', 'resnet26', 'resnet26d', 'resnet26t', 'resnet32ts', 'resnet33ts', 'resnet34']
# Load ResNet-18 model
= timm.create_model('resnet18', pretrained=True)
model model
ResNet(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act1): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(drop_block): Identity()
(act1): ReLU(inplace=True)
(aa): Identity()
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act2): ReLU(inplace=True)
)
(1): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(drop_block): Identity()
(act1): ReLU(inplace=True)
(aa): Identity()
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act2): ReLU(inplace=True)
)
)
(layer2): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(drop_block): Identity()
(act1): ReLU(inplace=True)
(aa): Identity()
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act2): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(drop_block): Identity()
(act1): ReLU(inplace=True)
(aa): Identity()
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act2): ReLU(inplace=True)
)
)
(layer3): Sequential(
(0): BasicBlock(
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(drop_block): Identity()
(act1): ReLU(inplace=True)
(aa): Identity()
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act2): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(drop_block): Identity()
(act1): ReLU(inplace=True)
(aa): Identity()
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act2): ReLU(inplace=True)
)
)
(layer4): Sequential(
(0): BasicBlock(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(drop_block): Identity()
(act1): ReLU(inplace=True)
(aa): Identity()
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act2): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(drop_block): Identity()
(act1): ReLU(inplace=True)
(aa): Identity()
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act2): ReLU(inplace=True)
)
)
(global_pool): SelectAdaptivePool2d(pool_type=avg, flatten=Flatten(start_dim=1, end_dim=-1))
(fc): Linear(in_features=512, out_features=1000, bias=True)
)
#### Finetuning the convnet ####
# Load a pretrained model and reset final fully connected layer.
= models.resnet18(pretrained=True)
model model
/home/ben/mambaforge/envs/pfast/lib/python3.12/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
warnings.warn(
/home/ben/mambaforge/envs/pfast/lib/python3.12/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights.
warnings.warn(msg)
ResNet(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer2): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer3): Sequential(
(0): BasicBlock(
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer4): Sequential(
(0): BasicBlock(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
(fc): Linear(in_features=512, out_features=1000, bias=True)
)
= model.fc.in_features
num_ftrs num_ftrs
512
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
= nn.Linear(model.fc.in_features, 2)
model.fc model
ResNet(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer2): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer3): Sequential(
(0): BasicBlock(
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer4): Sequential(
(0): BasicBlock(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
(fc): Linear(in_features=512, out_features=2, bias=True)
)
= model.to(device) model
= nn.CrossEntropyLoss()
criterion
# Observe that all parameters are being optimized
= optim.SGD(model.parameters(), lr=0.001)
optimizer
# StepLR Decays the learning rate of each parameter group by gamma every step_size epochs
# Decay LR by a factor of 0.1 every 7 epochs
# Learning rate scheduling should be applied after optimizer’s update
# e.g., you should write your code this way:
# for epoch in range(100):
# train(...)
# validate(...)
# scheduler.step()
= lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
step_lr_scheduler
= train_model(model, criterion, optimizer, step_lr_scheduler, num_epochs=25) model
Epoch 0/24
----------
train Loss: 1.3138 Acc: 0.6270
val Loss: 2.4487 Acc: 0.4837
Epoch 1/24
----------
train Loss: 1.1552 Acc: 0.6721
val Loss: 2.3901 Acc: 0.5033
Epoch 2/24
----------
train Loss: 0.9656 Acc: 0.7254
val Loss: 2.0542 Acc: 0.5229
Epoch 3/24
----------
train Loss: 1.0340 Acc: 0.6762
val Loss: 1.8399 Acc: 0.5425
Epoch 4/24
----------
train Loss: 0.8638 Acc: 0.7049
val Loss: 1.8519 Acc: 0.5686
Epoch 5/24
----------
train Loss: 0.9689 Acc: 0.6598
val Loss: 1.9669 Acc: 0.5817
Epoch 6/24
----------
train Loss: 0.8450 Acc: 0.7008
val Loss: 1.7546 Acc: 0.6209
Epoch 7/24
----------
train Loss: 0.9041 Acc: 0.6598
val Loss: 1.6658 Acc: 0.5948
Epoch 8/24
----------
train Loss: 0.8533 Acc: 0.6926
val Loss: 1.6936 Acc: 0.6209
Epoch 9/24
----------
train Loss: 0.8974 Acc: 0.6393
val Loss: 1.5477 Acc: 0.6013
Epoch 10/24
----------
train Loss: 0.9109 Acc: 0.6516
val Loss: 1.7137 Acc: 0.6078
Epoch 11/24
----------
train Loss: 0.8369 Acc: 0.6475
val Loss: 1.7870 Acc: 0.6078
Epoch 12/24
----------
train Loss: 0.8221 Acc: 0.6844
val Loss: 1.6008 Acc: 0.6078
Epoch 13/24
----------
train Loss: 0.7777 Acc: 0.6926
val Loss: 1.4073 Acc: 0.6340
Epoch 14/24
----------
train Loss: 0.8776 Acc: 0.6352
val Loss: 1.6360 Acc: 0.5948
Epoch 15/24
----------
train Loss: 0.8583 Acc: 0.6639
val Loss: 1.5304 Acc: 0.6405
Epoch 16/24
----------
train Loss: 0.7772 Acc: 0.6926
val Loss: 1.6465 Acc: 0.6275
Epoch 17/24
----------
train Loss: 0.8548 Acc: 0.6762
val Loss: 1.7349 Acc: 0.6340
Epoch 18/24
----------
train Loss: 0.8174 Acc: 0.7008
val Loss: 1.6733 Acc: 0.6209
Epoch 19/24
----------
train Loss: 0.7678 Acc: 0.7172
val Loss: 1.5187 Acc: 0.6209
Epoch 20/24
----------
train Loss: 0.7592 Acc: 0.7295
val Loss: 1.6524 Acc: 0.6209
Epoch 21/24
----------
train Loss: 0.7918 Acc: 0.6926
val Loss: 1.6008 Acc: 0.6013
Epoch 22/24
----------
train Loss: 0.8519 Acc: 0.6721
val Loss: 1.6299 Acc: 0.6013
Epoch 23/24
----------
train Loss: 0.8987 Acc: 0.6680
val Loss: 1.7279 Acc: 0.6013
Epoch 24/24
----------
train Loss: 0.8057 Acc: 0.6680
val Loss: 1.6766 Acc: 0.6536
Training complete in 1m 22s
Best val Acc: 0.653595
#### ConvNet as fixed feature extractor ####
# Here, we need to freeze all the network except the final layer.
# We need to set requires_grad == False to freeze the parameters so that the gradients are not computed in backward()
= models.resnet18(pretrained=True)
model_conv for param in model_conv.parameters():
= False
param.requires_grad
# Parameters of newly constructed modules have requires_grad=True by default
= model_conv.fc.in_features
num_ftrs = nn.Linear(num_ftrs, 2)
model_conv.fc
= model_conv.to(device)
model_conv
= nn.CrossEntropyLoss()
criterion
# Observe that only parameters of final layer are being optimized as
# opposed to before.
= optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)
optimizer_conv
# Decay LR by a factor of 0.1 every 7 epochs
= lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)
exp_lr_scheduler
= train_model(model_conv, criterion, optimizer_conv,
model_conv =25) exp_lr_scheduler, num_epochs
Epoch 0/24
----------
train Loss: 0.6815 Acc: 0.6393
val Loss: 0.2546 Acc: 0.8889
Epoch 1/24
----------
train Loss: 0.5108 Acc: 0.7541
val Loss: 0.1813 Acc: 0.9477
Epoch 2/24
----------
train Loss: 0.4495 Acc: 0.8156
val Loss: 0.4027 Acc: 0.8235
Epoch 3/24
----------
train Loss: 0.4571 Acc: 0.7951
val Loss: 0.2098 Acc: 0.9085
Epoch 4/24
----------
train Loss: 0.4004 Acc: 0.8320
val Loss: 0.2219 Acc: 0.9085
Epoch 5/24
----------
train Loss: 0.4751 Acc: 0.8115
val Loss: 0.1881 Acc: 0.9477
Epoch 6/24
----------
train Loss: 0.4426 Acc: 0.8279
val Loss: 0.2063 Acc: 0.9216
Epoch 7/24
----------
train Loss: 0.4091 Acc: 0.8238
val Loss: 0.2144 Acc: 0.9216
Epoch 8/24
----------
train Loss: 0.3693 Acc: 0.8320
val Loss: 0.1896 Acc: 0.9216
Epoch 9/24
----------
train Loss: 0.3681 Acc: 0.8279
val Loss: 0.1911 Acc: 0.9281
Epoch 10/24
----------
train Loss: 0.2974 Acc: 0.8689
val Loss: 0.1962 Acc: 0.9216
Epoch 11/24
----------
train Loss: 0.3128 Acc: 0.8525
val Loss: 0.1886 Acc: 0.9346
Epoch 12/24
----------
train Loss: 0.3634 Acc: 0.8361
val Loss: 0.1868 Acc: 0.9412
Epoch 13/24
----------
train Loss: 0.3299 Acc: 0.8484
val Loss: 0.1979 Acc: 0.9216
Epoch 14/24
----------
train Loss: 0.3036 Acc: 0.8893
val Loss: 0.2028 Acc: 0.9216
Epoch 15/24
----------
train Loss: 0.3533 Acc: 0.8361
val Loss: 0.1694 Acc: 0.9477
Epoch 16/24
----------
train Loss: 0.3248 Acc: 0.8525
val Loss: 0.1838 Acc: 0.9281
Epoch 17/24
----------
train Loss: 0.3293 Acc: 0.8648
val Loss: 0.1941 Acc: 0.9216
Epoch 18/24
----------
train Loss: 0.2718 Acc: 0.8484
val Loss: 0.1880 Acc: 0.9346
Epoch 19/24
----------
train Loss: 0.3811 Acc: 0.8074
val Loss: 0.2232 Acc: 0.9150
Epoch 20/24
----------
train Loss: 0.3523 Acc: 0.8402
val Loss: 0.1787 Acc: 0.9346
Epoch 21/24
----------
train Loss: 0.2430 Acc: 0.8893
val Loss: 0.2104 Acc: 0.9281
Epoch 22/24
----------
train Loss: 0.2858 Acc: 0.8730
val Loss: 0.1836 Acc: 0.9346
Epoch 23/24
----------
train Loss: 0.3786 Acc: 0.8443
val Loss: 0.1786 Acc: 0.9412
Epoch 24/24
----------
train Loss: 0.2935 Acc: 0.8770
val Loss: 0.1835 Acc: 0.9346
Training complete in 1m 5s
Best val Acc: 0.947712