import torch
Model Training Loop
= torch.randn(3, requires_grad = True) x
print(x)
tensor([-1.2561, -3.5029, -1.4459], requires_grad=True)
= x + 2
y y.retain_grad()
= y * y*2
z
z.retain_grad()= z.mean()
z print(z)
tensor(2.0794, grad_fn=<MeanBackward0>)
=True) z.backward(retain_graph
x,y, z
(tensor([-1.2561, -3.5029, -1.4459], requires_grad=True),
tensor([ 0.7439, -1.5029, 0.5541], grad_fn=<AddBackward0>),
tensor(2.0794, grad_fn=<MeanBackward0>))
x.grad, y.grad
(tensor([ 0.9919, -2.0039, 0.7388]), tensor([ 0.9919, -2.0039, 0.7388]))
For multiple z values
import torch
= torch.randn(3, requires_grad = True) x
print(x)
tensor([-1.3521, -0.5026, -0.7557], requires_grad=True)
= x + 2
y y.retain_grad()
= y * y*2
z
z.retain_grad()#z = z.mean()
print(z)
tensor([0.8396, 4.4845, 3.0964], grad_fn=<MulBackward0>)
= torch.tensor([0.1, 1.0, 0.001], dtype=torch.float32)
v =True) z.backward(v, retain_graph
x, y, z
(tensor([-1.3521, -0.5026, -0.7557], requires_grad=True),
tensor([0.6479, 1.4974, 1.2443], grad_fn=<AddBackward0>),
tensor([0.8396, 4.4845, 3.0964], grad_fn=<MulBackward0>))
x.grad, y.grad
(tensor([2.5917e-01, 5.9897e+00, 4.9771e-03]),
tensor([2.5917e-01, 5.9897e+00, 4.9771e-03]))
Stopping gradient descent
False)
x.requires_grad_(print(x)
tensor([-1.3521, -0.5026, -0.7557])
= x.detach()
y print(y)
tensor([-1.3521, -0.5026, -0.7557])
with torch.no_grad():
print(x)
tensor([-1.3521, -0.5026, -0.7557])
Zeroing Gradients
= torch.ones(4, requires_grad=True)
weights
for epoch in range(5):
= (weights * 3).sum()
model_output
model_output.backward()
print(weights.grad)
tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])
tensor([12., 12., 12., 12.])
tensor([15., 15., 15., 15.])
= torch.ones(4, requires_grad=True)
weights
for epoch in range(5):
= (weights * 3).sum()
model_output
model_output.backward()
print(weights.grad)
weights.grad.zero_()
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
Backpropagation
= torch.ones(4, requires_grad=True) weights
list| grep nbdevAuto pip
nbdevAuto 0.0.130 /home/ben/BENEDICT_Only/Benedict_Projects/Benedict_ML/nbdevAuto
Note: you may need to restart the kernel to use updated packages.
from graphviz import Digraph
from nbdevAuto import functions
= functions.graph()
dot # Add nodes with different shapes and formatting
'x', 'x')
dot.node('a', 'a(x)', shape='circle')
dot.node('y', 'y')
dot.node('b', 'b(y)', shape='circle')
dot.node('z', 'z')
dot.node(
# Add edges with custom labels and formatting
'x', 'a')
dot.edge('a', 'y')
dot.edge('y', 'b')
dot.edge('b', 'z')
dot.edge(
# Render the graph
dot
Chain rule
\(\dfrac{\delta z}{\delta x} = \dfrac{\delta z}{\delta y} \cdot \dfrac{\delta y}{\delta x}\)
Computational Graph
= functions.graph()
dot # Add nodes with different shapes and formatting
'x', 'x')
dot.node('*', 'f=x*y', shape='circle')
dot.node('y', 'y')
dot.node('z', 'z')
dot.node(
# Add edges with custom labels and formatting
'x', '*')
dot.edge('y', '*')
dot.edge('*', 'z')
dot.edge(
# Render the graph
dot
\(\dfrac{\delta z}{\delta x} = \dfrac{\delta xy}{\delta x} = y\)
\(\dfrac{\delta z}{\delta y} = \dfrac{\delta xy}{\delta y} = y\)
$ = $
- Forward pass: Computer loss
- Compute local gradients
- Backward pass: Compute dLoss/dWeights using the Chain Rule
= functions.graph()
dot # Add nodes with different shapes and formatting
'x', 'x')
dot.node('w', 'w')
dot.node('*', '*\ny1=w*y', shape='circle')
dot.node(
'y', 'y')
dot.node('-', '-\ns= y1-y')
dot.node(
'^2', '^2\n(y1-y)^2')
dot.node('Loss', 'Loss')
dot.node(# Add edges with custom labels and formatting
'x', '*')
dot.edge('w', '*')
dot.edge(
'*', '-', label='y1')
dot.edge('y', '-')
dot.edge(
'-', '^2', label='s')
dot.edge(
'^2', 'Loss')
dot.edge(# Render the graph
dot
\(Loss = (\hat{y} - y)^2\)
\(\dfrac{\delta loss}{\delta s} = \dfrac{s^2}{s} = 2s\)
\(\dfrac{\delta s}{\delta \hat{y}} = \dfrac{\delta\hat{y} - y}{\delta \hat{y}} = 1\)
\(\dfrac{\delta \hat{y}}{\delta w} = \dfrac{\delta wx}{\delta w} = x\)
\(\therefore \dfrac{\delta loss}{\delta w} = \dfrac{\delta loss}{\delta s} \cdot \dfrac{\delta s}{\delta y} \cdot \dfrac{\delta \hat{y}}{\delta w} = 2 \cdot s \cdot x = 2 \cdot (-1) \cdot (1) = -2\)
= 1
x = 2
y = 1
w
= x * w
y1 = y1-y
s = s**2 loss
print(f'x:{x} w:{w} y1:{y1} y:{y} s:{s} loss:{loss}')
x:1 w:1 y1:1 y:2 s:-1 loss:1
import torch
= torch.tensor(1.0)
x = torch.tensor(2.0) y
= torch.tensor(1.0, requires_grad = True)
w
= 0.005 lr
#forward pass and compute the loss
= w * x
y1 = (y1-y)**2
loss
print(y1)
print(loss)
tensor(1., grad_fn=<MulBackward0>)
tensor(1., grad_fn=<PowBackward0>)
# backward pass
loss.backward()print(w.grad)
w.grad.zero_()
tensor(-2.)
tensor(0.)
Gradient Descent
Numpy
Prediction: Manually
Gradients Computation: Manually
Loss Computation: Manually
Parameter updates: Manually
import numpy as np
= np.array([1,2,3,4], dtype=np.float32)
x = np.array([2,4,6,8], dtype=np.float32)
y
= 0.0 w
# model
def forward(x):
return w * x
def loss(y, y_predicted):
return ((y_predicted - y)**2).mean()
# gradient
# MSE = 1/N * (w*x - y)**2
# dJ/dw = 1/N 2x (w*x - y)
def gradient (x, y, y_predicted):
return np.dot(2 * x, y_predicted-y).mean()
print(f'Prediction before training: f(5) = {forward(5):.3f}')
Prediction before training: f(5) = 0.000
= 0.01
learning_rate = 15
n_iters
for epoch in range(n_iters):
# prediction = forward pass
= forward(x)
y_pred
# loss
= loss(y, y_pred)
l
# gradients
= gradient(x, y, y_pred)
dw
# update weights
-= learning_rate * dw
w
if epoch % 1 == 0:
print(f'epoc:{epoch} w = {w:.3f} , y_pred={forward(5)}, y = {10}, loss = {l:.8f}, dw = {dw}')
print(f'Prediction after training: {forward(5):.3f}, y = {10}')
epoc:0 w = 1.200 , y_pred=6.0, y = 10, loss = 30.00000000, dw = -120.0
epoc:1 w = 1.680 , y_pred=8.399999809265136, y = 10, loss = 4.79999924, dw = -47.999996185302734
epoc:2 w = 1.872 , y_pred=9.35999994277954, y = 10, loss = 0.76800019, dw = -19.200002670288086
epoc:3 w = 1.949 , y_pred=9.743999934196472, y = 10, loss = 0.12288000, dw = -7.679999828338623
epoc:4 w = 1.980 , y_pred=9.897600066661834, y = 10, loss = 0.01966083, dw = -3.072002649307251
epoc:5 w = 1.992 , y_pred=9.95904014110565, y = 10, loss = 0.00314574, dw = -1.2288014888763428
epoc:6 w = 1.997 , y_pred=9.983615934848784, y = 10, loss = 0.00050331, dw = -0.4915158748626709
epoc:7 w = 1.999 , y_pred=9.993446409702301, y = 10, loss = 0.00008053, dw = -0.1966094970703125
epoc:8 w = 1.999 , y_pred=9.997378492355345, y = 10, loss = 0.00001288, dw = -0.07864165306091309
epoc:9 w = 2.000 , y_pred=9.998951268196105, y = 10, loss = 0.00000206, dw = -0.03145551681518555
epoc:10 w = 2.000 , y_pred=9.999580299854276, y = 10, loss = 0.00000033, dw = -0.012580633163452148
epoc:11 w = 2.000 , y_pred=9.999832069873808, y = 10, loss = 0.00000005, dw = -0.005035400390625
epoc:12 w = 2.000 , y_pred=9.999932992458342, y = 10, loss = 0.00000001, dw = -0.002018451690673828
epoc:13 w = 2.000 , y_pred=9.999973046779632, y = 10, loss = 0.00000000, dw = -0.00080108642578125
epoc:14 w = 2.000 , y_pred=9.999989175796507, y = 10, loss = 0.00000000, dw = -0.00032258033752441406
Prediction after training: 10.000, y = 10
Torch
Prediction: Manually
Gradients Computation: Autograd
Loss Computation: Manually
Parameter updates: Manually
= torch.tensor([1,2,3,4], dtype=torch.float32)
x = torch.tensor([2,4,6,8], dtype=torch.float32)
y
= torch.tensor([0.0], dtype=torch.float32, requires_grad=True) w
# model
def forward(x):
return w * x
def loss(y, y_predicted):
return ((y_predicted - y)**2).mean()
# gradient
# MSE = 1/N * (w*x - y)**2
# dJ/dw = 1/N 2x (w*x - y)
print(f'Prediction before training: f(5) = {forward(5)}')
Prediction before training: f(5) = tensor([0.], grad_fn=<MulBackward0>)
= 0.01
learning_rate = 50
n_iters
for epoch in range(n_iters):
# prediction = forward pass
= forward(x)
y_pred
# loss
= loss(y, y_pred)
l
# gradients
l.backward()
# update weights
with torch.no_grad():
-= learning_rate * w.grad
w
if epoch % 2 == 0:
print(f'epoc:{epoch} w = {w.item():.3f}, y_pred={forward(5).item():.3f}, y = {10}, loss = {l.item():.7f}, dw = {w.grad.item():.7f}')
w.grad.zero_()
print(f'Prediction after training: {forward(5)}, y = {10}')
epoc:0 w = 0.300, y_pred=1.500, y = 10, loss = 30.0000000, dw = -30.0000000
epoc:2 w = 0.772, y_pred=3.859, y = 10, loss = 15.6601877, dw = -21.6749992
epoc:4 w = 1.113, y_pred=5.563, y = 10, loss = 8.1747169, dw = -15.6601877
epoc:6 w = 1.359, y_pred=6.794, y = 10, loss = 4.2672529, dw = -11.3144855
epoc:8 w = 1.537, y_pred=7.684, y = 10, loss = 2.2275321, dw = -8.1747150
epoc:10 w = 1.665, y_pred=8.327, y = 10, loss = 1.1627856, dw = -5.9062314
epoc:12 w = 1.758, y_pred=8.791, y = 10, loss = 0.6069812, dw = -4.2672515
epoc:14 w = 1.825, y_pred=9.126, y = 10, loss = 0.3168478, dw = -3.0830884
epoc:16 w = 1.874, y_pred=9.369, y = 10, loss = 0.1653965, dw = -2.2275314
epoc:18 w = 1.909, y_pred=9.544, y = 10, loss = 0.0863381, dw = -1.6093917
epoc:20 w = 1.934, y_pred=9.671, y = 10, loss = 0.0450689, dw = -1.1627841
epoc:22 w = 1.952, y_pred=9.762, y = 10, loss = 0.0235263, dw = -0.8401127
epoc:24 w = 1.966, y_pred=9.828, y = 10, loss = 0.0122808, dw = -0.6069803
epoc:26 w = 1.975, y_pred=9.876, y = 10, loss = 0.0064107, dw = -0.4385428
epoc:28 w = 1.982, y_pred=9.910, y = 10, loss = 0.0033464, dw = -0.3168479
epoc:30 w = 1.987, y_pred=9.935, y = 10, loss = 0.0017469, dw = -0.2289228
epoc:32 w = 1.991, y_pred=9.953, y = 10, loss = 0.0009119, dw = -0.1653977
epoc:34 w = 1.993, y_pred=9.966, y = 10, loss = 0.0004760, dw = -0.1194997
epoc:36 w = 1.995, y_pred=9.976, y = 10, loss = 0.0002485, dw = -0.0863385
epoc:38 w = 1.996, y_pred=9.982, y = 10, loss = 0.0001297, dw = -0.0623794
epoc:40 w = 1.997, y_pred=9.987, y = 10, loss = 0.0000677, dw = -0.0450683
epoc:42 w = 1.998, y_pred=9.991, y = 10, loss = 0.0000353, dw = -0.0325624
epoc:44 w = 1.999, y_pred=9.993, y = 10, loss = 0.0000184, dw = -0.0235248
epoc:46 w = 1.999, y_pred=9.995, y = 10, loss = 0.0000096, dw = -0.0169984
epoc:48 w = 1.999, y_pred=9.997, y = 10, loss = 0.0000050, dw = -0.0122809
Prediction after training: tensor([9.9970], grad_fn=<MulBackward0>), y = 10
Pytorch Loss and Pytorch Optimizer
Prediction: Manually
Gradients Computation: Autograd
Loss Computation: Pytorch Loss
Parameter updates: Pytorch Optimizer
- Design Model = (input, output, size, forward pass)
- Construct loss and optimizer
- Training loop
- forward pass: compute prediction
- backward pass: gradients
- update weights
import torch
import torch.nn as nn
= torch.tensor([1,2,3,4], dtype=torch.float32)
x = torch.tensor([2,4,6,8], dtype=torch.float32)
y
= torch.tensor([0.0], dtype=torch.float32, requires_grad=True) w
# model
def forward(x):
return w * x
print(f'Prediction before training: f(5) = {forward(5)}')
Prediction before training: f(5) = tensor([0.], grad_fn=<MulBackward0>)
= 0.01
learning_rate = 50
n_iters
= nn.MSELoss()
loss = torch.optim.SGD([w], lr= learning_rate)
optimizer
for epoch in range(n_iters):
# prediction = forward pass
= forward(x)
y_pred
# loss
= loss(y, y_pred)
l
# gradients
l.backward()
optimizer.step()
if epoch % 2 == 0:
print(f'epoc:{epoch} w = {w.item():.3f}, y_pred={forward(5).item():.3f}, y = {10}, loss = {l.item():.7f}, dw = {w.grad.item():.7f}')
optimizer.zero_grad()
print(f'Prediction after training: {forward(5)}, y = {10}')
epoc:0 w = 0.300, y_pred=1.500, y = 10, loss = 30.0000000, dw = -30.0000000
epoc:2 w = 0.772, y_pred=3.859, y = 10, loss = 15.6601877, dw = -21.6749992
epoc:4 w = 1.113, y_pred=5.563, y = 10, loss = 8.1747169, dw = -15.6601877
epoc:6 w = 1.359, y_pred=6.794, y = 10, loss = 4.2672529, dw = -11.3144855
epoc:8 w = 1.537, y_pred=7.684, y = 10, loss = 2.2275321, dw = -8.1747150
epoc:10 w = 1.665, y_pred=8.327, y = 10, loss = 1.1627856, dw = -5.9062314
epoc:12 w = 1.758, y_pred=8.791, y = 10, loss = 0.6069812, dw = -4.2672515
epoc:14 w = 1.825, y_pred=9.126, y = 10, loss = 0.3168478, dw = -3.0830884
epoc:16 w = 1.874, y_pred=9.369, y = 10, loss = 0.1653965, dw = -2.2275314
epoc:18 w = 1.909, y_pred=9.544, y = 10, loss = 0.0863381, dw = -1.6093917
epoc:20 w = 1.934, y_pred=9.671, y = 10, loss = 0.0450689, dw = -1.1627841
epoc:22 w = 1.952, y_pred=9.762, y = 10, loss = 0.0235263, dw = -0.8401127
epoc:24 w = 1.966, y_pred=9.828, y = 10, loss = 0.0122808, dw = -0.6069803
epoc:26 w = 1.975, y_pred=9.876, y = 10, loss = 0.0064107, dw = -0.4385428
epoc:28 w = 1.982, y_pred=9.910, y = 10, loss = 0.0033464, dw = -0.3168479
epoc:30 w = 1.987, y_pred=9.935, y = 10, loss = 0.0017469, dw = -0.2289228
epoc:32 w = 1.991, y_pred=9.953, y = 10, loss = 0.0009119, dw = -0.1653977
epoc:34 w = 1.993, y_pred=9.966, y = 10, loss = 0.0004760, dw = -0.1194997
epoc:36 w = 1.995, y_pred=9.976, y = 10, loss = 0.0002485, dw = -0.0863385
epoc:38 w = 1.996, y_pred=9.982, y = 10, loss = 0.0001297, dw = -0.0623794
epoc:40 w = 1.997, y_pred=9.987, y = 10, loss = 0.0000677, dw = -0.0450683
epoc:42 w = 1.998, y_pred=9.991, y = 10, loss = 0.0000353, dw = -0.0325624
epoc:44 w = 1.999, y_pred=9.993, y = 10, loss = 0.0000184, dw = -0.0235248
epoc:46 w = 1.999, y_pred=9.995, y = 10, loss = 0.0000096, dw = -0.0169984
epoc:48 w = 1.999, y_pred=9.997, y = 10, loss = 0.0000050, dw = -0.0122809
Prediction after training: tensor([9.9970], grad_fn=<MulBackward0>), y = 10
Pytorch Automate
Prediction: Manually
Gradients Computation: Autograd
Loss Computation: Pytorch Loss
Parameter updates: Pytorch Optimizer
import torch
import torch.nn as nn
= torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
x = torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)
y
= torch.tensor([5], dtype = torch.float32)
x_test = x.shape
n_samples, n_features n_samples, n_features
(4, 1)
= nn.Linear(in_features = n_features, out_features = 1)
model model
Linear(in_features=1, out_features=1, bias=True)
= model.parameters()
[w,b] 0].item() w[
-0.8376840353012085
'weight'] model.state_dict()[
tensor([[-0.8377]])
print(f'Prediction before training: f(5) = {model(x_test)}')
Prediction before training: f(5) = tensor([-3.8722], grad_fn=<ViewBackward0>)
= 0.1
learning_rate = 500
n_iters
= nn.MSELoss()
loss = torch.optim.SGD(model.parameters(), lr= learning_rate)
optimizer
for epoch in range(n_iters):
# prediction = forward pass
= model(x)
y_pred
# loss
= loss(y, y_pred)
l
# gradients
l.backward()
optimizer.step()
if epoch % 20 == 0:
= model.parameters()
[w,b] print(f'epoc:{epoch} w = {w[0].item():.3f} {b[0].item():.3f}, y_pred={model(x_test).item():.3f}, y = {10}, loss = {l.item():.7f}, dw = {w.grad.item():.7f}')
optimizer.zero_grad()
print(f'Prediction after training: {model(x_test)}, y = {10}')
epoc:0 w = 3.261 1.672, y_pred=17.975, y = 10, loss = 56.0067291, dw = -40.9841690
epoc:20 w = 1.794 0.607, y_pred=9.578, y = 10, loss = 0.0653165, dw = -0.0772833
epoc:40 w = 1.888 0.330, y_pred=9.769, y = 10, loss = 0.0193617, dw = -0.0346756
epoc:60 w = 1.939 0.180, y_pred=9.874, y = 10, loss = 0.0057399, dw = -0.0188781
epoc:80 w = 1.967 0.098, y_pred=9.931, y = 10, loss = 0.0017016, dw = -0.0102807
epoc:100 w = 1.982 0.053, y_pred=9.963, y = 10, loss = 0.0005045, dw = -0.0055964
epoc:120 w = 1.990 0.029, y_pred=9.980, y = 10, loss = 0.0001496, dw = -0.0030484
epoc:140 w = 1.995 0.016, y_pred=9.989, y = 10, loss = 0.0000443, dw = -0.0016569
epoc:160 w = 1.997 0.009, y_pred=9.994, y = 10, loss = 0.0000131, dw = -0.0009021
epoc:180 w = 1.998 0.005, y_pred=9.997, y = 10, loss = 0.0000039, dw = -0.0004910
epoc:200 w = 1.999 0.003, y_pred=9.998, y = 10, loss = 0.0000012, dw = -0.0002694
epoc:220 w = 2.000 0.001, y_pred=9.999, y = 10, loss = 0.0000003, dw = -0.0001463
epoc:240 w = 2.000 0.001, y_pred=9.999, y = 10, loss = 0.0000001, dw = -0.0000764
epoc:260 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000430
epoc:280 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000240
epoc:300 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000120
epoc:320 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000055
epoc:340 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000049
epoc:360 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000038
epoc:380 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000001
epoc:400 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000017
epoc:420 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000015
epoc:440 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = 0.0000001
epoc:460 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000001
epoc:480 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = 0.0000007
Prediction after training: tensor([10.], grad_fn=<ViewBackward0>), y = 10
Pytorch Model
Prediction: Manually
Gradients Computation: Autograd
Loss Computation: Pytorch Loss
Parameter updates: Pytorch Optimizer
- Design Model = (input, output, size, forward pass)
- Construct loss and optimizer
- Training loop
- forward pass: compute prediction
- backward pass: gradients
- update weights
import torch
import torch.nn as nn
= torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
x = torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)
y
= torch.tensor([5], dtype = torch.float32)
x_test = x.shape
n_samples, n_features n_samples, n_features
(4, 1)
= nn.Linear(in_features = n_features, out_features = 1)
model model
Linear(in_features=1, out_features=1, bias=True)
class LinearRegression(nn.Module):
def __init__(self, in_features, out_features):
super(LinearRegression, self).__init__()
self.lin = nn.Linear(in_features, out_features)
def forward(self, x):
return self.lin(x)
= LinearRegression(in_features = n_features, out_features = 1)
model model
LinearRegression(
(lin): Linear(in_features=1, out_features=1, bias=True)
)
= model.parameters()
[w,b] 0].item() w[
-0.08443880081176758
'lin.weight'] model.state_dict()[
tensor([[-0.0844]])
print(f'Prediction before training: f(5) = {model(x_test)}')
Prediction before training: f(5) = tensor([-0.1386], grad_fn=<ViewBackward0>)
= 0.1
learning_rate = 500
n_iters
= nn.MSELoss()
loss = torch.optim.SGD(model.parameters(), lr= learning_rate)
optimizer
for epoch in range(n_iters):
# prediction = forward pass
= model(x)
y_pred
# loss
= loss(y, y_pred)
l
# gradients
l.backward()
optimizer.step()
if epoch % 20 == 0:
= model.parameters()
[w,b] print(f'epoc:{epoch} w = {w[0].item():.3f} {b[0].item():.3f}, y_pred={model(x_test).item():.3f}, y = {10}, loss = {l.item():.7f}, dw = {w.grad.item():.7f}')
optimizer.zero_grad()
print(f'Prediction after training: {model(x_test)}, y = {10}')
epoc:0 w = 2.900 1.269, y_pred=15.771, y = 10, loss = 29.7110996, dw = -29.8484650
epoc:20 w = 1.841 0.470, y_pred=9.673, y = 10, loss = 0.0391924, dw = -0.0592351
epoc:40 w = 1.913 0.256, y_pred=9.821, y = 10, loss = 0.0116179, dw = -0.0268617
epoc:60 w = 1.953 0.139, y_pred=9.902, y = 10, loss = 0.0034442, dw = -0.0146208
epoc:80 w = 1.974 0.076, y_pred=9.947, y = 10, loss = 0.0010210, dw = -0.0079615
epoc:100 w = 1.986 0.041, y_pred=9.971, y = 10, loss = 0.0003027, dw = -0.0043370
epoc:120 w = 1.992 0.022, y_pred=9.984, y = 10, loss = 0.0000897, dw = -0.0023587
epoc:140 w = 1.996 0.012, y_pred=9.991, y = 10, loss = 0.0000266, dw = -0.0012866
epoc:160 w = 1.998 0.007, y_pred=9.995, y = 10, loss = 0.0000079, dw = -0.0007011
epoc:180 w = 1.999 0.004, y_pred=9.997, y = 10, loss = 0.0000023, dw = -0.0003816
epoc:200 w = 1.999 0.002, y_pred=9.999, y = 10, loss = 0.0000007, dw = -0.0002074
epoc:220 w = 2.000 0.001, y_pred=9.999, y = 10, loss = 0.0000002, dw = -0.0001137
epoc:240 w = 2.000 0.001, y_pred=10.000, y = 10, loss = 0.0000001, dw = -0.0000589
epoc:260 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000327
epoc:280 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000174
epoc:300 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000103
epoc:320 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000072
epoc:340 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000017
epoc:360 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000023
epoc:380 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000023
epoc:400 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000010
epoc:420 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000023
epoc:440 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = 0.0000000
epoc:460 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000007
epoc:480 w = 2.000 0.000, y_pred=10.000, y = 10, loss = 0.0000000, dw = -0.0000015
Prediction after training: tensor([10.0000], grad_fn=<ViewBackward0>), y = 10
Linear Regression
import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt
- Prepare data
- model
- loss and optimizer
- training loop
= datasets.make_regression(n_samples= 100, n_features=1, noise = 20, random_state = 1) x_numpy, y_numpy
= torch.from_numpy(x_numpy.astype(np.float32))
x = torch.from_numpy(y_numpy.astype(np.float32))
y 5], y[:5] x[:
(tensor([[-0.6118],
[-0.2494],
[ 0.4885],
[ 0.7620],
[ 1.5198]]),
tensor([-55.5386, -10.6620, 22.7574, 101.0961, 144.3376]))
= x[4]
x_test = y[4]
y_test x_test, y_test
(tensor([1.5198]), tensor(144.3376))
=y.view(y.shape[0], 1)
y 5], y[:5] x[:
(tensor([[-0.6118],
[-0.2494],
[ 0.4885],
[ 0.7620],
[ 1.5198]]),
tensor([[-55.5386],
[-10.6620],
[ 22.7574],
[101.0961],
[144.3376]]))
= x.shape
n_samples, n_features n_samples, n_features
(100, 1)
#1.model
= n_features
input_size = 1
output_size = nn.Linear(input_size, output_size)
model model
Linear(in_features=1, out_features=1, bias=True)
= model.parameters()
[a,b] a,b
(Parameter containing:
tensor([[-0.3357]], requires_grad=True),
Parameter containing:
tensor([0.3514], requires_grad=True))
#2. loss and optimizer
= 0.01
learning_rate = nn.MSELoss()
criterion = torch.optim.SGD(model.parameters(), lr= learning_rate) optimizer
optimizer
SGD (
Parameter Group 0
dampening: 0
differentiable: False
foreach: None
lr: 0.01
maximize: False
momentum: 0
nesterov: False
weight_decay: 0
)
#3. training loop
= 1000
num_epochs
for epoch in range(num_epochs):
#forward pass and loss
= model(x)
y_predicted =criterion(y_predicted, y)
loss
# backward pass
loss.backward()
#update
optimizer.step()
if (epoch + 1) % 50 == 0:
= model.parameters()
[w,b] print(f'[epoc:{epoch}] (y = {w[0].item():.3f}x + {b[0].item():.3f}) y_pred:{model(x_test).item():.3f}, y:{y_test}, loss :{loss.item():.7f}, dw:{w.grad.item():.7f} db:{b.grad.item():.7f}')
optimizer.zero_grad()
[epoc:49] (y = 45.054x + 4.782) y_pred:73.255, y:144.33755493164062, loss :1468.2329102, dw:-59.7857857 db:-3.2169607
[epoc:99] (y = 65.507x + 5.259) y_pred:104.818, y:144.33755493164062, loss :565.6588745, dw:-27.0066071 db:0.3266662
[epoc:149] (y = 74.762x + 4.920) y_pred:118.545, y:144.33755493164062, loss :380.9410095, dw:-12.2441845 db:0.7970295
[epoc:199] (y = 78.964x + 4.564) y_pred:124.575, y:144.33755493164062, loss :342.6767883, dw:-5.5675182 db:0.5981486
[epoc:249] (y = 80.877x + 4.328) y_pred:127.246, y:144.33755493164062, loss :334.6894531, dw:-2.5375218 db:0.3580039
[epoc:299] (y = 81.749x + 4.195) y_pred:128.438, y:144.33755493164062, loss :333.0141296, dw:-1.1586771 db:0.1943260
[epoc:349] (y = 82.148x + 4.124) y_pred:128.973, y:144.33755493164062, loss :332.6617126, dw:-0.5298302 db:0.0999891
[epoc:399] (y = 82.330x + 4.088) y_pred:129.215, y:144.33755493164062, loss :332.5874329, dw:-0.2425606 db:0.0497854
[epoc:449] (y = 82.414x + 4.070) y_pred:129.324, y:144.33755493164062, loss :332.5717468, dw:-0.1111394 db:0.0242567
[epoc:499] (y = 82.452x + 4.062) y_pred:129.374, y:144.33755493164062, loss :332.5684509, dw:-0.0509445 db:0.0116392
[epoc:549] (y = 82.470x + 4.058) y_pred:129.396, y:144.33755493164062, loss :332.5677490, dw:-0.0233506 db:0.0055273
[epoc:599] (y = 82.478x + 4.056) y_pred:129.407, y:144.33755493164062, loss :332.5675659, dw:-0.0107345 db:0.0026025
[epoc:649] (y = 82.481x + 4.055) y_pred:129.411, y:144.33755493164062, loss :332.5675659, dw:-0.0049275 db:0.0012180
[epoc:699] (y = 82.483x + 4.054) y_pred:129.414, y:144.33755493164062, loss :332.5675659, dw:-0.0022745 db:0.0005686
[epoc:749] (y = 82.484x + 4.054) y_pred:129.415, y:144.33755493164062, loss :332.5675659, dw:-0.0010500 db:0.0002624
[epoc:799] (y = 82.484x + 4.054) y_pred:129.415, y:144.33755493164062, loss :332.5675659, dw:-0.0004619 db:0.0001247
[epoc:849] (y = 82.484x + 4.054) y_pred:129.415, y:144.33755493164062, loss :332.5675659, dw:-0.0003704 db:0.0000531
[epoc:899] (y = 82.484x + 4.054) y_pred:129.415, y:144.33755493164062, loss :332.5675659, dw:-0.0003721 db:0.0000235
[epoc:949] (y = 82.484x + 4.054) y_pred:129.415, y:144.33755493164062, loss :332.5675659, dw:-0.0003721 db:0.0000235
[epoc:999] (y = 82.484x + 4.054) y_pred:129.415, y:144.33755493164062, loss :332.5675659, dw:-0.0003721 db:0.0000235
= model(x).detach().numpy() predicted
'ro')
plt.plot(x_numpy, y_numpy, 'b')
plt.plot(x_numpy, predicted, plt.show()
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.io import curdoc
# apply theme to current document
= "dark_minimal"
curdoc().theme
# create a new plot with a title and axis labels
= figure(title="Real data vs Model",
p ='x',
x_axis_label='y',
y_axis_label="stretch_width",
sizing_mode=1000,
max_width=500,)
height
# add a line renderer with legend and line thickness to the plot
="Original", line_width=2, color="red", radius=0.02)
p.circle(x_numpy.flatten(), y_numpy.flatten(), legend_label="Predicted", line_width=2)
p.line(x_numpy.flatten(), predicted.flatten(), legend_label
= "top_left"
p.legend.location ="mute"
p.legend.click_policy
# show the results
show(p)
Logistic Regression
import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
= datasets.load_breast_cancer()
bc bc.keys()
dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])
= bc.data, bc.target
x, y = x.shape
n_samples, n_features n_samples, n_features
(569, 30)
= train_test_split(x, y, test_size = 0.2, random_state = 1234)
x_train, x_test, y_train, y_test x_train.shape, x_test.shape, y_train.shape, y_test.shape
((455, 30), (114, 30), (455,), (114,))
= StandardScaler()
sc = sc.fit_transform(x_train)
x_train = sc.transform(x_test) x_test
= torch.from_numpy(x_train.astype(np.float32))
x_train = torch.from_numpy(x_test.astype(np.float32))
x_test = torch.from_numpy(y_train.astype(np.float32))
y_train = torch.from_numpy(y_test.astype(np.float32)) y_test
= y_train.view(y_train.shape[0], 1) y_train
#1. model
class LogisticRegression(nn.Module):
def __init__(self, n_input):
super(LogisticRegression, self).__init__()
self.linear = nn.Linear(n_input, 1)
def forward(self, x):
= torch.sigmoid(self.linear(x))
y_pred
return y_pred
= LogisticRegression(n_features) model
#2. loss and optimizer
= 0.01
learning_rate = nn.BCELoss()
criterion = torch.optim.Adam(model.parameters(), lr = learning_rate) optimizer
#3. train loop
= 1000
num_epochs for epoch in range(num_epochs):
#forward pass and loss
= model(x_train)
y_predicted =criterion(y_predicted, y_train)
loss
# backward pass
loss.backward()
#update
optimizer.step()
if (epoch + 1) % 50 == 0:
with torch.no_grad():
= model.parameters()
[w,b] = model(x_test)
y_predicted = y_predicted.round().flatten()
y_predicted_cls = (y_predicted_cls == y_test).float().mean() * 100
acc = (100 - acc)
error print(f'[epoc:{epoch + 1}] (y = {w.mean().item():.3f}x + {b.mean().item():.3f}) \
loss:{loss.item():.5f}, accuracy: {acc:.2f}%, error: {error:.2f}%, \
dw:{w.grad.mean().item():.5f} db:{b.grad.mean().item():.5f}')
optimizer.zero_grad()
[epoc:50] (y = -0.171x + 0.301) loss:0.11539, accuracy: 93.86%, error: 6.14%, dw:0.01317 db:-0.03279
[epoc:100] (y = -0.222x + 0.488) loss:0.07955, accuracy: 94.74%, error: 5.26%, dw:0.00604 db:-0.01464
[epoc:150] (y = -0.257x + 0.602) loss:0.06487, accuracy: 95.61%, error: 4.39%, dw:0.00404 db:-0.00820
[epoc:200] (y = -0.286x + 0.681) loss:0.05622, accuracy: 96.49%, error: 3.51%, dw:0.00301 db:-0.00513
[epoc:250] (y = -0.311x + 0.739) loss:0.05038, accuracy: 95.61%, error: 4.39%, dw:0.00238 db:-0.00343
[epoc:300] (y = -0.334x + 0.783) loss:0.04613, accuracy: 95.61%, error: 4.39%, dw:0.00197 db:-0.00240
[epoc:350] (y = -0.355x + 0.817) loss:0.04288, accuracy: 95.61%, error: 4.39%, dw:0.00167 db:-0.00173
[epoc:400] (y = -0.375x + 0.844) loss:0.04031, accuracy: 95.61%, error: 4.39%, dw:0.00144 db:-0.00128
[epoc:450] (y = -0.394x + 0.866) loss:0.03821, accuracy: 95.61%, error: 4.39%, dw:0.00126 db:-0.00096
[epoc:500] (y = -0.412x + 0.883) loss:0.03648, accuracy: 95.61%, error: 4.39%, dw:0.00112 db:-0.00073
[epoc:550] (y = -0.429x + 0.898) loss:0.03501, accuracy: 95.61%, error: 4.39%, dw:0.00101 db:-0.00056
[epoc:600] (y = -0.445x + 0.909) loss:0.03374, accuracy: 95.61%, error: 4.39%, dw:0.00091 db:-0.00042
[epoc:650] (y = -0.460x + 0.918) loss:0.03264, accuracy: 95.61%, error: 4.39%, dw:0.00083 db:-0.00032
[epoc:700] (y = -0.475x + 0.925) loss:0.03167, accuracy: 95.61%, error: 4.39%, dw:0.00076 db:-0.00024
[epoc:750] (y = -0.489x + 0.931) loss:0.03080, accuracy: 95.61%, error: 4.39%, dw:0.00070 db:-0.00017
[epoc:800] (y = -0.503x + 0.935) loss:0.03003, accuracy: 95.61%, error: 4.39%, dw:0.00065 db:-0.00012
[epoc:850] (y = -0.516x + 0.938) loss:0.02932, accuracy: 95.61%, error: 4.39%, dw:0.00060 db:-0.00008
[epoc:900] (y = -0.529x + 0.940) loss:0.02868, accuracy: 95.61%, error: 4.39%, dw:0.00056 db:-0.00004
[epoc:950] (y = -0.542x + 0.941) loss:0.02809, accuracy: 95.61%, error: 4.39%, dw:0.00052 db:-0.00002
[epoc:1000] (y = -0.554x + 0.942) loss:0.02754, accuracy: 95.61%, error: 4.39%, dw:0.00049 db:0.00001
with torch.no_grad():
= model.parameters()
[w,b] = model(x_test)
y_predicted = y_predicted.round().flatten()
y_predicted_cls = (y_predicted_cls == y_test).float().mean() * 100
acc = (100 - acc)
error print(f'[epoc:{epoch + 1}] (y = {w.mean().item():.3f}x + {b.mean().item():.3f}) \
loss:{loss.item():.5f}, accuracy: {acc:.2f}%, error: {error:.2f}%')
[epoc:1000] (y = -0.554x + 0.942) loss:0.02754, accuracy: 95.61%, error: 4.39%
Softmax and Cross-Entropy
Softmax
\(S(y_i) = \frac{e^{y_i}}{\sum e^{y_i}}\)
\(Linear = [2.0, 1.0, 0.1]\)
\(Softmax = [0.7, 0.2, 0.1]\)
Adds to 1
def softmax(x):
return np.exp(x) / np.sum(np.exp(x), axis = 0)
= np.array([2.0, 1.0, 0.1])
x = softmax(x)
outputs outputs
array([0.65900114, 0.24243297, 0.09856589])
= torch.from_numpy(x)
x x
tensor([2.0000, 1.0000, 0.1000], dtype=torch.float64)
= torch.softmax(x, dim = 0)
outputs outputs
tensor([0.6590, 0.2424, 0.0986], dtype=torch.float64)
Cross Entropy
\(D(\hat{Y}, Y) = \dfrac{1}{N} \cdot \displaystyle\sum_{i=1}^{N} Y_i \cdot \log{\hat{Y_i}}\)
\(Y = [1, 0, 0]\)
\(\hat{Y} = [0.7, 0.2, 0.1] --> D(\hat{Y}, Y) = 0.35\)
\(Y = [1, 0, 0]\)
\(\hat{Y} = [0.7, 0.2, 0.1] --> D(\hat{Y}, Y) = 2.30\)
def cross_entropy(actual, predicted):
= -np.sum(actual * np.log(predicted))
loss return loss
= np.array([1,0,0])
Y_actual
= np.array([0.7, 0.2, 0.1])
Y_pred_good = np.array([0.1, 0.3, 0.6])
Y_pred_bad
= cross_entropy(Y_actual, Y_pred_good)
l1 = cross_entropy(Y_actual, Y_pred_bad) l2
print(f'good pred:{l1:4f}, bad pred:{l2:.4f}')
good pred:0.356675, bad pred:2.3026
nn.CrossEntropyLoss()
applies nn.LogSoftmax + nn.NLLLoss(negative log likelihood loss)
y has class labels, not One-Hot!
Y_pred has raw scores(logits), no softmax
= nn.CrossEntropyLoss() loss
= torch.tensor([0])
Y
= torch.tensor([[2.0, 1.0, 0.1]])
Y_pred_good = torch.tensor([[0.5, 2.0, 0.3]])
Y_pred_bad
= loss(Y_pred_good, Y)
l1 = loss(Y_pred_bad, Y)
l2
print(f'good pred:{l1:4f}, bad pred:{l2:.4f}')
good pred:0.417030, bad pred:1.8406
= torch.max(Y_pred_good, 1)
_, predictions1 = torch.max(Y_pred_bad, 1)
_, predictions2
print(f'good pred:{predictions1}, bad pred:{predictions2}')
good pred:tensor([0]), bad pred:tensor([1])
#Multiclass Problem
class NeuralNet2(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet2, self).__init__()
self.linear1 = nn.Linear(input_size, hidden_size)
self.rely = nn.ReLU()
self.linear2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
= self.linear1(x)
out = self.relu(out)
out = self.linear2(out)
out
return out
= NeuralNet2(input_size = 28 * 28, hidden_size = 5, num_classes = 3)
model = nn.CrossEntropyLoss() crioterion
Activation Functions
Without activation functions, our network is basically just a stacked linear regression model
import torch
import torch.nn as nn
import torch.nn.functional as F
#Option 1 (create nn modules)
class NeuralNet2(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet2, self).__init__()
self.linear1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.linear2 = nn.Linear(hidden_size, num_classes)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
= self.linear1(x)
out = self.relu(out)
out = self.linear2(out)
out = self.sigmoid(out)
out
return out
#Option 2 (use activation functions directly in forward pass)
class NeuralNet2(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet2, self).__init__()
self.linear1 = nn.Linear(input_size, hidden_size)
self.linear2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
= torch.relu(self.linear1(x))
out = torch.sigmoid(self.linear2(out))
out
return out
MLP on MNIST
- MNIST
- DataLoader, Transformation
- Multilayer Neural Net, activation function
- Loss and Optimizer
- Training Loop (batch training)
- Model evaluation
- GPU Support
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torchvision.transforms import ToPILImage
import matplotlib.pyplot as plt
torch.cuda.is_available()
True
= torch.device('cuda' if torch.cuda.is_available() else 'cpu') device
= 28 * 28
input_size = 100
hidden_size = 10
num_classes = 2
num_epochs = 100 batch_size
= torchvision.datasets.MNIST(root="./data",
train_dataset =True,
download=True,
train=transforms.ToTensor())
transform
= torchvision.datasets.MNIST(root="./data",
test_dataset =True,
download=False,
train=transforms.ToTensor())
transform
= torch.utils.data.DataLoader(dataset = train_dataset,
train_loader = batch_size,
batch_size = True)
shuffle
= torch.utils.data.DataLoader(dataset = train_dataset,
test_loader = batch_size,
batch_size = False) shuffle
len(train_dataset), len(test_dataset)
(60000, 10000)
= train_dataset[1]
image, label ='gray')
plt.imshow(transforms.ToPILImage()(image), cmap'off')
plt.axis( plt.show()
= iter(train_loader)
examples = next(examples) images, labels
images.shape, labels.shape
(torch.Size([100, 1, 28, 28]), torch.Size([100]))
for i in range(6):
2, 3, i + 1)
plt.subplot(0], cmap = 'gray')
plt.imshow(images[i]['off')
plt.axis(
plt.show()
# model
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet, self).__init__()
self.l1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.l2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
= self.l1(x)
out = self.relu(out)
out = self.l2(out)
out
return out
= NeuralNet(input_size, hidden_size, num_classes).to(device) model
# loss and optimizer
= 0.001
learning_rate = nn.CrossEntropyLoss()
criterion = torch.optim.Adam(model.parameters(), lr = learning_rate) optimizer
# Train loop
= len(train_loader)
n_total_steps = 0.0
running_loss = 100
print_stat
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# 100, 1, 28, 28 --> 100 , 28 * 28
= images.reshape(-1, 28 * 28).to(device)
images = labels.to(device)
labels
# forward
= model(images)
outputs = criterion(outputs, labels)
loss
#backwards
optimizer.zero_grad()
loss.backward()
optimizer.step()
+= loss.item()
running_loss if (i + 1) % print_stat == 0:
with torch.no_grad():
= 0
n_correct = 0
n_samples for images, labels in test_loader:
= images.reshape(-1, 28 * 28).to(device)
images = labels.to(device)
labels = model(images)
outputs
= torch.max(outputs, 1)
_, predictions += labels.shape[0]
n_samples += (predictions == labels).sum().item()
n_correct
= 100.0 * n_correct / n_samples
acc
print(f'[epoch:{epoch+1}/{num_epochs}, [step:{i+1}/{n_total_steps}] loss:{(running_loss/print_stat):.4f} accuracy:{acc}')
= 0.0 running_loss
[epoch:1/2, [step:100/600] loss:0.9543 accuracy:88.51333333333334
[epoch:1/2, [step:200/600] loss:0.3956 accuracy:90.49166666666666
[epoch:1/2, [step:300/600] loss:0.3070 accuracy:91.86666666666666
[epoch:1/2, [step:400/600] loss:0.2970 accuracy:92.62
[epoch:1/2, [step:500/600] loss:0.2614 accuracy:93.28833333333333
[epoch:1/2, [step:600/600] loss:0.2401 accuracy:93.665
[epoch:2/2, [step:100/600] loss:0.2229 accuracy:94.08166666666666
[epoch:2/2, [step:200/600] loss:0.2201 accuracy:94.43333333333334
[epoch:2/2, [step:300/600] loss:0.1986 accuracy:94.75833333333334
[epoch:2/2, [step:400/600] loss:0.1918 accuracy:94.61666666666666
[epoch:2/2, [step:500/600] loss:0.1879 accuracy:95.115
[epoch:2/2, [step:600/600] loss:0.1609 accuracy:95.52166666666666
# Test
with torch.no_grad():
= 0
n_correct = 0
n_samples for images, labels in test_loader:
= images.reshape(-1, 28 * 28).to(device)
images = labels.to(device)
labels = model(images)
outputs
#value, index
= torch.max(outputs, 1)
_, predictions += labels.shape[0]
n_samples += (predictions == labels).sum().item()
n_correct
= 100.0 * n_correct / n_samples
acc print(f'accuracy = {acc}')
accuracy = 95.52166666666666
CNN on Cifar-10
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torchvision.transforms import ToPILImage
import matplotlib.pyplot as plt
import numpy as np
torch.cuda.is_available()
True
= torch.device('cuda' if torch.cuda.is_available() else 'cpu') device
= 4
num_epochs = 100
batch_size
= transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))]) transform
= torchvision.datasets.CIFAR10(root="./data",
train_dataset =True,
download=True,
train=transform)
transform
= torchvision.datasets.CIFAR10(root="./data",
test_dataset =True,
download=False,
train=transform)
transform
= torch.utils.data.DataLoader(dataset = train_dataset,
train_loader = batch_size,
batch_size = True)
shuffle
= torch.utils.data.DataLoader(dataset = train_dataset,
test_loader = batch_size,
batch_size = False) shuffle
Files already downloaded and verified
Files already downloaded and verified
len(train_dataset), len(test_dataset)
(50000, 10000)
= train_dataset.class_to_idx classes
= list(train_dataset.class_to_idx) classes
list(classes)
['airplane',
'automobile',
'bird',
'cat',
'deer',
'dog',
'frog',
'horse',
'ship',
'truck']
= iter(train_loader)
examples = next(examples) images, labels
images.shape, labels.shape
(torch.Size([100, 3, 32, 32]), torch.Size([100]))
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
# input size: 3 colour channels
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 40)
self.fc3 = nn.Linear(40, 10)
def forward(self, x):
= self.pool(F.relu(self.conv1(x)))
out = self.pool(F.relu(self.conv2(out)))
out = out.view(-1, 16*5*5)
out = F.relu(self.fc1(out))
out = F.relu(self.fc2(out))
out = self.fc3(out)
out
return out
= ConvNet().to(device) model
= 0.001
learning_rate = nn.CrossEntropyLoss()
criterion = torch.optim.SGD(model.parameters(), lr = learning_rate) optimizer
# Image Classifier Neural Network
class ImageClassifier(nn.Module):
def __init__(self):
super().__init__()
self.model = nn.Sequential(
3, 32, (3,3)),
nn.Conv2d(
nn.ReLU(),32, 64, (3,3)),
nn.Conv2d(
nn.ReLU(),64, 64, (3,3)),
nn.Conv2d(
nn.ReLU(),
nn.Flatten(), 64*(28-2)*(28-2), 10)
nn.Linear(
)
def forward(self, x):
return self.model(x)
= ImageClassifier().to(device) model
# Train loop
= len(train_loader)
n_total_steps = 0.0
running_loss = 100
print_stat
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# 100, 1, 28, 28 --> 100 , 28 * 28
= images.to(device)
images = labels.to(device)
labels
# forward
= model(images)
outputs = criterion(outputs, labels)
loss
#backwards
optimizer.zero_grad()
loss.backward()
optimizer.step()
+= loss.item()
running_loss if (i + 1) % print_stat == 0:
with torch.no_grad():
= 0
n_correct = 0
n_samples for images, labels in test_loader:
= images.to(device)
images = labels.to(device)
labels = model(images)
outputs
= torch.max(outputs, 1)
_, predictions += labels.shape[0]
n_samples += (predictions == labels).sum().item()
n_correct
= 100.0 * n_correct / n_samples
acc
print(f'[epoch:{epoch+1}/{num_epochs}, [step:{i+1}/{n_total_steps}] loss:{(running_loss/print_stat):.4f} accuracy:{acc}')
= 0.0 running_loss
[epoch:1/4, [step:100/500] loss:2.3045 accuracy:6.866
[epoch:1/4, [step:200/500] loss:2.3044 accuracy:6.866
[epoch:1/4, [step:300/500] loss:2.3047 accuracy:6.866
[epoch:1/4, [step:400/500] loss:2.3047 accuracy:6.866
[epoch:1/4, [step:500/500] loss:2.3048 accuracy:6.866
[epoch:2/4, [step:100/500] loss:2.3046 accuracy:6.866
[epoch:2/4, [step:200/500] loss:2.3045 accuracy:6.866
[epoch:2/4, [step:300/500] loss:2.3046 accuracy:6.866
[epoch:2/4, [step:400/500] loss:2.3046 accuracy:6.866
[epoch:2/4, [step:500/500] loss:2.3048 accuracy:6.866
[epoch:3/4, [step:100/500] loss:2.3044 accuracy:6.866
[epoch:3/4, [step:200/500] loss:2.3045 accuracy:6.866
[epoch:3/4, [step:300/500] loss:2.3047 accuracy:6.866
[epoch:3/4, [step:400/500] loss:2.3051 accuracy:6.866
[epoch:3/4, [step:500/500] loss:2.3044 accuracy:6.866
[epoch:4/4, [step:100/500] loss:2.3046 accuracy:6.866
[epoch:4/4, [step:200/500] loss:2.3046 accuracy:6.866
[epoch:4/4, [step:300/500] loss:2.3048 accuracy:6.866
[epoch:4/4, [step:400/500] loss:2.3048 accuracy:6.866
[epoch:4/4, [step:500/500] loss:2.3044 accuracy:6.866