import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
class WineDataset(Dataset):
def __init__(self):
#data loading
= np.loadtxt('Data/wine.csv', delimiter=",", dtype=np.float32, skiprows = 1)
xy self.xy = xy
self.x = torch.from_numpy(xy[:,1:])
self.y = torch.from_numpy(xy[:,[0]])
self.n_samples = xy.shape[0]
def __getitem__(self, index):
return self.x[index], self.y[index]
def __len__(self):
return self.n_samples
Transforms
Transforms
= WineDataset() dataset
= dataset[0]
first_data first_data
(tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
1.0650e+03]),
tensor([1.]))
= dataset[0]
features, labels features, labels
(tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
1.0650e+03]),
tensor([1.]))
= DataLoader(dataset=dataset, batch_size = 4, shuffle = True, num_workers = 2) dataloader
= iter(dataloader) dataiter
next(dataiter)
[tensor([[1.2250e+01, 1.7300e+00, 2.1200e+00, 1.9000e+01, 8.0000e+01, 1.6500e+00,
2.0300e+00, 3.7000e-01, 1.6300e+00, 3.4000e+00, 1.0000e+00, 3.1700e+00,
5.1000e+02],
[1.2420e+01, 1.6100e+00, 2.1900e+00, 2.2500e+01, 1.0800e+02, 2.0000e+00,
2.0900e+00, 3.4000e-01, 1.6100e+00, 2.0600e+00, 1.0600e+00, 2.9600e+00,
3.4500e+02],
[1.2370e+01, 1.1300e+00, 2.1600e+00, 1.9000e+01, 8.7000e+01, 3.5000e+00,
3.1000e+00, 1.9000e-01, 1.8700e+00, 4.4500e+00, 1.2200e+00, 2.8700e+00,
4.2000e+02],
[1.3830e+01, 1.6500e+00, 2.6000e+00, 1.7200e+01, 9.4000e+01, 2.4500e+00,
2.9900e+00, 2.2000e-01, 2.2900e+00, 5.6000e+00, 1.2400e+00, 3.3700e+00,
1.2650e+03]]),
tensor([[2.],
[2.],
[2.],
[1.]])]
= iter(dataloader) dataiter
= next(dataiter)
data = data features, labels
features, labels
(tensor([[1.3720e+01, 1.4300e+00, 2.5000e+00, 1.6700e+01, 1.0800e+02, 3.4000e+00,
3.6700e+00, 1.9000e-01, 2.0400e+00, 6.8000e+00, 8.9000e-01, 2.8700e+00,
1.2850e+03],
[1.1840e+01, 2.8900e+00, 2.2300e+00, 1.8000e+01, 1.1200e+02, 1.7200e+00,
1.3200e+00, 4.3000e-01, 9.5000e-01, 2.6500e+00, 9.6000e-01, 2.5200e+00,
5.0000e+02],
[1.3860e+01, 1.5100e+00, 2.6700e+00, 2.5000e+01, 8.6000e+01, 2.9500e+00,
2.8600e+00, 2.1000e-01, 1.8700e+00, 3.3800e+00, 1.3600e+00, 3.1600e+00,
4.1000e+02],
[1.4120e+01, 1.4800e+00, 2.3200e+00, 1.6800e+01, 9.5000e+01, 2.2000e+00,
2.4300e+00, 2.6000e-01, 1.5700e+00, 5.0000e+00, 1.1700e+00, 2.8200e+00,
1.2800e+03]]),
tensor([[1.],
[2.],
[2.],
[1.]]))
= 2
num_epochs = len(dataset)
total_samples = int(np.ceil(total_samples/4))
n_iterations
total_samples, n_iterations
(178, 45)
for epoch in range(num_epochs):
for i, (inputs, labels) in enumerate(dataloader):
if (i + 1) % 5 == 0:
print(f'epoch {epoch + 1}/{num_epochs}, step {i+1}/{n_iterations}, inputs:{inputs[0][:5]} labels:{labels[0]}')
epoch 1/2, step 5/45, inputs:tensor([13.3600, 2.5600, 2.3500, 20.0000, 89.0000]) labels:tensor([3.])
epoch 1/2, step 10/45, inputs:tensor([ 13.2900, 1.9700, 2.6800, 16.8000, 102.0000]) labels:tensor([1.])
epoch 1/2, step 15/45, inputs:tensor([14.1600, 2.5100, 2.4800, 20.0000, 91.0000]) labels:tensor([3.])
epoch 1/2, step 20/45, inputs:tensor([ 13.9400, 1.7300, 2.2700, 17.4000, 108.0000]) labels:tensor([1.])
epoch 1/2, step 25/45, inputs:tensor([12.6000, 1.3400, 1.9000, 18.5000, 88.0000]) labels:tensor([2.])
epoch 1/2, step 30/45, inputs:tensor([ 13.2400, 2.5900, 2.8700, 21.0000, 118.0000]) labels:tensor([1.])
epoch 1/2, step 35/45, inputs:tensor([11.0300, 1.5100, 2.2000, 21.5000, 85.0000]) labels:tensor([2.])
epoch 1/2, step 40/45, inputs:tensor([ 13.4800, 1.8100, 2.4100, 20.5000, 100.0000]) labels:tensor([1.])
epoch 1/2, step 45/45, inputs:tensor([ 12.6400, 1.3600, 2.0200, 16.8000, 100.0000]) labels:tensor([2.])
epoch 2/2, step 5/45, inputs:tensor([14.7500, 1.7300, 2.3900, 11.4000, 91.0000]) labels:tensor([1.])
epoch 2/2, step 10/45, inputs:tensor([12.3700, 1.6300, 2.3000, 24.5000, 88.0000]) labels:tensor([2.])
epoch 2/2, step 15/45, inputs:tensor([ 13.8300, 1.5700, 2.6200, 20.0000, 115.0000]) labels:tensor([1.])
epoch 2/2, step 20/45, inputs:tensor([12.6900, 1.5300, 2.2600, 20.7000, 80.0000]) labels:tensor([2.])
epoch 2/2, step 25/45, inputs:tensor([11.4100, 0.7400, 2.5000, 21.0000, 88.0000]) labels:tensor([2.])
epoch 2/2, step 30/45, inputs:tensor([12.2500, 1.7300, 2.1200, 19.0000, 80.0000]) labels:tensor([2.])
epoch 2/2, step 35/45, inputs:tensor([ 11.5600, 2.0500, 3.2300, 28.5000, 119.0000]) labels:tensor([2.])
epoch 2/2, step 40/45, inputs:tensor([ 14.1000, 2.0200, 2.4000, 18.8000, 103.0000]) labels:tensor([1.])
epoch 2/2, step 45/45, inputs:tensor([ 14.2200, 1.7000, 2.3000, 16.3000, 118.0000]) labels:tensor([1.])
Dataset Transform
Types of Transform:
On Images:
CenterCrop, Grayscale, Pad, RandomAffine RandomCrop, RandomHorizontalFlip, RandomRotation Resize, Scale
On Tensors:
LinearTransformation, Normalize, RandomErasing
Conversion:
ToPILImage: from tensor or ndarray
ToTensor: from numpy.ndarray or PIL Image
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
class WineDataset(Dataset):
def __init__(self, transform = None):
#data loading
= np.loadtxt('Data/wine.csv', delimiter=",", dtype=np.float32, skiprows = 1)
xy self.xy = xy
self.x = xy[:,1:]
self.y = xy[:,[0]]
self.n_samples = xy.shape[0]
self.transform = transform
def __getitem__(self, index):
= self.x[index], self.y[index]
sample
if self.transform:
= self.transform(sample)
sample
return sample
def __len__(self):
return self.n_samples
class ToTensor():
def __call__(self, sample):
= sample
inputs, targets return torch.from_numpy(inputs), torch.from_numpy(targets)
class MulTransform:
def __init__(self, factor):
self.factor = factor
def __call__(self, sample):
= sample
inputs, target *= self.factor
inputs return inputs, target
= torchvision.transforms.Compose([ToTensor(), MulTransform(2)]) composed
= WineDataset(transform = composed) dataset
= dataset[0]
first_data first_data
(tensor([2.8460e+01, 3.4200e+00, 4.8600e+00, 3.1200e+01, 2.5400e+02, 5.6000e+00,
6.1200e+00, 5.6000e-01, 4.5800e+00, 1.1280e+01, 2.0800e+00, 7.8400e+00,
2.1300e+03]),
tensor([1.]))
= dataset[0]
features, labels features, labels
(tensor([5.6920e+01, 6.8400e+00, 9.7200e+00, 6.2400e+01, 5.0800e+02, 1.1200e+01,
1.2240e+01, 1.1200e+00, 9.1600e+00, 2.2560e+01, 4.1600e+00, 1.5680e+01,
4.2600e+03]),
tensor([1.]))
= DataLoader(dataset=dataset, batch_size = 4, shuffle = True, num_workers = 2) dataloader
= iter(dataloader) dataiter
next(dataiter)
[tensor([[2.7460e+01, 3.0000e+00, 5.4000e+00, 4.5000e+01, 2.0200e+02, 6.0000e+00,
6.5000e+00, 5.8000e-01, 4.7600e+00, 1.1400e+01, 2.3800e+00, 5.4200e+00,
2.5700e+03],
[2.5440e+01, 3.6200e+00, 4.4000e+00, 3.7600e+01, 1.7200e+02, 4.4000e+00,
5.0600e+00, 5.2000e-01, 3.5400e+00, 7.8000e+00, 2.3200e+00, 6.2800e+00,
1.4280e+03],
[2.8200e+01, 4.0400e+00, 4.8000e+00, 3.7600e+01, 2.0600e+02, 5.5000e+00,
5.8400e+00, 6.4000e-01, 4.7600e+00, 1.2400e+01, 2.1400e+00, 5.5000e+00,
2.1200e+03],
[2.6100e+01, 3.5400e+00, 4.2000e+00, 3.4000e+01, 2.1400e+02, 6.0000e+00,
6.0000e+00, 5.6000e-01, 4.0600e+00, 1.0080e+01, 1.7600e+00, 6.7000e+00,
1.7700e+03]]),
tensor([[1.],
[2.],
[1.],
[1.]])]
= iter(dataloader) dataiter
= next(dataiter)
data = data features, labels
features, labels
(tensor([[2.5200e+01, 2.6800e+00, 3.8000e+00, 3.7000e+01, 1.7600e+02, 2.9000e+00,
2.7200e+00, 5.8000e-01, 2.7000e+00, 4.9000e+00, 2.0800e+00, 5.5400e+00,
1.1240e+03],
[2.6460e+01, 6.6000e+00, 4.5600e+00, 3.7000e+01, 1.9600e+02, 3.6000e+00,
1.6600e+00, 1.2200e+00, 3.7400e+00, 2.1040e+01, 1.1200e+00, 3.0200e+00,
1.3500e+03],
[2.4000e+01, 3.0200e+00, 4.8400e+00, 4.4000e+01, 1.7200e+02, 2.9000e+00,
2.5000e+00, 1.0000e+00, 3.2600e+00, 7.2000e+00, 2.1000e+00, 5.3000e+00,
9.0000e+02],
[2.2820e+01, 1.4800e+00, 5.0000e+00, 4.2000e+01, 1.7600e+02, 4.9600e+00,
4.0200e+00, 8.4000e-01, 2.8800e+00, 6.1600e+00, 2.2000e+00, 4.6200e+00,
8.6800e+02]]),
tensor([[2.],
[3.],
[2.],
[2.]]))
= 2
num_epochs = len(dataset)
total_samples = int(np.ceil(total_samples/4))
n_iterations
total_samples, n_iterations
(178, 45)
for epoch in range(num_epochs):
for i, (inputs, labels) in enumerate(dataloader):
if (i + 1) % 5 == 0:
print(f'epoch {epoch + 1}/{num_epochs}, step {i+1}/{n_iterations}, inputs:{inputs[0][:5]} labels:{labels[0]}')
epoch 1/2, step 5/45, inputs:tensor([ 24.0000, 6.8600, 4.0000, 38.0000, 174.0000]) labels:tensor([2.])
epoch 1/2, step 10/45, inputs:tensor([ 26.9800, 3.3200, 4.4800, 48.0000, 174.0000]) labels:tensor([2.])
epoch 1/2, step 15/45, inputs:tensor([ 25.4000, 7.1000, 4.7200, 43.0000, 212.0000]) labels:tensor([3.])
epoch 1/2, step 20/45, inputs:tensor([ 22.9200, 7.4800, 3.6400, 39.0000, 214.0000]) labels:tensor([2.])
epoch 1/2, step 25/45, inputs:tensor([ 23.2800, 4.1200, 4.9200, 43.2000, 168.0000]) labels:tensor([2.])
epoch 1/2, step 30/45, inputs:tensor([ 27.4400, 2.8600, 5.0000, 33.4000, 216.0000]) labels:tensor([1.])
epoch 1/2, step 35/45, inputs:tensor([ 23.3000, 3.3400, 5.2400, 52.0000, 176.0000]) labels:tensor([2.])
epoch 1/2, step 40/45, inputs:tensor([ 26.3400, 5.1800, 4.7400, 40.0000, 240.0000]) labels:tensor([3.])
epoch 1/2, step 45/45, inputs:tensor([ 29.5000, 3.4600, 4.7800, 22.8000, 182.0000]) labels:tensor([1.])
epoch 2/2, step 5/45, inputs:tensor([ 27.1600, 5.1600, 5.3800, 49.0000, 210.0000]) labels:tensor([3.])
epoch 2/2, step 10/45, inputs:tensor([ 26.1000, 3.5400, 4.2000, 34.0000, 214.0000]) labels:tensor([1.])
epoch 2/2, step 15/45, inputs:tensor([ 24.1400, 4.3200, 4.3400, 42.0000, 170.0000]) labels:tensor([2.])
epoch 2/2, step 20/45, inputs:tensor([ 24.7400, 3.2600, 4.6000, 49.0000, 176.0000]) labels:tensor([2.])
epoch 2/2, step 25/45, inputs:tensor([ 24.5800, 2.8200, 3.9600, 32.0000, 170.0000]) labels:tensor([2.])
epoch 2/2, step 30/45, inputs:tensor([ 24.7400, 2.2600, 4.3200, 38.0000, 174.0000]) labels:tensor([2.])
epoch 2/2, step 35/45, inputs:tensor([ 26.5600, 3.2800, 5.6800, 31.0000, 220.0000]) labels:tensor([1.])
epoch 2/2, step 40/45, inputs:tensor([ 24.7400, 1.8800, 2.7200, 21.2000, 176.0000]) labels:tensor([2.])
epoch 2/2, step 45/45, inputs:tensor([ 26.3400, 10.3800, 4.6400, 44.0000, 186.0000]) labels:tensor([3.])
Images
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets, utils
import torchvision.transforms.functional as TF
from torch.utils.data import DataLoader
from datetime import datetime
from tqdm import tqdm
import matplotlib.pyplot as plt
import timm
import numpy as np
= transforms.Compose([
transform_default 150, 150)),
transforms.Resize((
transforms.ToTensor(),=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # Normalize with ImageNet mean and std
transforms.Normalize(mean ])
# Download and load the Imagenette dataset
= datasets.Imagenette(root='Data',
train_dataset ='train',
split# download=True,
=transform_default,
transform
)
# Download and load the Imagenette dataset
= datasets.Imagenette(root='Data',
test_dataset ='val',
split# download=True,
=transform_default,
transform )
# Define a function to display images
def show_image(dataset):
# Access an image and its label from the dataset
= dataset
image, label
# Convert the image tensor to a NumPy array
= image[:3].numpy().transpose((1, 2, 0))
image_np
# Display the image using Matplotlib
0,1))
plt.imshow(image_np.clip('off')
plt.axis(f' {train_dataset.classes[label][0]}')
plt.title(
plt.show()
# Define a function to display images
def show_images(images, labels, **kwargs):
= int(np.ceil(np.sqrt(len(images))))
nrows = int(np.ceil(len(images)/nrows))
ncols
= plt.subplots(nrows, ncols, figsize=(12, 12), **kwargs)
fig, axes # Adjust the spacing between subplots
=0.3, hspace=0.3)
plt.subplots_adjust(wspacefor ax, image, label in zip(axes.flat, images, labels):
# Convert image to numpy array and adjust pixel values
= image[:3].numpy().transpose((1, 2, 0))
img_np
# Display image
0,1))
ax.imshow(img_np.clip('off')
ax.axis(f' {train_dataset.classes[label][0]}')
ax.set_title(for ax in axes.flat[len(images):]:
'off')
ax.axis(
plt.show()
= train_dataset[2] image, label
type(image)
torch.Tensor
2]) show_image(train_dataset[
Test Transform
import torch
import torchvision.transforms.functional as TF
class AddGrayscaleChannel(object):
def __init__(self):
super().__init__()
def __call__(self, img):
# Convert the image to grayscale
= TF.rgb_to_grayscale(img)
gray_img
# Concatenate the grayscale image with the original image along the fourth dimension
= torch.cat((img, gray_img), dim=0)
img_with_gray_channel
return img_with_gray_channel
def __repr__(self):
return self.__class__.__name__ + '()'
FFT Transform
class ComputeFFT(object):
def __init__(self):
super().__init__()
def __call__(self, image):
# Convert the color image to grayscale
= TF.rgb_to_grayscale(image).squeeze()
grayscale_image
# Convert the grayscale image to tensor and apply FFT
= torch.fft.fft2(grayscale_image)
fft_result
# Compute magnitude spectrum
= torch.log(torch.abs(fft_result) + 1)
magnitude_spectrum
# Compute phase spectrum
= torch.angle(fft_result)
phase_spectrum
= torch.cat((image, magnitude_spectrum.unsqueeze(0), phase_spectrum.unsqueeze(0)), dim=0)
combined_image
return combined_image
def __repr__(self):
return self.__class__.__name__ + '()'
= ComputeFFT()
transform = transform(image)
new_image = torch.transpose(new_image[:3], 0, 2).transpose(0, 1)
transposed_image
='gray')
plt.imshow(transposed_image, cmap'image')
plt.title(
plt.colorbar()
plt.show()
3], cmap='gray')
plt.imshow(new_image['Magnitude Spectrum')
plt.title(
plt.colorbar()
plt.show()
# Visualize phase spectrum
4], cmap='gray')
plt.imshow(new_image['Phase Spectrum')
plt.title(
plt.colorbar() plt.show()
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
#model_type = "DPT_Large" # MiDaS v3 - Large (highest accuracy, slowest inference speed)
#model_type = "DPT_Hybrid" # MiDaS v3 - Hybrid (medium accuracy, medium inference speed)
= "MiDaS_small" # MiDaS v2.1 - Small (lowest accuracy, highest inference speed)
model_type
= torch.hub.load("intel-isl/MiDaS", model_type)
midas
= torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device
= torch.hub.load("intel-isl/MiDaS", "transforms")
midas_transforms
if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
= midas_transforms.dpt_transform
transform else:
= midas_transforms.small_transform transform
Using cache found in /home/ben/.cache/torch/hub/intel-isl_MiDaS_master
Loading weights: None
Using cache found in /home/ben/.cache/torch/hub/rwightman_gen-efficientnet-pytorch_master
Using cache found in /home/ben/.cache/torch/hub/intel-isl_MiDaS_master
class ComputeDepth(object):
def __init__(self, model, transform, device = "cuda"):
super().__init__()
self.device = device
self.model = model
self.model.to(self.device)
self.model.eval()
self.transform = transform
def __call__(self, image):
= image.to('cpu').squeeze().numpy().transpose(1, 2, 0)
input_image # input_image = np.array(image)
= transform(input_image).to(self.device)
input_batch
with torch.no_grad():
= self.model(input_batch)
prediction
= torch.nn.functional.interpolate(
prediction 1),
prediction.unsqueeze(=input_image.shape[:2],
size="bicubic",
mode=False,
align_corners0)
).squeeze(= prediction.to('cpu')
prediction
= torch.mean(prediction)
prediction_mean = torch.std(prediction)
prediction_std
# Calculate the scaling factors for normalization
= 0.225 / prediction_std
scale_factor = 0.45 - prediction_mean * scale_factor
bias
# Normalize the tensor to the desired mean and standard deviation
= prediction * scale_factor + bias
prediction
= torch.cat((image, prediction), dim=0)
combined_image
return combined_image
def __repr__(self):
return self.__class__.__name__ + '()'
= train_dataset[2] image, label
type(image)
torch.Tensor
= ComputeDepth(midas, transform = transform, device = "cpu")
transform_depth = transform_depth(image)
new_image = torch.transpose(new_image[:3], 0, 2).transpose(0, 1) transposed_image
='gray')
plt.imshow(transposed_image, cmap'image')
plt.title(
plt.colorbar()
plt.show()
3], cmap='gray')
plt.imshow(new_image['Magnitude Spectrum')
plt.title(
plt.colorbar() plt.show()
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).