from torchvision import datasets
Dataset and Dataloaders
Datasets and Dataloaders
datasets.__all__
('LSUN',
'LSUNClass',
'ImageFolder',
'DatasetFolder',
'FakeData',
'CocoCaptions',
'CocoDetection',
'CIFAR10',
'CIFAR100',
'EMNIST',
'FashionMNIST',
'QMNIST',
'MNIST',
'KMNIST',
'StanfordCars',
'STL10',
'SUN397',
'SVHN',
'PhotoTour',
'SEMEION',
'Omniglot',
'SBU',
'Flickr8k',
'Flickr30k',
'Flowers102',
'VOCSegmentation',
'VOCDetection',
'Cityscapes',
'ImageNet',
'Caltech101',
'Caltech256',
'CelebA',
'WIDERFace',
'SBDataset',
'VisionDataset',
'USPS',
'Kinetics',
'HMDB51',
'UCF101',
'Places365',
'Kitti',
'INaturalist',
'LFWPeople',
'LFWPairs',
'KittiFlow',
'Sintel',
'FlyingChairs',
'FlyingThings3D',
'HD1K',
'Food101',
'DTD',
'FER2013',
'GTSRB',
'CLEVRClassification',
'OxfordIIITPet',
'PCAM',
'Country211',
'FGVCAircraft',
'EuroSAT',
'RenderedSST2',
'Kitti2012Stereo',
'Kitti2015Stereo',
'CarlaStereo',
'Middlebury2014Stereo',
'CREStereo',
'FallingThingsStereo',
'SceneFlowStereo',
'SintelStereo',
'InStereo2k',
'ETH3DStereo',
'wrap_dataset_for_transforms_v2',
'Imagenette')
MNIST
from torchvision import datasets, transforms
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
torch.cuda.is_available()
True
= torch.device('cuda' if torch.cuda.is_available() else 'cpu') device
= './Data'
path # Define transforms for preprocessing
= transforms.Compose([
transform # Convert image to tensor
transforms.ToTensor(), 0.5,), (0.5,)) # Normalize image pixel values to range [-1, 1]
transforms.Normalize((
])
# Define batch size for data loader
= 64
batch_size
# Create train and test datasets
= datasets.MNIST(root=path, train=True, download=True, transform=transform)
train_dataset = datasets.MNIST(root=path, train=False, download=True, transform=transform)
test_dataset
# Create train and test data loaders
= DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
train_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) test_loader
len(train_dataset), len(test_dataset)
(60000, 10000)
= train_dataset[1]
image, label ='gray')
plt.imshow(transforms.ToPILImage()(image), cmap'off')
plt.axis( plt.show()
= iter(train_loader) examples
= next(examples)
images, labels images.shape, labels.shape
(torch.Size([64, 1, 28, 28]), torch.Size([64]))
import matplotlib.pyplot as plt
import numpy as np
# Define a function to display images
def show_images(images, labels, **kwargs):
= int(np.ceil(np.sqrt(len(images))))
nrows = int(np.ceil(len(images)/nrows))
ncols
= plt.subplots(nrows, ncols, figsize=(12, 12), **kwargs)
fig, axes # Adjust the spacing between subplots
=0.1, hspace=0.3)
plt.subplots_adjust(wspacefor i, ax in enumerate(axes.flat):
# Convert image to numpy array and adjust pixel values
= images[i].numpy().transpose((1, 2, 0))
img_np = (img_np + 1) / 2 # Adjust pixel values to range [0, 1]
img_np
# Display image
='gray')
ax.imshow(img_np, cmap'off')
ax.axis(f'Label: {labels[i]}')
ax.set_title( plt.show()
show_images(images, labels)
Cifar10
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
torch.cuda.is_available()
True
= torch.device('cuda' if torch.cuda.is_available() else 'cpu') device
= 'Data'
path
# Define transforms for preprocessing
= transforms.Compose([
transform # Convert image to tensor
transforms.ToTensor(), 0.5, 0.5, 0.5), # Normalize image pixel values to range [-1, 1]
transforms.Normalize((0.5, 0.5, 0.5))
(
])
# Define batch size for data loader
# Create train and test datasets
= datasets.CIFAR10(root=path, train=True, download=True, transform=transform)
train_dataset = datasets.CIFAR10(root=path, train=False, download=True, transform=transform)
test_dataset
# Create train and test data loaders
= 64
batch_size = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
train_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) test_loader
Files already downloaded and verified
Files already downloaded and verified
len(train_dataset), len(test_dataset)
(50000, 10000)
= train_dataset[1] images, labels
type(labels)
int
= train_dataset.class_to_idx classes
= list(train_dataset.class_to_idx) classes
list(classes)
['airplane',
'automobile',
'bird',
'cat',
'deer',
'dog',
'frog',
'horse',
'ship',
'truck']
import matplotlib.pyplot as plt
import numpy as np
# Define a function to display images
def show_images(images, labels, **kwargs):
= int(np.ceil(np.sqrt(len(images))))
nrows = int(np.ceil(len(images)/nrows))
ncols
= plt.subplots(nrows, ncols, figsize=(12, 12), **kwargs)
fig, axes # Adjust the spacing between subplots
=0.3, hspace=0.3)
plt.subplots_adjust(wspacefor i, ax in enumerate(axes.flat):
# Convert image to numpy array and adjust pixel values
= images[i].numpy().transpose((1, 2, 0))
img_np = (img_np + 1) / 2 # Adjust pixel values to range [0, 1]
img_np
# Display image
ax.imshow(img_np)'off')
ax.axis(f' {classes[labels[i]]}')
ax.set_title( plt.show()
# Get a batch of images and labels from the data loader
= iter(train_loader)
examples = next(examples) images, labels
type(labels)
torch.Tensor
images.shape, labels.shape
(torch.Size([64, 3, 32, 32]), torch.Size([64]))
# Display the images
show_images(images, labels)
Imagenette
from torchvision.datasets import ImageFolder
from tqdm import tqdm
# Define transformation to convert images to tensors
= transforms.Compose([
transform 256), # Resize images to 256x256
transforms.Resize(224), # Crop the center 224x224 region
transforms.CenterCrop(# Convert images to PyTorch tensors
transforms.ToTensor()
])
# Load Imagenette dataset
= ImageFolder(root='Data/Imagenette_depth/imagenette2', transform=transform)
imagenette_dataset imagenette_dataset
Dataset ImageFolder
Number of datapoints: 13394
Root location: Data/Imagenette_depth/imagenette2
StandardTransform
Transform: Compose(
Resize(size=256, interpolation=bilinear, max_size=None, antialias=True)
CenterCrop(size=(224, 224))
ToTensor()
)
# Calculate mean and standard deviation
= torch.utils.data.DataLoader(imagenette_dataset, batch_size=128, shuffle=False)
loader = []
mean_list = []
std_list
for image, label in tqdm(loader):
= image.mean(dim=[0, 2, 3]) # Calculate mean across batch, height, and width
mean = image.std(dim=[0, 2, 3]) # Calculate standard deviation across batch, height, and width
std
mean_list.append(mean) std_list.append(std)
100%|█████████████████████████████████████████████████████████████████████████████████| 105/105 [03:44<00:00, 2.14s/it]
= torch.stack(mean_list)
mean_tensor = torch.stack(std_list) std_tensor
=[0]), std_tensor.mean(dim=[0]) mean_tensor.mean(dim
(tensor([0.4654, 0.4544, 0.4252]), tensor([0.2761, 0.2679, 0.2839]))
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
# Define the transformations to apply to the images
= transforms.Compose([
transform 375, 500)),
transforms.Resize((
transforms.ToTensor(),
])
# Download and load the Imagenette dataset
= datasets.Imagenette(root='Data',
train_dataset ='train',
split# download=True,
=transform,
transform
)
# Download and load the Imagenette dataset
= datasets.Imagenette(root='Data',
test_dataset ='val',
split# download=True,
=transform,
transform
)
= 64
batch_size = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
train_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) test_loader
train_dataset.__dict__.keys()
dict_keys(['root', 'transform', 'target_transform', 'transforms', '_split', '_size', '_url', '_md5', '_size_root', '_image_root', 'wnids', 'wnid_to_idx', 'classes', 'class_to_idx', '_samples'])
train_dataset.classes
[('tench', 'Tinca tinca'),
('English springer', 'English springer spaniel'),
('cassette player',),
('chain saw', 'chainsaw'),
('church', 'church building'),
('French horn', 'horn'),
('garbage truck', 'dustcart'),
('gas pump', 'gasoline pump', 'petrol pump', 'island dispenser'),
('golf ball',),
('parachute', 'chute')]
len(train_dataset), len(test_dataset)
(9469, 3925)
= train_dataset[10]
images, labels images.shape
torch.Size([3, 375, 500])
# Define a function to display images
def show_image(dataset):
# Access an image and its label from the dataset
= dataset
image, label
# Convert the image tensor to a NumPy array
= image.numpy().transpose((1, 2, 0))
image_np
# Display the image using Matplotlib
plt.imshow(image_np)'off')
plt.axis(f' {train_dataset.classes[label][0]}')
plt.title(
plt.show()
# Define a function to display images
def show_images(images, labels, **kwargs):
= int(np.ceil(np.sqrt(len(images))))
nrows = int(np.ceil(len(images)/nrows))
ncols
= plt.subplots(nrows, ncols, figsize=(12, 12), **kwargs)
fig, axes # Adjust the spacing between subplots
=0.3, hspace=0.3)
plt.subplots_adjust(wspacefor i, ax in enumerate(axes.flat):
# Convert image to numpy array and adjust pixel values
= images[i].numpy().transpose((1, 2, 0))
img_np
# Display image
ax.imshow(img_np)'off')
ax.axis(f' {train_dataset.classes[labels[i]][0]}')
ax.set_title( plt.show()
2]) show_image(train_dataset[
# Get a batch of images and labels from the data loader
= iter(train_loader)
examples = next(examples) images, labels
show_images(images, labels)
Country211
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms, datasets, utils
from torch.utils.data import DataLoader, Dataset, random_split
import torch
import os
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt
import timm
import numpy as np
from datetime import datetime
= torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device device
device(type='cuda')
from torchvision import transforms
# Define transforms to apply to the images
= transforms.Compose([
transform_default 200, 300)), # Resize images to a fixed size
transforms.Resize((# Convert images to PyTorch tensors
transforms.ToTensor(), # Add more transformations as needed (e.g., normalization)
])
# Path to the root directory of the dataset
= 'Data'
root_dir
# Create datasets
= datasets.Country211(root_dir,
train_dataset = 'train',
split =transform_default,
transform= False)
download = datasets.Country211(root_dir,
val_dataset = 'valid',
split =transform_default,
transform= False)
download = datasets.Country211(root_dir,
test_dataset = 'test',
split =transform_default,
transform= False) download
= train_dataset[1000] images, label
images.shape
torch.Size([3, 200, 300])
len(train_dataset.classes)
211
len(train_dataset), len(val_dataset), len(test_dataset)
(31650, 10550, 21100)
train_dataset
Dataset Country211
Number of datapoints: 31650
Root location: Data
StandardTransform
Transform: Compose(
Resize(size=(200, 300), interpolation=bilinear, max_size=None, antialias=True)
ToTensor()
)
train_dataset.__dict__.keys()
dict_keys(['_split', 'root', '_base_folder', 'transform', 'target_transform', 'transforms', 'loader', 'extensions', 'classes', 'class_to_idx', 'samples', 'targets', 'imgs'])
# Define a function to display images
def show_image(dataset):
# Access an image and its label from the dataset
= dataset
image, label # Convert the image tensor to a NumPy array
= image.numpy().transpose((1, 2, 0))
image_np
# Display the image using Matplotlib
0,1))
plt.imshow(image_np.clip('off')
plt.axis(f' {train_dataset.classes[label]}')
plt.title(
plt.show()
# Define a function to display images
def show_images(images, labels, **kwargs):
= int(np.ceil(np.sqrt(len(images))))
nrows = int(np.ceil(len(images)/nrows))
ncols
= plt.subplots(nrows, ncols, figsize=(12, 12), **kwargs)
fig, axes # Adjust the spacing between subplots
=0.3, hspace=0.3)
plt.subplots_adjust(wspacefor ax, image, label in zip(axes.flat, images, labels):
# Convert image to numpy array and adjust pixel values
= image.numpy().transpose((1, 2, 0))
img_np
# Display image
0,1))
ax.imshow(img_np.clip('off')
ax.axis(f' {train_dataset.classes[label]}')
ax.set_title(for ax in axes.flat[len(images):]:
'off')
ax.axis(
plt.show()
6]) show_image(train_dataset[
def loaders(batch_size):
= DataLoader(train_dataset,
train_loader =batch_size,
batch_size=True,
shuffle=8)
num_workers= DataLoader(val_dataset,
val_loader =batch_size,
batch_size=True,
shuffle=8)
num_workers= DataLoader(test_dataset,
test_loader =batch_size,
batch_size=True,
shuffle=8)
num_workers
# dataloaders = {'train': train_loader, 'val': test_loader}
# dataset_sizes = {'train': len(train_dataset), 'val': len(test_dataset) }
return train_loader, val_loader, test_loader
= 32
batch_size = loaders(batch_size) train_loader, val_loader, test_loader
# Get a batch of images and labels from the data loader
= iter(test_loader)
examples = next(examples)
images, labels # ############## TENSORBOARD ########################
# img_grid = utils.make_grid(images)
# writer.add_image('Imagenette', img_grid)
# writer.flush()
# #sys.exit()
# ###################################################
show_images(images, labels)
def find_mean_std(loader):
= []
mean_list = []
std_list for images, label in tqdm(loader):
= images.mean([0,2,3]), images.std([0,2,3])
mean, std
mean_list.append(mean)
std_list.append(std)
= torch.stack(mean_list)
mean_tensor = torch.stack(std_list)
std_tensor return mean_tensor.mean(dim=[0]), std_tensor.mean(dim=[0])
= find_mean_std(train_loader) train_norm
100%|█████████████████████████████████████████████████████████████████████████████████| 990/990 [00:32<00:00, 30.55it/s]
(tensor([0.4571, 0.4504, 0.4209]), tensor([0.2706, 0.2646, 0.2857]))
find_mean_std(val_loader)
100%|█████████████████████████████████████████████████████████████████████████████████| 330/330 [00:11<00:00, 29.80it/s]
(tensor([0.4587, 0.4514, 0.4219]), tensor([0.2706, 0.2647, 0.2852]))
find_mean_std(test_loader)
100%|█████████████████████████████████████████████████████████████████████████████████| 660/660 [00:25<00:00, 26.12it/s]
(tensor([0.4578, 0.4512, 0.4218]), tensor([0.2702, 0.2642, 0.2856]))
Kitti
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets
from torchvision.transforms import ToPILImage, v2
from torch.utils.data import DataLoader
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import cv2
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
# Create a histogram plot
output_notebook()
class Kitti_v2(datasets.Kitti):
def __init__(self, *args, **kwargs):
super(Kitti_v2, self).__init__(*args, **kwargs)
= './Data'
path = 16
batch_size
# Define transforms
# Define transforms for the dataset
= v2.Compose(
transform2
[
v2.ToImage(),# v2.Resize(size = desired_size), # Resize image
=0.2),
v2.RandomPhotometricDistort(p# v2.RandomZoomOut(fill={tv_tensors.Image: (123, 117, 104), "others": 0}),
# # v2.RandomIoUCrop(),
=0.4),
v2.RandomHorizontalFlip(p# # v2.SanitizeBoundingBoxes(),
=True),
v2.ToDtype(torch.float32, scale
] )
# Load KITTI train dataset
= Kitti_v2(root=path, train='true', download=True, transform=transform2)
train_dataset
# Load KITTI test dataset
= Kitti_v2(root=path, train='false', download=True, transform=transform2) test_dataset
= train_dataset[1000]
sample = sample
img, target print(f"{type(img) = }\n{type(target) = }")
type(img) = <class 'torchvision.tv_tensors._image.Image'>
type(target) = <class 'list'>
= datasets.wrap_dataset_for_transforms_v2(train_dataset, target_keys=("boxes", "labels"))
train_dataset2
= datasets.wrap_dataset_for_transforms_v2(test_dataset, target_keys=("boxes", "labels")) test_dataset2
= train_dataset2[1000]
sample = sample
img, target print(f"{type(img) = }\n{type(target) = }\n{target.keys() = }")
print(f"{type(target['boxes']) = }\n{type(target['labels']) = }")
type(img) = <class 'torchvision.tv_tensors._image.Image'>
type(target) = <class 'dict'>
target.keys() = dict_keys(['boxes', 'labels'])
type(target['boxes']) = <class 'torchvision.tv_tensors._bounding_boxes.BoundingBoxes'>
type(target['labels']) = <class 'torch.Tensor'>
= 8
batch_size
= DataLoader(train_dataset2,
train_loader =batch_size,
batch_size=True,
shuffle=lambda batch: tuple(zip(*batch)),
collate_fn= 8)
num_workers
# Create DataLoader for test dataset
= DataLoader(test_dataset2,
test_loader =batch_size,
batch_size=False,
shuffle=lambda batch: tuple(zip(*batch)),
collate_fn= 8) num_workers
train_dataset
Dataset Kitti_v2
Number of datapoints: 7481
Root location: ./Data
len(train_dataset)
7481
= train_dataset[2]
image, targets type(targets)
list
image
= ['Car', 'Van', 'DontCare',
car_types 'Cyclist', 'Pedestrian', 'Truck',
'Tram', 'Misc', 'Person_sitting']
def cv2_show(image_np, label):
= cv2.cvtColor((image_np * 255).astype(np.uint8), cv2.COLOR_RGB2BGR)
image_cv2 if 'scores' in label:
for bbox, item, score in zip(label['boxes'], label['labels'], label['scores']):
if score > 0.2:
cv2.rectangle(image_cv2,int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),
(0, 255, 0), 2)
(
# Display the label
= cv2.FONT_HERSHEY_SIMPLEX
font str(item),
cv2.putText(image_cv2, int(bbox[0]), int(bbox[1]) - 10),
(0.5, (0, 255, 0), 2, cv2.LINE_AA)
font, else:
for bbox, item in zip(label['boxes'], label['labels']):
# Draw the bounding box
cv2.rectangle(image_cv2,int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),
(0, 255, 0), 2)
(
# Display the label
= cv2.FONT_HERSHEY_SIMPLEX
font str(item),
cv2.putText(image_cv2, int(bbox[0]), int(bbox[1]) - 10),
(0.5, (0, 255, 0), 2, cv2.LINE_AA)
font,
# Convert the image back to RGB format for display with Matplotlib
= cv2.cvtColor(image_cv2, cv2.COLOR_BGR2RGB)
image_rgb
return image_rgb
def show_image(kitti_dataset):
# Access an image and its label from the dataset
= kitti_dataset
image, label
# Convert the image tensor to a NumPy array
= image.numpy().transpose((1, 2, 0))
image_np = cv2_show(image_np, label)
image_rgb # Display the image using Matplotlib
plt.imshow(image_rgb)'off')
plt.axis(
plt.show()
# Define a function to display images
def show_images(images, labels, **kwargs):
= int(np.sqrt(len(images)))
nrows = int(np.floor(len(images)/nrows))
ncols
= plt.subplots(nrows, ncols, **kwargs)
fig, axes # Adjust the spacing between subplots
=0.01, hspace=0.01)
plt.subplots_adjust(wspace
# Display the image using Matplotlib
for ax, image, label in zip(axes.flat, images, labels):
# Convert image to numpy array and adjust pixel values
= image.numpy().transpose((1, 2, 0))
image_np = cv2_show(image_np, label)
image_rgb
# Display image
ax.imshow(image_rgb)'off')
ax.axis(
for ax in axes.flat[len(images):]:
'off')
ax.axis( plt.show()
= train_dataset2[18]
image, label 18]) show_image(train_dataset2[
Iter
# Get a batch of images and labels from the data loader
= iter(train_loader)
examples = next(examples) images, labels
=(15, 5)) show_images(images, labels, figsize
COCO
Data download
import fiftyone as fo
import fiftyone.zoo as foz
#
# Only the required images will be downloaded (if necessary).
# By default, only detections are loaded
#
= foz.load_zoo_dataset(
dataset ="coco-2017",
name= "Data/coco",
dataset_dir=["validation","train"],
splits=["person", "car"],
classes=50,
max_samples )
Downloading split 'validation' to 'Data/coco/validation' if necessary
Found annotations at 'Data/coco/raw/instances_val2017.json'
Sufficient images already downloaded
Existing download of split 'validation' is sufficient
Downloading split 'train' to 'Data/coco/train' if necessary
Found annotations at 'Data/coco/raw/instances_train2017.json'
Sufficient images already downloaded
Existing download of split 'train' is sufficient
Loading 'coco-2017' split 'validation'
100% |███████████████████| 50/50 [304.1ms elapsed, 0s remaining, 164.4 samples/s]
Loading 'coco-2017' split 'train'
100% |███████████████████| 50/50 [291.6ms elapsed, 0s remaining, 171.5 samples/s]
Dataset 'coco-2017-validation-train-50' created
= dataset.default_classes classes
# Visualize the dataset in the FiftyOne App
= fo.launch_app(dataset) session
Welcome to
███████╗██╗███████╗████████╗██╗ ██╗ ██████╗ ███╗ ██╗███████╗
██╔════╝██║██╔════╝╚══██╔══╝╚██╗ ██╔╝██╔═══██╗████╗ ██║██╔════╝
█████╗ ██║█████╗ ██║ ╚████╔╝ ██║ ██║██╔██╗ ██║█████╗
██╔══╝ ██║██╔══╝ ██║ ╚██╔╝ ██║ ██║██║╚██╗██║██╔══╝
██║ ██║██║ ██║ ██║ ╚██████╔╝██║ ╚████║███████╗
╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═════╝ ╚═╝ ╚═══╝╚══════╝ v0.23.8
If you're finding FiftyOne helpful, here's how you can get involved:
|
| ⭐⭐⭐ Give the project a star on GitHub ⭐⭐⭐
| https://github.com/voxel51/fiftyone
|
| 🚀🚀🚀 Join the FiftyOne Slack community 🚀🚀🚀
| https://slack.voxel51.com
|
Dataset and DataLoaders
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets
from torchvision.transforms import ToPILImage, v2
from torch.utils.data import DataLoader
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import cv2
= ['0', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus','train',
classes 'truck', 'boat', 'traffic light', 'fire hydrant', '12', 'stop sign',
'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', '26', 'backpack', 'umbrella', '29',
'30', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
'surfboard', 'tennis racket', 'bottle', '45', 'wine glass', 'cup', 'fork',
'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', '66', 'dining table', '68', '69', 'toilet', '71',
'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', '83', 'book', 'clock', 'vase',
'scissors', 'teddy bear', 'hair drier', 'toothbrush']
= './Data/coco/train'
train_path = './Data/coco/validation'
val_path
# Define transforms
# Define transforms for the dataset
= v2.Compose(
transform2
[
v2.ToImage(),# v2.Resize(size = desired_size), # Resize image
# v2.RandomPhotometricDistort(p=0.2),
# v2.RandomZoomOut(fill={tv_tensors.Image: (123, 117, 104), "others": 0}),
# # v2.RandomIoUCrop(),
# v2.RandomHorizontalFlip(p=0.4),
# # v2.SanitizeBoundingBoxes(),
=True),
v2.ToDtype(torch.float32, scale
] )
# Load KITTI train dataset
= datasets.CocoDetection(root=f'{train_path}/data',
train_dataset =f'{train_path}/labels.json',
annFile= transform2)
transform
# Load KITTI test dataset
= datasets.CocoDetection(root=f'{val_path}/data',
test_dataset =f'{val_path}/labels.json',
annFile=transform2) transform
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
= train_dataset[49]
sample = sample
img, target print(f"{type(img) = }\n{type(target) = }")
type(img) = <class 'torchvision.tv_tensors._image.Image'>
type(target) = <class 'list'>
= datasets.wrap_dataset_for_transforms_v2(train_dataset)
train_dataset2
= datasets.wrap_dataset_for_transforms_v2(test_dataset) test_dataset2
len(train_dataset2), len(test_dataset2)
(50, 50)
= 2
batch_size
= DataLoader(train_dataset2,
train_loader =batch_size,
batch_size=True,
shuffle=lambda batch: tuple(zip(*batch)),
collate_fn= 8)
num_workers
# Create DataLoader for test dataset
= DataLoader(test_dataset2,
test_loader =batch_size,
batch_size=False,
shuffle=lambda batch: tuple(zip(*batch)),
collate_fn= 8) num_workers
= train_dataset2[10]
sample = sample
img, target print(f"{type(img) = }\n{type(target) = }\n{target.keys() = }")
print(f"{type(target['boxes']) = }\n{type(target['labels']) = }")
type(img) = <class 'torchvision.tv_tensors._image.Image'>
type(target) = <class 'dict'>
target.keys() = dict_keys(['image_id', 'boxes', 'labels'])
type(target['boxes']) = <class 'torchvision.tv_tensors._bounding_boxes.BoundingBoxes'>
type(target['labels']) = <class 'torch.Tensor'>
def cv2_show(image_np, label):
= cv2.cvtColor((image_np * 255).astype(np.uint8), cv2.COLOR_RGB2BGR)
image_cv2 if 'scores' in label:
for bbox, item, score in zip(label['boxes'], label['labels'], label['scores']):
if score > 0.2:
cv2.rectangle(image_cv2,int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),
(0, 255, 0), 2)
(
# Display the label
= cv2.FONT_HERSHEY_SIMPLEX
font str(item),
cv2.putText(image_cv2, int(bbox[0]), int(bbox[1]) - 10),
(0.5, (0, 255, 0), 2, cv2.LINE_AA)
font, else:
for bbox, item in zip(label['boxes'], label['labels']):
# Draw the bounding box
cv2.rectangle(image_cv2,int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),
(0, 255, 0), 2)
(
# Display the label
= cv2.FONT_HERSHEY_SIMPLEX
font
cv2.putText(image_cv2, classes[item],int(bbox[0]), int(bbox[1]) - 10),
(0.5, (0, 255, 0), 2, cv2.LINE_AA)
font,
# Convert the image back to RGB format for display with Matplotlib
= cv2.cvtColor(image_cv2, cv2.COLOR_BGR2RGB)
image_rgb
return image_rgb
def show_image(kitti_dataset):
# Access an image and its label from the dataset
= kitti_dataset
image, label
# Convert the image tensor to a NumPy array
= image.numpy().transpose((1, 2, 0))
image_np = cv2_show(image_np, label)
image_rgb # Display the image using Matplotlib
plt.imshow(image_rgb)'off')
plt.axis(
plt.show()
# Define a function to display images
def show_images(images, labels, **kwargs):
= int(np.sqrt(len(images)))
nrows = int(np.floor(len(images)/nrows))
ncols
= plt.subplots(nrows, ncols, **kwargs)
fig, axes # Adjust the spacing between subplots
=0.01, hspace=0.01)
plt.subplots_adjust(wspace
# Display the image using Matplotlib
for ax, image, label in zip(axes.flat, images, labels):
# Convert image to numpy array and adjust pixel values
= np.asarray(image).transpose((1, 2, 0))
image_np = cv2_show(image_np, label)
image_rgb
# Display image
ax.imshow(image_rgb)'off')
ax.axis(
for ax in axes.flat[len(images):]:
'off')
ax.axis( plt.show()
18]) show_image(train_dataset2[
Iter
# Get a batch of images and labels from the data loader
= iter(train_loader)
examples = next(examples) images, labels
=(15, 5)) show_images(images, labels, figsize