【ML笔记】PyTorch Custom Dataset(自定义数据集)

题头

本文为notebook直接转为markdown,因本人的MBP内存过于拉胯,所以Food101数据还需要缩小才能继续训练。其他代码在macOS上面已经运行训练成功。也可以点击这个链接直接在Colab里面打开👉🏻https://drive.google.com/file/d/1Lee_oohIN99bHRm1kMEi25VgxTSFe1fF/view?usp=sharing。
但是Colab给的T4卡只有15G显存,因此Food101还是会爆,需要缩小图片分辨率。

Init

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
!pip install torchmetrics
!pip install torchinfo
import torch
import torch.nn as nn
import torchvision
import time
import os
import random
import numpy as np
import matplotlib.pyplot as plt

from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from torchmetrics import Accuracy
from timeit import default_timer as timer
from timeit import default_timer as timer
from IPython.display import clear_output
from pathlib import Path
from PIL import Image # Pillow for showing image in python
from torchinfo import summary


if torch.cuda.is_available():
device = "cuda"
elif getattr(torch.backends, 'mps', None) is not None and torch.backends.mps.is_available():
device = "mps"
else:
device = "cpu"

Defination

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
def train_time(start: float, end: float, device = torch.device):
tim = end - start
print(f"On {device}: {tim:.3f} seconds")

def eval_model(model: torch.nn.Module,
dl: torch.utils.data.DataLoader,
loss_fn: torch.nn.Module):
loss, acc = 0, 0
acc_fn = Accuracy(task = "multiclass", num_classes = 10).to(device)
model.eval()
with torch.inference_mode():
for X, y in dl:
X, y = X.to(device), y.to(device)
y_pred = model(X)
loss += loss_fn(y_pred, y)
acc += acc_fn(y_pred, y.int()) * 100
loss /= len(dl)
acc /= len(dl)
return {"model_name": model.__class__.__name__,
"model_loss": loss.item(),
"model_acc": acc.item()}

def train_step(model: torch.nn.Module,
dl: torch.utils.data.DataLoader,
loss_fn: torch.nn.Module,
optimizer: torch.optim.Optimizer,
class_num,
showcase):
train_loss = 0
train_acc = 0
model.to(device)
print("Start Training")
acc_fn = Accuracy(task = "multiclass", num_classes = class_num).to(device)
for batch, (X, y) in enumerate(dl):
model.train()
X, y = X.to(device), y.to(device)
y_pred = model(X)
loss = loss_fn(y_pred, y)
train_loss += loss
train_acc += acc_fn(y_pred, y.int())*100
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (batch % showcase == 0):
print(f"Looked at {batch * len(X)}/{len(dl.dataset)} samples.")
train_loss /= len(dl)
train_acc /= len(dl)
print(f"Train Loss: {train_loss:.5f} | Train Accuracy: {train_acc:.5f}%")

def test_step(model: torch.nn.Module,
dl: torch.nn.Module,
loss_fn: torch.nn.Module,
class_num,
showcase):
test_loss = 0
test_acc = 0
acc_fn = Accuracy(task = "multiclass", num_classes = class_num).to(device)
model.to(device)
print("Start Testing")
with torch.inference_mode():
for batch, (X, y) in enumerate(dl):
model.eval()
X, y = X.to(device), y.to(device)
y_pred = model(X)
test_loss += loss_fn(y_pred, y)
test_acc += acc_fn(y_pred, y.int()) * 100
if (batch % showcase == 0):
print(f"Looked at {batch * len(X)}/{len(dl.dataset)} samples.")
test_acc /= len(dl)
test_loss /= len(dl)
print(f"Test Loss: {test_loss:.5f} | Test Accuracy: {test_acc:.5f}%")

def walk_throungh_path(dir_path):
for path, dir, file in os.walk(dir_path):
print(f"There are {len(dir)} folders and {len(file)} files in '{path}' ")

def show_img(image_paths: list, transform, n=3):
random_image_paths = random.sample(image_paths, k=n)
for image_path in random_image_paths:
with Image.open(image_path) as f:
# Original
fig, ax = plt.subplots(nrows=1, ncols=2)
ax[0].imshow(f)
ax[0].set_title(f"Original\nSize:{f.size}")
ax[0].axis(False)
# Transformed
transformed_image = transform(f).permute(1, 2, 0)
ax[1].imshow(transformed_image)
ax[1].set_title(f"Transformed\nShape: {transformed_image.shape}")
ax[1].axis(False)

fig.suptitle(f"Class: {image_path.parent.stem}", fontsize=16)

def train_model(model: torch.nn.Module,
train_dl: torch.nn.Module,
test_dl: torch.nn.Module,
epochs, class_num, showcase,
loss_fn: torch.nn.Module,
optimizer: torch.optim.Optimizer):
start_time = timer()
for epoch in range(epochs):
time.sleep(1)
clear_output(wait=True)
print(f"Epoch: {epoch+1}")
train_step(model=model,
dl=train_dl,
loss_fn=loss_fn,
optimizer=optimizer,
class_num=class_num,
showcase=showcase)
test_step(model=model,
dl=test_dl,
loss_fn=loss_fn,
class_num=class_num,
showcase=showcase)
end_time = timer()
train_time(start=start_time, end=end_time-epochs, device=device)

class FashionMNISTModel(nn.Module):
def __init__(self, in_ch, out_ft, hid):
super().__init__()
self.conv_block_1 = nn.Sequential(
nn.Conv2d(in_channels=in_ch,
out_channels=hid,
kernel_size=3, # CNN提取特征的单位范围
stride=1, # 提取不同特征的移动步伐
padding=1), # 在图像边缘填充的像素量,对边缘进行更加细化的特征读取
# self.bn1 = nn.BatchNorm2d(num_features=hid),
nn.ReLU(),
# nn.Conv2d(in_channels=hid, out_channels=hid, kernel_size=3, stride=1, padding=1),
# self.bn1 = nn.BatchNorm2d(num_features=hid),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv_block_2 = nn.Sequential(
nn.Conv2d(in_channels=hid,
out_channels=hid,
kernel_size=3, # CNN提取特征的单位范围
stride=1, # 提取不同特征的移动步伐
padding=1), # 在图像边缘填充的像素量,避免损失原图的边角数据
# self.bn1 = nn.BatchNorm2d(num_features=hid),
nn.ReLU(),
nn.Conv2d(in_channels=hid, out_channels=hid, kernel_size=3, stride=1, padding=1),
# self.bn1 = nn.BatchNorm2d(num_features=hid),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(in_features=hid*7*7, out_features=out_ft),
)

def forward(self, x):
x = self.conv_block_1(x)
# print(x.shape)
x = self.conv_block_2(x)
# print(x.shape)
x = self.classifier(x)
return x

class FoodModel(nn.Module):
def __init__(self, in_ch, out_ft, hid) -> None:
super().__init__()
self.conv_block_1 = nn.Sequential(
nn.Conv2d(in_channels=in_ch,
out_channels=hid,
kernel_size=3, # CNN提取特征的单位范围
stride=1, # 提取不同特征的移动步伐
padding=0), # 在图像边缘填充的像素量,对边缘进行更加细化的特征读取
nn.BatchNorm2d(num_features=hid),
nn.ReLU(),
nn.Conv2d(in_channels=hid, out_channels=hid, kernel_size=3, stride=1, padding=0),
nn.BatchNorm2d(num_features=hid),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2), # the default stride is the same as the kernal_size
)
self.conv_block_2 = nn.Sequential(
nn.Conv2d(in_channels=hid,
out_channels=hid,
kernel_size=3, # CNN提取特征的单位范围
stride=1, # 提取不同特征的移动步伐
padding=0), # 在图像边缘填充的像素量,避免损失原图的边角数据
nn.BatchNorm2d(num_features=hid),
nn.ReLU(),
nn.Conv2d(in_channels=hid, out_channels=hid, kernel_size=3, stride=1, padding=0),
nn.BatchNorm2d(num_features=hid),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv_block_3 = nn.Sequential(
nn.Conv2d(in_channels=hid,
out_channels=hid,
kernel_size=3, # CNN提取特征的单位范围
stride=1, # 提取不同特征的移动步伐
padding=0), # 在图像边缘填充的像素量,避免损失原图的边角数据
nn.BatchNorm2d(num_features=hid),
nn.ReLU(),
nn.Conv2d(in_channels=hid, out_channels=hid, kernel_size=3, stride=1, padding=0),
nn.BatchNorm2d(num_features=hid),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv_block_4 = nn.Sequential(
nn.Conv2d(in_channels=hid,
out_channels=hid,
kernel_size=3, # CNN提取特征的单位范围
stride=1, # 提取不同特征的移动步伐
padding=0), # 在图像边缘填充的像素量,避免损失原图的边角数据
nn.BatchNorm2d(num_features=hid),
nn.ReLU(),
nn.Conv2d(in_channels=hid, out_channels=hid, kernel_size=3, stride=1, padding=0),
nn.BatchNorm2d(num_features=hid),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(in_features=hid*144, out_features=out_ft),
)

def forward(self, x):
# x = self.conv_block_1(x)
# # print(x.shape)
# x = self.conv_block_2(x)
# # print(x.shape)
# x = self.conv_block_3(x)
# # print(x.shape)
# x = self.conv_block_4(x)
# print(x.shape)
# x = self.classifier(x)
# # print(x.shape)
# return x
return self.classifier(self.conv_block_4(self.conv_block_3(self.conv_block_2(self.conv_block_1(x)))))

FashionMNIST

Data Preparation

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
train_data = datasets.FashionMNIST(
root = "data", # download to what path
train = True, # do we want the training datasets: True --> Training Datasets ; False --> Testing Datasets
download = True,
transform = ToTensor(), # Transform the sources
target_transform = None # Transform the outcome
)
test_data = datasets.FashionMNIST(
root = "data",
train = False,
download = True,
transform = ToTensor(),
target_transform = None
)

BATCH_SIZE = 512
train_dataloader = DataLoader(dataset = train_data,
batch_size = BATCH_SIZE,
shuffle = True,
num_workers = 4)
test_dataloader = DataLoader(dataset = train_data,
batch_size = BATCH_SIZE,
shuffle = False,
num_workers = 4) # It may be better for evaluate the model

Train the model

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
model = FashionMNISTModel(in_ch=1, out_ft=10, hid=10).to(device)
epochs = 10
start_time = timer()
for epoch in range(epochs):
time.sleep(1)
clear_output(wait=True)
print(f"Epoch: {epoch+1}")
train_step(model=model,
dl=train_dataloader,
loss_fn=nn.CrossEntropyLoss(),
optimizer=torch.optim.SGD(params=model.parameters(),
lr=0.1),
class_num=10,
showcase=40)
test_step(model=model,
dl=test_dataloader,
loss_fn=nn.CrossEntropyLoss(),
class_num=10,
showcase=40)
end_time = timer()
train_time(start=start_time, end=end_time-epochs, device=device)

Food 101

Prepare the datasets

use data/Food_part_small for small datasets

1
2
3
4
# get the datasets
data_path = Path("data/")
image_path = data_path / "Food_part_small"
print(image_path.is_dir()) # check the path's existence
1
2
# data exploration
walk_throungh_path(image_path)
1
2
3
4
5
6
7
8
9
10
11
12
13
# data preparation
train_dir = image_path / "train"
test_dir = image_path / "test"

train_image_path_list = list(train_dir.glob("*/*.jpg")) # 提取所有图片的文件地址
test_image_path_list = list(test_dir.glob("*/*.jpg"))
image_path_list = list(image_path.glob("*/*/*.jpg"))

random_image_path = random.choice(image_path_list) # randomly choose one image

img = Image.open(random_image_path)
print(f"Name: {random_image_path.parent.stem}")
img
1
2
3
4
5
6
# visualize the img using matplotlib
img_as_array = np.asarray(img)
plt.figure(figsize=(10, 7))
plt.imshow(img_as_array)
plt.title(f"Class: {random_image_path.parent.stem} | Image shape: {img_as_array.shape} --> HWC")
plt.axis(False)
1
2
3
4
5
6
7
8
# transform the img into tensor
# turn the tensor into a torch.utils.data.Dataset
data_transform = transforms.Compose([
transforms.Resize(size=(256, 256)),
transforms.RandomHorizontalFlip(p=0.5), # 随机翻转
transforms.ToTensor()
])
data_transform(img).shape
1
2
# visualize the transformed img
show_img(image_paths=image_path_list, transform=data_transform, n=3)
1
2
3
4
5
6
7
8
9
10
# loading the image into dataset using ImageFolder
# ImageFolder is a build-in dataloader
train_data = datasets.ImageFolder(root=train_dir,
transform=data_transform, # transform for the data
target_transform=None) # transform for the label
test_data = datasets.ImageFolder(root=test_dir,
transform=data_transform,
target_transform=None)
train_data, test_data
# train_dir, test_dir
1
2
3
# Get class name as a list
class_names = train_data.classes
class_names
1
2
3
# Get class name as a dict
class_dict = train_data.class_to_idx
class_dict
1
2
3
4
img, label = train_data[0][0], train_data[0][1]
plt.imshow(img.permute(1, 2, 0))
plt.title(class_names[label])
plt.axis(False)
1
2
3
4
5
6
7
8
9
10
# turn the datasets into the dataloader
BATCH_SIZE = 128
train_dataloader = DataLoader(dataset=train_data,
batch_size=BATCH_SIZE,
shuffle=True,
num_workers=4)
test_dataloader = DataLoader(dataset=test_data,
batch_size=BATCH_SIZE,
shuffle=False,
num_workers=4)

Train the model!

1
2
3
4
5
# init the model
model_food = FoodModel(in_ch=3, out_ft=3, hid=16).to(device)
image_batch, label_batch = next(iter(train_dataloader))
# image_batch.shape, label_batch.shape
model_food(image_batch.to(device))
1
2
# get info about a model using torchinfo
summary(model_food, input_size=[1, 3, 256, 256])
1
2
3
4
5
6
train_model(model=model_food,
train_dl=train_dataloader,
test_dl=test_dataloader,
loss_fn=nn.CrossEntropyLoss(),
optimizer=torch.optim.Adam(params=model_food.parameters(), lr=0.1),
epochs=100, showcase=3, class_num=3)
1


【ML笔记】PyTorch Custom Dataset(自定义数据集)
https://学习.fun/ml-note/pytorch-custom-dataset/
Author
Stephen Zeng
Posted on
August 7, 2024
Licensed under