【ML笔记】PyTorch Computer Vision(计算机视觉)

以下为notebook直接转化为markdown,在macOS上可以运行。
可以在这里试试直接在Google Colab里运行,理论上也行👉🏻https://drive.google.com/file/d/1CjHEb3ZspR_3qbHjJgenfrGmeJC75nF8/view?usp=sharing
下面的内容主要是CNN的初级训练和使用,初学者所为,大佬门看个乐子就行🤪

PyTorch Computer Vision

  • torchvision - the base function for PyTorch on CV
  • torchvision.datasets - get datasets and data loading functions for CV
  • torchvision.models - get pre-trained CV model to use
  • torchvision.transform - functions for manipulating the vision data to
    prepare for an ML model
  • torch.utils.data.Dataset - dataset class
  • torch.utils.data.DataLoad - creates a python iterable over a dataset

Init

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import torch
import torch.nn as nn
import torchvision

from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor

import matplotlib.pyplot as plt

if torch.cuda.is_available():
device = "cuda"
elif getattr(torch.backends, 'mps', None) is not None and torch.backends.mps.is_available():
device = "mps"
else:
device = "cpu"

# device = "cpu"
print(torch.__version__)
print(torchvision.__version__)

Prepare datasets

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# get Fasion-MNIST for e.g.
train_data = datasets.FashionMNIST(
root = "data", # download to what path
train = True, # do we want the training datasets: True --> Training Datasets ; False --> Testing Datasets
download = True,
transform = ToTensor(), # Transform the sources
target_transform = None # Transform the outcome
)

test_data = datasets.FashionMNIST(
root = "data",
train = False,
download = True,
transform = ToTensor(),
target_transform = None
)
1
2
3
4
5
6
7
# To turn a dataset classes into an array, use .classes
# To turn a dataset classes into a dict, use .class_to_idx
image, label = train_data[0]
class_idx = train_data.class_to_idx
class_array = train_data.classes
image.shape
class_array[0]
1
2
# visualize the image as an image
plt.imshow(image.squeeze(), cmap="gray")
1
2
3
4
5
6
7
8
9
10
11
12
13
torch.manual_seed(2233)
fig = plt.figure(figsize = (9, 9))
row, col = 4, 4
for i in range(1, row * col + 1):
rand = torch.randint(0, len(train_data), size = [1]).item()
# print(rand)
image, label = train_data[rand]
fig.add_subplot(row, col, i)
plt.imshow(image.squeeze(), cmap = "gray")
plt.title(train_data.classes[label])
plt.axis(False)

print(len(train_data.classes))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Load the data and turn it to mini-batches
# train_data, test_data
from torch.utils.data import DataLoader

BATCH_SIZE = 512
train_dataloader = DataLoader(dataset = train_data,
batch_size = BATCH_SIZE,
shuffle = True,
num_workers = 4)
test_dataloader = DataLoader(dataset = train_data,
batch_size = BATCH_SIZE,
shuffle = False,
num_workers = 4) # It may be better for evaluate the model

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# len(train_dataloader), # total batches
train_features_batch, train_labels_batch = next(iter(train_dataloader)) # iter选定train_dataloader,然后next进行手动遍历,从0开始
# rand_idx = torch.randint(0, len(train_features_batch), size = [1]).item()
# img, label = train_features_batch[rand_idx], train_labels_batch[rand_idx]
# plt.imshow(img.squeeze(), cmap = "gray")
# plt.title(train_data.classes[label])
fig = plt.figure(figsize = (18, 9))
row, col = 4, 8
for i in range(1, row * col + 1):
rand = torch.randint(0, len(train_features_batch), size = [1]).item()
img, label = train_features_batch[i-1], train_labels_batch[i-1]
fig.add_subplot(row, col, i)
plt.imshow(img.squeeze(), cmap = "gray")
plt.title(train_data.classes[label])
plt.axis(False)

梳理一下

  • train_data - 原始数据, type = FashionMNIST
  • train_dataloader - 将原始数据分成mini batches并全部存放, type = torch.utils.data.dataloader.DataLoader
  • train_features_batch, train_labels_batch - 每一个mini batch对应的图像和标签, type = tensor, NCWH or NWHC

总结一下,各个数据类型常用属性等

Dataset (FashionMNIST等)

  • .classes - 将类型转为数组
  • .class_to_idx - 将类型转为字典
  • len(dataset) - 总共有几组数据

DataLoader

  • iter(dataloader) - 选中DataLoader,准备遍历
  • next(iter) - 手动遍历,从0开始
  • len(dataloader) - 总共有几个mini batches
  • .dataset - 查看原始数据集(即Dataset,FashionMNIST)

Build a baseline model

use a linear model

1
2
3
flatten_model = nn.Flatten() # create a flatten model
X = train_features_batch[0]
output = flatten_model(X) # flatten the image,将多维张量转化为一维张量
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# device = "cpu"
class FashionMNISTModelV0(nn.Module):
def __init__(self, in_ft, out_ft, hid = 10):
super().__init__()
self.layers = nn.Sequential(
nn.Flatten(),
nn.Linear(in_features = in_ft, out_features = hid),
nn.Linear(in_features = hid, out_features = hid),
nn.Linear(in_features = hid, out_features = hid),
nn.Linear(in_features = hid, out_features = out_ft)
)

def forward (self, x):
return self.layers(x)

model_0 = FashionMNISTModelV0(in_ft = 784, out_ft = 10).to(device) # 784 = 28 * 28
next(model_0.parameters()).device

Train it!

  • train for each batches
  • time it
1
2
3
4
from timeit import default_timer as timer
def train_time(start: float, end: float, device = torch.device):
tim = end - start
print(f"On {device}: {tim:.3f} seconds")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# model_0
!pip -q install torchmetrics
from torchmetrics import Accuracy
acc_fn = Accuracy(task = "multiclass", num_classes = 10).to(device)
loss_fn = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(params = model_0.parameters(),
lr = 0.1)
epochs = 3
start_time = timer()
for epoch in range(epochs): # loop throung all batches for epochs
print(f"\nEpoch: {epoch}")
train_loss = 0
for batch, (X, y) in enumerate(train_dataloader):
model_0.train()
# print("Put into train mode")
X, y = X.to(device), y.to(device)
# print("change device")
y_pred = model_0(X)
loss = loss_fn(y_pred, y)
train_loss += loss
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (batch % 40 == 0):
print(f"Looked at {batch * len(X)}/{len(train_dataloader.dataset)} samples.")
# print("End Training")
train_loss /= len(train_dataloader)
test_acc, test_loss = 0, 0
print("Start Testing",end = "...")
model_0.eval()
with torch.inference_mode():
for X, y in test_dataloader:
X, y = X.to(device), y.to(device)
test_pred = model_0(X)
test_loss += loss_fn(test_pred, y)
test_acc += acc_fn(test_pred, y.int()) * 100
test_loss /= len(test_dataloader)
test_acc /= len(test_dataloader)
print("Stop Testing")
print(f"Train Loss: {train_loss:.5f} | Test Loss: {test_loss:.5f} | Test Accuracy: {test_acc:.5f}")
end_time = timer()
model_0_train_time = train_time(start = start_time, end = end_time, device = device)

Eval the model, make it a function

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
def eval_model(model: torch.nn.Module,
dl: torch.utils.data.DataLoader,
loss_fn: torch.nn.Module,):
loss, acc = 0, 0
acc_fn = Accuracy(task = "multiclass", num_classes = 10).to(device)
model.eval()
with torch.inference_mode():
for X, y in dl:
X, y = X.to(device), y.to(device)
y_pred = model(X)
loss += loss_fn(y_pred, y)
acc += acc_fn(y_pred, y.int()) * 100
loss /= len(dl)
acc /= len(dl)
return {"model_name": model.__class__.__name__,
"model_loss": loss.item(),
"model_acc": acc.item()}

model_0_result = eval_model(model = model_0,
dl = train_dataloader,
loss_fn = loss_fn)
model_0_result

Non-Linear Model

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
class FashionMNISTModelV1(nn.Module):
def __init__(self, in_ft, out_ft, hid = 10):
super().__init__()
self.layers = nn.Sequential(
nn.Flatten(),
nn.Linear(in_features = in_ft, out_features = hid),
nn.ReLU(),
nn.Linear(in_features = hid, out_features = hid),
nn.ReLU(),
)

def forward (self, x):
return self.layers(x)

model_1 = FashionMNISTModelV1(in_ft = 784, out_ft = 10).to(device)
model_1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# functionizing train and test loop
from torchmetrics import Accuracy
def train_step(model: torch.nn.Module,
dl: torch.utils.data.DataLoader,
loss_fn: torch.nn.Module,
optimizer: torch.optim.Optimizer):
train_loss = 0
train_acc = 0
model.to(device)
print("Start Training")
acc_fn = Accuracy(task = "multiclass", num_classes = 10).to(device)
for batch, (X, y) in enumerate(dl):
model.train()
X, y = X.to(device), y.to(device)
y_pred = model(X)
loss = loss_fn(y_pred, y)
train_loss += loss
train_acc += acc_fn(y_pred, y.int())*100
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (batch % 40 == 0):
print(f"Looked at {batch * len(X)}/{len(dl.dataset)} samples.")
train_loss /= len(dl)
train_acc /= len(dl)
print(f"Train Loss: {train_loss:.5f} | Train Accuracy: {train_acc:.5f}%")

def test_step(model: torch.nn.Module,
dl: torch.nn.Module,
loss_fn: torch.nn.Module):
test_loss = 0
test_acc = 0
acc_fn = Accuracy(task = "multiclass", num_classes = 10).to(device)
model.to(device)
print("Start Testing")
with torch.inference_mode():
for batch, (X, y) in enumerate(dl):
model.eval()
X, y = X.to(device), y.to(device)
y_pred = model(X)
test_loss += loss_fn(y_pred, y)
test_acc += acc_fn(y_pred, y.int()) * 100
if (batch % 40 == 0):
print(f"Looked at {batch * len(X)}/{len(dl.dataset)} samples.")
test_acc /= len(dl)
test_loss /= len(dl)
print(f"Test Loss: {test_loss:.5f} | Test Accuracy: {test_acc:.5f}%")
1
2
3
4
5
6
7
8
train_step(model = model_1,
loss_fn = nn.CrossEntropyLoss(),
dl = train_dataloader,
optimizer = torch.optim.Adam(params = model_1.parameters(),
lr = 0.1))
test_step(model = model_1,
loss_fn = nn.CrossEntropyLoss(),
dl = test_dataloader)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
from timeit import default_timer as timer
from IPython.display import clear_output
start_time = timer()
epochs = 3
for epoch in range(epochs):
clear_output(wait=True)
print(f"\nEpoch: {epoch + 1}")
train_step(model = model_1,
dl = train_dataloader,
loss_fn = nn.CrossEntropyLoss(),
optimizer = torch.optim.Adam(params = model_1.parameters(),
lr = 0.1))
test_step(model = model_1,
dl = test_dataloader,
loss_fn = nn.CrossEntropyLoss())
print(eval_model(model = model_1,
loss_fn = nn.CrossEntropyLoss(),
dl = test_dataloader))
end_time = timer()
train_time(start = start_time, end = end_time, device = device)

CNN

Layers:

  • Input Layer - 输入层
  • Convolutional Layer - 用于提取图像的特征
  • Hidden Layer - Non-Linear训练层 - nn.ReLU()
  • Pooling Layer - 用于精简特征,并提升特征稳定性
  • Output layer - 输出层

Orders (basic):
Input --> Convolutional --> ReLU --> Pooling --> Convolutional --> … -->
Pooling --> Output

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
class FashionMNISTModelV2(nn.Module):
def __init__(self, in_ch, out_ft, hid):
super().__init__()
self.conv_block_1 = nn.Sequential(
nn.Conv2d(in_channels=in_ch,
out_channels=hid,
kernel_size=3, # CNN提取特征的单位范围
stride=1, # 提取不同特征的移动步伐
padding=1), # 在图像边缘填充的像素量,对边缘进行更加细化的特征读取
nn.ReLU(),
nn.Conv2d(in_channels=hid, out_channels=hid, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv_block_2 = nn.Sequential(
nn.Conv2d(in_channels=hid,
out_channels=hid,
kernel_size=3, # CNN提取特征的单位范围
stride=1, # 提取不同特征的移动步伐
padding=1), # 在图像边缘填充的像素量,避免损失原图的边角数据
nn.ReLU(),
nn.Conv2d(in_channels=hid, out_channels=hid, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(in_features=hid*7*7, out_features=out_ft),
)

def forward(self, x):
x = self.conv_block_1(x)
# print(x.shape)
x = self.conv_block_2(x)
# print(x.shape)
x = self.classifier(x)
return x

model_2 = FashionMNISTModelV2(in_ch=1, out_ft=10, hid=10).to(device)
model_2

Know more about the CNN

1
2
3
images = torch.randn(size=(32, 3, 64, 64))
test_image = images[0]
test_image.shape
1
2
3
4
conv_layer = nn.Conv2d(in_channels=3, out_channels=10, kernel_size=3, stride=1, padding=0)
conv_max = nn.MaxPool2d(kernel_size=2)
conv_output = conv_max(test_image.unsqueeze(0))
conv_output.shape
1
2
3
# image.shape
y_pred = model_2(image.unsqueeze(0).to(device))
y_pred

Train the CNN

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
epochs = 10
from IPython.display import clear_output
from timeit import default_timer as timer
import time
start_time = timer()
for epoch in range(epochs):
time.sleep(1)
clear_output(wait=True)
print(f"Epoch: {epoch+1}")
train_step(model=model_2,
dl=train_dataloader,
loss_fn=nn.CrossEntropyLoss(),
optimizer=torch.optim.SGD(params=model_2.parameters(),
lr=0.1))
test_step(model=model_2,
dl=test_dataloader,
loss_fn=nn.CrossEntropyLoss())
end_time = timer()
train_time(start=start_time, end=end_time-epochs, device=device)
1
2
3
4
5
6
7
8
9
10
11
12
model_0_Adam_result = eval_model(model=model_0,
dl=test_dataloader,
loss_fn=nn.CrossEntropyLoss())
model_1_Adam_result = eval_model(model=model_1,
dl=test_dataloader,
loss_fn=nn.CrossEntropyLoss())
model_2_Adam_result = eval_model(model=model_2,
dl=test_dataloader,
loss_fn=nn.CrossEntropyLoss())
print(model_0_Adam_result)
print(model_1_Adam_result)
print(model_2_Adam_result)

Make prediction with the best model

1
2
3
4
5
6
7
8
9
10
11
12
13
def make_predictions(model: torch.nn.Module,
data: list,
device: torch.device=device):
pred_probs = []
model.to(device)
model.eval()
with torch.inference_mode():
for sample in data:
sample = torch.unsqueeze(sample, dim=0).to(device)
y_logits = model(sample)
y_prob = torch.softmax(y_logits.squeeze(), dim=0)
pred_probs.append(y_prob.cpu())
return torch.stack(pred_probs)
1
test_data
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import random
test_samples = []
test_labels = []
for sample, label in random.sample(list(test_data), k=9):
test_samples.append(sample)
test_labels.append(label)
pred_probs = make_predictions(model=model_2,
data=test_samples)
pred_classes = pred_probs.argmax(dim=1)
# plot it
plt.figure(figsize=(9, 9))
row, col = 3, 3
for i, sample in enumerate(test_samples):
plt.subplot(row, col, i+1)
plt.imshow(sample.squeeze(), cmap="gray")
pred_label = test_data.classes[pred_classes[i]]
true_label = test_data.classes[test_labels[i]]
title_text = f"Pred: {pred_label} | Truth: {true_label}"
if pred_label == true_label:
plt.title(title_text, fontsize=10, c="g")
else:
plt.title(title_text, fontsize=10, c="r")
plt.axis(False)
1


【ML笔记】PyTorch Computer Vision(计算机视觉)
https://学习.fun/ml-note/python-cv/
Author
Stephen Zeng
Posted on
August 6, 2024
Licensed under