【ML笔记】PyTorch第一阶段总结

题头

在PyTorch学习的第一阶段,我基本上掌握了加载,训练,使用,保存模型的基本方法,下面对使用到的函数和方法进行一个阶段性的总结。这篇文章全程在typora里面完成,没有任何代码提示,算是默写,但也没有经过测试。

初始化

初始化一般是导入模型,引入最基本的库,设置运行设备等等。

引入最基本的库

1
2
3
4
# 这些都是有关pytorch的大头
import torch
import torch.nn as nn
import numpy as np

设置设备

1
2
3
4
5
6
7
8
if torch.cuda.is_available():
device = "cuda" # 老黄
elif torch.backends.mps.is_available():
device = "mps" # 果果
else:
device = "cpu" # 苏妈和没卡的

# 苏妈的ROCm现在也可以用了,但是我不会用且没有苏妈的卡,所以这里统一用CPU

导入已有模型

1
2
3
4
5
6
7
from pathlib import Path
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)
MODEL_0 = MODEL_PATH / "model_0.pth"
...
model_0 = BaseModel().to(device)
model_0.load_state_dict(torch.load(f=MODEL_0))

准备数据

有两种方法,一种是pytorch自带的数据集,另一种是自定义的数据集,分别以FashionMNIST和Food101为例子,先看图复习一下各个数据集类型之间的关系

FashionMNIST

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import torchvision
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader

BATCH_SIZE = 512

train_data = datasets.FashionMNIST(
root = "data",
train = True,
transform = ToTensor(),
target_transform = None)
test_data = datasets.FashionMNIST(
root = "data",
train = False,
transform = ToTensor(),
target_transform = None)

train_dataloader = DataLoader(
dataset = train_data,
batch_size = BATCH_SIZE,
shuffle = True,
num_workers = 4)
test_dataloader = DataLoader(
dataset = test_data,
batch_size = BATCH_SIZE,
shuffle = False,
num_workers = 4)

Food101

首先要把文件夹结构设置好,大概是这样的

然后开始咯

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from pathlib import Path

BATCH_SIZE = 256
TRAIN = Path("data/Food_part_small/train")
TEST = Path("data/Food_part_small/test")

data_transform = transforms.Compose([
transforms.Resize(size=(256, 256)),
transforms.RandomHorizontalFlip(p=0.5),
transforms.ToTensor()
])

train_data = datasets.ImageFolder(
root = TRAIN,
transform = data_transform,
target_transform = None)
test_data = datasets.ImageFolder(
root = TEST,
transform = data_transform,
target_transform = None)

train_dataloader = DataLoader(
dataset = train_data,
batch_size = BATCH_SIZE,
shuffle = True,
num_workers = 4)
test_dataloader = DataLoader(
dataset = test_data,
batch_size = BATCH_SIZE,
shuffle = False,
num_workers = 4)

准备模型

有好多模型的引入方式,可以自己写,也可以引入pytorch的一些内置模型。以BaseModel和ResNet分别为例

BaseModel

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
class BaseModel(nn.Module):
def __init__(self, in_planes, num_classes):
super().__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_planes, 64, kernel_size=2, padding=1, striding=1),
nn.BatchNorm2d(hid),
nn.ReLU(),
nn.Conv2d(64, 128, kernel_size=2, padding=1, striding=1),
nn.BatchNorm2d(hid),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(128, 256, kernel_size=2, padding=1, striding=1),
nn.BatchNorm2d(hid)
)
self.fc = nn.Sequential(
nn.Flatten(),
nn.Linear(256, num_classes)
)

def forward(self, x):
return self.fc(self.conv(x))
model_0 = BaseModel(3, 10).to(device)

ResNet

1
2
# 一行就完事儿,但是限制会比较大,因为num_classes不能变
resnet_0 = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True).to(device)

训练模型

为了更加方便地批量对多个模型进行训练,一般会将训练过程进行函数化

定义函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# 时间函数
def train_time(start: float, end: float, device=torch.device):
tim = end - start
print(f"On {device}: {tim:.5f}s")

# 训练步骤
!pip install torchmetrics
from torchmetrics import Accuracy
def train_step(
model: torch.nn.Module,
dl: torch.nn.Module,
loss_fn: torch.nn.Module,
optimizer: torch.optim.Optimizer,
class_num, showcase):
train_loss = 0
train_acc = 0
model.to(device)
model.train()
acc_fn = Accuracy(task="multiclass", num_classes=classnum).to(device)
print("Start Training")
for batch, (X, y) in enumerate(dl):
X, y = X.to(device), y.to(device)
y_preds = model(X)
loss = loss_fn(y_preds, y)
train_loss += loss
train_acc += acc_fn(y_preds, y.int())
optimizer.zero_grad()
loss.backwards()
optimizer.step()
if (batch % showcase == 0):
print(f"Looked at {batch * len(X)} in {len(dl.dataset)} samples")
train_loss /= len(dl)
train_acc /= len(dl)
print(f"Train loss: {train_loss:.5f} | Trans acc: {train_acc:.5f}")

# 测试步骤
def test_step(
model: torch.nn.Module,
dl: torch.nn.Module,
loss_fn: torch.nn.Module,
class_num, showcase):
test_loss = 0
test_acc = 0
model.to(device)
model.eval()
acc_fn = Accuracy(task="multiclass", num_classes=class_num).to(device)
with torch.inference_mode():
for batch, (X, y) in enumerate(dl):
X, y = X.to(device), y.to(device)
y_preds = model(X)
test_loss += loss_fn(y_preds, y)
test_acc += acc_fn(y_preds, y.int())
if (batch % showcase == 0):
print(f"Looked at {batch * len(X)} in {len(dl.dataset)} samples")
test_loss /= len(dl)
test_acc /= len(dl)
print(f"Test loss: {test_loss:.5f} | Test acc: {test_acc:.5f}")

# 训练模型(把俩步骤合一起)
import time
from timeit import default_timer as timer
from IPython.display import clear_output
def train_model(
model: torch.nn.Module,
train_dl: torch.nn.Module,
test_dl: torch.nn.Module,
loss_fn: torch.nn.Module,
optimizer: torch.optim.Optimizer,
class_num, showacase, epochs):
start_time = timer()
for epoch in range(epochs):
time.sleep(1)
clear_output(wait=True)
print("Epoch: {epoch+1}")
train_step(
model = model,
dl = train_dl,
loss_fn = loss_fn,
optimizer = optimizer,
class_num = class_num,
showcase = showcase)
test_step(
model = model,
dl = test_dl,
loss_fn = loss_fn,
class_num = class_num,
showcase = showcase)
end_time = timer()
train_time(start=start_time, end=end_time, device=device)

# 评估模型(其实和测试步骤差不多)
def eval_model(
model: torch.nn.Module,
dl: torch.nn.Module,
loss_fn: torch.nn.Module,
class_num, showcase):
model.to(device)
model.eval()
loss = 0
acc = 0
acc_fn = Accuracy(task="multiclass", num_classes=class_num).to(device)
with torch.inference_mode():
for batch, (X, y) in enumerate(dl):
X, y = X.to(devoce), y.to(device)
y_preds = model(X)
loss += loss_fn(y_preds, y)
acc += acc_fn(y_preds, y.int())
if (batch % showcase == 0):
print(f"Looked at {batch * len(X)} in {len(dl.dataset)} samples")
eval_loss /= len(dl)
eval_acc /= len(dl)
return {
"model_name": model.__class__.__name__,
"model_loss": loss.item(),
"model_acc": acc.item()
}

开始训练 & 评估模型

其实都很简单啦,函数都定义好了,直接调用就OK咯

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
train_model(
model = model_0,
train_dl = train_dataloader,
test_dl = test_dataloader,
loss_fn = nn.CrossEntropyLoss(),
optimizer = torch.optim.Adam(
paras = model_0.parameters(),
lr = 0.1),
class_num = 10,
showcase = 10,
epochs = 100)
model_0_results = eval_model(
model = model_0,
dl = test_dataloader,
loss_fn = nn.CrossEntropyLoss(),
class_num = 10,
showcase = 10)
model_0_results

保存模型

一般只保存参数就行啦,不然模型忒大了也装不下啊

1
2
3
4
5
from pathlib import Path
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)
MODEL_0 = MODEL_PATH / "model_0.pth"
torch.save(obj=model_0.state_dict(), f=MODEL_0)

结语

第一阶段,入门阶段完工!下面可以开始琢磨琢磨ResNet这种比较成熟的模型结构是怎么实现的了,毕竟按照常规方法叠深度的话我现在就已经遇到了深度过大导致的退化,最后准确率上不去。下一阶段见咯。


【ML笔记】PyTorch第一阶段总结
https://学习.fun/ml-note/pytorch-summary-1/
Author
Stephen Zeng
Posted on
August 9, 2024
Licensed under