【ML笔记】PyTorch Classification（分类）

以下是notebook直接转换成markdown，在Google Colab上可以正常执行。
可以在这里直接进入👉https://colab.research.google.com/drive/1Xf0wMvc_bG-bMMewme__oDXtE5KmmBCG?usp=sharing
下面的内容为经典的ML应用：classification，分类。

Some hyperparameters（For Linear）

输入层输入参数的个数，输出参数的个数 - in_features, out_features
中间层的输入参数，同为in，out，但是都等于输入层的输出参数个数
输出层的输入参数为中间层的输出参数个数，输出层的输出参数个数为实际应有的参数个数
损失函数，二元和多元不太一样 - Loss_function
优化器，大部分是SGD和Adam - Optimizer

Make Data

# Make Data #
import sklearn
from sklearn.datasets import make_circles

# make 1000 samples
# x for tensor & y for labels
n_samples = 1000
x, y = make_circles(n_samples,
                    noise = 0.03,
                    random_state = 42)
print(f"5 Samples of x:\n{x[:5]}")
print(f"5 Samples of y:\n{y[:5]}")

# visualize the data
import pandas as pd
import matplotlib.pyplot as plt
circles = pd.DataFrame({"X1": x[:, 0],
                        "X2": x[:, 1],
                        "Label": y})
plt.scatter(x = x[:, 0],
            y = x[:, 1],
            c = y,
            cmap = plt.cm.RdYlBu)

# X means the coordinate of the dot
# y means whether the dot is the outside dot or the inside dot

# check shape
x_sample = x[0]
y_sample = y[0]
x_sample.shape, y_sample.shape

# turn data into tensors
import torch
X = torch.from_numpy(x).type(torch.float)
y = torch.from_numpy(y).type(torch.float)

# split data into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size = 0.2,
                                                    random_state = 42)

Build a model

Set up the training device (GPU)
Set up a model using nn.Module
Train it

# Setup the device
import torch
from torch import nn

if torch.cuda.is_available():
  device = "cuda"
else:
  device = "cpu"

X_train = X_train.to(device)
X_test = X_test.to(device)
y_train = y_train.to(device)
y_test = y_test.to(device)

# create a model
class CircleModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer_0 = nn.Linear(in_features = 2, out_features = 8)
    self.layer_1 = nn.Linear(in_features = 8, out_features = 8)
    self.layer_out = nn.Linear(in_features = 8, out_features = 1)

  def forward(self, x):
    return self.layer_out(self.layer_1(self.layer_0(x)))

model_0 = CircleModel().to(device)

# a simpler way to create a model
model_1 = nn.Sequential(
    nn.Linear(in_features = 2, out_features = 8),
    nn.Linear(in_features = 8, out_features = 8),
    nn.Linear(in_features = 8, out_features = 1)
).to(device)

with torch.inference_mode():
  untrained_pred = model_1(X_test)

# create a loss funcion and an optimizer and calc accuracy
# common loss functions for classification atr BCELoss and BCEWithLogitsLoss
# common optimizer are Adam and SGD
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(params = model_1.parameters(),
                            lr = 0.01)

def accuracy_fn(y_true, y_pred):
  correct = torch.eq(y_true, y_pred).sum().item()
  acc = (correct / len(y_pred)) * 100
  return acc

Train the model

Forward
Calculate the loss
Optimize zero grad
Loss Backward
Optimizer step (Gradient decent)

1
2
3

with torch.inference_mode():
  y_test_pred = torch.round(torch.sigmoid(model_1(X_test)))
torch.eq(y_test_pred, y_test).sum().item()

epochs = 100
for epoch in range(epochs):
  model_1.train()
  y_logits = model_1(X_train).squeeze()
  y_pred = torch.round(torch.sigmoid(y_logits))
  loss = loss_fn(y_logits, y_train)
  acc = accuracy_fn(y_true = y_train,
                    y_pred = y_pred)
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()
  model_1.eval()
  with torch.inference_mode():
    y_test_logits = model_1(X_test).squeeze()
    y_test_pred = torch.round(torch.sigmoid(y_test_pred))
    loss_test = loss_fn(y_test_logits, y_test)
    acc_test = accuracy_fn(y_true = y_test,
                           y_pred = y_test_pred)
    if (epoch % 100 == 0):
      print(f"Epoch: {epoch} | Loss: {loss:.5f} | Test Loss: {loss_test:.5f} | Acc: {acc}% | Test Acc: {acc_test}%")

import requests
from pathlib import Path

# Download from GitHub
if Path("helper_functions.py").is_file():
  print(f"Skip Download")
else:
  print(f"Start Download")
  request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
  with open("helper_functions.py", "wb") as f:
    f.write(request.content)

from helper_functions import plot_predictions, plot_decision_boundary

plt.figure(figsize = (12, 6))
plt.subplot(1, 2, 1)
plt.title("Train")
plot_decision_boundary(model_1, X_train, y_train)
plt.subplot(1, 2, 2)
plt.title("Test")
plot_decision_boundary(model_1, X_test, y_test)

Improve the model

for the model

add more layers
add more hidden units
more epochs
change the activation functions --> torch.sigmoid()
change the learning rate --> step length
change the loss function --> loss_fn

for data

get more data sample

class CircleModelV1(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer_1 = nn.Linear(in_features = 2, out_features = 10)
    self.layer_2 = nn.Linear(in_features = 10, out_features = 10)
    self.layer_3 = nn.Linear(in_features = 10, out_features = 1)

  def forward(self, x):
    z = self.layer_1(x)
    z = self.layer_2(z)
    z = self.layer_3(z)
    return z

model_1 = CircleModelV1().to(device)

loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(params = model_1.parameters(),
                            lr = 0.1)
epochs = 1000

X_train ,y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

for epoch in range(epochs):
  model_1.train()
  y_logits = model_1(X_train).squeeze()
  y_pred = torch.round(torch.sigmoid(y_logits))
  loss = loss_fn(y_logits, y_train)
  acc = accuracy_fn(y_true = y_train,
                    y_pred = y_pred)
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()
  model_1.eval()
  with torch.inference_mode():
    y_test_logits = model_1(X_test).squeeze()
    y_test_pred = torch.round(torch.sigmoid(y_test_logits))
    loss_test = loss_fn(y_test_logits, y_test)
    acc_test = accuracy_fn(y_true = y_test,
                           y_pred = y_test_pred)
  if (epoch % 100 == 0):
      print(f"Epoch: {epoch} | Loss: {loss:.5f} | Test Loss: {loss_test:.5f} | Acc: {acc}% | Test Acc: {acc_test}%")

plt.figure(figsize = (12, 6))
plt.subplot(1, 2, 1)
plt.title("Train")
plot_decision_boundary(model_1, X_train, y_train)
plt.subplot(1, 2, 2)
plt.title("Test")
plot_decision_boundary(model_1, X_test, y_test)

Non-linearity

class CircleModelV2(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer_1 = nn.Linear(in_features = 2, out_features = 10)
    self.layer_2 = nn.Linear(in_features = 10, out_features = 10)
    self.layer_3 = nn.Linear(in_features = 10, out_features = 1)
    self.relu = nn.ReLU()

  def forward(self, x):
    z = self.layer_1(x)
    z = self.relu(z)
    z = self.layer_2(z)
    z = self.relu(z)
    z = self.layer_3(z)
    return z

model_2 = CircleModelV2().to(device)

loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(params = model_2.parameters(),
                            lr = 0.1)
epochs = 100

X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

# X_train[:5]

for epoch in range(epochs):
  model_2.train()
  y_logits = model_2(X_train).squeeze()
  y_pred = torch.round(torch.sigmoid(y_logits))
  loss = loss_fn(y_logits, y_train)
  acc = accuracy_fn(y_true = y_train,
                    y_pred = y_pred)
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()
  model_2.eval()
  with torch.inference_mode():
    y_test_logits = model_2(X_test).squeeze()
    y_test_pred = torch.round(torch.sigmoid(y_test_logits))
    acc_test = accuracy_fn(y_true = y_test,
                           y_pred = y_test_pred)
    loss_test = loss_fn(y_test_logits, y_test)
  if (epoch % 100 == 0):
    print(f"Epoch: {epoch} | Loss: {loss:.5f} | Test Loss: {loss_test:.5f} | Acc: {acc}% | Test Acc: {acc_test}%")

plt.figure(figsize = (12, 6))
plt.subplot(1, 2, 1)
plt.title("Linear")
plot_decision_boundary(model_1, X_test, y_test)
plt.subplot(1, 2, 2)
plt.title("Non-linear")
plot_decision_boundary(model_2, X_test, y_test)

Know more about the activation function for non-linear model

1 2	`A = torch.arange(-10, 10, 1, dtype = torch.float) A.dtype`

1	`plt.plot(A)`

1	`plt.plot(torch.relu(A))`

def relu(x: torch.Tensor):
  return torch.max(torch.tensor(0), x)

plt.plot(relu(A))

1 2	`# Sigmoid # plt.plot(torch.sigmoid(A))`

Multi-class classification

import torch
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
import torch.nn as nn

NUM_CLASSES = 4
NUM_FEATURES = 2
RANDOM_SEED = 2333

# create dataset
X_blob, y_blob = make_blobs(n_samples = 1000,
                            n_features = NUM_FEATURES,# 特征点数量
                            centers = NUM_CLASSES,# 类型数量
                            cluster_std = 1.23,# 分散
                            random_state = RANDOM_SEED)
X_blob = torch.from_numpy(X_blob).type(torch.float)
y_blob = torch.from_numpy(y_blob).type(torch.LongTensor)

# split dataset
X_blob_train, X_blob_test, y_blob_train, y_blob_test = train_test_split(X_blob,
                                                                        y_blob,
                                                                        test_size = 0.2,
                                                                        random_state = RANDOM_SEED)

# visualize
plt.figure(figsize = (10, 7))
plt.scatter(X_blob[:, 0], X_blob[:, 1],
            c = y_blob, cmap = plt.cm.RdYlBu)

# set device
if torch.cuda.is_available():
  device = "cuda"
else:
  device = "cpu"
X_blob_train, y_blob_train = X_blob_train.to(device), y_blob_train.to(device)
X_blob_test, y_blob_test = X_blob_test.to(device), y_blob_test.to(device)

class BlobModelV0(nn.Module):
  def __init__(self, in_features, out_features, hidden_units = 8):
    super().__init__()
    self.linear_layer = nn.Sequential(
        nn.Linear(in_features = in_features, out_features = hidden_units),
        nn.ReLU(),
        nn.Linear(in_features = hidden_units, out_features = hidden_units),
        nn.ReLU(),
        nn.Linear(in_features = hidden_units, out_features = out_features)
    )

  def forward(self, x):
    return self.linear_layer(x)

model_3 = BlobModelV0(in_features = 2, out_features = 4).to(device)

# inital model test
model_3.eval()
with torch.inference_mode():
  y_logits = model_3(X_blob_test)
y_pred_probs = torch.softmax(y_logits, dim=1)
y_preds = torch.argmax(y_pred_probs, dim=1)
y_preds[:5], y_blob_test[:5]

loss_fn = nn.CrossEntropyLoss() # 注意数据类型，输入为float，目标为LongTensor
optimizer = torch.optim.Adam(params = model_3.parameters(),
                            lr = 0.1)
epochs = 2000
for epoch in range(epochs):
  model_3.train()
  y_logits = model_3(X_blob_train)
  y_preds = y_logits.softmax(dim = 1).argmax(dim = 1)
  loss = loss_fn(y_logits, y_blob_train)
  acc = accuracy_fn(y_true = y_blob_train,
                    y_pred = y_preds)
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()
  model_3.eval()
  with torch.inference_mode():
    y_test_logits = model_3(X_blob_test)
    y_test_preds = y_test_logits.softmax(dim = 1).argmax(dim = 1)
    loss_test = loss_fn(y_test_logits, y_blob_test)
    acc = accuracy_fn(y_true = y_blob_test,
                      y_pred = y_test_preds)
  if (epoch % 100 == 0):
    print(f"Epoch: {epoch} | Loss: {loss:.5f} | Test Loss: {loss_test:.5f} | Acc: {acc:.5f}% | Test Acc: {acc_test:.5f}%")

# make prediction
model_3.eval()
with torch.inference_mode():
  y_logits = model_3(X_blob_test)
y_preds = y_logits.softmax(dim = 1).argmax(dim = 1)

plt.figure(figsize = (12, 6))
plt.subplot(1, 2, 1)
plt.title("Train")
plot_decision_boundary(model_3.cpu(), X_blob_train, y_blob_train)
plt.subplot(1, 2, 2)
plt.title("Test")
plot_decision_boundary(model_3.cpu(), X_blob_test, y_blob_test)

from pathlib import Path

# create model directory
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents = True, exist_ok = True)

# create model save directory
MODEL_NAME = "model_3_full.pt"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

MODEL_SAVE_PATH

# save the state dict
torch.save(model_3, MODEL_SAVE_PATH)

More classification metrics for evaluation

Accuracy - main one
Precision
Recall
F1-score
Confusion matrix
Classification report

1	`!pip install torchmetrics`

ml-note

#ML笔记

【ML笔记】PyTorch Classification（分类）

https://学习.fun/ml-note/pytorch-classification/

Author

Stephen Zeng

Posted on

August 2, 2024

Licensed under

【ML笔记】在VSCode中不同提示图标的含义 Previous

【ML笔记】PyTorch Workflow（工作流） Next