首頁 > 軟體

pytorch實現影象識別(實戰)

2022-02-18 10:03:36

1. 程式碼講解

1.1 導庫

import os.path
from os import listdir
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn import AdaptiveAvgPool2d
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import Dataset
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split

1.2 標準化、transform、設定GPU

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
normalize = transforms.Normalize(
   mean=[0.485, 0.456, 0.406],
   std=[0.229, 0.224, 0.225]
)
transform = transforms.Compose([transforms.ToTensor(), normalize])  # 轉換

1.3 預處理資料

class DogDataset(Dataset):
# 定義變數
    def __init__(self, img_paths, img_labels, size_of_images):  
        self.img_paths = img_paths
        self.img_labels = img_labels
        self.size_of_images = size_of_images

# 多少長圖片
    def __len__(self):
        return len(self.img_paths)

# 開啟每組圖片並處理每張圖片
    def __getitem__(self, index):
        PIL_IMAGE = Image.open(self.img_paths[index]).resize(self.size_of_images)
        TENSOR_IMAGE = transform(PIL_IMAGE)
        label = self.img_labels[index]
        return TENSOR_IMAGE, label


print(len(listdir(r'C:UsersAIAXITDesktopDeepLearningProjectDeep_Learning_Datadog-breed-identificationtrain')))
print(len(pd.read_csv(r'C:UsersAIAXITDesktopDeepLearningProjectDeep_Learning_Datadog-breed-identificationlabels.csv')))
print(len(listdir(r'C:UsersAIAXITDesktopDeepLearningProjectDeep_Learning_Datadog-breed-identificationtest')))

train_paths = []
test_paths = []
labels = []
# 訓練集圖片路徑
train_paths_lir = r'C:UsersAIAXITDesktopDeepLearningProjectDeep_Learning_Datadog-breed-identificationtrain'
for path in listdir(train_paths_lir):
    train_paths.append(os.path.join(train_paths_lir, path))  
# 測試集圖片路徑
labels_data = pd.read_csv(r'C:UsersAIAXITDesktopDeepLearningProjectDeep_Learning_Datadog-breed-identificationlabels.csv')
labels_data = pd.DataFrame(labels_data)  
# 把字元標籤離散化,因為資料有120種狗,不離散化後面把資料給模型時會報錯:字元標籤過多。把字元標籤從0-119編號
size_mapping = {}
value = 0
size_mapping = dict(labels_data['breed'].value_counts())
for kay in size_mapping:
    size_mapping[kay] = value
    value += 1
# print(size_mapping)
labels = labels_data['breed'].map(size_mapping)
labels = list(labels)
# print(labels)
print(len(labels))
# 劃分訓練集和測試集
X_train, X_test, y_train, y_test = train_test_split(train_paths, labels, test_size=0.2)

train_set = DogDataset(X_train, y_train, (32, 32))
test_set = DogDataset(X_test, y_test, (32, 32))

train_loader = torch.utils.data.DataLoader(train_set, batch_size=64)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64)

1.4 建立模型

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()

        self.features = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5),  
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=2, stride=2),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=2, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(16 * 5 * 5, 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, 120)
        )

    def forward(self, x):
        batch_size = x.shape[0]
        x = self.features(x)
        x = x.view(batch_size, -1)
        x = self.classifier(x)
        return x


model = LeNet().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters())
TRAIN_LOSS = []  # 損失
TRAIN_ACCURACY = []  # 準確率

1.5 訓練模型

def train(epoch):
    model.train()
    epoch_loss = 0.0 # 損失
    correct = 0  # 精確率
    for batch_index, (Data, Label) in enumerate(train_loader):
    # 扔到GPU中
        Data = Data.to(device)
        Label = Label.to(device)
        output_train = model(Data)
    # 計算損失
        loss_train = criterion(output_train, Label)
        epoch_loss = epoch_loss + loss_train.item()
    # 計算精確率
        pred = torch.max(output_train, 1)[1]
        train_correct = (pred == Label).sum()
        correct = correct + train_correct.item()
    # 梯度歸零、反向傳播、更新引數
        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()
    print('Epoch: ', epoch, 'Train_loss: ', epoch_loss / len(train_set), 'Train correct: ', correct / len(train_set))

1.6 測試模型

和訓練集差不多。

def test():
    model.eval()
    correct = 0.0
    test_loss = 0.0
    with torch.no_grad():
        for Data, Label in test_loader:
            Data = Data.to(device)
            Label = Label.to(device)
            test_output = model(Data)
            loss = criterion(test_output, Label)
            pred = torch.max(test_output, 1)[1]
            test_correct = (pred == Label).sum()
            correct = correct + test_correct.item()
            test_loss = test_loss + loss.item()
    print('Test_loss: ', test_loss / len(test_set), 'Test correct: ', correct / len(test_set))

1.7結果

epoch = 10
for n_epoch in range(epoch):
    train(n_epoch)
test()

到此這篇關於pytorch實現影象識別(實戰)的文章就介紹到這了,更多相關pytorch實現影象識別內容請搜尋it145.com以前的文章或繼續瀏覽下面的相關文章希望大家以後多多支援it145.com!


IT145.com E-mail:sddin#qq.com