AI

CNN을 이용한 CIFAR 10 영상 분류

✿(๑❛ڡ❛๑)✿ 2022. 12. 8. 13:33
728x90
SMALL
import os
from os.path import join
import cv2
from glob import glob
from PIL import Image
from tqdm import tqdm




method = 'GoogLeNet' # (ResNet | VGG | GoogLeNet)
import torch
import random
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
device='cuda' if torch.cuda.is_available() else 'cpu'
train_mother_path = '/kaggle/input/2022-ai-w10p2/images/images/'
test_mother_path = '/kaggle/input/2022-ai-w10p2/test_data/test_data'
train_image_path = glob(join(train_mother_path, '*', '*'))
test_image_path = glob(join(test_mother_path, '*'))
test_paths=[]
for i in range(len(test_image_path)):
    test_paths.append(test_mother_path+'/'+"{}".format(i)+'.png')

#test_paths=[]
#for i in range(10000):
#    test_paths.append(test_mother_path+'/'+str(i)+'.png')
# (ResNet | VGG | GoogLeNet)
"""
pretrained=True 옵션을 통해 pretrain weight를 불러온다.
"""
if method is "ResNet":
    model = models.resnet18(pretrained=True)
    # output layer 출력 형태 변환
    model_classifier = torch.nn.Linear(512, 10)
    torch.nn.init.xavier_uniform_(model_classifier.weight)
    model.fc = model_classifier
    model.to(device)             
if method is "VGG":
    model = models.vgg16(pretrained=True)
    # output layer 출력 형태 변환
    model_classifier = torch.nn.Linear(4096, 10)
    torch.nn.init.xavier_uniform_(model_classifier.weight)
    model.classifier[6] = model_classifier
    model.to(device)
if method is "GoogLeNet":
    model = models.googlenet(pretrained=True)
    # output layer 출력 형태 변환
    model_classifier = torch.nn.Linear(1024, 10)
    torch.nn.init.xavier_uniform_(model_classifier.weight)
    model.fc = model_classifier
    model.to(device)
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import models

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
class W10_DateLoader(torch.utils.data.Dataset):
    def __init__(self, data_path,split='train', transform=None):
        
        self.transform = transform
        self.data = data_path
        self.split=split
            
    def __len__(self):
        return len(self.data) 

    def __getitem__(self, index):
        image = Image.open(self.data[index], mode='r')
        image = image.convert('RGB')
        
        if self.transform:
            image = self.transform(image)
            
        if self.split == "test":
            return image
        elif self.split == "train":
            return image, int(self.data[index].split('/')[-2])#[6]

    
transform = transforms.Compose(
    [transforms.Resize((224, 224)),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])



trainData = W10_DateLoader(train_image_path, 'train', transform=transform)
testData = W10_DateLoader(test_paths, 'test', transform=transform)

trainLoader = torch.utils.data.DataLoader(trainData, batch_size=32, shuffle=True, num_workers=2)
testLoader = torch.utils.data.DataLoader(testData, batch_size=32, shuffle=False, num_workers=2)
model.to(device)
model.train()
size=len(trainLoader)
from tqdm import tqdm
for i in tqdm(range(5)):
    avg_cost=0
    for X,Y in trainLoader:
        X=X.to(device)
        Y=Y.to(device)
        H=model(X)
        cost=loss(H,Y)
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        avg_cost+=cost/size
    if(i%1==0):print(avg_cost)
submit=pd.read_csv('/kaggle/input/2022-ai-w10p2/format.csv')

model.eval()
preds=[]
for test in testLoader:
    test=test.to(device)
    pred=model(test)
    pred=torch.argmax(pred,dim=1)
    preds.extend(pred.detach().cpu().numpy().tolist())
preds=np.asarray(preds)
submit['label'] = preds
submit.to_csv("pred.csv", index=False)

detach()는 이 연산 기록으로 부터 분리한 tensor을 반환

 

 

--------------------------------------------------------------------------------------------------------------------------------------------------

Solution code

 

환경변수 기본 세팅

import os
from os.path import join
import cv2
import torch
import torchvision
import torchvision.transforms as transforms
from glob import glob
from PIL import Image
from torchvision import models
from tqdm import tqdm
import random

batch_size = 32
total_epoch = 5
method = 'GoogLeNet' # (ResNet | VGG | GoogLeNet)
if torch.cuda.is_available():
    print("CUDA is available")
    device = 'cuda'
else:
    print("CUDA is NOT available")
    device = 'cpu'

seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

데이터 로더

train_mother_path = '/kaggle/input/2022-ai-w10p2/images/images/'
test_mother_path = '/kaggle/input/2022-ai-w10p2/test_data/test_data'
train_image_path = glob(join(train_mother_path, '*', '*'))
test_image_path = glob(join(test_mother_path, '*'))

사전학습 모델 사용

# (ResNet | VGG | GoogLeNet)
"""
pretrained=True 옵션을 통해 pretrain weight를 불러온다.
"""
if method is "ResNet":
    model = models.resnet18(pretrained=True)
    # output layer 출력 형태 변환
    model_classifier = torch.nn.Linear(512, 10)
    torch.nn.init.xavier_uniform_(model_classifier.weight)
    model.fc = model_classifier
    model.to(device)             
if method is "VGG":
    model = models.vgg16(pretrained=True)
    # output layer 출력 형태 변환
    model_classifier = torch.nn.Linear(4096, 10)
    torch.nn.init.xavier_uniform_(model_classifier.weight)
    model.classifier[6] = model_classifier
    model.to(device)
if method is "GoogLeNet":
    model = models.googlenet(pretrained=True)
    # output layer 출력 형태 변환
    model_classifier = torch.nn.Linear(1024, 10)
    torch.nn.init.xavier_uniform_(model_classifier.weight)
    model.fc = model_classifier
    model.to(device)
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import time
import torch.nn.functional as F
import torch.nn as nn
import matplotlib.pyplot as plt
from torchvision import models

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

모델 학습

# training function
import random 

def train(model, train_image_path):
    model.train()
    train_running_loss = 0.0
    train_running_correct = 0
    random.shuffle(train_image_path)
    transform = transforms.Compose(
        [transforms.Resize((224, 224)),
         transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    for i in range(len(train_image_path)//3200):
        x_train=[]
        y_train=[]
        for ii in range(3200):
            path_i = train_image_path[(i*3200)+ii]
            image = Image.open(path_i, mode='r')
            image = image.convert('RGB')
            data = transform(image)
            data = torch.reshape(data, (1,3,224,224))
            target = int(path_i.split('/')[-2])
            x_train.append(data)
            y_train.append(target)
        trainset = torch.utils.data.TensorDataset(torch.cat(x_train, dim=0), torch.tensor(y_train))
        trainloader =  torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)
        del x_train
        for data in tqdm(trainloader):
            data, target = data[0].to(device), data[1].to(device)
            
            optimizer.zero_grad()
            output = model(data)

            loss = criterion(output, target).cuda()

            train_running_loss += loss.item()
            _, preds = torch.max(output.data, 1)
            train_running_correct += (preds == target).sum().item()
            loss.backward()
            optimizer.step()
        del trainloader
        del trainset
    train_loss = train_running_loss/len(train_image_path)
    train_accuracy = 100. * train_running_correct/len(train_image_path)
    print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}')
    
    return train_loss, train_accuracy
train_loss , train_accuracy = [], []
val_loss , val_accuracy = [], []
start = time.time()
for epoch in range(total_epoch):
    train_epoch_loss, train_epoch_accuracy = train(model, train_image_path)
    train_loss.append(train_epoch_loss)
    train_accuracy.append(train_epoch_accuracy)
    print(total_epoch, train_accuracy, val_accuracy)
    
end = time.time()
 
print((end-start)/60, 'minutes')

평가

# training function
def test(model, test_image_path):
    model.eval()
    preds=[]
    transform = transforms.Compose(
        [transforms.Resize((224, 224)),
         transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    for path_i in tqdm(range(0, 10000)):
        image = Image.open(test_image_path+str(path_i)+'.png', mode='r')
        image = image.convert('RGB')
        data = transform(image)
        data = torch.reshape(data, (1,3,224,224))
        data = data.to(device)
        output = model(data)
        _, pred = torch.max(output.data, 1)
        preds.extend(pred.detach().cpu().tolist())
    
    return preds
sample_submit_csv = pd.read_csv('/kaggle/input/2022-ai-w10p2/format.csv')
preds = test(model, "/kaggle/input/2022-ai-w10p2/test_data/test_data/")
sample_submit_csv['label'] = preds
sample_submit_csv.to_csv("{}_pred.csv".format(method), index=False)
728x90
LIST