Mixupを使うと学習データへの精度が下がる

Python

1# 必要ライブラリのロード2import torch 3import torch.nn as nn 4import torch.nn.functional as F 5import torchvision 6from torchvision import models 7from torchvision.models.feature_extraction import create_feature_extractor 8#from memory_profiler import profile9#from pytorch_memlab import profile10import numpy as np 11import matplotlib.pyplot as plt 12from PIL import Image 13from IPython.display import display 14import time 15import pickle 16import pandas as pd 17import seaborn as sns 18from sklearn.datasets import load_iris 19from sklearn.utils import shuffle 20# データセットの読み込み21from sklearn.model_selection import train_test_split 22 23train_data = torchvision.datasets.FashionMNIST('./fashion-mnist', train=True, download=True, transform=torchvision.transforms.ToTensor())24test_data = torchvision.datasets.FashionMNIST('./fashion-mnist', train=False, download=True, transform=torchvision.transforms.ToTensor())25random_state=0 #分割する際のseed26np.random.seed(5) #Mixupの際のseed27mix = True #mixupするか28 29train_data.data,train_data.targets = shuffle(train_data.data,train_data.targets)30x_train, y_train = train_data.data, train_data.targets 31x_test, y_test = test_data.data, test_data.targets 32 33if torch.cuda.is_available() == True:34 model = mnist_Net().to('cuda')35else:36 model = mnist_Net()37 38y_train_enc = []39for i in range(len(y_train)):#Mixupを適用するために教師データをone-hot encodeへ40 y_train_enc.append([0,0,0,0,0,0,0,0,0,0])41 y_train_enc[i][y_train[i]] = 142y_train_enc = torch.tensor(y_train_enc)43 44mixup_rate = 0.5 #学習データに対するMixupデータの割合45mixup_size = int(len(x_train)*mixup_rate)46mixup_data_x = []47mixup_data_y = []48for i in range(mixup_size):49 index_1 = np.random.randint(0, len(x_train)-1) #x_1,y_1のindex50 index_2 = np.random.randint(0, len(x_train)-1) #x_2,y_2のindex51 if index_1 == index_2:#x_1,x_2が同じ時を避ける52 if index_1 == 0:53 index_2 += 154 else:55 index_2 -= 156 x_1 = x_train[index_1]57 y_1 = y_train_enc[index_1]58 x_2 = x_train[index_2]59 y_2 = y_train_enc[index_2]60 x,y = mixup(x_1,y_1,x_2,y_2)61 mixup_data_x.append(x)62 mixup_data_y.append(y)63mixup_data_x = torch.stack(mixup_data_x)64mixup_data_y = torch.stack(mixup_data_y)65 66x_train,y_train_enc = shuffle(x_train,y_train_enc)67 68x_train_mix = np.append(mixup_data_x, x_train,axis= 0)69y_train_mix = np.append(mixup_data_y, y_train_enc,axis= 0)70 71 72if mix == False:73 x_train = x_train 74 y_train_enc = y_train_enc 75else:76 x_train = torch.tensor(x_train_mix)77 y_train_enc = torch.tensor(y_train_mix)78 79optimizer = torch.optim.Adam(model.parameters())80epoch = 15081batch = 100082iteration = int(len(x_train)/batch)83loss_train = []84acc_train = []85loss_test = []86acc_test = []87ce = nn.CrossEntropyLoss()88for i in range(epoch):89 if i % 10 == 0:90 print('epoch:', i)91 92 loss_total_train = 093 rmse_train = 094 x_train,y_train_enc = shuffle(x_train,y_train_enc)95 y_output = []96 y_output_test = []97 model.train()98 for j in range(iteration):99 x_train_batch = x_train[j*batch:(j+1)*batch].to(torch.float32).to('cuda')100 x_train_batch = torch.unsqueeze(x_train_batch, dim=1)101 y_train_batch = y_train_enc[j*batch:(j+1)*batch].to(torch.float32).to('cuda') 102 103 optimizer.zero_grad()104 y = model(x_train_batch)105 #y_train_batch = y_train_batch.reshape(batch,1)106 loss = ce(y, y_train_batch)107 loss_total_train += loss.to('cpu')108 loss.backward()109 optimizer.step()110 111 y_output.append(torch.argmax(y,dim = 1).to('cpu').detach().numpy())112 loss_train.append(loss_total_train.detach().numpy()/len(x_train))113 acc = 0114 y_output = np.stack(y_output).flatten()115 for n in range(len(x_train)):116 if y_output[n] == torch.argmax(y_train_enc[n],dim=0):117 acc += 1118 acc_train.append(acc/len(x_train))119 model.eval()120 y = model(torch.unsqueeze(x_test.to(torch.float32).to('cuda'),dim=1))121 122 loss = ce(y, y_test.to(torch.int64).to('cuda'))123 y_output_test.append(torch.argmax(y,dim = 1).to('cpu').detach().numpy())124 loss_test.append(loss.to('cpu').detach().numpy())125 acc = 0126 for n in range(len(x_test)):127 if y_output_test[0][n] == y_test[n]:128 acc += 1129 acc_test.append(acc/len(x_test))

コメントを投稿

0 コメント