時系列データを二値分類(行数が統一されたデータ)

python

1import csv 2import math 3import numpy as np 4from glob import glob 5import pandas as pd 6import seaborn as sns 7import matplotlib.pyplot as plt 8import os 9from sklearn.preprocessing import StandardScaler 10from sklearn.model_selection import StratifiedKFold 11from google.colab import drive 12drive.mount('/content/drive')13 14class DataLoader:15 def __init__(self, split: int, batch_size: int, epochs: int, roll: int):16 self.batch_size = batch_size 17 self.epochs = epochs 18 self.roll = roll 19 self.file, self.y, self.l = list(), list(), list()20 self.category = {"normal": 0, "abnormal": 1} 21 for name in self.category.keys():22 for i, file in enumerate(glob(f"/content/drive/MyDrive/data/{name}/*.csv")):23 self.file.append(file)24 self.y.append(self.category[name])25 self.l.append(len(open(file).readlines()) - 1) # subtract header row26 assert self.l[-1] > roll, f"Missing roll size: (roll, file length): ({roll}, {self.l[-1]}) on {file}"27 self.skf = StratifiedKFold(split, shuffle = True)28 29 def generator(self, idx, epochs):30 X1, X2, X3, y = list(), list(), list(), list()31 for e in range(epochs):32 np.random.shuffle(idx)33 for i in idx:34 start = np.random.randint(0, self.l[i] - self.roll - 1)35 data = pd.read_csv(self.file[i]).values[start: start + self.roll]36 data = StandardScaler().fit_transform(data.reshape(-1, 1)).reshape(data.shape)37 X1.append(np.concatenate([data[:, 8:26], data[:, 71:83]], axis = -1))38 X2.append(data[:, 107:113])39 X3.append(data[:, 117:120])40 y.append(self.y[i])41 if len(X1) == self.batch_size:42 yield list(map(np.array, [X1, X2, X3])), np.array(y) # Returning Just one batch43 X1, X2, X3, y = list(), list(), list(), list()44 if len(X1):45 yield list(map(np.array, [X1, X2, X3])), np.array(y) # Rreturning remain batch46 47 def split(self):48 for train, test in self.skf.split(self.file, self.y):49 self.test_idx = test 50 yield (51 self.generator(train, self.epochs),52 self.generator(test, self.epochs),53 math.ceil(len(train) / self.batch_size),54 math.ceil(len(test) / self.batch_size)55 )56 57import tensorflow as tf 58from tensorflow.python import keras 59from keras.models import Sequential, Model 60from keras.layers import Input, Dense, Concatenate, Flatten, Dropout 61from keras.layers import Conv1D, AveragePooling1D, GlobalAveragePooling1D 62from keras.layers import LSTM 63from keras.optimizers import Adam 64from keras.callbacks import EarlyStopping, ReduceLROnPlateau 65 66def build_model2(time_stamp):67 inputs1 = Input(shape = (300, 30))68 inputs2 = Input(shape = (300, 6))69 inputs3 = Input(shape = (300, 3))70 71 x1 = Conv1D(32, 7, activation = "swish", kernel_initializer = "he_uniform")(inputs1) 72 x1 = AveragePooling1D()(x1) 73 x2 = Conv1D(32, 7, activation = "swish", kernel_initializer = "he_uniform")(inputs2) 74 x2 = AveragePooling1D()(x2)75 x3 = Conv1D(32, 7, activation = "swish", kernel_initializer = "he_uniform")(inputs3) 76 x3 = AveragePooling1D()(x3) 77 78 combined = Concatenate(axis = -1)([x1, x2, x3]) 79 x = LSTM(32, dropout = 0.2)(combined) 80 x = Dense(1, activation = "sigmoid")(x)81 return Model(inputs = [inputs1, inputs2, inputs3], outputs = x)82 83from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay 84from sklearn.metrics import balanced_accuracy_score, accuracy_score, precision_score, recall_score, f1_score 85 86epochs = 12887batch_size = 888time_stamp = 3089generator = DataLoader(split = 3, roll = time_stamp, batch_size = batch_size, epochs = epochs)90 91def reround(number, ndigits=0): # 0から1の閾値調整92 shift_amount = 10 ** ndigits 93 shifted = number * shift_amount 94 return np.floor(shifted +0.6) / shift_amount 95 96for train_gen, valid_gen, steps_per_epoch, validation_steps in generator.split():97 model = build_model2(time_stamp) # Be sure to rebuild the model with each fold.98 model.summary()99 model.compile(100 loss = "binary_crossentropy",101 optimizer = Adam(),102 metrics = ["acc"]103 )104 es = EarlyStopping(105 monitor = "val_loss", # val_lossが106 patience = 10, # 10epoch間で107 mode = "min", # 最小値を更新しなかったら108 restore_best_weights = True, # ベストのweightsを採用して終了109 verbose = 1,110 )111 # 学習モデルにデータを与えて学習させる112 model.fit(113 train_gen,114 epochs = epochs,115 steps_per_epoch = steps_per_epoch,116 class_weight = {0: 4, 1: 1},117 validation_data = valid_gen,118 validation_steps = validation_steps,119 callbacks = [es],120 )121 122 y_valid, y_pred = list(), list()123 test_generator = generator.generator(generator.test_idx, 1)124 for (X1, X2, X3), y in test_generator:125 y_pred.extend(reround(model.predict([X1, X2, X3], batch_size = batch_size)))126 y_valid.extend(y)127 128 #混同行列129 tn, fp, fn, tp = confusion_matrix(y_valid, y_pred).ravel() #混同行列のそれぞれの結果を取得130 print(f"TN {tn} / FP {fp} / FN {fn} / TP {tp}")

コメントを投稿

0 コメント