kerasでfitができない。

python

1import logging 2 3GAMMA = 0.94 5# パラメータの準備 #追加6NUM_EPISODES = 500 # エピソード数7MAX_STEPS = 200 # 最大ステップ数8GAMMA = 0.99 # 時間割引率9WARMUP = 10 # 無操作ステップ数10 11# 探索パラメータ12E_START = 1.0 # εの初期値13E_STOP = 0.01 # εの最終値14E_DECAY_RATE = 0.001 # εの減衰率15 16# メモリパラメータ17MEMORY_SIZE = 10000 # 経験メモリのサイズ18BATCH_SIZE = 32 # バッチサイズ19 20# 行動価値関数の定義 #追加21class QNetwork:22 # 初期化23 def __init__(self, state_size, action_size):24 # モデルの作成25 self.model = Sequential()26 self.model.add(Dense(16, activation='relu', input_dim=state_size))27 self.model.add(Dense(16, activation='relu'))28 self.model.add(Dense(16, activation='relu'))29 self.model.add(Dense(action_size, activation='linear'))30 31 # モデルのコンパイル32 self.model.compile(optimizer=Adam(learning_rate=0.001),loss=Huber())33 34class NeuralNetworkPlayer(Player):35 model: QNetwork #全結合36 epsilon: float37 38 def __init__(39 self,40 pokemons: list[p.Pokemon],41 model: QNetwork,42 epsilon: float,43 ):44 self.pokemons = pokemons 45 self.model = model ##モデル決める46 self.epsilon = epsilon 47 48 def choose_action(self, opponent: Player) -> Action:49 available_pokemons = self.get_available_pokemons_for_change()50 if random() < self.epsilon: #一部ランダムにする51 if len(available_pokemons) == 0 or random() < 0.5:52 return self.pick_random_move_action()53 else:54 return Action.change_to(self.get_random_living_pokemon_index_to_replace())55 predicts = self.model.predict([[*self.to_array(), *opponent.to_array()]])[0]56 for index, _ in enumerate(predicts):57 if index < 6 and (58 self.pokemons[index].actual_hp <= 0 or index == self.active_pokemon_index 59 ):60 predicts[index] = predicts.min() - 161 logging.info(f"predictions:\n{predicts}")62 return Action(predicts.argmax())63 64 def choose_action_on_pokemon_dead(self, opponent: Player) -> Action:65 if random() < self.epsilon:66 return Action.change_to(self.get_random_living_pokemon_index_to_replace())67 else:68 predicts_arr = self.model.predict([[*self.to_array(), *opponent.to_array()]])69 predicts = predicts_arr[0][:6] # [:6] removes skill moves70 for index in range(0, len(self.pokemons)):71 if self.pokemons[index].actual_hp <= 0 or index == self.active_pokemon_index:72 predicts[index] = predicts.min() - 173 logging.info(f"predictions:\n{predicts}")74 return Action(predicts.argmax())75 76class ValueFunctionAgent:77 # model: Pipeline78 model: QNetwork 79 battle: Battle 80 learner: Player 81 opponent: Player 82 epsilon: float83 84 def __init__(85 self,86 model = None,87 epsilon: float = 0.2,88 ):89 if model:90 self.model = model 91 else:92 self.model = QNetwork( #状態数、行動数93 state_size=5,94 action_size=495 96 )97 self.epsilon = epsilon 98 # pokemon(id + is_active + actual_hp + 4 moves) * 8, 2 players99 fake_state = np.array([np.zeros((7 * 6) * 2)])100 fake_estimation = np.array([np.zeros(10)]) # change * 6, moves * 4101 self.model.fit(fake_state, fake_estimation,epochs=1,verbose=0)102 103 def reset(self, max_episodes: int, episodes: int):104 # self.learner = NeuralNetworkPlayer(105 # build_random_team(),106 # self.model,107 # self.epsilon,108 #)109 #バトルするプレイヤーの中身110 self.learner=NeuralNetworkPlayer(111 [112 p.Jolteon(113 [114 m.BodySlam(),115 m.DoubleKick(),116 m.PinMissle(),117 m.Thunderbolt(),118 ]119 )120 ],121 self.model,122 self.epsilon 123 )124 125 def update(self, experiences: list[Experience]):126 states = np.array([e.state for e in experiences])127 next_states = np.array([e.next_state for e in experiences])128 129 #価値を計算130 y = self.model.predict(states)131 future = self.model.predict(next_states)132 133 for i, experience in enumerate(experiences):134 reward = experience.reward 135 if not experience.done:136 reward += GAMMA * np.max(future[i])137 y[i][experience.action.value] = reward 138 139 self.model.fit(states, y,epochs=1,verbose=0)

コメントを投稿

0 コメント