python
1from dual_network import DN_INPUT_SHAPE 2from math import sqrt 3from tensorflow.keras.models import load_model 4from pathlib import Path 5import numpy as np 6import battle 7from battle import Battle 8import pokedex as p 9import moves as m 10 11# パラメータの準備12PV_EVALUATE_COUNT = 50 # 1推論あたりのシミュレーション回数(本家は1600)13 14# 推論15def predict(model, state):16 # 推論のための入力データのシェイプの変換17 x=np.array(state)18 x=x.reshape(1,4,2)19 20 # 推論21 y=model.predict(x,batch_size=1)22 23 # 方策の取得24 policies=y[0][0:4]25 26 sei_policies=[]27 28 for a in policies[0]:29 sei_policies.append(a)30 31 # 価値の取得32 value=y[1][0]33 34 return sei_policies, value 35 36# ノードのリストを試行回数のリストに変換37def nodes_to_scores(nodes):38 scores = []39 for c in nodes:40 scores.append(c.n)41 return scores 42 43# モンテカルロ木探索のスコアの取得44#def pv_mcts_scores(model, p1_is,p1_mae_action,p1_took_damage,p1_nokorihp,p1_is,p2_mae_action,p2_took_damage,p2_nokorihp, temperature): #stateに8つの状態45def pv_mcts_scores(model, state, temperature,winner=None): #stateに8つの状態46# モンテカルロ木探索のノードの定義47 class Node:48 player1=[49 p.Jolteon([m.BodySlam(),m.DoubleKick(),m.PinMissle(),m.Thunderbolt()])50 ]51 52 player2=[53 p.Rhydon([m.Earthquake(), m.RockSlide(), m.Surf(), m.BodySlam()])54 ]55 56 # ノードの初期化57 def __init__(self, state, p,winner):58 self.state = state # 状態59 self.p = p # 方策60 self.w = 0 # 累計価値61 self.n = 0 # 試行回数62 self.winner=winner 63 self.child_nodes = None # 子ノード群64 (self.p1_is,self.p1_mae_action,self.p1_took_damage,self.p1_nokorihp),(self.p1_is,self.p2_mae_action,self.p2_took_damage,self.p2_nokorihp)=state 65 self.turn=066 67 # 局面の価値の計算68 def evaluate(self): #Battle が入る69 # ゲーム終了時70 if self.winner is not None:71 # 勝敗結果で価値を取得72 #print("hplen",len(self.p1_nokorihp))73 battle=Battle(player1,player2)74 value = 0 if self.winner == player1 else -175 76 # 累計価値と試行回数の更新77 self.w += value 78 self.n += 179 return value 80 81 # 子ノードが存在しない時82 if not self.child_nodes:83 # ニューラルネットワークの推論で方策と価値を取得84 policies, value = predict(model, self.state)85 86 print("policies",policies)87 print("value",value)88 89 # 累計価値と試行回数の更新90 self.w += value 91 self.n += 192 93 94 # 子ノードの展開95 self.child_nodes = []96 a=[6,7,8,9]97 for action, policy in zip(a, policies):98 battle=Battle(player1,player2)99 zyoutai=battle.forward_step(self.p1_nokorihp,self.p2_nokorihp,action)100 winner = battle.get_winner()101 self.child_nodes.append(Node(zyoutai, policy,winner))102 103 104 return value 105 106 # 子ノードが存在する時107 else:108 # アーク評価値が最大の子ノードの評価で価値を取得109 value = self.next_child_node().evaluate()110 111 # 累計価値と試行回数の更新112 self.w += value 113 self.n += 1114 return value 115 116 # アーク評価値が最大の子ノードを取得117 def next_child_node(self):118 # アーク評価値の計算119 C_PUCT = 1.0120 t = sum(nodes_to_scores(self.child_nodes))121 pucb_values = []122 print("前 child_nodes",len(self.child_nodes))123 for child_node in self.child_nodes:124 print("child_node.p",child_node.p)125 pucb_values.append((-child_node.w / child_node.n if child_node.n else 0.0) +126 C_PUCT * child_node.p * sqrt(t) / (1 + child_node.n))127 self.turn+=1128 129 # アーク評価値が最大の子ノードを返す130 print("pucb_values",pucb_values)131 print("argmax",np.argmax(pucb_values))132 print("turn",self.turn)133 print("len(pucb_values)",len(pucb_values))134 index=np.argmax(pucb_values)135 a = index.item()136 print("index",type(index))137 print("index",index)138 print("len(self.child_nodes)",len(self.child_nodes))139 print("self.child_nodes",self.child_nodes)140 return self.child_nodes[a]141 142 # 現在の局面のノードの作成143 root_node = Node(state, 0,winner)144 145 # 複数回の評価の実行146 for _ in range(PV_EVALUATE_COUNT):147 root_node.evaluate()148 149 # 合法手の確率分布150 scores = nodes_to_scores(root_node.child_nodes)151 if temperature == 0: # 最大値のみ1152 action = np.argmax(scores)153 scores = np.zeros(len(scores))154 scores[action] = 1155 else: # ボルツマン分布でバラつき付加156 scores = boltzman(scores, temperature)157 return scores 158 159# モンテカルロ木探索で行動選択160def pv_mcts_action(model, temperature=0):161 def pv_mcts_action(state):162 scores = pv_mcts_scores(model, state, temperature,winner)163 rng=np.random.default_rng()164 return rng.choice([0,1,2,3], p=scores)165 return pv_mcts_action 166 167# ボルツマン分布168def boltzman(xs, temperature):169 xs = [x ** (1 / temperature) for x in xs]170 return [x / sum(xs) for x in xs]171 172import moves as m 173import pokedex as p 174from damage import calculate_damage 175 176# 動作確認177if __name__ == '__main__':178 # モデルの読み込み179 path = sorted(Path('./model').glob('*.h5'))[-1]180 model = load_model(str(path))181 winner=None182 # 状態の生成183 player1=[184 p.Jolteon([m.BodySlam(),m.DoubleKick(),m.PinMissle(),m.Thunderbolt()])185 ]186 187 player2=[188 p.Rhydon([m.Earthquake(), m.RockSlide(), m.Surf(), m.BodySlam()])189 ]190 191 battle=Battle(player1,player2)192 193 # モンテカルロ木探索で行動取得を行う関数の生成194 action1 = pv_mcts_action(model, 1.0)195 196 result=None197 while True:198 if result is not None:199 if winner is not None:200 print("バトルは終了しました")201 break202 else:203 result=battle.forward_step(action=next_action)204 next_action=action1(result)205 else:206 if winner is not None:207 print("バトルは終了しました")208 break209 #1番目(resultない)210 #result= battle.forward_step()211 if player1[0].spe > player2[0].spe:212 c1=1213 c2=0214 c1hp=player1[0].actual_hp 215 c2hp=player2[0].actual_hp 216 else:217 c1=0218 c2=1219 c1hp=player2[0].actual_hp 220 c2hp=player1[0].actual_hp 221 result=((c1,-1,-1,c1hp),(c2,-1,-1,c2hp))222 next_action=action1(result)223 winner = battle.get_winner()224 #ゲーム終了時225 if winner is not None or battle.turn > 500:226 break

0 コメント