-
Notifications
You must be signed in to change notification settings - Fork 0
/
arena.py
110 lines (93 loc) · 3.16 KB
/
arena.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import time
import numpy as np
import game
from progress.bar import Bar
from progress.misc import AverageMeter
class Arena:
"""
Implementation of a battle between two players
"""
def __init__(self, player1, player2):
self.player1 = player1
self.player2 = player2
def play_game(self):
"""
Run one episode and return the winner of the game (1 if player1, -1 if player2)
or a draw result that is neither 1, -1, nor 0
"""
players = [self.player2, None, self.player1]
current_player = 1
state = game.get_init_state()
while game.get_state_score(state, current_player) == 0:
move = players[current_player + 1](
game.get_canonical_form(state, current_player)
)
legal_moves = game.get_legal_moves(
game.get_canonical_form(state, current_player), 1
)
if legal_moves[move] == 0:
print(move)
assert legal_moves[move] > 0
state, current_player = game.get_next_state(
state, current_player, move
)
return current_player * game.get_state_score(state, current_player)
def play_games(self, num):
"""
Plays num games in which player1 starts num/2 games and player2 starts
num/2 games.
Returns:
one_won: games won by player1
two_won: games won by player2
draws: games won by nobody
"""
episode_time = AverageMeter()
bar = Bar("Arena.play_games", max=num)
end = time.time()
episode = 0
max_episodes = int(num)
num = int(num / 2)
one_won = 0
two_won = 0
draws = 0
for _ in range(num):
game_result = self.play_game()
if game_result == 1:
one_won += 1
elif game_result == -1:
two_won += 1
else:
draws += 1
episode += 1
episode_time.update(time.time() - end)
end = time.time()
bar.suffix = "({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}".format(
eps=episode,
maxeps=max_episodes,
et=episode_time.avg,
total=bar.elapsed_td,
eta=bar.eta_td,
)
bar.next()
self.player1, self.player2 = self.player2, self.player1
for _ in range(num):
game_result = self.play_game()
if game_result == -1:
one_won += 1
elif game_result == 1:
two_won += 1
else:
draws += 1
episode += 1
episode_time.update(time.time() - end)
end = time.time()
bar.suffix = "({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}".format(
eps=episode,
maxeps=max_episodes,
et=episode_time.avg,
total=bar.elapsed_td,
eta=bar.eta_td,
)
bar.next()
bar.finish()
return one_won, two_won, draws