/
env.py
202 lines (168 loc) · 7.5 KB
/
env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import random
import numpy as np
TOP = 0
TOP_RIGHT = 1
RIGHT = 2
BOTTOM_RIGHT = 3
BOTTOM = 4
BOTTOM_LEFT = 5
LEFT = 6
TOP_LEFT = 7
class SoccerEnv():
def __init__(self, width=5, height=5, goal_size=3, prob_random=0.05):
# check if the dimension is valid
if width < 2:
raise ValueError('`width` must be greater than 2')
if not 0 < goal_size <= height:
raise ValueError('`goal_size` must be greater than 0 and smaller or equal to `height`')
if (height - goal_size) % 2 != 0:
raise ValueError('`height` and `goal_size` must both be odd or even')
# set parameter of the environment
self.env_dim = 5 # (agent_left_x, agent_left_y, agent_right_x, agent_right_y, ball_possession)
self.act_dim = 8 # split 360 degrees by 8 directions by 45-degree difference
# set dimension of the field
self.width = width
self.height = height
self.goal_size = goal_size
# probability distribution of actions
# 1 - prob_random: correct direction
# 4 : 3 : 2 : 1 = prob of 45, 90, 135, 180 degrees away from correct direction
# 19 = 4 + 3 + 2 + 1 + 2 + 3 + 4
# prob_distribution = [correct, 45, 90, 135, 180, -135, -90, -45]
self.prob_distribution = [1-prob_random] + \
[prob_random/19*i for i in [4, 3, 2, 1, 2, 3, 4]]
# initialize agents and ball possession
self.agent_left = Agent()
self.agent_right = Agent()
self.ball_possession = None
def reset(self):
self.agent_left.set_xy(0, int(self.height/2))
self.agent_right.set_xy(self.width-1, int(self.height/2))
# 0 for left possession, 1 for right possession
self.ball_possession = random.randint(0, 1)
# state = (agent_left_x, agent_left_y, agent_right_x, agent_right_y, ball_possession)
state = self.agent_left.get_xy() + self.agent_right.get_xy() + (self.ball_possession,)
return state
def step(self, agent_left_action, agent_right_action):
# add randomness into the environment
al_actual_action = self.get_actual_action(agent_left_action)
ar_actual_action = self.get_actual_action(agent_right_action)
if al_actual_action != agent_left_action:
print('env randomness on agent_left')
if ar_actual_action != agent_right_action:
print('env randomness on agent_right')
# check if game is over and the rewards
done, reward_l, reward_r = self.game_over(al_actual_action, ar_actual_action)
if not done:
# underscore (_) after variable means next state
al_loc_ = self.agent_left.move(al_actual_action)
ar_loc_ = self.agent_right.move(ar_actual_action)
# check if next state locations are valid
# if not, next state location = original location
if not self.location_valid(al_loc_):
al_loc_ = self.agent_left.get_xy()
reward_l -= 1
if not self.location_valid(ar_loc_):
ar_loc_ = self.agent_right.get_xy()
reward_r -= 1
if self.change_possesion(al_loc_, ar_loc_):
# switch ball possession
self.ball_possession = int(not self.ball_possession)
# give reward to the agent who steal the ball
if self.ball_possession == 0:
# left agent steal the ball
reward_l += 2
reward_r -= 2
elif self.ball_possession == 1:
# right agent steal the ball
reward_l -= 2
reward_r += 2
# if ball possession switched, next state locations = original locations
al_loc_ = self.agent_left.get_xy()
ar_loc_ = self.agent_right.get_xy()
self.agent_left.set_xy(*al_loc_)
self.agent_right.set_xy(*ar_loc_)
# state = (agent_left_x, agent_left_y, agent_right_x, agent_right_y, ball_possession)
state = self.agent_left.get_xy() + self.agent_right.get_xy() + (self.ball_possession,)
actions = (al_actual_action, ar_actual_action)
return done, reward_l, reward_r, state, actions
def location_valid(self, location):
x, y = location
if 0 <= x < self.width and 0 <= y < self.height:
return True
else:
return False
def game_over(self, agent_left_action, agent_right_action):
# underscore (_) after variable means next state
al_x_, al_y_ = self.agent_left.move(agent_left_action)
if self.ball_possession == 0 and \
al_x_ == self.width and \
(self.height - self.goal_size) / 2 <= al_y_ <= (self.height + self.goal_size) / 2 - 1:
# left agent wins
# return if_game_over, left_reward, right_reward
return True, 10, -10
ar_x_, ar_y_ = self.agent_right.move(agent_right_action)
if self.ball_possession == 1 and \
ar_x_ == -1 and \
(self.height - self.goal_size) / 2 <= ar_y_ <= (self.height + self.goal_size) / 2 - 1:
# right agent wins
# return if_game_over, left_reward, right_reward
return True, -10, 10
# game not end yet
return False, 0, 0
def get_actual_action(self, action):
prob_distribution = self.prob_distribution[-action:] + self.prob_distribution[:-action]
return np.random.choice([i for i in range(8)], p=prob_distribution)
def change_possesion(self, al_next_loc, ar_next_loc):
al_current_loc = self.agent_left.get_xy()
ar_current_loc = self.agent_right.get_xy()
if al_current_loc == ar_next_loc and ar_current_loc == al_next_loc:
return True
elif al_next_loc == ar_next_loc:
return True
else:
return False
def show(self):
if self.ball_possession == 0:
left = '▲'
right = '○'
else:
left = '△'
right = '●'
for y in range(self.height):
for x in range(-1, self.width+1):
# draw the goals
if (x == -1 or x == self.width) and \
((self.height - self.goal_size) / 2 <= y <= (self.height + self.goal_size) / 2 - 1):
print('+', end='')
continue
elif x == -1 or x == self.width:
print(' ', end='')
continue
if (x, y) == self.agent_left.get_xy():
print(left, end='')
elif (x, y) == self.agent_right.get_xy():
print(right, end='')
else:
print('.', end='')
print()
class Agent():
def __init__(self, x=None, y=None):
self.set_xy(x, y)
def move(self, action):
moves = {
TOP : lambda: (self.x, self.y-1),
TOP_RIGHT : lambda: (self.x+1, self.y-1),
RIGHT : lambda: (self.x+1, self.y),
BOTTOM_RIGHT: lambda: (self.x+1, self.y+1),
BOTTOM : lambda: (self.x, self.y+1),
BOTTOM_LEFT : lambda: (self.x-1, self.y+1),
LEFT : lambda: (self.x-1, self.y),
TOP_LEFT : lambda: (self.x-1, self.y-1)
}
return moves.get(action, lambda: (self.x, self.y))()
def set_xy(self, x, y):
self.x = x
self.y = y
def get_xy(self):
return (self.x, self.y)