/
pendulum_step_numba.py
72 lines (52 loc) · 1.67 KB
/
pendulum_step_numba.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import numba
import numba.cuda as numba_driver
import numpy as np
import math
DEFAULT_X = np.pi
DEFAULT_Y = 1.0
max_speed = 8
max_torque = 2.0
dt = 0.05
g = 9.81
m = 1.0
l = 1.0
@numba_driver.jit
def _clip(v, min, max):
if v < min:
return min
if v > max:
return max
return v
@numba_driver.jit
def angle_normalize(x):
return ((x + np.pi) % (2 * np.pi)) - np.pi
@numba_driver.jit
def NumbaClassicControlPendulumEnvStep(
state_arr,
action_arr,
done_arr,
reward_arr,
observation_arr,
env_timestep_arr,
episode_length):
kEnvId = numba_driver.blockIdx.x
kThisAgentId = numba_driver.threadIdx.x
assert kThisAgentId == 0, "We only have one agent per environment"
env_timestep_arr[kEnvId] += 1
assert 0 < env_timestep_arr[kEnvId] <= episode_length
action = action_arr[kEnvId, kThisAgentId, 0]
u = _clip(action, -max_torque, max_torque)
th = state_arr[kEnvId, kThisAgentId, 0]
thdot = state_arr[kEnvId, kThisAgentId, 1]
costs = angle_normalize(th) ** 2 + 0.1 * thdot ** 2 + 0.001 * (u ** 2)
newthdot = thdot + (3 * g / (2 * l) * math.sin(th) + 3.0 / (m * l ** 2) * u) * dt
newthdot = _clip(newthdot, -max_speed, max_speed)
newth = th + newthdot * dt
state_arr[kEnvId, kThisAgentId, 0] = newth
state_arr[kEnvId, kThisAgentId, 1] = newthdot
observation_arr[kEnvId, kThisAgentId, 0] = math.cos(newth)
observation_arr[kEnvId, kThisAgentId, 1] = math.sin(newth)
observation_arr[kEnvId, kThisAgentId, 2] = newthdot
reward_arr[kEnvId, kThisAgentId] = -costs
if env_timestep_arr[kEnvId] == episode_length:
done_arr[kEnvId] = 1