-
Notifications
You must be signed in to change notification settings - Fork 1
/
exp_gym.py
140 lines (99 loc) · 4.24 KB
/
exp_gym.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import argparse
import torch
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from mushroom_rl.core import Core, Logger
from mushroom_rl.environments import Gym
from mushroom_rl.utils.dataset import compute_J
from metric_rl.metric_rl import MetricRL
from metric_rl.utils import save_parameters
from metric_rl.rl_shared import TwoPhaseEntropProfile, MLP
from experiment_launcher import get_default_params, add_launcher_base_args, run_experiment
def experiment(env_id, n_epochs=1000, n_steps=3000, n_steps_per_fit=3000, n_episodes_test=5, n_clusters=10,
no_delete=True, temp=1., seed=0, results_dir='logs'):
np.random.seed(seed)
torch.manual_seed(seed)
torch.set_num_threads(1)
logger = Logger(log_name='MetricRL', results_dir=results_dir, seed=seed)
logger.info('Running MetricRL experiment')
logger.strong_line()
if results_dir:
params = get_parameters(n_clusters, temp)
save_parameters(logger.path, params)
mdp = Gym(env_id)
# Set environment seed
mdp.env.seed(seed)
# Set critic params (add input shape)
input_shape = mdp.info.observation_space.shape
critic_params = dict(input_shape=input_shape,
**params['critic_params'])
params['critic_params'] = critic_params
params['do_delete'] = not no_delete
agent = MetricRL(mdp.info, **params)
agent.set_logger(logger)
core = Core(agent, mdp)
# Initial evaluation
dataset = core.evaluate(n_episodes=n_episodes_test, render=False)
J = np.mean(compute_J(dataset, mdp.info.gamma))
R = np.mean(compute_J(dataset))
E = agent.policy.entropy()
logger.log_numpy(J=J, R=R, E=E)
logger.epoch_info(0, J=J, R=R, E=E)
# Learning
for it in range(n_epochs):
core.learn(n_steps=n_steps, n_steps_per_fit=n_steps_per_fit, quiet=True)
dataset = core.evaluate(n_episodes=n_episodes_test, render=False, quiet=True)
J = np.mean(compute_J(dataset, mdp.info.gamma))
R = np.mean(compute_J(dataset))
E = agent.policy.entropy()
logger.log_numpy(J=J, R=R, E=E)
logger.epoch_info(it+1, J=J, R=R, E=E, cweights=agent.policy._regressor._c_weights.data.cpu().numpy())
logger.log_agent(agent)
def get_parameters(n_clusters, temp):
policy_params = dict(n_clusters=n_clusters,
std_0=1., temp=temp)
actor_optimizer = {'class': optim.Adam,
'cw_params': {'lr': .01},
'means_params': {'lr': .01},
'log_sigma_params': {'lr': .001}}
e_profile = {'class': TwoPhaseEntropProfile,
'params': {'e_reduc': 0.0075, 'e_thresh_mult': .5}}
critic_params = dict(network=MLP,
optimizer={'class': optim.Adam,
'params': {'lr': 3e-4}},
loss=F.mse_loss,
batch_size=64,
output_shape=(1,),
size_list=[64, 64],
n_models=2,
prediction='min',
quiet=True)
critic_fit_params = dict(n_epochs=10)
params = dict(policy_params=policy_params,
critic_params=critic_params,
actor_optimizer=actor_optimizer,
n_epochs_per_fit=20,
batch_size=64,
entropy_profile=e_profile,
max_kl=.015,
lam=.95,
critic_fit_params=critic_fit_params)
return params
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--env-id", type=str)
parser.add_argument("--n-clusters", type=int)
parser.add_argument("--temp", type=float)
parser.add_argument("--no-delete", action='store_false')
parser.add_argument("--n-epochs", type=int)
parser.add_argument("--n-steps", type=int)
parser.add_argument("--n-steps-per-fit", type=int)
parser.add_argument("--n-episodes-test", type=int)
parser = add_launcher_base_args(parser)
parser.set_defaults(**get_default_params(experiment))
args = parser.parse_args()
return vars(args)
if __name__ == '__main__':
args = parse_args()
run_experiment(experiment, args)