-
Notifications
You must be signed in to change notification settings - Fork 1
/
config.py
56 lines (46 loc) · 2.11 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
class Config:
# default setting
def __init__(self):
# set by command line args
self.agent_type = None # 'dan', 'dan_coverage', 'random_policy', 'coverage', 'dan_shared'
self.random_seed = None
# env params
self.total_train_steps = 60000 # 5000 episode
self.agent_pre_train_steps = 3000 # 250 episodes
self.test_ep_num = 500 # total 100
self.test_interval = 6000 # every 500 episodes
self.buffer_size = 1000000 # 1mil episodes
self.update_reward = False
self.agent_update_freq = 1
self.max_ep_length = 12 # The max allowed length of our episode.
self.batch_size = 4
self.trace_length = 8
self.fc_size1 = 60
self.fc_size2 = 30
self.tau = 0.01
self.gamma = 0.99
self.nStates = 51
self.nActions = 10
self.print_ep_freq = 100
# add custom setting
def merge_config(self, custom_config):
for key in custom_config.keys():
setattr(self, key, custom_config[key])
#### Parameters
# batch_size = 4 # How many experience traces to use for each training step.
# trace_length = 4 # How long each experience trace will be when training
# update_freq = 2 # How often to perform a training step.
# update_target = 20
# y = .99 # Discount factor on the target Q-values
# startE = 1 # Starting chance of random action
# endE = 0.1 # Final chance of random action
# anneling_steps = 50000 # How many steps of training to reduce startE to endE.
# num_episodes = 1000000 # How many episodes of game environment to train network with.
# pre_train_steps = 5000 # How many steps of random actions before training begins.
# load_model = False # Whether to load a saved model.
# path = "./drqn" # The path to save our model to.
# h_size = 512 # The size of the final recurrent layer before splitting it into Advantage and Value streams.
# max_epLength = 16 # The max allowed length of our episode.
# time_per_step = 1 # Length of each step used in gif creation
# summaryLength = 25 # Number of epidoes to periodically save for analysis
# tau = 0.001