/
model.py
58 lines (50 loc) · 1.94 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.autograd as autograd
import math, random
USE_CUDA = torch.cuda.is_available()
Variable = lambda *args, **kwargs: autograd.Variable(*args, \
**kwargs).cuda() if USE_CUDA else autograd.Variable(*args, **kwargs)
class QLearner(nn.Module):
def __init__(self, env):
super(QLearner, self).__init__()
self.env = env
self.input_shape = self.env.observation_space.shape
self.num_actions = self.env.action_space.n
self.features = nn.Sequential(
nn.Conv2d(self.input_shape[0], 32, kernel_size=8, stride=4),
nn.ReLU(),
# nn.BatchNorm2d(32, momentum=0.8),
nn.Conv2d(32, 64, kernel_size=4, stride=2),
nn.ReLU(),
# nn.BatchNorm2d(64, momentum=0.8),
nn.Conv2d(64, 64, kernel_size=3, stride=1),
nn.ReLU(),
# nn.BatchNorm2d(128, momentum=0.8)
)
self.fc = nn.Sequential(
nn.Linear(self.feature_size(), 512),
nn.ReLU(),
# nn.Dropout(inplace=True),
nn.Linear(512, self.num_actions)
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def feature_size(self):
return self.features(autograd.Variable(torch.zeros(1, \
*self.input_shape))).view(1, -1).size(1)
def act(self, state, epsilon):
if random.random() > epsilon:
state = Variable(torch.FloatTensor(np.float32(state\
)).unsqueeze(0), requires_grad=True)
# TODO: Given state get the Q value and chosen action
q_value = self.forward(state)
action = torch.argmax(q_value, dim=1).item()
else:
action = random.randrange(self.env.action_space.n)
return action