/
inspect_camera.py
111 lines (91 loc) · 3.06 KB
/
inspect_camera.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import argparse
import numpy as np
import cv2
import sys
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
import os
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.conv1 = nn.Conv2d(3, 32, (3, 3), 1, 1)
self.conv2 = nn.Conv2d(32, 64, (3, 3), 1, 1)
self.dropout1 = nn.Dropout2d(0.25)
self.dropout2 = nn.Dropout2d(0.5)
self.fc1 = nn.Linear(64 * 32 * 32, 128) # 32 = 64(IMAGE_SIZE) / 2
self.fc2 = nn.Linear(128, NUM_CLASSES)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = F.relu(x)
x = self.dropout2(x)
x = self.fc2(x)
output = F.log_softmax(x, dim=1)
return output
if __name__ == '__main__':
# parse options
parser = argparse.ArgumentParser(description='pytorch')
parser.add_argument('-m', '--model', default='./model/janken.pth')
parser.add_argument('-l', '--labels', default='./model/label.txt')
args = parser.parse_args()
labels = []
with open(args.labels,'r') as f:
for line in f:
labels.append(line.rstrip())
NUM_CLASSES = len(labels)
print(labels)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torch.load(args.model, map_location=torch.device(device))
model.eval()
print(model)
cam = cv2.VideoCapture(0)
count_max = 0
count = 0
while True:
ret, capture = cam.read()
if not ret:
print('error')
break
key = cv2.waitKey(1)
if key == 27: # when ESC key is pressed break
break
count += 1
if count > count_max:
# image convert
# reference site:
# https://discuss.pytorch.org/t/how-to-classify-single-image-using-loaded-net/1411/29
image = capture.copy()
image = cv2.resize(image, (64, 64))
loader = transforms.Compose([transforms.ToTensor()])
image = loader(image).float()
image = image.unsqueeze(0)
image = image.to(device)
start = time.time()
output = model(image)
print(output)
preds = output.argmax(dim=1, keepdim=True)
print(preds)
elapsed_time = time.time() - start
pred_label = labels[preds]
# Put speed
speed_info = '%s: %f' % ('speed=', elapsed_time)
cv2.putText(capture, speed_info , (10,50), \
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 1, cv2.LINE_AA)
# Put label
cv2.putText(capture, pred_label, (10,100), \
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 1, cv2.LINE_AA)
cv2.imshow('pytorchi inspector', capture)
count = 0
cam.release()
cv2.destroyAllWindows()