-
Notifications
You must be signed in to change notification settings - Fork 12
/
webcam.py
128 lines (105 loc) · 3.88 KB
/
webcam.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import cv2
import numpy as np
from model_ML import create_model_pretrain
import time
from data_helper import calculateRGBdiff
###### Parameters setting
dim = (224,224) # MobileNetV2
n_sequence = 8
n_channels = 3 # RGB Channel
n_output = 18 # number of output class
weights_path = 'weight-513-0.80-0.89.hdf5' # pretrain weight path
#######
### load model
model = create_model_pretrain(dim, n_sequence, n_channels, n_output)
model.load_weights(weights_path)
### Define empty sliding window
frame_window = np.empty((0, *dim, n_channels)) # seq, dim0, dim1, channel
### State Machine Define
RUN_STATE = 0
WAIT_STATE = 1
SET_NEW_ACTION_STATE = 2
state = RUN_STATE #
previous_action = -1 # no action
text_show = 'no action'
### Class label define
class_text = [
'1 Horizontal arm wave',
'2 High arm wave',
'3 Two hand wave',
'4 Catch Cap',
'5 High throw',
'6 Draw X',
'7 Draw Tick',
'8 Toss Paper',
'9 Forward Kick',
'10 Side Kick',
'11 Take Umbrella',
'12 Bend',
'13 Hand Clap',
'14 Walk',
'15 Phone Call',
'16 Drink',
'17 Sit down',
'18 Stand up']
cap = cv2.VideoCapture(0)
start_time = time.time()
while(cap.isOpened()):
ret, frame = cap.read()
if ret == True:
new_f = cv2.resize(frame, dim)
new_f = new_f/255.0
new_f_rs = np.reshape(new_f, (1, *new_f.shape))
frame_window = np.append(frame_window, new_f_rs, axis=0)
### if sliding window is full(8 frames), start action recognition
if frame_window.shape[0] >= n_sequence:
frame_window_dif = calculateRGBdiff(frame_window.copy())
frame_window_new = frame_window_dif.reshape(1, *frame_window_dif.shape)
# print(frame_window_new.dtype)
### Predict action from model
output = model.predict(frame_window_new)[0]
predict_ind = np.argmax(output)
### Check noise of action
if output[predict_ind] < 0.55:
new_action = -1 # no action(noise)
else:
new_action = predict_ind # action detect
### Use State Machine to delete noise between action(just for stability)
### RUN_STATE: normal state, change to wait state when action is changed
if state == RUN_STATE:
if new_action != previous_action: # action change
state = WAIT_STATE
start_time = time.time()
else:
if previous_action == -1:
text_show = 'no action'
else:
text_show = "{: <22} {:.2f} ".format(class_text[previous_action],
output[previous_action] )
print(text_show)
### WAIT_STATE: wait 0.5 second when action from prediction is change to fillout noise
elif state == WAIT_STATE:
dif_time = time.time() - start_time
if dif_time > 0.5: # wait 0.5 second
state = RUN_STATE
previous_action = new_action
### put text to image
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(frame, text_show, (10,450), font, 0.8, (0, 255, 0), 2, cv2.LINE_AA)
### shift sliding window
frame_window = frame_window[1:n_sequence]
### To show dif RGB image
# vis = np.concatenate((new_f, frame_window_new[0,n_sequence-1]), axis=0)
# cv2.imshow('Frame', vis)
cv2.imshow('Frame', frame)
### To show FPS
# end_time = time.time()
# diff_time =end_time - start_time
# print("FPS:",1/diff_time)
# start_time = end_time
# Press Q on keyboard to exit
if cv2.waitKey(25) & 0xFF == ord('q'):
break
else:
break
cap.release()