-
Notifications
You must be signed in to change notification settings - Fork 3
/
detection.py
171 lines (137 loc) · 5.02 KB
/
detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import os
import argparse
from os.path import join
import cv2
import dlib
import torch
import torch.nn as nn
from PIL import Image as pil_image
from tqdm import tqdm
from helper_codes.transform import transform_xception
def get_boundingbox(face, width, height, scale=1.3, minsize=None):
x1 = face.left()
y1 = face.top()
x2 = face.right()
y2 = face.bottom()
size_bb = int(max(x2 - x1, y2 - y1) * scale)
if minsize:
if size_bb < minsize:
size_bb = minsize
center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2
# Check for out of bounds, x-y top left corner
x1 = max(int(center_x - size_bb // 2), 0)
y1 = max(int(center_y - size_bb // 2), 0)
size_bb = min(width - x1, size_bb)
size_bb = min(height - y1, size_bb)
return x1, y1, size_bb
def preprocess_image(image, cuda=False):
# Revert from BGR
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Preprocess using the preprocessing function used during training and
preprocess = transform_xception['test']
preprocessed_image = preprocess(pil_image.fromarray(image))
# Add first dimension as the network expects a batch
preprocessed_image = preprocessed_image.unsqueeze(0)
if cuda:
preprocessed_image = preprocessed_image.cuda()
return preprocessed_image
def predict_with_model(image, model, post_function=nn.Softmax(dim=1),
cuda=False):
# Preprocess
preprocessed_image = preprocess_image(image, cuda)
# Model prediction
output = model(preprocessed_image)
output = post_function(output)
# Cast to desired
_, prediction = torch.max(output, 1)
prediction = float(prediction.cpu().numpy())
return int(prediction), output
def test_full_image_network(video_path, model_path, output_path, fast,
start_frame=0, end_frame=None, cuda=False):
print('Starting: {}'.format(video_path))
# Read and write
reader = cv2.VideoCapture(video_path)
video_fn = video_path.split('/')[-1].split('.')[0]+'.avi'
os.makedirs(output_path, exist_ok=True)
fourcc = cv2.VideoWriter_fourcc(*'MJPG')
fps = reader.get(cv2.CAP_PROP_FPS)
num_frames = int(reader.get(cv2.CAP_PROP_FRAME_COUNT))
writer = None
# Face detector
face_detector = dlib.get_frontal_face_detector()
# Load model
model, *_ = model_selection(modelname='xception', num_out_classes=2)
if model_path is not None:
model = torch.load(model_path, map_location="cuda" if torch.cuda.is_available() else "cpu")
print('Model found in {}'.format(model_path))
else:
print('No model found, initializing random model.')
if cuda:
model = model.cuda()
# Text variables
font_face = cv2.FONT_HERSHEY_SIMPLEX
thickness = 2
font_scale = 1
# Fake frames number
ff = 0
ffn = 0
# Frame numbers and length of output video
frame_num = 0
assert start_frame < num_frames - 1
end_frame = end_frame if end_frame else num_frames
pbar = tqdm(total=end_frame-start_frame)
while reader.isOpened():
_, image = reader.read()
if image is None:
break
if fast:
frame_num += 10
pbar.update(10)
else:
frame_num+= 1
pbar.update(1)
if frame_num < start_frame:
continue
height, width = image.shape[:2]
if writer is None:
writer = cv2.VideoWriter(join(output_path, video_fn), fourcc, fps,
(height, width)[::-1])
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces = face_detector(gray, 1)
if len(faces):
# If multiple faces, take the biggest one
face = faces[0]
x, y, size = get_boundingbox(face, width, height)
cropped_face = image[y:y+size, x:x+size]
#prediction using our model
prediction, output = predict_with_model(cropped_face, model,
cuda=cuda)
if prediction == 1:
ff += 1
ffn +=1
x = face.left()
y = face.top()
w = face.right() - x
h = face.bottom() - y
label = 'fake' if prediction == 1 else 'real'
color = (0, 255, 0) if prediction == 0 else (0, 0, 255)
output_list = ['{0:.2f}'.format(float(x)) for x in
output.detach().cpu().numpy()[0]]
cv2.putText(image, str(output_list)+'=>'+label, (x, y+h+30),
font_face, font_scale,
color, thickness, 2)
# draw box over face
cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
if frame_num >= end_frame:
break
writer.write(image)
pbar.close()
p = ff / float(ffn) * 100;
if writer is not None:
out = {}
writer.release()
out["score"] = p
out["file"] = video_fn
return out
else:
print('Input video file was empty')