-
Notifications
You must be signed in to change notification settings - Fork 5
/
test.py
88 lines (67 loc) · 2.77 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env python3
import numpy as np
import tensorflow as tf
import sys, os
import argparse
from model import Net
############ Parameters ############
PATCH_SIZE = 10
SEQ_LEN = 206 + 2*PATCH_SIZE-2
NUM_CHANNELS = 4
NUM_LABELS = 2
############ **************** ############
def pad_dataset(dataset, labels=[]):
''' Pad sequences to (length + 2*DEPTH - 2) wtih 0.25 '''
new_dataset = np.ones([dataset.shape[0], dataset.shape[1]+2*PATCH_SIZE-2, dataset.shape[2], dataset.shape[3]], dtype = np.float32) * 0.25
new_dataset[:, PATCH_SIZE-1:-(PATCH_SIZE-1), :, :] = dataset
if labels != []:
labels = (np.arange(NUM_LABELS) == labels[:,None]).astype(np.float32)
return new_dataset, labels
def get_accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
/ predictions.shape[0])
def read_file(filename):
data = []
alphabet = np.array(['A', 'G', 'T', 'C'])
with open(filename, 'r') as f:
for line in f:
line = list(line.strip('\n'))
seq = np.array(line, dtype = '|U1').reshape(-1, 1)
seq_data = (seq == alphabet).astype(np.float32)
data.append(seq_data)
data = np.stack(data).reshape([-1, 206, 1, 4])
return data
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('data', help='Path to data file, can be .txt file containing sequeces or .npz file containing one-hot encoded sequences')
parser.add_argument('wts', help='Trained model (.npz file)')
parser.add_argument('--out', default=None, help='Save predictions to (.txt file)')
opts = parser.parse_args()
# Build model
sess = tf.Session()
model = Net()
# Load trained model
model.load_weights(opts.wts, sess)
print('\n########################')
print('Model loaded from %s.'%opts.wts)
# Load and pad data
if opts.data.endswith('.npz'):
data = np.load(opts.data)
dataset = data['test_dataset']
labels = data['test_labels'] if 'test_labels' in data else []
elif opts.data.endswith('.txt'):
dataset = read_file(opts.data)
labels = []
dataset, labels = pad_dataset(dataset, labels)
print("Read %d sequences and %d labels from %s."%(len(dataset), len(labels), opts.data))
predictions = model.get_prediction(sess, dataset, istrain=False)
if labels != []:
accuracy = get_accuracy(predictions, labels)
print('\nTest accuracy: %.1f%%'%accuracy)
if opts.out is not None:
predictions = np.argmax(predictions, 1)
predictions = np.where(predictions, 'F', 'T')
with open(opts.out, 'w') as f:
for pred in predictions:
f.write(pred+'\n')
print('\nPredictions wrote to %s.'%opts.out)