/
extra_functions.py
71 lines (58 loc) · 2.28 KB
/
extra_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# -*- coding: utf-8 -*-
"""
This script contains all the supplementary functions needed to execute the
CoNNear IHC-ANF models in python.
@author: Fotios Drakopoulos, UGent, 12/2020
"""
import numpy as np
import tensorflow as tf
import scipy.signal as sp_sig
import scipy.io.wavfile
from keras.models import model_from_json, Model
from keras.utils import CustomObjectScope
from keras.initializers import glorot_uniform
def load_connear_model(modeldir,json_name="/Gmodel.json",weights_name="/Gmodel.h5",crop=1,name=[]):
# Function to load each CoNNear model using tensorflow and keras
#print ("loading model from " + modeldir )
json_file = open (modeldir + json_name, "r")
loaded_model_json = json_file.read()
json_file.close()
with CustomObjectScope({'GlorotUniform': glorot_uniform()}):
model = model_from_json(loaded_model_json, custom_objects={'tf': tf})
if name:
try:
model.name = name
except: # fix tensorflow 2 compatibility
model._name = name
model.load_weights(modeldir + weights_name)
if not crop: # for connecting the different modules
model=model.layers[1]
if name:
model=Model(model.layers[0].input, model.layers[-2].output,name=name)
else:
model=Model(model.layers[0].input, model.layers[-2].output) # get uncropped output
#else:
# model=Model(model.layers[0].input, model.layers[-1].output) # get cropped output
return model
def rms (x, axis=0):
# compute rms of a matrix
sq = np.mean(np.square(x), axis = axis)
return np.sqrt(sq)
def wavfile_read(wavfile,fs=[]):
# read a wavfile and normalize it
# if fs is given the signal is resampled to the given sampling frequency
fs_signal, speech = scipy.io.wavfile.read(wavfile)
if not fs:
fs=fs_signal
if speech.dtype != 'float32':
if speech.dtype == 'int16':
nb_bits = 16 # -> 16-bit wav files
elif speech.dtype == 'int32':
nb_bits = 32 # -> 32-bit wav files
max_nb_bit = float(2 ** (nb_bits - 1))
speech = speech / (max_nb_bit + 1.0) # scale the signal to [-1.0,1.0]
if fs_signal != fs :
signalr = sp_sig.resample_poly(speech, fs, fs_signal)
else:
signalr = speech
return signalr, fs