/
SpeechToText.py
119 lines (116 loc) · 5.33 KB
/
SpeechToText.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import speech_recognition as sr
import os
from pydub import AudioSegment
from natsort import natsorted
from tkinter import Tk
from tkinter.filedialog import askopenfilename
#main menu
while True:
try:
print("Choose an option to convert speech to text: ")
print("1. Open File")
print("2. File generated from TextToSpeech.py")
choice = int(input())
if (choice == 1):
#choose a file if human script
Tk().withdraw()
filename = askopenfilename()
r = sr.Recognizer()
try:
with sr.AudioFile(filename) as source:
audio = r.record(source) # read the entire audio file
print("Transcript: " + r.recognize_google(audio))
except sr.UnknownValueError:
print("*Google Speech Recognition could not understand audio of the file" + str(count))
except sr.RequestError as e:
print("Could not request results from Google Speech Recognition service; {0}".format(e))
proportion = 4
print("Attempting to split into %d audio file as solution: " % (proportion))
sound = AudioSegment.from_file(filename)
start = 0
end = len(sound) // proportion
soundIncrement = len(sound) // proportion
#delete files
folder = "Split/"
for file in os.listdir(folder):
file_path = os.path.join(folder, file)
try:
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
except Exception as e:
print('Failed to delete %s. Reason: %s' % (file_path, e))
#create and split into files
for i in range(proportion):
part = sound[start:end]
part.export("Split/split%d.wav" % (i+1), format="wav")
start += soundIncrement
end += soundIncrement
#iterate through folder and put in a list
sorted = []
directory = os.fsencode("Split/")
for file in os.listdir(directory):
filename = os.fsdecode(file)
if filename.endswith(".wav"):
audio_path = "Split/" + str(filename)
sorted.append(audio_path)
continue
else:
continue
count = 0;
#natural number sort
sorted = natsorted(sorted)
#iterate through sorted list from folder
for audio_path in sorted:
count += 1
sound = AudioSegment.from_file(audio_path)
sound.export(audio_path, format="wav")
with sr.AudioFile(audio_path) as source:
try:
audio = r.record(source) # read the entire audio file
print("Split " + str(count) + ": " + r.recognize_google(audio))
except sr.UnknownValueError:
print("***Google Speech Recognition could not understand audio in paragraph " + str(count) + "***")
except sr.RequestError as e:
print("Could not request results from Google Speech Recognition service; {0}".format(e))
finally:
break
elif (choice == 2):
r = sr.Recognizer()
sorted = []
directory = os.fsencode("Paragraphs/")
#add files as list
for file in os.listdir(directory):
filename = os.fsdecode(file)
if filename.endswith(".wav"):
audio_path = "Paragraphs/" + str(filename)
sorted.append(audio_path)
continue
else:
continue
count = 0;
#natural number sort
sorted = natsorted(sorted)
#iterate through list
for audio_path in sorted:
count += 1
sound = AudioSegment.from_file(audio_path)
sound.export(audio_path, format="wav")
with sr.AudioFile(audio_path) as source:
try:
audio = r.record(source) # read the entire audio file
print("Paragraph " + str(count) + ": " + r.recognize_google(audio))
except sr.UnknownValueError:
print("***Google Speech Recognition could not understand audio in paragraph " + str(count) + "***")
except sr.RequestError as e:
print("Could not request results from Google Speech Recognition service; {0}".format(e))
break
else:
print("Please choose from only 1 and 2.")
print()
except ValueError:
print("Invalid input.")
continue
else:
break