Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Using https speech-to-text api instead of http #135

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ dist/
.DS_Store
MANIFEST
*#*
.vscode
.vscode
/.idea
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Autosub is a utility for automatic speech recognition and subtitle generation. I
$ autosub -h
usage: autosub [-h] [-C CONCURRENCY] [-o OUTPUT] [-F FORMAT] [-S SRC_LANGUAGE]
[-D DST_LANGUAGE] [-K API_KEY] [--list-formats]
[--list-languages]
[--list-languages] [-htp]
[source_path]

positional arguments:
Expand All @@ -40,6 +40,8 @@ optional arguments:
subtitle translation)
--list-formats List all available subtitle formats
--list-languages List all available source/destination languages
-htp, --http-speech-to-text-api
Change the speech-to-text api url into the http one
```

### License
Expand Down
48 changes: 36 additions & 12 deletions autosub/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
DEFAULT_CONCURRENCY = 10
DEFAULT_SRC_LANGUAGE = 'en'
DEFAULT_DST_LANGUAGE = 'en'

DEFAULT_API_URL_SCHEME = 'https://'

def percentile(arr, percent):
"""
Expand Down Expand Up @@ -84,16 +84,19 @@ class SpeechRecognizer(object): # pylint: disable=too-few-public-methods
"""
Class for performing speech-to-text for an input FLAC file.
"""
def __init__(self, language="en", rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY):
def __init__(self, api_url, language="en",
rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY):
# pylint: disable=too-many-arguments
self.language = language
self.rate = rate
self.api_url = api_url
self.api_key = api_key
self.retries = retries

def __call__(self, data):
try:
for _ in range(self.retries):
url = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key)
url = self.api_url.format(lang=self.language, key=self.api_key)
headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate}

try:
Expand Down Expand Up @@ -237,6 +240,7 @@ def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments
src_language=DEFAULT_SRC_LANGUAGE,
dst_language=DEFAULT_DST_LANGUAGE,
subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
api_url_scheme=DEFAULT_API_URL_SCHEME,
api_key=None,
):
"""
Expand All @@ -249,6 +253,7 @@ def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments
pool = multiprocessing.Pool(concurrency)
converter = FLACConverter(source_path=audio_filename)
recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,
api_url=api_url_scheme + GOOGLE_SPEECH_API_URL,
api_key=GOOGLE_SPEECH_API_KEY)

transcripts = []
Expand Down Expand Up @@ -375,6 +380,9 @@ def main():
action='store_true')
parser.add_argument('--list-languages', help="List all available source/destination languages",
action='store_true')
parser.add_argument('-htp', '--http-speech-to-text-api',
help="Change the speech-to-text api url into the http one",
action='store_true')

args = parser.parse_args()

Expand All @@ -394,16 +402,32 @@ def main():
return 1

try:
subtitle_file_path = generate_subtitles(
source_path=args.source_path,
concurrency=args.concurrency,
src_language=args.src_language,
dst_language=args.dst_language,
api_key=args.api_key,
subtitle_file_format=args.format,
output=args.output,
)
if args.http_speech_to_text_api:
print("Using http url instead of https one. ")
subtitle_file_path = generate_subtitles(
source_path=args.source_path,
concurrency=args.concurrency,
src_language=args.src_language,
dst_language=args.dst_language,
api_url_scheme="http://",
api_key=args.api_key,
subtitle_file_format=args.format,
output=args.output,
)

else:
subtitle_file_path = generate_subtitles(
source_path=args.source_path,
concurrency=args.concurrency,
src_language=args.src_language,
dst_language=args.dst_language,
api_key=args.api_key,
subtitle_file_format=args.format,
output=args.output,
)

print("Subtitles file created at {}".format(subtitle_file_path))

except KeyboardInterrupt:
return 1

Expand Down
2 changes: 1 addition & 1 deletion autosub/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from __future__ import unicode_literals

GOOGLE_SPEECH_API_KEY = "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"
GOOGLE_SPEECH_API_URL = "http://www.google.com/speech-api/v2/recognize?client=chromium&lang={lang}&key={key}" # pylint: disable=line-too-long
GOOGLE_SPEECH_API_URL = "www.google.com/speech-api/v2/recognize?client=chromium&lang={lang}&key={key}" # pylint: disable=line-too-long

LANGUAGE_CODES = {
'af': 'Afrikaans',
Expand Down