/
app.py
89 lines (67 loc) · 2.42 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env python2.7
# encoding=utf-8
from __future__ import unicode_literals
import youtube_dl
import re, os
import requests
import json
from flask import Flask
from flask import request
from flask import render_template
from celery import Celery
application = app = Flask(__name__)
app.config['CELERY_BROKER_URL'] = 'redis://localhost:6379/0'
app.config['CELERY_RESULT_BACKEND'] = 'redis://localhost:6379/0'
celery = Celery(app.name, broker=app.config['CELERY_BROKER_URL'])
celery.conf.update(app.config)
@celery.task
##extract video id from youtube url
def youtube_url_validation(url):
youtube_regex = (
r'(https?://)?(www\.)?'
'(youtube|youtu|youtube-nocookie)\.(com|be)/'
'(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})')
youtube_regex_match = re.match(youtube_regex, url)
if youtube_regex_match:
return youtube_regex_match.group(6)
return youtube_regex_match
@app.route('/')
def my_form():
return render_template("input.html")
@app.route('/', methods=['POST'])
def processyoutube():
video_url = request.form['video_url']
video_lan = request.form['video_lan']
##youtube-dl options
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': 'temp/%(id)s.%(ext)s',
'audioformat' : 'wav',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'wav',
'preferredquality': '192',
}]}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
info_dict = ydl.extract_info(video_url, download=True)
video_title = info_dict.get('title', None)
##video_url = info_dict.get("url", None)
##extract id to use in filename
video_id = youtube_url_validation(video_url)
##build the filename for speech recognition
directory='temp/'
filename=''.join([directory,video_id,'.wav'])
##speech recognition
model ='_'.join([video_lan,'BroadbandModel'])
url = ''.join(['https://stream.watsonplatform.net/speech-to-text/api/v1/recognize?continuous=true&model=', model])
username = '25ce03dd-7fbe-4b9e-8f3f-29a434ed9fe9'
password = 'QxdU3aIayU7X'
headers={'Content-Type': 'audio/wav'}
audio = open(filename, 'rb')
r = requests.post(url, data=audio, headers=headers, auth=(username, password))
os.remove(filename)
data= r.json()
return render_template("results.html",data=data,video_id=video_id)
if __name__ == '__main__':
application.debug = True
application.run()