From b5f85f86673615e5ceec6a34ec38ccc91d1d7e40 Mon Sep 17 00:00:00 2001 From: Franklin Nunez Date: Tue, 22 Sep 2020 16:16:26 -0700 Subject: [PATCH 01/12] I updated the comment on the transcribe_async file to reflect time limitations on local files for the long_running_recognize --- samples/snippets/transcribe_async.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/samples/snippets/transcribe_async.py b/samples/snippets/transcribe_async.py index 56c7fca1..f5e656e8 100644 --- a/samples/snippets/transcribe_async.py +++ b/samples/snippets/transcribe_async.py @@ -45,6 +45,8 @@ def transcribe_file(speech_file): config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, + + sample_rate_hertz=16000, language_code="en-US", ) From cd19e6b28e5f3fcb41b099c6bb0378876ec2bcdb Mon Sep 17 00:00:00 2001 From: Franklin Nunez Date: Tue, 22 Sep 2020 16:31:11 -0700 Subject: [PATCH 02/12] I updated the comment on the transcribe_async file to reflect time limitations on local files for the long_running_recognize --- samples/snippets/transcribe_async.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/transcribe_async.py b/samples/snippets/transcribe_async.py index f5e656e8..7bcb3261 100644 --- a/samples/snippets/transcribe_async.py +++ b/samples/snippets/transcribe_async.py @@ -38,7 +38,7 @@ def transcribe_file(speech_file): content = audio_file.read() """ - Note that transcription is limited to 60 seconds audio. + Note that transcription is limited to a 60 seconds audio file. Use a GCS file for audio longer than 1 minute. """ audio = speech.RecognitionAudio(content=content) From 96688c1128c562d80cb38c3673162fd31dae8e21 Mon Sep 17 00:00:00 2001 From: Franklin Nunez Date: Tue, 22 Sep 2020 16:52:43 -0700 Subject: [PATCH 03/12] docs: I updated the comment on the transcribe_async file to reflect time limitations on local files for the long_running_recognize --- samples/snippets/transcribe_async.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/transcribe_async.py b/samples/snippets/transcribe_async.py index 7bcb3261..f5e656e8 100644 --- a/samples/snippets/transcribe_async.py +++ b/samples/snippets/transcribe_async.py @@ -38,7 +38,7 @@ def transcribe_file(speech_file): content = audio_file.read() """ - Note that transcription is limited to a 60 seconds audio file. + Note that transcription is limited to 60 seconds audio. Use a GCS file for audio longer than 1 minute. """ audio = speech.RecognitionAudio(content=content) From f018e131862bbb92658b2bdf22ecec343416b5b3 Mon Sep 17 00:00:00 2001 From: Franklin Nunez Date: Wed, 23 Sep 2020 10:57:19 -0700 Subject: [PATCH 04/12] chore: I updated the comments on the transcribe_async file to reflect time limitations on local files for the long_running_recognize --- samples/snippets/transcribe_async.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/transcribe_async.py b/samples/snippets/transcribe_async.py index f5e656e8..7bcb3261 100644 --- a/samples/snippets/transcribe_async.py +++ b/samples/snippets/transcribe_async.py @@ -38,7 +38,7 @@ def transcribe_file(speech_file): content = audio_file.read() """ - Note that transcription is limited to 60 seconds audio. + Note that transcription is limited to a 60 seconds audio file. Use a GCS file for audio longer than 1 minute. """ audio = speech.RecognitionAudio(content=content) From 3213aec16fc45a0c2afe36d221b4eb22847e7f72 Mon Sep 17 00:00:00 2001 From: Franklin Nunez Date: Sat, 3 Oct 2020 00:03:42 -0700 Subject: [PATCH 05/12] fix: resolved conflicts pick f510e8f chore: I updated the comments on the transcribe_async file to reflect time limitations on local files for the long_running_recognize --- samples/snippets/beta_snippets.py | 7 +++++-- samples/snippets/speech_quickstart_beta.py | 1 - samples/snippets/transcribe_async.py | 5 ++++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/samples/snippets/beta_snippets.py b/samples/snippets/beta_snippets.py index 8d957100..52bf0d60 100644 --- a/samples/snippets/beta_snippets.py +++ b/samples/snippets/beta_snippets.py @@ -74,14 +74,17 @@ def transcribe_file_with_metadata(): # Here we construct a recognition metadata object. # Most metadata fields are specified as enums that can be found - # in speech.enums.RecognitionMetadata + + # in speech.RecognitionMetadata metadata = speech.RecognitionMetadata() metadata.interaction_type = ( speech.RecognitionMetadata.InteractionType.DISCUSSION) metadata.microphone_distance = ( speech.RecognitionMetadata.MicrophoneDistance.NEARFIELD) metadata.recording_device_type = ( - speech.RecognitionMetadata.RecordingDeviceType.SMARTPHONE) + + speech.RecognitionMetadata.RecordingDeviceType.SMARTPHONE) + # Some metadata fields are free form strings metadata.recording_device_name = "Pixel 2 XL" # And some are integers, for instance the 6 digit NAICS code diff --git a/samples/snippets/speech_quickstart_beta.py b/samples/snippets/speech_quickstart_beta.py index d40e6d32..a1ad7376 100644 --- a/samples/snippets/speech_quickstart_beta.py +++ b/samples/snippets/speech_quickstart_beta.py @@ -27,7 +27,6 @@ # [START speech_quickstart_beta] from google.cloud import speech_v1p1beta1 as speech - def sample_recognize(storage_uri): """ Performs synchronous speech recognition on an audio file diff --git a/samples/snippets/transcribe_async.py b/samples/snippets/transcribe_async.py index 7bcb3261..82d12d4d 100644 --- a/samples/snippets/transcribe_async.py +++ b/samples/snippets/transcribe_async.py @@ -38,7 +38,7 @@ def transcribe_file(speech_file): content = audio_file.read() """ - Note that transcription is limited to a 60 seconds audio file. + Note that transcription is limited to 60 seconds audio. Use a GCS file for audio longer than 1 minute. """ audio = speech.RecognitionAudio(content=content) @@ -46,7 +46,10 @@ def transcribe_file(speech_file): config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, + audio = speech.RecognitionAudio(content=content) + config = speech.RecognitionConfig( + encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, language_code="en-US", ) From 21beb1dfcb66faca28d2f39c55da0300fbc75b64 Mon Sep 17 00:00:00 2001 From: Franklin Nunez Date: Wed, 28 Oct 2020 13:15:58 -0700 Subject: [PATCH 06/12] fix: conflicts --- samples/microphone/transcribe_streaming_infinite.py | 1 + samples/snippets/beta_snippets.py | 3 +-- samples/snippets/speech_quickstart_beta.py | 1 + samples/snippets/transcribe_async.py | 12 ------------ 4 files changed, 3 insertions(+), 14 deletions(-) diff --git a/samples/microphone/transcribe_streaming_infinite.py b/samples/microphone/transcribe_streaming_infinite.py index dedb0d5b..7935c5ce 100644 --- a/samples/microphone/transcribe_streaming_infinite.py +++ b/samples/microphone/transcribe_streaming_infinite.py @@ -242,6 +242,7 @@ def main(): sample_rate_hertz=SAMPLE_RATE, language_code='en-US', max_alternatives=1) + streaming_config = speech.StreamingRecognitionConfig( config=config, interim_results=True) diff --git a/samples/snippets/beta_snippets.py b/samples/snippets/beta_snippets.py index 52bf0d60..9b68f561 100644 --- a/samples/snippets/beta_snippets.py +++ b/samples/snippets/beta_snippets.py @@ -82,8 +82,7 @@ def transcribe_file_with_metadata(): metadata.microphone_distance = ( speech.RecognitionMetadata.MicrophoneDistance.NEARFIELD) metadata.recording_device_type = ( - - speech.RecognitionMetadata.RecordingDeviceType.SMARTPHONE) + speech.RecognitionMetadata.RecordingDeviceType.SMARTPHONE) # Some metadata fields are free form strings metadata.recording_device_name = "Pixel 2 XL" diff --git a/samples/snippets/speech_quickstart_beta.py b/samples/snippets/speech_quickstart_beta.py index a1ad7376..d40e6d32 100644 --- a/samples/snippets/speech_quickstart_beta.py +++ b/samples/snippets/speech_quickstart_beta.py @@ -27,6 +27,7 @@ # [START speech_quickstart_beta] from google.cloud import speech_v1p1beta1 as speech + def sample_recognize(storage_uri): """ Performs synchronous speech recognition on an audio file diff --git a/samples/snippets/transcribe_async.py b/samples/snippets/transcribe_async.py index 82d12d4d..c8e9a86d 100644 --- a/samples/snippets/transcribe_async.py +++ b/samples/snippets/transcribe_async.py @@ -43,11 +43,6 @@ def transcribe_file(speech_file): """ audio = speech.RecognitionAudio(content=content) - config = speech.RecognitionConfig( - encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, - - audio = speech.RecognitionAudio(content=content) - config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, @@ -55,9 +50,6 @@ def transcribe_file(speech_file): ) # [START speech_python_migration_async_response - operation = client.long_running_recognize( - request={"config": config, "audio": audio} - ) operation = client.long_running_recognize(config=config, audio=audio) # [END speech_python_migration_async_request] @@ -90,10 +82,6 @@ def transcribe_gcs(gcs_uri): language_code="en-US", ) - operation = client.long_running_recognize( - request={"config": config, "audio": audio} - ) - operation = client.long_running_recognize(config=config, audio=audio) print("Waiting for operation to complete...") From 00b8f3ca3fa9898b1130376669729d630ac7dc86 Mon Sep 17 00:00:00 2001 From: Franklin Nunez Date: Fri, 2 Oct 2020 15:14:40 -0700 Subject: [PATCH 07/12] fix: migrated to speech 2.0.0 --- samples/microphone/transcribe_streaming_mic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/microphone/transcribe_streaming_mic.py b/samples/microphone/transcribe_streaming_mic.py index 597e1bd7..75f22a01 100644 --- a/samples/microphone/transcribe_streaming_mic.py +++ b/samples/microphone/transcribe_streaming_mic.py @@ -171,6 +171,7 @@ def main(): encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) + streaming_config = speech.StreamingRecognitionConfig( config=config, interim_results=True) From b83fa6e1c57654f0151d538630486bf145b3b5b7 Mon Sep 17 00:00:00 2001 From: Franklin Nunez Date: Mon, 5 Oct 2020 16:24:12 -0700 Subject: [PATCH 08/12] fix: fixed lint issues --- samples/microphone/transcribe_streaming_infinite.py | 3 +++ samples/snippets/beta_snippets.py | 5 ++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/samples/microphone/transcribe_streaming_infinite.py b/samples/microphone/transcribe_streaming_infinite.py index 7935c5ce..fb3dca26 100644 --- a/samples/microphone/transcribe_streaming_infinite.py +++ b/samples/microphone/transcribe_streaming_infinite.py @@ -242,7 +242,10 @@ def main(): sample_rate_hertz=SAMPLE_RATE, language_code='en-US', max_alternatives=1) +<<<<<<< HEAD +======= +>>>>>>> c37dbd0 (fix: fixed lint issues) streaming_config = speech.StreamingRecognitionConfig( config=config, interim_results=True) diff --git a/samples/snippets/beta_snippets.py b/samples/snippets/beta_snippets.py index 9b68f561..6ffa084f 100644 --- a/samples/snippets/beta_snippets.py +++ b/samples/snippets/beta_snippets.py @@ -74,8 +74,7 @@ def transcribe_file_with_metadata(): # Here we construct a recognition metadata object. # Most metadata fields are specified as enums that can be found - - # in speech.RecognitionMetadata + # in speech.enums.RecognitionMetadata metadata = speech.RecognitionMetadata() metadata.interaction_type = ( speech.RecognitionMetadata.InteractionType.DISCUSSION) @@ -83,7 +82,7 @@ def transcribe_file_with_metadata(): speech.RecognitionMetadata.MicrophoneDistance.NEARFIELD) metadata.recording_device_type = ( speech.RecognitionMetadata.RecordingDeviceType.SMARTPHONE) - + # Some metadata fields are free form strings metadata.recording_device_name = "Pixel 2 XL" # And some are integers, for instance the 6 digit NAICS code From b4b8e7f26c469e0a64f4891ed1ad27b2d195bccc Mon Sep 17 00:00:00 2001 From: Franklin Nunez Date: Wed, 28 Oct 2020 10:02:33 -0700 Subject: [PATCH 09/12] fix: deleted a duplicate line that calls the recognizer --- samples/snippets/transcribe_async.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/samples/snippets/transcribe_async.py b/samples/snippets/transcribe_async.py index c8e9a86d..d1ce6c7b 100644 --- a/samples/snippets/transcribe_async.py +++ b/samples/snippets/transcribe_async.py @@ -49,10 +49,9 @@ def transcribe_file(speech_file): language_code="en-US", ) - # [START speech_python_migration_async_response operation = client.long_running_recognize(config=config, audio=audio) # [END speech_python_migration_async_request] - + # [START speech_python_migration_async_response print("Waiting for operation to complete...") response = operation.result(timeout=90) From dc110ba981c04af637981a49c1dd672a7315ec6c Mon Sep 17 00:00:00 2001 From: Franklin Nunez Date: Wed, 28 Oct 2020 10:22:27 -0700 Subject: [PATCH 10/12] docs: repaired region tag mismatch --- samples/snippets/transcribe_async.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/snippets/transcribe_async.py b/samples/snippets/transcribe_async.py index d1ce6c7b..8530e4db 100644 --- a/samples/snippets/transcribe_async.py +++ b/samples/snippets/transcribe_async.py @@ -48,10 +48,10 @@ def transcribe_file(speech_file): sample_rate_hertz=16000, language_code="en-US", ) - + # [START speech_python_migration_async_response operation = client.long_running_recognize(config=config, audio=audio) # [END speech_python_migration_async_request] - # [START speech_python_migration_async_response + print("Waiting for operation to complete...") response = operation.result(timeout=90) From ea10fd45e590fb61a8ed3802ed687346ec6d45db Mon Sep 17 00:00:00 2001 From: Franklin Nunez Date: Wed, 28 Oct 2020 16:52:02 -0700 Subject: [PATCH 11/12] chore: formatting --- .../transcribe_streaming_infinite.py | 73 ++++---- .../microphone/transcribe_streaming_mic.py | 34 ++-- .../transcribe_streaming_mic_test.py | 19 +- samples/snippets/beta_snippets.py | 165 ++++++++++-------- .../snippets/transcribe_context_classes.py | 2 +- samples/snippets/transcribe_multichannel.py | 4 +- samples/snippets/transcribe_streaming.py | 14 +- .../snippets/transcribe_word_time_offsets.py | 1 + 8 files changed, 175 insertions(+), 137 deletions(-) diff --git a/samples/microphone/transcribe_streaming_infinite.py b/samples/microphone/transcribe_streaming_infinite.py index fb3dca26..1866022a 100644 --- a/samples/microphone/transcribe_streaming_infinite.py +++ b/samples/microphone/transcribe_streaming_infinite.py @@ -41,9 +41,9 @@ SAMPLE_RATE = 16000 CHUNK_SIZE = int(SAMPLE_RATE / 10) # 100ms -RED = '\033[0;31m' -GREEN = '\033[0;32m' -YELLOW = '\033[0;33m' +RED = "\033[0;31m" +GREEN = "\033[0;32m" +YELLOW = "\033[0;33m" def get_current_time(): @@ -123,12 +123,14 @@ def generator(self): if self.bridging_offset > self.final_request_end_time: self.bridging_offset = self.final_request_end_time - chunks_from_ms = round((self.final_request_end_time - - self.bridging_offset) / chunk_time) + chunks_from_ms = round( + (self.final_request_end_time - self.bridging_offset) + / chunk_time + ) - self.bridging_offset = (round(( - len(self.last_audio_input) - chunks_from_ms) - * chunk_time)) + self.bridging_offset = round( + (len(self.last_audio_input) - chunks_from_ms) * chunk_time + ) for i in range(chunks_from_ms, len(self.last_audio_input)): data.append(self.last_audio_input[i]) @@ -157,7 +159,7 @@ def generator(self): except queue.Empty: break - yield b''.join(data) + yield b"".join(data) def listen_print_loop(responses, stream): @@ -203,32 +205,35 @@ def listen_print_loop(responses, stream): stream.result_end_time = int((result_seconds * 1000) + (result_micros / 1000)) - corrected_time = (stream.result_end_time - stream.bridging_offset - + (STREAMING_LIMIT * stream.restart_counter)) + corrected_time = ( + stream.result_end_time + - stream.bridging_offset + + (STREAMING_LIMIT * stream.restart_counter) + ) # Display interim results, but with a carriage return at the end of the # line, so subsequent lines will overwrite them. if result.is_final: sys.stdout.write(GREEN) - sys.stdout.write('\033[K') - sys.stdout.write(str(corrected_time) + ': ' + transcript + '\n') + sys.stdout.write("\033[K") + sys.stdout.write(str(corrected_time) + ": " + transcript + "\n") stream.is_final_end_time = stream.result_end_time stream.last_transcript_was_final = True # Exit recognition if any of the transcribed phrases could be # one of our keywords. - if re.search(r'\b(exit|quit)\b', transcript, re.I): + if re.search(r"\b(exit|quit)\b", transcript, re.I): sys.stdout.write(YELLOW) - sys.stdout.write('Exiting...\n') + sys.stdout.write("Exiting...\n") stream.closed = True break else: sys.stdout.write(RED) - sys.stdout.write('\033[K') - sys.stdout.write(str(corrected_time) + ': ' + transcript + '\r') + sys.stdout.write("\033[K") + sys.stdout.write(str(corrected_time) + ": " + transcript + "\r") stream.last_transcript_was_final = False @@ -240,38 +245,38 @@ def main(): config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=SAMPLE_RATE, - language_code='en-US', - max_alternatives=1) -<<<<<<< HEAD + language_code="en-US", + max_alternatives=1, + ) -======= ->>>>>>> c37dbd0 (fix: fixed lint issues) streaming_config = speech.StreamingRecognitionConfig( - config=config, - interim_results=True) + config=config, interim_results=True + ) mic_manager = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE) print(mic_manager.chunk_size) sys.stdout.write(YELLOW) sys.stdout.write('\nListening, say "Quit" or "Exit" to stop.\n\n') - sys.stdout.write('End (ms) Transcript Results/Status\n') - sys.stdout.write('=====================================================\n') + sys.stdout.write("End (ms) Transcript Results/Status\n") + sys.stdout.write("=====================================================\n") with mic_manager as stream: while not stream.closed: sys.stdout.write(YELLOW) - sys.stdout.write('\n' + str( - STREAMING_LIMIT * stream.restart_counter) + ': NEW REQUEST\n') + sys.stdout.write( + "\n" + str(STREAMING_LIMIT * stream.restart_counter) + ": NEW REQUEST\n" + ) stream.audio_input = [] audio_generator = stream.generator() - requests = (speech.StreamingRecognizeRequest( - audio_content=content)for content in audio_generator) + requests = ( + speech.StreamingRecognizeRequest(audio_content=content) + for content in audio_generator + ) - responses = client.streaming_recognize(streaming_config, - requests) + responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. listen_print_loop(responses, stream) @@ -285,11 +290,11 @@ def main(): stream.restart_counter = stream.restart_counter + 1 if not stream.last_transcript_was_final: - sys.stdout.write('\n') + sys.stdout.write("\n") stream.new_stream = True -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/samples/microphone/transcribe_streaming_mic.py b/samples/microphone/transcribe_streaming_mic.py index 75f22a01..5d9776f8 100644 --- a/samples/microphone/transcribe_streaming_mic.py +++ b/samples/microphone/transcribe_streaming_mic.py @@ -43,6 +43,7 @@ class MicrophoneStream(object): """Opens a recording stream as a generator yielding the audio chunks.""" + def __init__(self, rate, chunk): self._rate = rate self._chunk = chunk @@ -57,8 +58,10 @@ def __enter__(self): format=pyaudio.paInt16, # The API currently only supports 1-channel (mono) audio # https://goo.gl/z757pE - channels=1, rate=self._rate, - input=True, frames_per_buffer=self._chunk, + channels=1, + rate=self._rate, + input=True, + frames_per_buffer=self._chunk, # Run the audio stream asynchronously to fill the buffer object. # This is necessary so that the input device's buffer doesn't # overflow while the calling thread makes network requests, etc. @@ -103,7 +106,7 @@ def generator(self): except queue.Empty: break - yield b''.join(data) + yield b"".join(data) def listen_print_loop(responses): @@ -141,10 +144,10 @@ def listen_print_loop(responses): # # If the previous result was longer than this one, we need to print # some extra spaces to overwrite the previous result - overwrite_chars = ' ' * (num_chars_printed - len(transcript)) + overwrite_chars = " " * (num_chars_printed - len(transcript)) if not result.is_final: - sys.stdout.write(transcript + overwrite_chars + '\r') + sys.stdout.write(transcript + overwrite_chars + "\r") sys.stdout.flush() num_chars_printed = len(transcript) @@ -154,8 +157,8 @@ def listen_print_loop(responses): # Exit recognition if any of the transcribed phrases could be # one of our keywords. - if re.search(r'\b(exit|quit)\b', transcript, re.I): - print('Exiting..') + if re.search(r"\b(exit|quit)\b", transcript, re.I): + print("Exiting..") break num_chars_printed = 0 @@ -164,22 +167,25 @@ def listen_print_loop(responses): def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. - language_code = 'en-US' # a BCP-47 language tag + language_code = "en-US" # a BCP-47 language tag client = speech.SpeechClient() config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, - language_code=language_code) + language_code=language_code, + ) streaming_config = speech.StreamingRecognitionConfig( - config=config, - interim_results=True) + config=config, interim_results=True + ) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() - requests = (speech.StreamingRecognizeRequest(audio_content=content) - for content in audio_generator) + requests = ( + speech.StreamingRecognizeRequest(audio_content=content) + for content in audio_generator + ) responses = client.streaming_recognize(streaming_config, requests) @@ -187,6 +193,6 @@ def main(): listen_print_loop(responses) -if __name__ == '__main__': +if __name__ == "__main__": main() # [END speech_transcribe_streaming_mic] diff --git a/samples/microphone/transcribe_streaming_mic_test.py b/samples/microphone/transcribe_streaming_mic_test.py index dd5e7ea6..f5e08f5d 100644 --- a/samples/microphone/transcribe_streaming_mic_test.py +++ b/samples/microphone/transcribe_streaming_mic_test.py @@ -18,7 +18,7 @@ import mock -RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") class MockPyAudio(object): @@ -32,8 +32,9 @@ def open(self, stream_callback, rate, *args, **kwargs): self.rate = rate self.closed = threading.Event() self.stream_thread = threading.Thread( - target=self.stream_audio, args=( - self.audio_filename, stream_callback, self.closed)) + target=self.stream_audio, + args=(self.audio_filename, stream_callback, self.closed), + ) self.stream_thread.start() return self @@ -47,23 +48,25 @@ def terminate(self): pass def stream_audio(self, audio_filename, callback, closed, num_frames=512): - with open(audio_filename, 'rb') as audio_file: + with open(audio_filename, "rb") as audio_file: while not closed.is_set(): # Approximate realtime by sleeping for the appropriate time for # the requested number of frames time.sleep(num_frames / float(self.rate)) # audio is 16-bit samples, whereas python byte is 8-bit num_bytes = 2 * num_frames - chunk = audio_file.read(num_bytes) or b'\0' * num_bytes + chunk = audio_file.read(num_bytes) or b"\0" * num_bytes callback(chunk, None, None, None) -@mock.patch.dict('sys.modules', pyaudio=mock.MagicMock( - PyAudio=MockPyAudio(os.path.join(RESOURCES, 'quit.raw')))) +@mock.patch.dict( + "sys.modules", + pyaudio=mock.MagicMock(PyAudio=MockPyAudio(os.path.join(RESOURCES, "quit.raw"))), +) def test_main(capsys): import transcribe_streaming_mic transcribe_streaming_mic.main() out, err = capsys.readouterr() - assert re.search(r'quit', out, re.DOTALL | re.I) + assert re.search(r"quit", out, re.DOTALL | re.I) diff --git a/samples/snippets/beta_snippets.py b/samples/snippets/beta_snippets.py index 6ffa084f..45f661cd 100644 --- a/samples/snippets/beta_snippets.py +++ b/samples/snippets/beta_snippets.py @@ -35,29 +35,31 @@ def transcribe_file_with_enhanced_model(): """Transcribe the given audio file using an enhanced model.""" # [START speech_transcribe_enhanced_model_beta] from google.cloud import speech_v1p1beta1 as speech + client = speech.SpeechClient() - speech_file = 'resources/commercial_mono.wav' + speech_file = "resources/commercial_mono.wav" - with io.open(speech_file, 'rb') as audio_file: + with io.open(speech_file, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=8000, - language_code='en-US', + language_code="en-US", use_enhanced=True, # A model must be specified to use enhanced model. - model='phone_call') + model="phone_call", + ) response = client.recognize(config=config, audio=audio) for i, result in enumerate(response.results): alternative = result.alternatives[0] - print('-' * 20) - print(u'First alternative of result {}'.format(i)) - print(u'Transcript: {}'.format(alternative.transcript)) + print("-" * 20) + print(u"First alternative of result {}".format(i)) + print(u"Transcript: {}".format(alternative.transcript)) # [END speech_transcribe_enhanced_model_beta] @@ -65,24 +67,26 @@ def transcribe_file_with_metadata(): """Send a request that includes recognition metadata.""" # [START speech_transcribe_recognition_metadata_beta] from google.cloud import speech_v1p1beta1 as speech + client = speech.SpeechClient() - speech_file = 'resources/commercial_mono.wav' + speech_file = "resources/commercial_mono.wav" - with io.open(speech_file, 'rb') as audio_file: + with io.open(speech_file, "rb") as audio_file: content = audio_file.read() # Here we construct a recognition metadata object. # Most metadata fields are specified as enums that can be found # in speech.enums.RecognitionMetadata metadata = speech.RecognitionMetadata() - metadata.interaction_type = ( - speech.RecognitionMetadata.InteractionType.DISCUSSION) + metadata.interaction_type = speech.RecognitionMetadata.InteractionType.DISCUSSION metadata.microphone_distance = ( - speech.RecognitionMetadata.MicrophoneDistance.NEARFIELD) + speech.RecognitionMetadata.MicrophoneDistance.NEARFIELD + ) metadata.recording_device_type = ( - speech.RecognitionMetadata.RecordingDeviceType.SMARTPHONE) - + speech.RecognitionMetadata.RecordingDeviceType.SMARTPHONE + ) + # Some metadata fields are free form strings metadata.recording_device_name = "Pixel 2 XL" # And some are integers, for instance the 6 digit NAICS code @@ -93,17 +97,18 @@ def transcribe_file_with_metadata(): config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=8000, - language_code='en-US', + language_code="en-US", # Add this in the request to send metadata. - metadata=metadata) + metadata=metadata, + ) response = client.recognize(config=config, audio=audio) for i, result in enumerate(response.results): alternative = result.alternatives[0] - print('-' * 20) - print(u'First alternative of result {}'.format(i)) - print(u'Transcript: {}'.format(alternative.transcript)) + print("-" * 20) + print(u"First alternative of result {}".format(i)) + print(u"Transcript: {}".format(alternative.transcript)) # [END speech_transcribe_recognition_metadata_beta] @@ -111,28 +116,30 @@ def transcribe_file_with_auto_punctuation(): """Transcribe the given audio file with auto punctuation enabled.""" # [START speech_transcribe_auto_punctuation_beta] from google.cloud import speech_v1p1beta1 as speech + client = speech.SpeechClient() - speech_file = 'resources/commercial_mono.wav' + speech_file = "resources/commercial_mono.wav" - with io.open(speech_file, 'rb') as audio_file: + with io.open(speech_file, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=8000, - language_code='en-US', + language_code="en-US", # Enable automatic punctuation - enable_automatic_punctuation=True) + enable_automatic_punctuation=True, + ) response = client.recognize(config=config, audio=audio) for i, result in enumerate(response.results): alternative = result.alternatives[0] - print('-' * 20) - print(u'First alternative of result {}'.format(i)) - print(u'Transcript: {}'.format(alternative.transcript)) + print("-" * 20) + print(u"First alternative of result {}".format(i)) + print(u"Transcript: {}".format(alternative.transcript)) # [END speech_transcribe_auto_punctuation_beta] @@ -140,11 +147,12 @@ def transcribe_file_with_diarization(): """Transcribe the given audio file synchronously with diarization.""" # [START speech_transcribe_diarization_beta] from google.cloud import speech_v1p1beta1 as speech + client = speech.SpeechClient() - speech_file = 'resources/commercial_mono.wav' + speech_file = "resources/commercial_mono.wav" - with open(speech_file, 'rb') as audio_file: + with open(speech_file, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) @@ -152,11 +160,12 @@ def transcribe_file_with_diarization(): config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=8000, - language_code='en-US', + language_code="en-US", enable_speaker_diarization=True, - diarization_speaker_count=2) + diarization_speaker_count=2, + ) - print('Waiting for operation to complete...') + print("Waiting for operation to complete...") response = client.recognize(config=config, audio=audio) # The transcript within each result is separate and sequential per result. @@ -169,21 +178,23 @@ def transcribe_file_with_diarization(): # Printing out the output: for word_info in words_info: - print(u"word: '{}', speaker_tag: {}".format( - word_info.word, word_info.speaker_tag)) + print( + u"word: '{}', speaker_tag: {}".format(word_info.word, word_info.speaker_tag) + ) # [END speech_transcribe_diarization_beta] def transcribe_file_with_multichannel(): """Transcribe the given audio file synchronously with - multi channel.""" + multi channel.""" # [START speech_transcribe_multichannel_beta] from google.cloud import speech_v1p1beta1 as speech + client = speech.SpeechClient() - speech_file = 'resources/Google_Gnome.wav' + speech_file = "resources/Google_Gnome.wav" - with open(speech_file, 'rb') as audio_file: + with open(speech_file, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) @@ -191,33 +202,35 @@ def transcribe_file_with_multichannel(): config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, - language_code='en-US', + language_code="en-US", audio_channel_count=1, - enable_separate_recognition_per_channel=True) + enable_separate_recognition_per_channel=True, + ) response = client.recognize(config=config, audio=audio) for i, result in enumerate(response.results): alternative = result.alternatives[0] - print('-' * 20) - print('First alternative of result {}'.format(i)) - print(u'Transcript: {}'.format(alternative.transcript)) - print(u'Channel Tag: {}'.format(result.channel_tag)) + print("-" * 20) + print("First alternative of result {}".format(i)) + print(u"Transcript: {}".format(alternative.transcript)) + print(u"Channel Tag: {}".format(result.channel_tag)) # [END speech_transcribe_multichannel_beta] def transcribe_file_with_multilanguage(): """Transcribe the given audio file synchronously with - multi language.""" + multi language.""" # [START speech_transcribe_multilanguage_beta] from google.cloud import speech_v1p1beta1 as speech + client = speech.SpeechClient() - speech_file = 'resources/multi.wav' - first_lang = 'en-US' - second_lang = 'es' + speech_file = "resources/multi.wav" + first_lang = "en-US" + second_lang = "es" - with open(speech_file, 'rb') as audio_file: + with open(speech_file, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) @@ -227,29 +240,31 @@ def transcribe_file_with_multilanguage(): sample_rate_hertz=44100, audio_channel_count=2, language_code=first_lang, - alternative_language_codes=[second_lang]) + alternative_language_codes=[second_lang], + ) - print('Waiting for operation to complete...') + print("Waiting for operation to complete...") response = client.recognize(config=config, audio=audio) for i, result in enumerate(response.results): alternative = result.alternatives[0] - print('-' * 20) - print(u'First alternative of result {}: {}'.format(i, alternative)) - print(u'Transcript: {}'.format(alternative.transcript)) + print("-" * 20) + print(u"First alternative of result {}: {}".format(i, alternative)) + print(u"Transcript: {}".format(alternative.transcript)) # [END speech_transcribe_multilanguage_beta] def transcribe_file_with_word_level_confidence(): """Transcribe the given audio file synchronously with - word level confidence.""" + word level confidence.""" # [START speech_transcribe_word_level_confidence_beta] from google.cloud import speech_v1p1beta1 as speech + client = speech.SpeechClient() - speech_file = 'resources/Google_Gnome.wav' + speech_file = "resources/Google_Gnome.wav" - with open(speech_file, 'rb') as audio_file: + with open(speech_file, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) @@ -257,40 +272,44 @@ def transcribe_file_with_word_level_confidence(): config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, - language_code='en-US', - enable_word_confidence=True) + language_code="en-US", + enable_word_confidence=True, + ) response = client.recognize(config=config, audio=audio) for i, result in enumerate(response.results): alternative = result.alternatives[0] - print('-' * 20) - print('First alternative of result {}'.format(i)) - print(u'Transcript: {}'.format(alternative.transcript)) - print(u'First Word and Confidence: ({}, {})'.format( - alternative.words[0].word, alternative.words[0].confidence)) + print("-" * 20) + print("First alternative of result {}".format(i)) + print(u"Transcript: {}".format(alternative.transcript)) + print( + u"First Word and Confidence: ({}, {})".format( + alternative.words[0].word, alternative.words[0].confidence + ) + ) # [END speech_transcribe_word_level_confidence_beta] -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('command') + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("command") args = parser.parse_args() - if args.command == 'enhanced-model': + if args.command == "enhanced-model": transcribe_file_with_enhanced_model() - elif args.command == 'metadata': + elif args.command == "metadata": transcribe_file_with_metadata() - elif args.command == 'punctuation': + elif args.command == "punctuation": transcribe_file_with_auto_punctuation() - elif args.command == 'diarization': + elif args.command == "diarization": transcribe_file_with_diarization() - elif args.command == 'multi-channel': + elif args.command == "multi-channel": transcribe_file_with_multichannel() - elif args.command == 'multi-language': + elif args.command == "multi-language": transcribe_file_with_multilanguage() - elif args.command == 'word-level-conf': + elif args.command == "word-level-conf": transcribe_file_with_word_level_confidence() diff --git a/samples/snippets/transcribe_context_classes.py b/samples/snippets/transcribe_context_classes.py index 72b40507..fc22e184 100644 --- a/samples/snippets/transcribe_context_classes.py +++ b/samples/snippets/transcribe_context_classes.py @@ -28,7 +28,7 @@ def transcribe_context_classes(storage_uri): # https://cloud.google.com/speech-to-text/docs/reference/rpc/google.cloud.speech.v1#speechcontext # Full list of supported phrases (class tokens) here: # https://cloud.google.com/speech-to-text/docs/class-tokens - speech_context = speech.SpeechContext(phrases=['$TIME']) + speech_context = speech.SpeechContext(phrases=["$TIME"]) # RecognitionConfig: to configure your encoding and sample_rate_hertz, see: # https://cloud.google.com/speech-to-text/docs/reference/rpc/google.cloud.speech.v1#recognitionconfig diff --git a/samples/snippets/transcribe_multichannel.py b/samples/snippets/transcribe_multichannel.py index 68fd013c..24573855 100644 --- a/samples/snippets/transcribe_multichannel.py +++ b/samples/snippets/transcribe_multichannel.py @@ -27,7 +27,7 @@ def transcribe_file_with_multichannel(speech_file): """Transcribe the given audio file synchronously with - multi channel.""" + multi channel.""" # [START speech_transcribe_multichannel] from google.cloud import speech @@ -59,7 +59,7 @@ def transcribe_file_with_multichannel(speech_file): def transcribe_gcs_with_multichannel(gcs_uri): """Transcribe the given audio file on GCS with - multi channel.""" + multi channel.""" # [START speech_transcribe_multichannel_gcs] from google.cloud import speech diff --git a/samples/snippets/transcribe_streaming.py b/samples/snippets/transcribe_streaming.py index 93243171..979478a6 100644 --- a/samples/snippets/transcribe_streaming.py +++ b/samples/snippets/transcribe_streaming.py @@ -38,20 +38,24 @@ def transcribe_streaming(stream_file): # In practice, stream should be a generator yielding chunks of audio data. stream = [content] - requests = (speech.StreamingRecognizeRequest(audio_content=chunk) - for chunk in stream) + requests = ( + speech.StreamingRecognizeRequest(audio_content=chunk) for chunk in stream + ) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, - - language_code='en-US') + language_code="en-US", + ) streaming_config = speech.StreamingRecognitionConfig(config=config) # streaming_recognize returns a generator. # [START speech_python_migration_streaming_response] - responses = client.streaming_recognize(config=streaming_config, requests=requests,) + responses = client.streaming_recognize( + config=streaming_config, + requests=requests, + ) # [END speech_python_migration_streaming_request] for response in responses: diff --git a/samples/snippets/transcribe_word_time_offsets.py b/samples/snippets/transcribe_word_time_offsets.py index ced4baec..a0e32c27 100644 --- a/samples/snippets/transcribe_word_time_offsets.py +++ b/samples/snippets/transcribe_word_time_offsets.py @@ -95,6 +95,7 @@ def transcribe_gcs_with_word_time_offsets(gcs_uri): f"Word: {word}, start_time: {start_time.total_seconds()}, end_time: {end_time.total_seconds()}" ) + # [END speech_transcribe_async_word_time_offsets_gcs] From 3466da02fd70bceb373e1026ced610bf3cf2c6ce Mon Sep 17 00:00:00 2001 From: Franklin Nunez Date: Wed, 28 Oct 2020 17:17:06 -0700 Subject: [PATCH 12/12] chore: added ] --- samples/snippets/transcribe_async.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/transcribe_async.py b/samples/snippets/transcribe_async.py index 8530e4db..51d5a132 100644 --- a/samples/snippets/transcribe_async.py +++ b/samples/snippets/transcribe_async.py @@ -48,7 +48,7 @@ def transcribe_file(speech_file): sample_rate_hertz=16000, language_code="en-US", ) - # [START speech_python_migration_async_response + # [START speech_python_migration_async_response] operation = client.long_running_recognize(config=config, audio=audio) # [END speech_python_migration_async_request]