Skip to content
This repository has been archived by the owner on Nov 29, 2023. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
feat: video speech transcription [(#1849)](GoogleCloudPlatform/python…
  • Loading branch information
anguillanneuf authored and danoscarmike committed Sep 30, 2020
1 parent 9354c0f commit 0bb8156
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 0 deletions.
54 changes: 54 additions & 0 deletions samples/analyze/analyze.py
Expand Up @@ -229,6 +229,55 @@ def analyze_shots(path):
# [END video_analyze_shots]


def speech_transcription(path):
# [START video_speech_transcription]
"""Transcribe speech from a video stored on GCS."""
from google.cloud import videointelligence

video_client = videointelligence.VideoIntelligenceServiceClient()
features = [videointelligence.enums.Feature.SPEECH_TRANSCRIPTION]

config = videointelligence.types.SpeechTranscriptionConfig(
language_code='en-US',
enable_automatic_punctuation=True)
video_context = videointelligence.types.VideoContext(
speech_transcription_config=config)

operation = video_client.annotate_video(
path, features=features,
video_context=video_context)

print('\nProcessing video for speech transcription.')

result = operation.result(timeout=600)

# There is only one annotation_result since only
# one video is processed.
annotation_results = result.annotation_results[0]
for speech_transcription in annotation_results.speech_transcriptions:

# The number of alternatives for each transcription is limited by
# SpeechTranscriptionConfig.max_alternatives.
# Each alternative is a different possible transcription
# and has its own confidence score.
for alternative in speech_transcription.alternatives:
print('Alternative level information:')

print('Transcript: {}'.format(alternative.transcript))
print('Confidence: {}\n'.format(alternative.confidence))

print('Word level information:')
for word_info in alternative.words:
word = word_info.word
start_time = word_info.start_time
end_time = word_info.end_time
print('\t{}s - {}s: {}'.format(
start_time.seconds + start_time.nanos * 1e-9,
end_time.seconds + end_time.nanos * 1e-9,
word))
# [END video_speech_transcription]


if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__,
Expand All @@ -246,6 +295,9 @@ def analyze_shots(path):
analyze_shots_parser = subparsers.add_parser(
'shots', help=analyze_shots.__doc__)
analyze_shots_parser.add_argument('path')
transcribe_speech_parser = subparsers.add_parser(
'transcribe', help=speech_transcription.__doc__)
transcribe_speech_parser.add_argument('path')

args = parser.parse_args()

Expand All @@ -257,3 +309,5 @@ def analyze_shots(path):
analyze_shots(args.path)
if args.command == 'explicit_content':
analyze_explicit_content(args.path)
if args.command == 'transcribe':
speech_transcription(args.path)
8 changes: 8 additions & 0 deletions samples/analyze/analyze_test.py
Expand Up @@ -38,3 +38,11 @@ def test_analyze_explicit_content(capsys):
analyze.analyze_explicit_content('gs://demomaker/cat.mp4')
out, _ = capsys.readouterr()
assert 'pornography' in out


@pytest.mark.slow
def test_speech_transcription(capsys):
analyze.speech_transcription(
'gs://python-docs-samples-tests/video/googlework_short.mp4')
out, _ = capsys.readouterr()
assert 'cultural' in out

0 comments on commit 0bb8156

Please sign in to comment.