Skip to content
This repository has been archived by the owner on Feb 23, 2024. It is now read-only.

Commit

Permalink
docs: add w/ glossary and model
Browse files Browse the repository at this point in the history
  • Loading branch information
busunkim96 committed Sep 1, 2020
1 parent 6cf9742 commit 1e030d4
Show file tree
Hide file tree
Showing 2 changed files with 209 additions and 0 deletions.
@@ -0,0 +1,137 @@
# -*- coding: utf-8 -*-
#
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# DO NOT EDIT! This is a generated sample ("LongRunningPromise", "translate_v3_batch_translate_text_with_glossary_and_model")

# To install the latest published package dependency, execute the following:
# pip install google-cloud-translate

# sample-metadata
# title: Batch Translate with Glossary and Model
# description: Batch translate text with Glossary using AutoML Translation model
# usage: python3 translate_v3_batch_translate_text_with_glossary_and_model.py [--input_uri "gs://cloud-samples-data/text.txt"] [--output_uri "gs://YOUR_BUCKET_ID/path_to_store_results/"] [--project "[Google Cloud Project ID]"] [--location "us-central1"] [--target_language en] [--source_language de] [--model_id "{your-model-id}"] [--glossary_id "{your-glossary-id}"]

# [START translate_v3_batch_translate_text_with_glossary_and_model]
from google.cloud import translate

def sample_batch_translate_text_with_glossary_and_model(
input_uri,
output_uri,
project_id,
location,
target_language,
source_language,
model_id,
glossary_id
):
"""
Batch translate text with Glossary and Translation model
"""

client = translate.TranslationServiceClient()

# TODO(developer): Uncomment and set the following variables
# input_uri = 'gs://cloud-samples-data/text.txt'
# output_uri = 'gs://YOUR_BUCKET_ID/path_to_store_results/'
# project = '[Google Cloud Project ID]'
# location = 'us-central1'
# target_language = 'en'
# source_language = 'de'
# model_id = '{your-model-id}'
# glossary_id = '[YOUR_GLOSSARY_ID]'
target_language_codes = [target_language]
gcs_source = {"input_uri": input_uri}

# Optional. Can be "text/plain" or "text/html".
mime_type = "text/plain"
input_configs_element = {"gcs_source": gcs_source, "mime_type": mime_type}
input_configs = [input_configs_element]
gcs_destination = {"output_uri_prefix": output_uri}
output_config = {"gcs_destination": gcs_destination}
parent = client.location_path(project_id, location)
model_path = 'projects/{}/locations/{}/models/{}'.format(project_id, 'us-central1', model_id)
models = {target_language: model_path}

glossary_path = client.glossary_path(
project_id,
'us-central1', # The location of the glossary
glossary_id)

glossary_config = translate.types.TranslateTextGlossaryConfig(
glossary=glossary_path)
glossaries = {"ja": glossary_config} #target lang as key

operation = client.batch_translate_text(
parent=parent,
source_language_code=source_language,
target_language_codes=target_language_codes,
input_configs=input_configs,
output_config=output_config,
models=models,
glossaries=glossaries
)

print(u"Waiting for operation to complete...")
response = operation.result()

# Display the translation for each input text provided
print(u"Total Characters: {}".format(response.total_characters))
print(u"Translated Characters: {}".format(response.translated_characters))


# [END translate_v3_batch_translate_text_with_glossary_and_model]


def main():
import argparse

parser = argparse.ArgumentParser()
parser.add_argument(
"--input_uri", type=str, default="gs://cloud-samples-data/text.txt"
)
parser.add_argument(
"--output_uri", type=str, default="gs://YOUR_BUCKET_ID/path_to_store_results/"
)
parser.add_argument("--project_id", type=str, default="[Google Cloud Project ID]")
parser.add_argument("--location", type=str, default="us-central1")
parser.add_argument("--target_language", type=str, default="en")
parser.add_argument("--source_language", type=str, default="de")
parser.add_argument(
"--model_id",
type=str,
default="{your-model-id}"
)
parser.add_argument(
"--glossary_id",
type=str,
default="[YOUR_GLOSSARY_ID]",
)
args = parser.parse_args()

sample_batch_translate_text_with_glossary_and_model(
args.input_uri,
args.output_uri,
args.project_id,
args.location,
args.target_language,
args.source_language,
args.model_id,
args.glossary_id
)


if __name__ == "__main__":
main()
@@ -0,0 +1,72 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import pytest
import uuid
import translate_v3_batch_translate_text_with_glossary_and_model
import translate_v3_create_glossary
import translate_v3_delete_glossary
from google.cloud import storage

PROJECT_ID = os.environ['GOOGLE_CLOUD_PROJECT']
GLOSSARY_INPUT_URI = 'gs://cloud-samples-data/translation/glossary_ja.csv'
MODEL_ID = 'TRL3128559826197068699'

@pytest.fixture(scope='session')
def glossary():
"""Get the ID of a glossary available to session (do not mutate/delete)."""
glossary_id = 'must-start-with-letters-' + str(uuid.uuid1())
translate_v3_create_glossary.sample_create_glossary(
PROJECT_ID,
GLOSSARY_INPUT_URI,
glossary_id
)

yield glossary_id

try:
translate_v3_delete_glossary.sample_delete_glossary(PROJECT_ID, glossary_id)
except Exception:
pass

@pytest.fixture(scope='function')
def bucket():
"""Create a temporary bucket to store annotation output."""
bucket_name = "mike-test-delete-" + str(uuid.uuid1())
storage_client = storage.Client()
bucket = storage_client.create_bucket(bucket_name)

yield bucket

bucket.delete(force=True)

def test_batch_translate_text_with_glossary_and_model(capsys, bucket, glossary):
translate_v3_batch_translate_text_with_glossary_and_model.sample_batch_translate_text_with_glossary_and_model(
'gs://cloud-samples-data/translation/text_with_custom_model_and_glossary.txt',
'gs://{}/translation/BATCH_TRANSLATION_OUTPUT/'.format(bucket.name),
PROJECT_ID,
'us-central1',
'ja',
'en',
MODEL_ID,
glossary
)

out, _ = capsys.readouterr()
assert 'Total Characters: 25' in out
#TODO: find a way to make sure it translates correctly
# SHOULD NOT BE - Google NMT model -> それはしません。欺ception"
# literal: "それはそうだ" # custom model
# literal: "欺く" # glossary

0 comments on commit 1e030d4

Please sign in to comment.