From 10cb925fb71772b66c39991b985bf254a7b9e79a Mon Sep 17 00:00:00 2001 From: Morgan Du Date: Mon, 16 Nov 2020 22:18:53 -0800 Subject: [PATCH] feat: add data_labeling samples --- ...ata_labeling_job_active_learning_sample.py | 60 ++++++++++++ ..._data_labeling_job_active_learning_test.py | 93 ++++++++++++++++++ ..._labeling_job_image_segmentation_sample.py | 58 +++++++++++ ...ta_labeling_job_image_segmentation_test.py | 96 ++++++++++++++++++ ...ata_labeling_job_specialist_pool_sample.py | 60 ++++++++++++ ..._data_labeling_job_specialist_pool_test.py | 98 +++++++++++++++++++ 6 files changed, 465 insertions(+) create mode 100644 samples/snippets/create_data_labeling_job_active_learning_sample.py create mode 100644 samples/snippets/create_data_labeling_job_active_learning_test.py create mode 100644 samples/snippets/create_data_labeling_job_image_segmentation_sample.py create mode 100644 samples/snippets/create_data_labeling_job_image_segmentation_test.py create mode 100644 samples/snippets/create_data_labeling_job_specialist_pool_sample.py create mode 100644 samples/snippets/create_data_labeling_job_specialist_pool_test.py diff --git a/samples/snippets/create_data_labeling_job_active_learning_sample.py b/samples/snippets/create_data_labeling_job_active_learning_sample.py new file mode 100644 index 0000000000..86360b7b34 --- /dev/null +++ b/samples/snippets/create_data_labeling_job_active_learning_sample.py @@ -0,0 +1,60 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START aiplatform_create_data_labeling_job_active_learning_sample] +from google.cloud import aiplatform +from google.protobuf import json_format +from google.protobuf.struct_pb2 import Value + + +def create_data_labeling_job_active_learning_sample( + project: str, + display_name: str, + dataset: str, + instruction_uri: str, + inputs_schema_uri: str, + annotation_spec: str, + location: str = "us-central1", + api_endpoint: str = "us-central1-aiplatform.googleapis.com", +): + client_options = {"api_endpoint": api_endpoint} + # Initialize client that will be used to create and send requests. + # This client only needs to be created once, and can be reused for multiple requests. + client = aiplatform.gapic.JobServiceClient(client_options=client_options) + inputs_dict = {"annotation_specs": [annotation_spec]} + inputs = json_format.ParseDict(inputs_dict, Value()) + + active_learning_config = {"max_data_item_count": 1} + + data_labeling_job = { + "display_name": display_name, + # Full resource name: projects/{project}/locations/{location}/datasets/{dataset_id} + "datasets": [dataset], + "labeler_count": 1, + "instruction_uri": instruction_uri, + "inputs_schema_uri": inputs_schema_uri, + "inputs": inputs, + "annotation_labels": { + "aiplatform.googleapis.com/annotation_set_name": "data_labeling_job_active_learning" + }, + "active_learning_config": active_learning_config, + } + parent = f"projects/{project}/locations/{location}" + response = client.create_data_labeling_job( + parent=parent, data_labeling_job=data_labeling_job + ) + print("response:", response) + + +# [END aiplatform_create_data_labeling_job_active_learning_sample] diff --git a/samples/snippets/create_data_labeling_job_active_learning_test.py b/samples/snippets/create_data_labeling_job_active_learning_test.py new file mode 100644 index 0000000000..38a7c0c1c0 --- /dev/null +++ b/samples/snippets/create_data_labeling_job_active_learning_test.py @@ -0,0 +1,93 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +import os +import uuid +from google.cloud import aiplatform + +import helpers + +import create_data_labeling_job_active_learning_sample + +API_ENDPOINT = os.getenv("DATA_LABELING_API_ENDPOINT") +PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") +LOCATION = "us-central1" +DATASET_ID = "1905673553261363200" +INPUTS_SCHEMA_URI = "gs://google-cloud-aiplatform/schema/datalabelingjob/inputs/image_classification_1.0.0.yaml" +DISPLAY_NAME = f"temp_create_data_labeling_job_active_learning_test_{uuid.uuid4()}" + +INSTRUCTIONS_GCS_URI = ( + "gs://ucaip-sample-resources/images/datalabeling_instructions.pdf" +) +ANNOTATION_SPEC = "rose" + + +@pytest.fixture +def shared_state(): + state = {} + yield state + + +@pytest.fixture +def job_client(): + client_options = {"api_endpoint": API_ENDPOINT} + job_client = aiplatform.gapic.JobServiceClient(client_options=client_options) + yield job_client + + +@pytest.fixture(scope="function", autouse=True) +def teardown(capsys, shared_state, job_client): + yield + + job_client.cancel_data_labeling_job(name=shared_state["data_labeling_job_name"]) + + # Verify Data Labelling Job is cancelled, or timeout after 400 seconds + helpers.wait_for_job_state( + get_job_method=job_client.get_data_labeling_job, + name=shared_state["data_labeling_job_name"], + timeout=400, + freq=10, + ) + + # Delete the data labeling job + response = job_client.delete_data_labeling_job( + name=shared_state["data_labeling_job_name"] + ) + + print("Delete LRO:", response.operation.name) + delete_data_labeling_job_response = response.result(timeout=300) + print("delete_data_labeling_job_response", delete_data_labeling_job_response) + + out, _ = capsys.readouterr() + assert "delete_data_labeling_job_response" in out + + +# Creating a data labeling job for images +def test_create_data_labeling_job_active_learning_sample(capsys, shared_state): + + create_data_labeling_job_active_learning_sample.create_data_labeling_job_active_learning_sample( + project=PROJECT_ID, + display_name=DISPLAY_NAME, + dataset=f"projects/{PROJECT_ID}/locations/{LOCATION}/datasets/{DATASET_ID}", + instruction_uri=INSTRUCTIONS_GCS_URI, + inputs_schema_uri=INPUTS_SCHEMA_URI, + annotation_spec=ANNOTATION_SPEC, + api_endpoint=API_ENDPOINT, + ) + + out, _ = capsys.readouterr() + + # Save resource name of the newly created data labeing job + shared_state["data_labeling_job_name"] = helpers.get_name(out) diff --git a/samples/snippets/create_data_labeling_job_image_segmentation_sample.py b/samples/snippets/create_data_labeling_job_image_segmentation_sample.py new file mode 100644 index 0000000000..94a80b9dd8 --- /dev/null +++ b/samples/snippets/create_data_labeling_job_image_segmentation_sample.py @@ -0,0 +1,58 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START aiplatform_create_data_labeling_job_image_segmentation_sample] +from google.cloud import aiplatform +from google.protobuf import json_format +from google.protobuf.struct_pb2 import Value + + +def create_data_labeling_job_image_segmentation_sample( + project: str, + display_name: str, + dataset: str, + instruction_uri: str, + inputs_schema_uri: str, + annotation_spec: dict, + annotation_set_name: str, + location: str = "us-central1", + api_endpoint: str = "us-central1-aiplatform.googleapis.com", +): + client_options = {"api_endpoint": api_endpoint} + # Initialize client that will be used to create and send requests. + # This client only needs to be created once, and can be reused for multiple requests. + client = aiplatform.gapic.JobServiceClient(client_options=client_options) + inputs_dict = {"annotationSpecColors": [annotation_spec]} + inputs = json_format.ParseDict(inputs_dict, Value()) + + data_labeling_job = { + "display_name": display_name, + # Full resource name: projects/{project}/locations/{location}/datasets/{dataset_id} + "datasets": [dataset], + "labeler_count": 1, + "instruction_uri": instruction_uri, + "inputs_schema_uri": inputs_schema_uri, + "inputs": inputs, + "annotation_labels": { + "aiplatform.googleapis.com/annotation_set_name": annotation_set_name + }, + } + parent = f"projects/{project}/locations/{location}" + response = client.create_data_labeling_job( + parent=parent, data_labeling_job=data_labeling_job + ) + print("response:", response) + + +# [END aiplatform_create_data_labeling_job_image_segmentation_sample] diff --git a/samples/snippets/create_data_labeling_job_image_segmentation_test.py b/samples/snippets/create_data_labeling_job_image_segmentation_test.py new file mode 100644 index 0000000000..79f40d949c --- /dev/null +++ b/samples/snippets/create_data_labeling_job_image_segmentation_test.py @@ -0,0 +1,96 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +import os +import uuid +from google.cloud import aiplatform + +import helpers + +import create_data_labeling_job_image_segmentation_sample + +API_ENDPOINT = os.getenv("DATA_LABELING_API_ENDPOINT") +PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") +LOCATION = "us-central1" +DATASET_ID = "5111009432972558336" +INPUTS_SCHEMA_URI = "gs://google-cloud-aiplatform/schema/datalabelingjob/inputs/image_segmentation_1.0.0.yaml" +DISPLAY_NAME = f"temp_create_data_labeling_job_image_segmentation_test_{uuid.uuid4()}" + +INSTRUCTIONS_GCS_URI = ( + "gs://ucaip-sample-resources/images/datalabeling_instructions.pdf" +) +ANNOTATION_SPEC = {"color": {"red": 1.0}, "displayName": "rose"} +ANNOTATION_SET_NAME = f"temp_image_segmentation_{uuid.uuid4()}" + +@pytest.fixture +def shared_state(): + state = {} + yield state + + +@pytest.fixture +def job_client(): + client_options = {"api_endpoint": API_ENDPOINT} + job_client = aiplatform.gapic.JobServiceClient(client_options=client_options) + yield job_client + + +@pytest.fixture(scope="function", autouse=True) +def teardown(capsys, shared_state, job_client): + yield + + job_client.cancel_data_labeling_job(name=shared_state["data_labeling_job_name"]) + + # Verify Data Labelling Job is cancelled, or timeout after 400 seconds + helpers.wait_for_job_state( + get_job_method=job_client.get_data_labeling_job, + name=shared_state["data_labeling_job_name"], + timeout=400, + freq=10, + ) + + # Delete the data labeling job + response = job_client.delete_data_labeling_job( + name=shared_state["data_labeling_job_name"] + ) + + print("Delete LRO:", response.operation.name) + delete_data_labeling_job_response = response.result(timeout=300) + print("delete_data_labeling_job_response", delete_data_labeling_job_response) + + out, _ = capsys.readouterr() + assert "delete_data_labeling_job_response" in out + + +# Creating a data labeling job for images +def test_create_data_labeling_job_image_segmentation_sample(capsys, shared_state): + + dataset = f"projects/{PROJECT_ID}/locations/{LOCATION}/datasets/{DATASET_ID}" + + create_data_labeling_job_image_segmentation_sample.create_data_labeling_job_image_segmentation_sample( + project=PROJECT_ID, + display_name=DISPLAY_NAME, + dataset=dataset, + instruction_uri=INSTRUCTIONS_GCS_URI, + inputs_schema_uri=INPUTS_SCHEMA_URI, + annotation_spec=ANNOTATION_SPEC, + annotation_set_name=ANNOTATION_SET_NAME, + api_endpoint=API_ENDPOINT, + ) + + out, _ = capsys.readouterr() + + # Save resource name of the newly created data labeing job + shared_state["data_labeling_job_name"] = helpers.get_name(out) diff --git a/samples/snippets/create_data_labeling_job_specialist_pool_sample.py b/samples/snippets/create_data_labeling_job_specialist_pool_sample.py new file mode 100644 index 0000000000..5cbded1fea --- /dev/null +++ b/samples/snippets/create_data_labeling_job_specialist_pool_sample.py @@ -0,0 +1,60 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START aiplatform_create_data_labeling_job_specialist_pool_sample] +from google.cloud import aiplatform +from google.protobuf import json_format +from google.protobuf.struct_pb2 import Value + + +def create_data_labeling_job_specialist_pool_sample( + project: str, + display_name: str, + dataset: str, + specialist_pool: str, + instruction_uri: str, + inputs_schema_uri: str, + annotation_spec: str, + location: str = "us-central1", + api_endpoint: str = "us-central1-aiplatform.googleapis.com", +): + client_options = {"api_endpoint": api_endpoint} + # Initialize client that will be used to create and send requests. + # This client only needs to be created once, and can be reused for multiple requests. + client = aiplatform.gapic.JobServiceClient(client_options=client_options) + inputs_dict = {"annotation_specs": [annotation_spec]} + inputs = json_format.ParseDict(inputs_dict, Value()) + + data_labeling_job = { + "display_name": display_name, + # Full resource name: projects/{project}/locations/{location}/datasets/{dataset_id} + "datasets": [dataset], + "labeler_count": 1, + "instruction_uri": instruction_uri, + "inputs_schema_uri": inputs_schema_uri, + "inputs": inputs, + "annotation_labels": { + "aiplatform.googleapis.com/annotation_set_name": "data_labeling_job_specialist_pool" + }, + # Full resource name: projects/{project}/locations/{location}/specialistPools/{specialist_pool_id} + "specialist_pools": [specialist_pool], + } + parent = f"projects/{project}/locations/{location}" + response = client.create_data_labeling_job( + parent=parent, data_labeling_job=data_labeling_job + ) + print("response:", response) + + +# [END aiplatform_create_data_labeling_job_specialist_pool_sample] diff --git a/samples/snippets/create_data_labeling_job_specialist_pool_test.py b/samples/snippets/create_data_labeling_job_specialist_pool_test.py new file mode 100644 index 0000000000..8936fa6776 --- /dev/null +++ b/samples/snippets/create_data_labeling_job_specialist_pool_test.py @@ -0,0 +1,98 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +import os +import uuid +from google.cloud import aiplatform + +import helpers + +import create_data_labeling_job_specialist_pool_sample + +API_ENDPOINT = os.getenv("DATA_LABELING_API_ENDPOINT") +PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") +LOCATION = "us-central1" +DATASET_ID = "1905673553261363200" +SPECIALIST_POOL_ID = "5898026661995085824" +INPUTS_SCHEMA_URI = "gs://google-cloud-aiplatform/schema/datalabelingjob/inputs/image_classification_1.0.0.yaml" +DISPLAY_NAME = f"temp_create_data_labeling_job_specialist_pool_test_{uuid.uuid4()}" + +INSTRUCTIONS_GCS_URI = ( + "gs://ucaip-sample-resources/images/datalabeling_instructions.pdf" +) +ANNOTATION_SPEC = "rose" + + +@pytest.fixture +def shared_state(): + state = {} + yield state + + +@pytest.fixture +def job_client(): + client_options = {"api_endpoint": API_ENDPOINT} + job_client = aiplatform.gapic.JobServiceClient(client_options=client_options) + yield job_client + + +@pytest.fixture(scope="function", autouse=True) +def teardown(capsys, shared_state, job_client): + yield + + job_client.cancel_data_labeling_job(name=shared_state["data_labeling_job_name"]) + + # Verify Data Labelling Job is cancelled, or timeout after 400 seconds + helpers.wait_for_job_state( + get_job_method=job_client.get_data_labeling_job, + name=shared_state["data_labeling_job_name"], + timeout=400, + freq=10, + ) + + # Delete the data labeling job + response = job_client.delete_data_labeling_job( + name=shared_state["data_labeling_job_name"] + ) + + print("Delete LRO:", response.operation.name) + delete_data_labeling_job_response = response.result(timeout=300) + print("delete_data_labeling_job_response", delete_data_labeling_job_response) + + out, _ = capsys.readouterr() + assert "delete_data_labeling_job_response" in out + + +# Creating a data labeling job for images +def test_create_data_labeling_job_specialist_pool_sample(capsys, shared_state): + + dataset = f"projects/{PROJECT_ID}/locations/{LOCATION}/datasets/{DATASET_ID}" + specialist_pool = f"projects/{PROJECT_ID}/locations/{LOCATION}/specialistPools/{SPECIALIST_POOL_ID}" + + create_data_labeling_job_specialist_pool_sample.create_data_labeling_job_specialist_pool_sample( + project=PROJECT_ID, + display_name=DISPLAY_NAME, + dataset=dataset, + specialist_pool=specialist_pool, + instruction_uri=INSTRUCTIONS_GCS_URI, + inputs_schema_uri=INPUTS_SCHEMA_URI, + annotation_spec=ANNOTATION_SPEC, + api_endpoint=API_ENDPOINT, + ) + + out, _ = capsys.readouterr() + + # Save resource name of the newly created data labeing job + shared_state["data_labeling_job_name"] = helpers.get_name(out)