From 1d8efd9d74b8ae4c865751f60a01baed5a8d8d24 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Wed, 1 Apr 2020 10:40:03 -0700 Subject: [PATCH] feat: add v1beta2, remove v1beta1 (#13) * feat: generate v1beta2 * chore: remove extra line in changelog --- .eggs/README.txt | 6 + .flake8 | 2 +- .repo-metadata.json | 2 +- CHANGELOG.md | 2 +- README.rst | 15 +- docs/documentai_v1beta2/services.rst | 6 + docs/documentai_v1beta2/types.rst | 5 + docs/gapic/v1beta1/api.rst | 6 - docs/gapic/v1beta1/types.rst | 5 - docs/index.rst | 9 +- google/cloud/documentai.py | 25 - google/cloud/documentai/__init__.py | 83 + google/cloud/documentai/py.typed | 2 + google/cloud/documentai_v1beta1/__init__.py | 45 - .../documentai_v1beta1/gapic/__init__.py | 0 .../document_understanding_service_client.py | 291 -- ...ent_understanding_service_client_config.py | 28 - .../cloud/documentai_v1beta1/gapic/enums.py | 84 - .../gapic/transports/__init__.py | 0 ...nt_understanding_service_grpc_transport.py | 131 - .../documentai_v1beta1/proto/__init__.py | 0 .../documentai_v1beta1/proto/document.proto | 446 --- .../documentai_v1beta1/proto/document_pb2.py | 2698 ----------------- .../proto/document_pb2_grpc.py | 2 - .../proto/document_understanding.proto | 299 -- .../proto/document_understanding_pb2.py | 1559 ---------- .../proto/document_understanding_pb2_grpc.py | 57 - .../documentai_v1beta1/proto/geometry.proto | 55 - .../documentai_v1beta1/proto/geometry_pb2.py | 270 -- .../proto/geometry_pb2_grpc.py | 2 - google/cloud/documentai_v1beta1/types.py | 58 - google/cloud/documentai_v1beta2/__init__.py | 64 + google/cloud/documentai_v1beta2/py.typed | 2 + .../services}/__init__.py | 16 +- .../__init__.py | 16 +- .../document_understanding_service/client.py | 281 ++ .../transports/__init__.py | 35 + .../transports/base.py | 86 + .../transports/grpc.py | 194 ++ .../documentai_v1beta2/types/__init__.py | 61 + .../documentai_v1beta2/types/document.py | 756 +++++ .../types/document_understanding.py | 432 +++ .../documentai_v1beta2/types/geometry.py | 75 + mypy.ini | 3 + noxfile.py | 4 +- scripts/fixup_keywords.py | 178 ++ setup.py | 56 +- synth.metadata | 33 +- synth.py | 29 +- .../test_document_understanding_service.py | 296 ++ ...nt_understanding_service_client_v1beta1.py | 118 - 51 files changed, 2641 insertions(+), 6287 deletions(-) create mode 100644 .eggs/README.txt create mode 100644 docs/documentai_v1beta2/services.rst create mode 100644 docs/documentai_v1beta2/types.rst delete mode 100644 docs/gapic/v1beta1/api.rst delete mode 100644 docs/gapic/v1beta1/types.rst delete mode 100644 google/cloud/documentai.py create mode 100644 google/cloud/documentai/__init__.py create mode 100644 google/cloud/documentai/py.typed delete mode 100644 google/cloud/documentai_v1beta1/__init__.py delete mode 100644 google/cloud/documentai_v1beta1/gapic/__init__.py delete mode 100644 google/cloud/documentai_v1beta1/gapic/document_understanding_service_client.py delete mode 100644 google/cloud/documentai_v1beta1/gapic/document_understanding_service_client_config.py delete mode 100644 google/cloud/documentai_v1beta1/gapic/enums.py delete mode 100644 google/cloud/documentai_v1beta1/gapic/transports/__init__.py delete mode 100644 google/cloud/documentai_v1beta1/gapic/transports/document_understanding_service_grpc_transport.py delete mode 100644 google/cloud/documentai_v1beta1/proto/__init__.py delete mode 100644 google/cloud/documentai_v1beta1/proto/document.proto delete mode 100644 google/cloud/documentai_v1beta1/proto/document_pb2.py delete mode 100644 google/cloud/documentai_v1beta1/proto/document_pb2_grpc.py delete mode 100644 google/cloud/documentai_v1beta1/proto/document_understanding.proto delete mode 100644 google/cloud/documentai_v1beta1/proto/document_understanding_pb2.py delete mode 100644 google/cloud/documentai_v1beta1/proto/document_understanding_pb2_grpc.py delete mode 100644 google/cloud/documentai_v1beta1/proto/geometry.proto delete mode 100644 google/cloud/documentai_v1beta1/proto/geometry_pb2.py delete mode 100644 google/cloud/documentai_v1beta1/proto/geometry_pb2_grpc.py delete mode 100644 google/cloud/documentai_v1beta1/types.py create mode 100644 google/cloud/documentai_v1beta2/__init__.py create mode 100644 google/cloud/documentai_v1beta2/py.typed rename google/cloud/{ => documentai_v1beta2/services}/__init__.py (67%) rename google/{ => cloud/documentai_v1beta2/services/document_understanding_service}/__init__.py (67%) create mode 100644 google/cloud/documentai_v1beta2/services/document_understanding_service/client.py create mode 100644 google/cloud/documentai_v1beta2/services/document_understanding_service/transports/__init__.py create mode 100644 google/cloud/documentai_v1beta2/services/document_understanding_service/transports/base.py create mode 100644 google/cloud/documentai_v1beta2/services/document_understanding_service/transports/grpc.py create mode 100644 google/cloud/documentai_v1beta2/types/__init__.py create mode 100644 google/cloud/documentai_v1beta2/types/document.py create mode 100644 google/cloud/documentai_v1beta2/types/document_understanding.py create mode 100644 google/cloud/documentai_v1beta2/types/geometry.py create mode 100644 mypy.ini create mode 100644 scripts/fixup_keywords.py create mode 100644 tests/unit/documentai_v1beta2/test_document_understanding_service.py delete mode 100644 tests/unit/gapic/v1beta1/test_document_understanding_service_client_v1beta1.py diff --git a/.eggs/README.txt b/.eggs/README.txt new file mode 100644 index 00000000..5d016688 --- /dev/null +++ b/.eggs/README.txt @@ -0,0 +1,6 @@ +This directory contains eggs that were downloaded by setuptools to build, test, and run plug-ins. + +This directory caches those eggs to prevent repeated downloads. + +However, it is safe to delete this directory. + diff --git a/.flake8 b/.flake8 index 0268ecc9..ed758f48 100644 --- a/.flake8 +++ b/.flake8 @@ -1,6 +1,6 @@ # Generated by synthtool. DO NOT EDIT! [flake8] -ignore = E203, E266, E501, W503 +ignore = E203, E266, E501, W503, F401, F841 exclude = # Exclude generated code. **/proto/** diff --git a/.repo-metadata.json b/.repo-metadata.json index 06ac13a3..44c100cb 100644 --- a/.repo-metadata.json +++ b/.repo-metadata.json @@ -4,7 +4,7 @@ "product_documentation": "https://cloud.google.com/document-understanding/docs/", "client_documentation": "https://googleapis.dev/python/documentai/latest", "issue_tracker": "", - "release_level": "alpha", + "release_level": "beta", "language": "python", "repo": "googleapis/python-documentai", "distribution_name": "google-cloud-documentai", diff --git a/CHANGELOG.md b/CHANGELOG.md index 825c32f0..5ddad421 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1 +1 @@ -# Changelog +# Changelog \ No newline at end of file diff --git a/README.rst b/README.rst index 13151dbc..f797d7a3 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,8 @@ -Python Client for Cloud Document AI API (`Alpha`_) +Python Client for Cloud Document AI API ================================================== +|beta| |pypi| |versions| + `Cloud Document AI API`_: Service to parse structured information from unstructured or semi-structured documents using state-of-the-art Google AI such as natural language, computer vision, translation, and AutoML. @@ -8,7 +10,12 @@ language, computer vision, translation, and AutoML. - `Client Library Documentation`_ - `Product Documentation`_ -.. _Alpha: https://github.com/googleapis/google-cloud-python/blob/master/README.rst +.. |beta| image:: https://img.shields.io/badge/support-beta-orange.svg + :target: https://github.com/googleapis/google-cloud-python/blob/master/README.rst#beta-support +.. |pypi| image:: https://img.shields.io/pypi/v/google-cloud-service-directory.svg + :target: https://pypi.org/project/google-cloud-service-directory/ +.. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-service-directory.svg + :target: https://pypi.org/project/google-cloud-service-directory/ .. _Cloud Document AI API: https://cloud.google.com/document-understanding/docs/ .. _Client Library Documentation: https://googleapis.dev/python/documentai/latest .. _Product Documentation: https://cloud.google.com/document-understanding/docs/ @@ -70,8 +77,8 @@ Next Steps API to see other available methods on the client. - Read the `Cloud Document AI API Product documentation`_ to learn more about the product and see How-to Guides. -- View this `repository’s main README`_ to see the full list of Cloud +- View this `README`_ to see the full list of Cloud APIs that we cover. .. _Cloud Document AI API Product documentation: https://cloud.google.com/document-understanding/docs/ -.. _repository’s main README: https://github.com/googleapis/google-cloud-python/blob/master/README.rst \ No newline at end of file +.. _README: https://github.com/googleapis/google-cloud-python/blob/master/README.rst \ No newline at end of file diff --git a/docs/documentai_v1beta2/services.rst b/docs/documentai_v1beta2/services.rst new file mode 100644 index 00000000..ea9bfbe4 --- /dev/null +++ b/docs/documentai_v1beta2/services.rst @@ -0,0 +1,6 @@ +Client for Google Cloud Documentai API +====================================== + +.. automodule:: google.cloud.documentai_v1beta2 + :members: + :inherited-members: diff --git a/docs/documentai_v1beta2/types.rst b/docs/documentai_v1beta2/types.rst new file mode 100644 index 00000000..d116ddab --- /dev/null +++ b/docs/documentai_v1beta2/types.rst @@ -0,0 +1,5 @@ +Types for Google Cloud Documentai API +===================================== + +.. automodule:: google.cloud.documentai_v1beta2.types + :members: diff --git a/docs/gapic/v1beta1/api.rst b/docs/gapic/v1beta1/api.rst deleted file mode 100644 index e588392f..00000000 --- a/docs/gapic/v1beta1/api.rst +++ /dev/null @@ -1,6 +0,0 @@ -Client for Cloud Document AI API -================================ - -.. automodule:: google.cloud.documentai_v1beta1 - :members: - :inherited-members: \ No newline at end of file diff --git a/docs/gapic/v1beta1/types.rst b/docs/gapic/v1beta1/types.rst deleted file mode 100644 index 469eb107..00000000 --- a/docs/gapic/v1beta1/types.rst +++ /dev/null @@ -1,5 +0,0 @@ -Types for Cloud Document AI API Client -====================================== - -.. automodule:: google.cloud.documentai_v1beta1.types - :members: \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index f15473e5..2ae541b5 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,12 +1,13 @@ .. include:: README.rst -Api Reference +API Reference ------------- .. toctree:: :maxdepth: 2 - gapic/v1beta1/api - gapic/v1beta1/types + documentai_v1beta2/services + documentai_v1beta2/types + Changelog --------- @@ -16,4 +17,4 @@ For a list of all ``google-cloud-documentai`` releases: .. toctree:: :maxdepth: 2 - changelog \ No newline at end of file + changelog diff --git a/google/cloud/documentai.py b/google/cloud/documentai.py deleted file mode 100644 index de0e1a7e..00000000 --- a/google/cloud/documentai.py +++ /dev/null @@ -1,25 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from __future__ import absolute_import - -from google.cloud.documentai_v1beta1 import DocumentUnderstandingServiceClient -from google.cloud.documentai_v1beta1 import enums -from google.cloud.documentai_v1beta1 import types - - -__all__ = ("enums", "types", "DocumentUnderstandingServiceClient") diff --git a/google/cloud/documentai/__init__.py b/google/cloud/documentai/__init__.py new file mode 100644 index 00000000..a14d0857 --- /dev/null +++ b/google/cloud/documentai/__init__.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +from google.cloud.documentai_v1beta2.services.document_understanding_service.client import ( + DocumentUnderstandingServiceClient, +) +from google.cloud.documentai_v1beta2.types.document import Document +from google.cloud.documentai_v1beta2.types.document_understanding import AutoMlParams +from google.cloud.documentai_v1beta2.types.document_understanding import ( + BatchProcessDocumentsRequest, +) +from google.cloud.documentai_v1beta2.types.document_understanding import ( + BatchProcessDocumentsResponse, +) +from google.cloud.documentai_v1beta2.types.document_understanding import ( + EntityExtractionParams, +) +from google.cloud.documentai_v1beta2.types.document_understanding import ( + FormExtractionParams, +) +from google.cloud.documentai_v1beta2.types.document_understanding import GcsDestination +from google.cloud.documentai_v1beta2.types.document_understanding import GcsSource +from google.cloud.documentai_v1beta2.types.document_understanding import InputConfig +from google.cloud.documentai_v1beta2.types.document_understanding import ( + KeyValuePairHint, +) +from google.cloud.documentai_v1beta2.types.document_understanding import OcrParams +from google.cloud.documentai_v1beta2.types.document_understanding import ( + OperationMetadata, +) +from google.cloud.documentai_v1beta2.types.document_understanding import OutputConfig +from google.cloud.documentai_v1beta2.types.document_understanding import ( + ProcessDocumentRequest, +) +from google.cloud.documentai_v1beta2.types.document_understanding import ( + ProcessDocumentResponse, +) +from google.cloud.documentai_v1beta2.types.document_understanding import TableBoundHint +from google.cloud.documentai_v1beta2.types.document_understanding import ( + TableExtractionParams, +) +from google.cloud.documentai_v1beta2.types.geometry import BoundingPoly +from google.cloud.documentai_v1beta2.types.geometry import NormalizedVertex +from google.cloud.documentai_v1beta2.types.geometry import Vertex + +__all__ = ( + "AutoMlParams", + "BatchProcessDocumentsRequest", + "BatchProcessDocumentsResponse", + "BoundingPoly", + "Document", + "DocumentUnderstandingServiceClient", + "EntityExtractionParams", + "FormExtractionParams", + "GcsDestination", + "GcsSource", + "InputConfig", + "KeyValuePairHint", + "NormalizedVertex", + "OcrParams", + "OperationMetadata", + "OutputConfig", + "ProcessDocumentRequest", + "ProcessDocumentResponse", + "TableBoundHint", + "TableExtractionParams", + "Vertex", +) diff --git a/google/cloud/documentai/py.typed b/google/cloud/documentai/py.typed new file mode 100644 index 00000000..81b45001 --- /dev/null +++ b/google/cloud/documentai/py.typed @@ -0,0 +1,2 @@ +# Marker file for PEP 561. +# The google-cloud-documentai package uses inline types. diff --git a/google/cloud/documentai_v1beta1/__init__.py b/google/cloud/documentai_v1beta1/__init__.py deleted file mode 100644 index a8d5baad..00000000 --- a/google/cloud/documentai_v1beta1/__init__.py +++ /dev/null @@ -1,45 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from __future__ import absolute_import -import sys -import warnings - -from google.cloud.documentai_v1beta1 import types -from google.cloud.documentai_v1beta1.gapic import document_understanding_service_client -from google.cloud.documentai_v1beta1.gapic import enums - - -if sys.version_info[:2] == (2, 7): - message = ( - "A future version of this library will drop support for Python 2.7." - "More details about Python 2 support for Google Cloud Client Libraries" - "can be found at https://cloud.google.com/python/docs/python2-sunset/" - ) - warnings.warn(message, DeprecationWarning) - - -class DocumentUnderstandingServiceClient( - document_understanding_service_client.DocumentUnderstandingServiceClient -): - __doc__ = ( - document_understanding_service_client.DocumentUnderstandingServiceClient.__doc__ - ) - enums = enums - - -__all__ = ("enums", "types", "DocumentUnderstandingServiceClient") diff --git a/google/cloud/documentai_v1beta1/gapic/__init__.py b/google/cloud/documentai_v1beta1/gapic/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/google/cloud/documentai_v1beta1/gapic/document_understanding_service_client.py b/google/cloud/documentai_v1beta1/gapic/document_understanding_service_client.py deleted file mode 100644 index 4627e1dd..00000000 --- a/google/cloud/documentai_v1beta1/gapic/document_understanding_service_client.py +++ /dev/null @@ -1,291 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Accesses the google.cloud.documentai.v1beta1 DocumentUnderstandingService API.""" - -import pkg_resources -import warnings - -from google.oauth2 import service_account -import google.api_core.client_options -import google.api_core.gapic_v1.client_info -import google.api_core.gapic_v1.config -import google.api_core.gapic_v1.method -import google.api_core.gapic_v1.routing_header -import google.api_core.grpc_helpers -import google.api_core.operation -import google.api_core.operations_v1 -import grpc - -from google.cloud.documentai_v1beta1.gapic import ( - document_understanding_service_client_config, -) -from google.cloud.documentai_v1beta1.gapic import enums -from google.cloud.documentai_v1beta1.gapic.transports import ( - document_understanding_service_grpc_transport, -) -from google.cloud.documentai_v1beta1.proto import document_understanding_pb2 -from google.cloud.documentai_v1beta1.proto import document_understanding_pb2_grpc -from google.longrunning import operations_pb2 - - -_GAPIC_LIBRARY_VERSION = pkg_resources.get_distribution( - "google-cloud-documentai" -).version - - -class DocumentUnderstandingServiceClient(object): - """ - Service to parse structured information from unstructured or semi-structured - documents using state-of-the-art Google AI such as natural language, - computer vision, and translation. - """ - - SERVICE_ADDRESS = "documentai.googleapis.com:443" - """The default address of the service.""" - - # The name of the interface for this client. This is the key used to - # find the method configuration in the client_config dictionary. - _INTERFACE_NAME = "google.cloud.documentai.v1beta1.DocumentUnderstandingService" - - @classmethod - def from_service_account_file(cls, filename, *args, **kwargs): - """Creates an instance of this client using the provided credentials - file. - - Args: - filename (str): The path to the service account private key json - file. - args: Additional arguments to pass to the constructor. - kwargs: Additional arguments to pass to the constructor. - - Returns: - DocumentUnderstandingServiceClient: The constructed client. - """ - credentials = service_account.Credentials.from_service_account_file(filename) - kwargs["credentials"] = credentials - return cls(*args, **kwargs) - - from_service_account_json = from_service_account_file - - def __init__( - self, - transport=None, - channel=None, - credentials=None, - client_config=None, - client_info=None, - client_options=None, - ): - """Constructor. - - Args: - transport (Union[~.DocumentUnderstandingServiceGrpcTransport, - Callable[[~.Credentials, type], ~.DocumentUnderstandingServiceGrpcTransport]): A transport - instance, responsible for actually making the API calls. - The default transport uses the gRPC protocol. - This argument may also be a callable which returns a - transport instance. Callables will be sent the credentials - as the first argument and the default transport class as - the second argument. - channel (grpc.Channel): DEPRECATED. A ``Channel`` instance - through which to make calls. This argument is mutually exclusive - with ``credentials``; providing both will raise an exception. - credentials (google.auth.credentials.Credentials): The - authorization credentials to attach to requests. These - credentials identify this application to the service. If none - are specified, the client will attempt to ascertain the - credentials from the environment. - This argument is mutually exclusive with providing a - transport instance to ``transport``; doing so will raise - an exception. - client_config (dict): DEPRECATED. A dictionary of call options for - each method. If not specified, the default configuration is used. - client_info (google.api_core.gapic_v1.client_info.ClientInfo): - The client info used to send a user-agent string along with - API requests. If ``None``, then default info will be used. - Generally, you only need to set this if you're developing - your own client library. - client_options (Union[dict, google.api_core.client_options.ClientOptions]): - Client options used to set user options on the client. API Endpoint - should be set through client_options. - """ - # Raise deprecation warnings for things we want to go away. - if client_config is not None: - warnings.warn( - "The `client_config` argument is deprecated.", - PendingDeprecationWarning, - stacklevel=2, - ) - else: - client_config = document_understanding_service_client_config.config - - if channel: - warnings.warn( - "The `channel` argument is deprecated; use " "`transport` instead.", - PendingDeprecationWarning, - stacklevel=2, - ) - - api_endpoint = self.SERVICE_ADDRESS - if client_options: - if type(client_options) == dict: - client_options = google.api_core.client_options.from_dict( - client_options - ) - if client_options.api_endpoint: - api_endpoint = client_options.api_endpoint - - # Instantiate the transport. - # The transport is responsible for handling serialization and - # deserialization and actually sending data to the service. - if transport: - if callable(transport): - self.transport = transport( - credentials=credentials, - default_class=document_understanding_service_grpc_transport.DocumentUnderstandingServiceGrpcTransport, - address=api_endpoint, - ) - else: - if credentials: - raise ValueError( - "Received both a transport instance and " - "credentials; these are mutually exclusive." - ) - self.transport = transport - else: - self.transport = document_understanding_service_grpc_transport.DocumentUnderstandingServiceGrpcTransport( - address=api_endpoint, channel=channel, credentials=credentials - ) - - if client_info is None: - client_info = google.api_core.gapic_v1.client_info.ClientInfo( - gapic_version=_GAPIC_LIBRARY_VERSION - ) - else: - client_info.gapic_version = _GAPIC_LIBRARY_VERSION - self._client_info = client_info - - # Parse out the default settings for retry and timeout for each RPC - # from the client configuration. - # (Ordinarily, these are the defaults specified in the `*_config.py` - # file next to this one.) - self._method_configs = google.api_core.gapic_v1.config.parse_method_configs( - client_config["interfaces"][self._INTERFACE_NAME] - ) - - # Save a dictionary of cached API call functions. - # These are the actual callables which invoke the proper - # transport methods, wrapped with `wrap_method` to add retry, - # timeout, and the like. - self._inner_api_calls = {} - - # Service calls - def batch_process_documents( - self, - requests, - parent=None, - retry=google.api_core.gapic_v1.method.DEFAULT, - timeout=google.api_core.gapic_v1.method.DEFAULT, - metadata=None, - ): - """ - LRO endpoint to batch process many documents. - - Example: - >>> from google.cloud import documentai_v1beta1 - >>> - >>> client = documentai_v1beta1.DocumentUnderstandingServiceClient() - >>> - >>> # TODO: Initialize `requests`: - >>> requests = [] - >>> - >>> response = client.batch_process_documents(requests) - >>> - >>> def callback(operation_future): - ... # Handle result. - ... result = operation_future.result() - >>> - >>> response.add_done_callback(callback) - >>> - >>> # Handle metadata. - >>> metadata = response.metadata() - - Args: - requests (list[Union[dict, ~google.cloud.documentai_v1beta1.types.ProcessDocumentRequest]]): Required. Individual requests for each document. - - If a dict is provided, it must be of the same form as the protobuf - message :class:`~google.cloud.documentai_v1beta1.types.ProcessDocumentRequest` - parent (str): Target project and location to make a call. - - Format: ``projects/{project-id}/locations/{location-id}``. - - If no location is specified, a region will be chosen automatically. - retry (Optional[google.api_core.retry.Retry]): A retry object used - to retry requests. If ``None`` is specified, requests will - be retried using a default configuration. - timeout (Optional[float]): The amount of time, in seconds, to wait - for the request to complete. Note that if ``retry`` is - specified, the timeout applies to each individual attempt. - metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata - that is provided to the method. - - Returns: - A :class:`~google.cloud.documentai_v1beta1.types._OperationFuture` instance. - - Raises: - google.api_core.exceptions.GoogleAPICallError: If the request - failed for any reason. - google.api_core.exceptions.RetryError: If the request failed due - to a retryable error and retry attempts failed. - ValueError: If the parameters are invalid. - """ - # Wrap the transport method to add retry and timeout logic. - if "batch_process_documents" not in self._inner_api_calls: - self._inner_api_calls[ - "batch_process_documents" - ] = google.api_core.gapic_v1.method.wrap_method( - self.transport.batch_process_documents, - default_retry=self._method_configs["BatchProcessDocuments"].retry, - default_timeout=self._method_configs["BatchProcessDocuments"].timeout, - client_info=self._client_info, - ) - - request = document_understanding_pb2.BatchProcessDocumentsRequest( - requests=requests, parent=parent - ) - if metadata is None: - metadata = [] - metadata = list(metadata) - try: - routing_header = [("parent", parent)] - except AttributeError: - pass - else: - routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( - routing_header - ) - metadata.append(routing_metadata) - - operation = self._inner_api_calls["batch_process_documents"]( - request, retry=retry, timeout=timeout, metadata=metadata - ) - return google.api_core.operation.from_gapic( - operation, - self.transport._operations_client, - document_understanding_pb2.BatchProcessDocumentsResponse, - metadata_type=document_understanding_pb2.OperationMetadata, - ) diff --git a/google/cloud/documentai_v1beta1/gapic/document_understanding_service_client_config.py b/google/cloud/documentai_v1beta1/gapic/document_understanding_service_client_config.py deleted file mode 100644 index 25695074..00000000 --- a/google/cloud/documentai_v1beta1/gapic/document_understanding_service_client_config.py +++ /dev/null @@ -1,28 +0,0 @@ -config = { - "interfaces": { - "google.cloud.documentai.v1beta1.DocumentUnderstandingService": { - "retry_codes": { - "idempotent": ["DEADLINE_EXCEEDED", "UNAVAILABLE"], - "non_idempotent": [], - }, - "retry_params": { - "default": { - "initial_retry_delay_millis": 100, - "retry_delay_multiplier": 1.3, - "max_retry_delay_millis": 60000, - "initial_rpc_timeout_millis": 20000, - "rpc_timeout_multiplier": 1.0, - "max_rpc_timeout_millis": 20000, - "total_timeout_millis": 600000, - } - }, - "methods": { - "BatchProcessDocuments": { - "timeout_millis": 60000, - "retry_codes_name": "idempotent", - "retry_params_name": "default", - } - }, - } - } -} diff --git a/google/cloud/documentai_v1beta1/gapic/enums.py b/google/cloud/documentai_v1beta1/gapic/enums.py deleted file mode 100644 index 92d66699..00000000 --- a/google/cloud/documentai_v1beta1/gapic/enums.py +++ /dev/null @@ -1,84 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Wrappers for protocol buffer enum types.""" - -import enum - - -class Document(object): - class Page(object): - class Layout(object): - class Orientation(enum.IntEnum): - """ - Detected human reading orientation. - - Attributes: - ORIENTATION_UNSPECIFIED (int): Unspecified orientation. - PAGE_UP (int): Orientation is aligned with page up. - PAGE_RIGHT (int): Orientation is aligned with page right. - Turn the head 90 degrees clockwise from upright to read. - PAGE_DOWN (int): Orientation is aligned with page down. - Turn the head 180 degrees from upright to read. - PAGE_LEFT (int): Orientation is aligned with page left. - Turn the head 90 degrees counterclockwise from upright to read. - """ - - ORIENTATION_UNSPECIFIED = 0 - PAGE_UP = 1 - PAGE_RIGHT = 2 - PAGE_DOWN = 3 - PAGE_LEFT = 4 - - class Token(object): - class DetectedBreak(object): - class Type(enum.IntEnum): - """ - Enum to denote the type of break found. - - Attributes: - TYPE_UNSPECIFIED (int): Unspecified break type. - SPACE (int): A single whitespace. - WIDE_SPACE (int): A wider whitespace. - HYPHEN (int): A hyphen that indicates that a token has been split across lines. - """ - - TYPE_UNSPECIFIED = 0 - SPACE = 1 - WIDE_SPACE = 2 - HYPHEN = 3 - - -class OperationMetadata(object): - class State(enum.IntEnum): - """ - Attributes: - STATE_UNSPECIFIED (int): The default value. This value is used if the state is omitted. - ACCEPTED (int): Request is received. - WAITING (int): Request operation is waiting for scheduling. - RUNNING (int): Request is being processed. - SUCCEEDED (int): The batch processing completed successfully. - CANCELLED (int): The batch processing was cancelled. - FAILED (int): The batch processing has failed. - """ - - STATE_UNSPECIFIED = 0 - ACCEPTED = 1 - WAITING = 2 - RUNNING = 3 - SUCCEEDED = 4 - CANCELLED = 5 - FAILED = 6 diff --git a/google/cloud/documentai_v1beta1/gapic/transports/__init__.py b/google/cloud/documentai_v1beta1/gapic/transports/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/google/cloud/documentai_v1beta1/gapic/transports/document_understanding_service_grpc_transport.py b/google/cloud/documentai_v1beta1/gapic/transports/document_understanding_service_grpc_transport.py deleted file mode 100644 index 46b9503a..00000000 --- a/google/cloud/documentai_v1beta1/gapic/transports/document_understanding_service_grpc_transport.py +++ /dev/null @@ -1,131 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import google.api_core.grpc_helpers -import google.api_core.operations_v1 - -from google.cloud.documentai_v1beta1.proto import document_understanding_pb2_grpc - - -class DocumentUnderstandingServiceGrpcTransport(object): - """gRPC transport class providing stubs for - google.cloud.documentai.v1beta1 DocumentUnderstandingService API. - - The transport provides access to the raw gRPC stubs, - which can be used to take advantage of advanced - features of gRPC. - """ - - # The scopes needed to make gRPC calls to all of the methods defined - # in this service. - _OAUTH_SCOPES = ("https://www.googleapis.com/auth/cloud-platform",) - - def __init__( - self, channel=None, credentials=None, address="documentai.googleapis.com:443" - ): - """Instantiate the transport class. - - Args: - channel (grpc.Channel): A ``Channel`` instance through - which to make calls. This argument is mutually exclusive - with ``credentials``; providing both will raise an exception. - credentials (google.auth.credentials.Credentials): The - authorization credentials to attach to requests. These - credentials identify this application to the service. If none - are specified, the client will attempt to ascertain the - credentials from the environment. - address (str): The address where the service is hosted. - """ - # If both `channel` and `credentials` are specified, raise an - # exception (channels come with credentials baked in already). - if channel is not None and credentials is not None: - raise ValueError( - "The `channel` and `credentials` arguments are mutually " "exclusive." - ) - - # Create the channel. - if channel is None: - channel = self.create_channel( - address=address, - credentials=credentials, - options={ - "grpc.max_send_message_length": -1, - "grpc.max_receive_message_length": -1, - }.items(), - ) - - self._channel = channel - - # gRPC uses objects called "stubs" that are bound to the - # channel and provide a basic method for each RPC. - self._stubs = { - "document_understanding_service_stub": document_understanding_pb2_grpc.DocumentUnderstandingServiceStub( - channel - ) - } - - # Because this API includes a method that returns a - # long-running operation (proto: google.longrunning.Operation), - # instantiate an LRO client. - self._operations_client = google.api_core.operations_v1.OperationsClient( - channel - ) - - @classmethod - def create_channel( - cls, address="documentai.googleapis.com:443", credentials=None, **kwargs - ): - """Create and return a gRPC channel object. - - Args: - address (str): The host for the channel to use. - credentials (~.Credentials): The - authorization credentials to attach to requests. These - credentials identify this application to the service. If - none are specified, the client will attempt to ascertain - the credentials from the environment. - kwargs (dict): Keyword arguments, which are passed to the - channel creation. - - Returns: - grpc.Channel: A gRPC channel object. - """ - return google.api_core.grpc_helpers.create_channel( - address, credentials=credentials, scopes=cls._OAUTH_SCOPES, **kwargs - ) - - @property - def channel(self): - """The gRPC channel used by the transport. - - Returns: - grpc.Channel: A gRPC channel object. - """ - return self._channel - - @property - def batch_process_documents(self): - """Return the gRPC stub for :meth:`DocumentUnderstandingServiceClient.batch_process_documents`. - - LRO endpoint to batch process many documents. - - Returns: - Callable: A callable which accepts the appropriate - deserialized request object and returns a - deserialized response object. - """ - return self._stubs["document_understanding_service_stub"].BatchProcessDocuments diff --git a/google/cloud/documentai_v1beta1/proto/__init__.py b/google/cloud/documentai_v1beta1/proto/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/google/cloud/documentai_v1beta1/proto/document.proto b/google/cloud/documentai_v1beta1/proto/document.proto deleted file mode 100644 index 1303c32d..00000000 --- a/google/cloud/documentai_v1beta1/proto/document.proto +++ /dev/null @@ -1,446 +0,0 @@ -// Copyright 2019 Google LLC. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -syntax = "proto3"; - -package google.cloud.documentai.v1beta1; - -import "google/api/annotations.proto"; -import "google/cloud/documentai/v1beta1/geometry.proto"; -import "google/rpc/status.proto"; -import "google/type/color.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentai"; -option java_multiple_files = true; -option java_outer_classname = "DocumentProto"; -option java_package = "com.google.cloud.documentai.v1beta1"; - -// Document represents the canonical document resource in Document Understanding -// AI. -// It is an interchange format that provides insights into documents and allows -// for collaboration between users and Document Understanding AI to iterate and -// optimize for quality. -message Document { - // For a large document, sharding may be performed to produce several - // document shards. Each document shard contains this field to detail which - // shard it is. - message ShardInfo { - // The 0-based index of this shard. - int64 shard_index = 1; - - // Total number of shards. - int64 shard_count = 2; - - // The index of the first character in - // [Document.text][google.cloud.documentai.v1beta1.Document.text] in the - // overall document global text. - int64 text_offset = 3; - } - - // Annotation for common text style attributes. This adheres to CSS - // conventions as much as possible. - message Style { - // Font size with unit. - message FontSize { - // Font size for the text. - float size = 1; - - // Unit for the font size. Follows CSS naming (in, px, pt, etc.). - string unit = 2; - } - - // Text anchor indexing into the - // [Document.text][google.cloud.documentai.v1beta1.Document.text]. - TextAnchor text_anchor = 1; - - // Text color. - google.type.Color color = 2; - - // Text background color. - google.type.Color background_color = 3; - - // Font weight. Possible values are normal, bold, bolder, and lighter. - // https://www.w3schools.com/cssref/pr_font_weight.asp - string font_weight = 4; - - // Text style. Possible values are normal, italic, and oblique. - // https://www.w3schools.com/cssref/pr_font_font-style.asp - string text_style = 5; - - // Text decoration. Follows CSS standard. - // - // https://www.w3schools.com/cssref/pr_text_text-decoration.asp - string text_decoration = 6; - - // Font size. - FontSize font_size = 7; - } - - // A page in a [Document][google.cloud.documentai.v1beta1.Document]. - message Page { - // Dimension for the page. - message Dimension { - // Page width. - float width = 1; - - // Page height. - float height = 2; - - // Dimension unit. - string unit = 3; - } - - // Visual element describing a layout unit on a page. - message Layout { - // Detected human reading orientation. - enum Orientation { - // Unspecified orientation. - ORIENTATION_UNSPECIFIED = 0; - - // Orientation is aligned with page up. - PAGE_UP = 1; - - // Orientation is aligned with page right. - // Turn the head 90 degrees clockwise from upright to read. - PAGE_RIGHT = 2; - - // Orientation is aligned with page down. - // Turn the head 180 degrees from upright to read. - PAGE_DOWN = 3; - - // Orientation is aligned with page left. - // Turn the head 90 degrees counterclockwise from upright to read. - PAGE_LEFT = 4; - } - - // Text anchor indexing into the - // [Document.text][google.cloud.documentai.v1beta1.Document.text]. - TextAnchor text_anchor = 1; - - // Confidence of the current - // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] within - // context of the object this layout is for. e.g. confidence can be for a - // single token, a table, a visual element, etc. depending on context. - // Range [0, 1]. - float confidence = 2; - - // The bounding polygon for the - // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout]. - BoundingPoly bounding_poly = 3; - - // Detected orientation for the - // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout]. - Orientation orientation = 4; - } - - // A block has a set of lines (collected into paragraphs) that have a - // common line-spacing and orientation. - message Block { - // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for - // [Block][google.cloud.documentai.v1beta1.Document.Page.Block]. - Layout layout = 1; - - // A list of detected languages together with confidence. - repeated DetectedLanguage detected_languages = 2; - } - - // A collection of lines that a human would perceive as a paragraph. - message Paragraph { - // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for - // [Paragraph][google.cloud.documentai.v1beta1.Document.Page.Paragraph]. - Layout layout = 1; - - // A list of detected languages together with confidence. - repeated DetectedLanguage detected_languages = 2; - } - - // A collection of tokens that a human would perceive as a line. - // Does not cross column boundaries, can be horizontal, vertical, etc. - message Line { - // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for - // [Line][google.cloud.documentai.v1beta1.Document.Page.Line]. - Layout layout = 1; - - // A list of detected languages together with confidence. - repeated DetectedLanguage detected_languages = 2; - } - - // A detected token. - message Token { - // Detected break at the end of a - // [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. - message DetectedBreak { - // Enum to denote the type of break found. - enum Type { - // Unspecified break type. - TYPE_UNSPECIFIED = 0; - - // A single whitespace. - SPACE = 1; - - // A wider whitespace. - WIDE_SPACE = 2; - - // A hyphen that indicates that a token has been split across lines. - HYPHEN = 3; - } - - // Detected break type. - Type type = 1; - } - - // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for - // [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. - Layout layout = 1; - - // Detected break at the end of a - // [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. - DetectedBreak detected_break = 2; - - // A list of detected languages together with confidence. - repeated DetectedLanguage detected_languages = 3; - } - - // Detected non-text visual elements e.g. checkbox, signature etc. on the - // page. - message VisualElement { - // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for - // [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. - Layout layout = 1; - - // Type of the - // [VisualElement][google.cloud.documentai.v1beta1.Document.Page.VisualElement]. - string type = 2; - - // A list of detected languages together with confidence. - repeated DetectedLanguage detected_languages = 3; - } - - // A table representation similar to HTML table structure. - message Table { - // A row of table cells. - message TableRow { - // Cells that make up this row. - repeated TableCell cells = 1; - } - - // A cell representation inside the table. - message TableCell { - // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for - // [TableCell][google.cloud.documentai.v1beta1.Document.Page.Table.TableCell]. - Layout layout = 1; - - // How many rows this cell spans. - int32 row_span = 2; - - // How many columns this cell spans. - int32 col_span = 3; - - // A list of detected languages together with confidence. - repeated DetectedLanguage detected_languages = 4; - } - - // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for - // [Table][google.cloud.documentai.v1beta1.Document.Page.Table]. - Layout layout = 1; - - // Header rows of the table. - repeated TableRow header_rows = 2; - - // Body rows of the table. - repeated TableRow body_rows = 3; - - // A list of detected languages together with confidence. - repeated DetectedLanguage detected_languages = 4; - } - - // A form field detected on the page. - message FormField { - // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the - // [FormField][google.cloud.documentai.v1beta1.Document.Page.FormField] - // name. e.g. `Address`, `Email`, `Grand total`, `Phone number`, etc. - Layout field_name = 1; - - // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the - // [FormField][google.cloud.documentai.v1beta1.Document.Page.FormField] - // value. - Layout field_value = 2; - - // A list of detected languages for name together with confidence. - repeated DetectedLanguage name_detected_languages = 3; - - // A list of detected languages for value together with confidence. - repeated DetectedLanguage value_detected_languages = 4; - } - - // Detected language for a structural component. - message DetectedLanguage { - // The BCP-47 language code, such as "en-US" or "sr-Latn". For more - // information, see - // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier. - string language_code = 1; - - // Confidence of detected language. Range [0, 1]. - float confidence = 2; - } - - // 1-based index for current - // [Page][google.cloud.documentai.v1beta1.Document.Page] in a parent - // [Document][google.cloud.documentai.v1beta1.Document]. Useful when a page - // is taken out of a [Document][google.cloud.documentai.v1beta1.Document] - // for individual processing. - int32 page_number = 1; - - // Physical dimension of the page. - Dimension dimension = 2; - - // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the - // page. - Layout layout = 3; - - // A list of detected languages together with confidence. - repeated DetectedLanguage detected_languages = 4; - - // A list of visually detected text blocks on the page. - // A block has a set of lines (collected into paragraphs) that have a common - // line-spacing and orientation. - repeated Block blocks = 5; - - // A list of visually detected text paragraphs on the page. - // A collection of lines that a human would perceive as a paragraph. - repeated Paragraph paragraphs = 6; - - // A list of visually detected text lines on the page. - // A collection of tokens that a human would perceive as a line. - repeated Line lines = 7; - - // A list of visually detected tokens on the page. - repeated Token tokens = 8; - - // A list of detected non-text visual elements e.g. checkbox, - // signature etc. on the page. - repeated VisualElement visual_elements = 9; - - // A list of visually detected tables on the page. - repeated Table tables = 10; - - // A list of visually detected form fields on the page. - repeated FormField form_fields = 11; - } - - // A phrase in the text that is a known entity type, such as a person, an - // organization, or location. - message Entity { - // Provenance of the entity. - // Text anchor indexing into the - // [Document.text][google.cloud.documentai.v1beta1.Document.text]. - TextAnchor text_anchor = 1; - - // Entity type from a schema e.g. `Address`. - string type = 2; - - // Text value in the document e.g. `1600 Amphitheatre Pkwy`. - string mention_text = 3; - - // Canonical mention name. This will be a unique value in the entity list - // for this document. - string mention_id = 4; - } - - // Relationship between - // [Entities][google.cloud.documentai.v1beta1.Document.Entity]. - message EntityRelation { - // Subject entity mention_id. - string subject_id = 1; - - // Object entity mention_id. - string object_id = 2; - - // Relationship description. - string relation = 3; - } - - // Text reference indexing into the - // [Document.text][google.cloud.documentai.v1beta1.Document.text]. - message TextAnchor { - // A text segment in the - // [Document.text][google.cloud.documentai.v1beta1.Document.text]. The - // indices may be out of bounds which indicate that the text extends into - // another document shard for large sharded documents. See - // [ShardInfo.text_offset][google.cloud.documentai.v1beta1.Document.ShardInfo.text_offset] - message TextSegment { - // [TextSegment][google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment] - // start UTF-8 char index in the - // [Document.text][google.cloud.documentai.v1beta1.Document.text]. - int64 start_index = 1; - - // [TextSegment][google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment] - // half open end UTF-8 char index in the - // [Document.text][google.cloud.documentai.v1beta1.Document.text]. - int64 end_index = 2; - } - - // The text segments from the - // [Document.text][google.cloud.documentai.v1beta1.Document.text]. - repeated TextSegment text_segments = 1; - } - - // Original source document from the user. - oneof source { - // Currently supports Google Cloud Storage URI of the form - // `gs://bucket_name/object_name`. Object versioning is not supported. - // See [Google Cloud Storage Request - // URIs](https://cloud.google.com/storage/docs/reference-uris) for more - // info. - string uri = 1; - - // Inline document content, represented as a stream of bytes. - // Note: As with all `bytes` fields, protobuffers use a pure binary - // representation, whereas JSON representations use base64. - bytes content = 2; - } - - // An IANA published MIME type (also referred to as media type). For more - // information, see - // https://www.iana.org/assignments/media-types/media-types.xhtml. - string mime_type = 3; - - // UTF-8 encoded text in reading order from the document. - string text = 4; - - // Styles for the - // [Document.text][google.cloud.documentai.v1beta1.Document.text]. - repeated Style text_styles = 5; - - // Visual page layout for the - // [Document][google.cloud.documentai.v1beta1.Document]. - repeated Page pages = 6; - - // A list of entities detected on - // [Document.text][google.cloud.documentai.v1beta1.Document.text]. For - // document shards, entities in this list may cross shard boundaries. - repeated Entity entities = 7; - - // Relationship among - // [Document.entities][google.cloud.documentai.v1beta1.Document.entities]. - repeated EntityRelation entity_relations = 8; - - // Information about the sharding if this document is sharded part of a larger - // document. If the document is not sharded, this message is not specified. - ShardInfo shard_info = 9; - - // Any error that occurred while processing this document. - google.rpc.Status error = 10; -} diff --git a/google/cloud/documentai_v1beta1/proto/document_pb2.py b/google/cloud/documentai_v1beta1/proto/document_pb2.py deleted file mode 100644 index 5973694f..00000000 --- a/google/cloud/documentai_v1beta1/proto/document_pb2.py +++ /dev/null @@ -1,2698 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/documentai_v1beta1/proto/document.proto - -import sys - -_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 -from google.cloud.documentai_v1beta1.proto import ( - geometry_pb2 as google_dot_cloud_dot_documentai__v1beta1_dot_proto_dot_geometry__pb2, -) -from google.rpc import status_pb2 as google_dot_rpc_dot_status__pb2 -from google.type import color_pb2 as google_dot_type_dot_color__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/documentai_v1beta1/proto/document.proto", - package="google.cloud.documentai.v1beta1", - syntax="proto3", - serialized_options=_b( - "\n#com.google.cloud.documentai.v1beta1B\rDocumentProtoP\001ZIgoogle.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentai" - ), - serialized_pb=_b( - '\n4google/cloud/documentai_v1beta1/proto/document.proto\x12\x1fgoogle.cloud.documentai.v1beta1\x1a\x1cgoogle/api/annotations.proto\x1a\x34google/cloud/documentai_v1beta1/proto/geometry.proto\x1a\x17google/rpc/status.proto\x1a\x17google/type/color.proto"\xfa$\n\x08\x44ocument\x12\r\n\x03uri\x18\x01 \x01(\tH\x00\x12\x11\n\x07\x63ontent\x18\x02 \x01(\x0cH\x00\x12\x11\n\tmime_type\x18\x03 \x01(\t\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x44\n\x0btext_styles\x18\x05 \x03(\x0b\x32/.google.cloud.documentai.v1beta1.Document.Style\x12=\n\x05pages\x18\x06 \x03(\x0b\x32..google.cloud.documentai.v1beta1.Document.Page\x12\x42\n\x08\x65ntities\x18\x07 \x03(\x0b\x32\x30.google.cloud.documentai.v1beta1.Document.Entity\x12R\n\x10\x65ntity_relations\x18\x08 \x03(\x0b\x32\x38.google.cloud.documentai.v1beta1.Document.EntityRelation\x12G\n\nshard_info\x18\t \x01(\x0b\x32\x33.google.cloud.documentai.v1beta1.Document.ShardInfo\x12!\n\x05\x65rror\x18\n \x01(\x0b\x32\x12.google.rpc.Status\x1aJ\n\tShardInfo\x12\x13\n\x0bshard_index\x18\x01 \x01(\x03\x12\x13\n\x0bshard_count\x18\x02 \x01(\x03\x12\x13\n\x0btext_offset\x18\x03 \x01(\x03\x1a\xda\x02\n\x05Style\x12I\n\x0btext_anchor\x18\x01 \x01(\x0b\x32\x34.google.cloud.documentai.v1beta1.Document.TextAnchor\x12!\n\x05\x63olor\x18\x02 \x01(\x0b\x32\x12.google.type.Color\x12,\n\x10\x62\x61\x63kground_color\x18\x03 \x01(\x0b\x32\x12.google.type.Color\x12\x13\n\x0b\x66ont_weight\x18\x04 \x01(\t\x12\x12\n\ntext_style\x18\x05 \x01(\t\x12\x17\n\x0ftext_decoration\x18\x06 \x01(\t\x12K\n\tfont_size\x18\x07 \x01(\x0b\x32\x38.google.cloud.documentai.v1beta1.Document.Style.FontSize\x1a&\n\x08\x46ontSize\x12\x0c\n\x04size\x18\x01 \x01(\x02\x12\x0c\n\x04unit\x18\x02 \x01(\t\x1a\xf6\x1a\n\x04Page\x12\x13\n\x0bpage_number\x18\x01 \x01(\x05\x12K\n\tdimension\x18\x02 \x01(\x0b\x32\x38.google.cloud.documentai.v1beta1.Document.Page.Dimension\x12\x45\n\x06layout\x18\x03 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.Document.Page.Layout\x12[\n\x12\x64\x65tected_languages\x18\x04 \x03(\x0b\x32?.google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage\x12\x44\n\x06\x62locks\x18\x05 \x03(\x0b\x32\x34.google.cloud.documentai.v1beta1.Document.Page.Block\x12L\n\nparagraphs\x18\x06 \x03(\x0b\x32\x38.google.cloud.documentai.v1beta1.Document.Page.Paragraph\x12\x42\n\x05lines\x18\x07 \x03(\x0b\x32\x33.google.cloud.documentai.v1beta1.Document.Page.Line\x12\x44\n\x06tokens\x18\x08 \x03(\x0b\x32\x34.google.cloud.documentai.v1beta1.Document.Page.Token\x12U\n\x0fvisual_elements\x18\t \x03(\x0b\x32<.google.cloud.documentai.v1beta1.Document.Page.VisualElement\x12\x44\n\x06tables\x18\n \x03(\x0b\x32\x34.google.cloud.documentai.v1beta1.Document.Page.Table\x12M\n\x0b\x66orm_fields\x18\x0b \x03(\x0b\x32\x38.google.cloud.documentai.v1beta1.Document.Page.FormField\x1a\x38\n\tDimension\x12\r\n\x05width\x18\x01 \x01(\x02\x12\x0e\n\x06height\x18\x02 \x01(\x02\x12\x0c\n\x04unit\x18\x03 \x01(\t\x1a\xec\x02\n\x06Layout\x12I\n\x0btext_anchor\x18\x01 \x01(\x0b\x32\x34.google.cloud.documentai.v1beta1.Document.TextAnchor\x12\x12\n\nconfidence\x18\x02 \x01(\x02\x12\x44\n\rbounding_poly\x18\x03 \x01(\x0b\x32-.google.cloud.documentai.v1beta1.BoundingPoly\x12V\n\x0borientation\x18\x04 \x01(\x0e\x32\x41.google.cloud.documentai.v1beta1.Document.Page.Layout.Orientation"e\n\x0bOrientation\x12\x1b\n\x17ORIENTATION_UNSPECIFIED\x10\x00\x12\x0b\n\x07PAGE_UP\x10\x01\x12\x0e\n\nPAGE_RIGHT\x10\x02\x12\r\n\tPAGE_DOWN\x10\x03\x12\r\n\tPAGE_LEFT\x10\x04\x1a\xab\x01\n\x05\x42lock\x12\x45\n\x06layout\x18\x01 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.Document.Page.Layout\x12[\n\x12\x64\x65tected_languages\x18\x02 \x03(\x0b\x32?.google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage\x1a\xaf\x01\n\tParagraph\x12\x45\n\x06layout\x18\x01 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.Document.Page.Layout\x12[\n\x12\x64\x65tected_languages\x18\x02 \x03(\x0b\x32?.google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage\x1a\xaa\x01\n\x04Line\x12\x45\n\x06layout\x18\x01 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.Document.Page.Layout\x12[\n\x12\x64\x65tected_languages\x18\x02 \x03(\x0b\x32?.google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage\x1a\xb5\x03\n\x05Token\x12\x45\n\x06layout\x18\x01 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.Document.Page.Layout\x12Z\n\x0e\x64\x65tected_break\x18\x02 \x01(\x0b\x32\x42.google.cloud.documentai.v1beta1.Document.Page.Token.DetectedBreak\x12[\n\x12\x64\x65tected_languages\x18\x03 \x03(\x0b\x32?.google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage\x1a\xab\x01\n\rDetectedBreak\x12U\n\x04type\x18\x01 \x01(\x0e\x32G.google.cloud.documentai.v1beta1.Document.Page.Token.DetectedBreak.Type"C\n\x04Type\x12\x14\n\x10TYPE_UNSPECIFIED\x10\x00\x12\t\n\x05SPACE\x10\x01\x12\x0e\n\nWIDE_SPACE\x10\x02\x12\n\n\x06HYPHEN\x10\x03\x1a\xc1\x01\n\rVisualElement\x12\x45\n\x06layout\x18\x01 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.Document.Page.Layout\x12\x0c\n\x04type\x18\x02 \x01(\t\x12[\n\x12\x64\x65tected_languages\x18\x03 \x03(\x0b\x32?.google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage\x1a\x82\x05\n\x05Table\x12\x45\n\x06layout\x18\x01 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.Document.Page.Layout\x12R\n\x0bheader_rows\x18\x02 \x03(\x0b\x32=.google.cloud.documentai.v1beta1.Document.Page.Table.TableRow\x12P\n\tbody_rows\x18\x03 \x03(\x0b\x32=.google.cloud.documentai.v1beta1.Document.Page.Table.TableRow\x12[\n\x12\x64\x65tected_languages\x18\x04 \x03(\x0b\x32?.google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage\x1aY\n\x08TableRow\x12M\n\x05\x63\x65lls\x18\x01 \x03(\x0b\x32>.google.cloud.documentai.v1beta1.Document.Page.Table.TableCell\x1a\xd3\x01\n\tTableCell\x12\x45\n\x06layout\x18\x01 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.Document.Page.Layout\x12\x10\n\x08row_span\x18\x02 \x01(\x05\x12\x10\n\x08\x63ol_span\x18\x03 \x01(\x05\x12[\n\x12\x64\x65tected_languages\x18\x04 \x03(\x0b\x32?.google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage\x1a\xe7\x02\n\tFormField\x12I\n\nfield_name\x18\x01 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.Document.Page.Layout\x12J\n\x0b\x66ield_value\x18\x02 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.Document.Page.Layout\x12`\n\x17name_detected_languages\x18\x03 \x03(\x0b\x32?.google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage\x12\x61\n\x18value_detected_languages\x18\x04 \x03(\x0b\x32?.google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage\x1a=\n\x10\x44\x65tectedLanguage\x12\x15\n\rlanguage_code\x18\x01 \x01(\t\x12\x12\n\nconfidence\x18\x02 \x01(\x02\x1a\x8b\x01\n\x06\x45ntity\x12I\n\x0btext_anchor\x18\x01 \x01(\x0b\x32\x34.google.cloud.documentai.v1beta1.Document.TextAnchor\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x14\n\x0cmention_text\x18\x03 \x01(\t\x12\x12\n\nmention_id\x18\x04 \x01(\t\x1aI\n\x0e\x45ntityRelation\x12\x12\n\nsubject_id\x18\x01 \x01(\t\x12\x11\n\tobject_id\x18\x02 \x01(\t\x12\x10\n\x08relation\x18\x03 \x01(\t\x1a\x9c\x01\n\nTextAnchor\x12W\n\rtext_segments\x18\x01 \x03(\x0b\x32@.google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment\x1a\x35\n\x0bTextSegment\x12\x13\n\x0bstart_index\x18\x01 \x01(\x03\x12\x11\n\tend_index\x18\x02 \x01(\x03\x42\x08\n\x06sourceB\x81\x01\n#com.google.cloud.documentai.v1beta1B\rDocumentProtoP\x01ZIgoogle.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentaib\x06proto3' - ), - dependencies=[ - google_dot_api_dot_annotations__pb2.DESCRIPTOR, - google_dot_cloud_dot_documentai__v1beta1_dot_proto_dot_geometry__pb2.DESCRIPTOR, - google_dot_rpc_dot_status__pb2.DESCRIPTOR, - google_dot_type_dot_color__pb2.DESCRIPTOR, - ], -) - - -_DOCUMENT_PAGE_LAYOUT_ORIENTATION = _descriptor.EnumDescriptor( - name="Orientation", - full_name="google.cloud.documentai.v1beta1.Document.Page.Layout.Orientation", - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name="ORIENTATION_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - ), - _descriptor.EnumValueDescriptor( - name="PAGE_UP", index=1, number=1, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="PAGE_RIGHT", index=2, number=2, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="PAGE_DOWN", index=3, number=3, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="PAGE_LEFT", index=4, number=4, serialized_options=None, type=None - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=2236, - serialized_end=2337, -) -_sym_db.RegisterEnumDescriptor(_DOCUMENT_PAGE_LAYOUT_ORIENTATION) - -_DOCUMENT_PAGE_TOKEN_DETECTEDBREAK_TYPE = _descriptor.EnumDescriptor( - name="Type", - full_name="google.cloud.documentai.v1beta1.Document.Page.Token.DetectedBreak.Type", - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name="TYPE_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - ), - _descriptor.EnumValueDescriptor( - name="SPACE", index=1, number=1, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="WIDE_SPACE", index=2, number=2, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="HYPHEN", index=3, number=3, serialized_options=None, type=None - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=3235, - serialized_end=3302, -) -_sym_db.RegisterEnumDescriptor(_DOCUMENT_PAGE_TOKEN_DETECTEDBREAK_TYPE) - - -_DOCUMENT_SHARDINFO = _descriptor.Descriptor( - name="ShardInfo", - full_name="google.cloud.documentai.v1beta1.Document.ShardInfo", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="shard_index", - full_name="google.cloud.documentai.v1beta1.Document.ShardInfo.shard_index", - index=0, - number=1, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="shard_count", - full_name="google.cloud.documentai.v1beta1.Document.ShardInfo.shard_count", - index=1, - number=2, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="text_offset", - full_name="google.cloud.documentai.v1beta1.Document.ShardInfo.text_offset", - index=2, - number=3, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=696, - serialized_end=770, -) - -_DOCUMENT_STYLE_FONTSIZE = _descriptor.Descriptor( - name="FontSize", - full_name="google.cloud.documentai.v1beta1.Document.Style.FontSize", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="size", - full_name="google.cloud.documentai.v1beta1.Document.Style.FontSize.size", - index=0, - number=1, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="unit", - full_name="google.cloud.documentai.v1beta1.Document.Style.FontSize.unit", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1081, - serialized_end=1119, -) - -_DOCUMENT_STYLE = _descriptor.Descriptor( - name="Style", - full_name="google.cloud.documentai.v1beta1.Document.Style", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="text_anchor", - full_name="google.cloud.documentai.v1beta1.Document.Style.text_anchor", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="color", - full_name="google.cloud.documentai.v1beta1.Document.Style.color", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="background_color", - full_name="google.cloud.documentai.v1beta1.Document.Style.background_color", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="font_weight", - full_name="google.cloud.documentai.v1beta1.Document.Style.font_weight", - index=3, - number=4, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="text_style", - full_name="google.cloud.documentai.v1beta1.Document.Style.text_style", - index=4, - number=5, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="text_decoration", - full_name="google.cloud.documentai.v1beta1.Document.Style.text_decoration", - index=5, - number=6, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="font_size", - full_name="google.cloud.documentai.v1beta1.Document.Style.font_size", - index=6, - number=7, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[_DOCUMENT_STYLE_FONTSIZE], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=773, - serialized_end=1119, -) - -_DOCUMENT_PAGE_DIMENSION = _descriptor.Descriptor( - name="Dimension", - full_name="google.cloud.documentai.v1beta1.Document.Page.Dimension", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="width", - full_name="google.cloud.documentai.v1beta1.Document.Page.Dimension.width", - index=0, - number=1, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="height", - full_name="google.cloud.documentai.v1beta1.Document.Page.Dimension.height", - index=1, - number=2, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="unit", - full_name="google.cloud.documentai.v1beta1.Document.Page.Dimension.unit", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1914, - serialized_end=1970, -) - -_DOCUMENT_PAGE_LAYOUT = _descriptor.Descriptor( - name="Layout", - full_name="google.cloud.documentai.v1beta1.Document.Page.Layout", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="text_anchor", - full_name="google.cloud.documentai.v1beta1.Document.Page.Layout.text_anchor", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="confidence", - full_name="google.cloud.documentai.v1beta1.Document.Page.Layout.confidence", - index=1, - number=2, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="bounding_poly", - full_name="google.cloud.documentai.v1beta1.Document.Page.Layout.bounding_poly", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="orientation", - full_name="google.cloud.documentai.v1beta1.Document.Page.Layout.orientation", - index=3, - number=4, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[_DOCUMENT_PAGE_LAYOUT_ORIENTATION], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1973, - serialized_end=2337, -) - -_DOCUMENT_PAGE_BLOCK = _descriptor.Descriptor( - name="Block", - full_name="google.cloud.documentai.v1beta1.Document.Page.Block", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="layout", - full_name="google.cloud.documentai.v1beta1.Document.Page.Block.layout", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="detected_languages", - full_name="google.cloud.documentai.v1beta1.Document.Page.Block.detected_languages", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2340, - serialized_end=2511, -) - -_DOCUMENT_PAGE_PARAGRAPH = _descriptor.Descriptor( - name="Paragraph", - full_name="google.cloud.documentai.v1beta1.Document.Page.Paragraph", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="layout", - full_name="google.cloud.documentai.v1beta1.Document.Page.Paragraph.layout", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="detected_languages", - full_name="google.cloud.documentai.v1beta1.Document.Page.Paragraph.detected_languages", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2514, - serialized_end=2689, -) - -_DOCUMENT_PAGE_LINE = _descriptor.Descriptor( - name="Line", - full_name="google.cloud.documentai.v1beta1.Document.Page.Line", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="layout", - full_name="google.cloud.documentai.v1beta1.Document.Page.Line.layout", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="detected_languages", - full_name="google.cloud.documentai.v1beta1.Document.Page.Line.detected_languages", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2692, - serialized_end=2862, -) - -_DOCUMENT_PAGE_TOKEN_DETECTEDBREAK = _descriptor.Descriptor( - name="DetectedBreak", - full_name="google.cloud.documentai.v1beta1.Document.Page.Token.DetectedBreak", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="type", - full_name="google.cloud.documentai.v1beta1.Document.Page.Token.DetectedBreak.type", - index=0, - number=1, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ) - ], - extensions=[], - nested_types=[], - enum_types=[_DOCUMENT_PAGE_TOKEN_DETECTEDBREAK_TYPE], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3131, - serialized_end=3302, -) - -_DOCUMENT_PAGE_TOKEN = _descriptor.Descriptor( - name="Token", - full_name="google.cloud.documentai.v1beta1.Document.Page.Token", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="layout", - full_name="google.cloud.documentai.v1beta1.Document.Page.Token.layout", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="detected_break", - full_name="google.cloud.documentai.v1beta1.Document.Page.Token.detected_break", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="detected_languages", - full_name="google.cloud.documentai.v1beta1.Document.Page.Token.detected_languages", - index=2, - number=3, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[_DOCUMENT_PAGE_TOKEN_DETECTEDBREAK], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2865, - serialized_end=3302, -) - -_DOCUMENT_PAGE_VISUALELEMENT = _descriptor.Descriptor( - name="VisualElement", - full_name="google.cloud.documentai.v1beta1.Document.Page.VisualElement", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="layout", - full_name="google.cloud.documentai.v1beta1.Document.Page.VisualElement.layout", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="type", - full_name="google.cloud.documentai.v1beta1.Document.Page.VisualElement.type", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="detected_languages", - full_name="google.cloud.documentai.v1beta1.Document.Page.VisualElement.detected_languages", - index=2, - number=3, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3305, - serialized_end=3498, -) - -_DOCUMENT_PAGE_TABLE_TABLEROW = _descriptor.Descriptor( - name="TableRow", - full_name="google.cloud.documentai.v1beta1.Document.Page.Table.TableRow", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="cells", - full_name="google.cloud.documentai.v1beta1.Document.Page.Table.TableRow.cells", - index=0, - number=1, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ) - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3840, - serialized_end=3929, -) - -_DOCUMENT_PAGE_TABLE_TABLECELL = _descriptor.Descriptor( - name="TableCell", - full_name="google.cloud.documentai.v1beta1.Document.Page.Table.TableCell", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="layout", - full_name="google.cloud.documentai.v1beta1.Document.Page.Table.TableCell.layout", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="row_span", - full_name="google.cloud.documentai.v1beta1.Document.Page.Table.TableCell.row_span", - index=1, - number=2, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="col_span", - full_name="google.cloud.documentai.v1beta1.Document.Page.Table.TableCell.col_span", - index=2, - number=3, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="detected_languages", - full_name="google.cloud.documentai.v1beta1.Document.Page.Table.TableCell.detected_languages", - index=3, - number=4, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3932, - serialized_end=4143, -) - -_DOCUMENT_PAGE_TABLE = _descriptor.Descriptor( - name="Table", - full_name="google.cloud.documentai.v1beta1.Document.Page.Table", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="layout", - full_name="google.cloud.documentai.v1beta1.Document.Page.Table.layout", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="header_rows", - full_name="google.cloud.documentai.v1beta1.Document.Page.Table.header_rows", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="body_rows", - full_name="google.cloud.documentai.v1beta1.Document.Page.Table.body_rows", - index=2, - number=3, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="detected_languages", - full_name="google.cloud.documentai.v1beta1.Document.Page.Table.detected_languages", - index=3, - number=4, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[_DOCUMENT_PAGE_TABLE_TABLEROW, _DOCUMENT_PAGE_TABLE_TABLECELL], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3501, - serialized_end=4143, -) - -_DOCUMENT_PAGE_FORMFIELD = _descriptor.Descriptor( - name="FormField", - full_name="google.cloud.documentai.v1beta1.Document.Page.FormField", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="field_name", - full_name="google.cloud.documentai.v1beta1.Document.Page.FormField.field_name", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="field_value", - full_name="google.cloud.documentai.v1beta1.Document.Page.FormField.field_value", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="name_detected_languages", - full_name="google.cloud.documentai.v1beta1.Document.Page.FormField.name_detected_languages", - index=2, - number=3, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="value_detected_languages", - full_name="google.cloud.documentai.v1beta1.Document.Page.FormField.value_detected_languages", - index=3, - number=4, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4146, - serialized_end=4505, -) - -_DOCUMENT_PAGE_DETECTEDLANGUAGE = _descriptor.Descriptor( - name="DetectedLanguage", - full_name="google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="language_code", - full_name="google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage.language_code", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="confidence", - full_name="google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage.confidence", - index=1, - number=2, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4507, - serialized_end=4568, -) - -_DOCUMENT_PAGE = _descriptor.Descriptor( - name="Page", - full_name="google.cloud.documentai.v1beta1.Document.Page", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="page_number", - full_name="google.cloud.documentai.v1beta1.Document.Page.page_number", - index=0, - number=1, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="dimension", - full_name="google.cloud.documentai.v1beta1.Document.Page.dimension", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="layout", - full_name="google.cloud.documentai.v1beta1.Document.Page.layout", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="detected_languages", - full_name="google.cloud.documentai.v1beta1.Document.Page.detected_languages", - index=3, - number=4, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="blocks", - full_name="google.cloud.documentai.v1beta1.Document.Page.blocks", - index=4, - number=5, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="paragraphs", - full_name="google.cloud.documentai.v1beta1.Document.Page.paragraphs", - index=5, - number=6, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="lines", - full_name="google.cloud.documentai.v1beta1.Document.Page.lines", - index=6, - number=7, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="tokens", - full_name="google.cloud.documentai.v1beta1.Document.Page.tokens", - index=7, - number=8, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="visual_elements", - full_name="google.cloud.documentai.v1beta1.Document.Page.visual_elements", - index=8, - number=9, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="tables", - full_name="google.cloud.documentai.v1beta1.Document.Page.tables", - index=9, - number=10, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="form_fields", - full_name="google.cloud.documentai.v1beta1.Document.Page.form_fields", - index=10, - number=11, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[ - _DOCUMENT_PAGE_DIMENSION, - _DOCUMENT_PAGE_LAYOUT, - _DOCUMENT_PAGE_BLOCK, - _DOCUMENT_PAGE_PARAGRAPH, - _DOCUMENT_PAGE_LINE, - _DOCUMENT_PAGE_TOKEN, - _DOCUMENT_PAGE_VISUALELEMENT, - _DOCUMENT_PAGE_TABLE, - _DOCUMENT_PAGE_FORMFIELD, - _DOCUMENT_PAGE_DETECTEDLANGUAGE, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1122, - serialized_end=4568, -) - -_DOCUMENT_ENTITY = _descriptor.Descriptor( - name="Entity", - full_name="google.cloud.documentai.v1beta1.Document.Entity", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="text_anchor", - full_name="google.cloud.documentai.v1beta1.Document.Entity.text_anchor", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="type", - full_name="google.cloud.documentai.v1beta1.Document.Entity.type", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="mention_text", - full_name="google.cloud.documentai.v1beta1.Document.Entity.mention_text", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="mention_id", - full_name="google.cloud.documentai.v1beta1.Document.Entity.mention_id", - index=3, - number=4, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4571, - serialized_end=4710, -) - -_DOCUMENT_ENTITYRELATION = _descriptor.Descriptor( - name="EntityRelation", - full_name="google.cloud.documentai.v1beta1.Document.EntityRelation", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="subject_id", - full_name="google.cloud.documentai.v1beta1.Document.EntityRelation.subject_id", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="object_id", - full_name="google.cloud.documentai.v1beta1.Document.EntityRelation.object_id", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="relation", - full_name="google.cloud.documentai.v1beta1.Document.EntityRelation.relation", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4712, - serialized_end=4785, -) - -_DOCUMENT_TEXTANCHOR_TEXTSEGMENT = _descriptor.Descriptor( - name="TextSegment", - full_name="google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="start_index", - full_name="google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment.start_index", - index=0, - number=1, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="end_index", - full_name="google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment.end_index", - index=1, - number=2, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4891, - serialized_end=4944, -) - -_DOCUMENT_TEXTANCHOR = _descriptor.Descriptor( - name="TextAnchor", - full_name="google.cloud.documentai.v1beta1.Document.TextAnchor", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="text_segments", - full_name="google.cloud.documentai.v1beta1.Document.TextAnchor.text_segments", - index=0, - number=1, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ) - ], - extensions=[], - nested_types=[_DOCUMENT_TEXTANCHOR_TEXTSEGMENT], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4788, - serialized_end=4944, -) - -_DOCUMENT = _descriptor.Descriptor( - name="Document", - full_name="google.cloud.documentai.v1beta1.Document", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="uri", - full_name="google.cloud.documentai.v1beta1.Document.uri", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="content", - full_name="google.cloud.documentai.v1beta1.Document.content", - index=1, - number=2, - type=12, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b(""), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="mime_type", - full_name="google.cloud.documentai.v1beta1.Document.mime_type", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="text", - full_name="google.cloud.documentai.v1beta1.Document.text", - index=3, - number=4, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="text_styles", - full_name="google.cloud.documentai.v1beta1.Document.text_styles", - index=4, - number=5, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="pages", - full_name="google.cloud.documentai.v1beta1.Document.pages", - index=5, - number=6, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="entities", - full_name="google.cloud.documentai.v1beta1.Document.entities", - index=6, - number=7, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="entity_relations", - full_name="google.cloud.documentai.v1beta1.Document.entity_relations", - index=7, - number=8, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="shard_info", - full_name="google.cloud.documentai.v1beta1.Document.shard_info", - index=8, - number=9, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="error", - full_name="google.cloud.documentai.v1beta1.Document.error", - index=9, - number=10, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[ - _DOCUMENT_SHARDINFO, - _DOCUMENT_STYLE, - _DOCUMENT_PAGE, - _DOCUMENT_ENTITY, - _DOCUMENT_ENTITYRELATION, - _DOCUMENT_TEXTANCHOR, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="source", - full_name="google.cloud.documentai.v1beta1.Document.source", - index=0, - containing_type=None, - fields=[], - ) - ], - serialized_start=224, - serialized_end=4954, -) - -_DOCUMENT_SHARDINFO.containing_type = _DOCUMENT -_DOCUMENT_STYLE_FONTSIZE.containing_type = _DOCUMENT_STYLE -_DOCUMENT_STYLE.fields_by_name["text_anchor"].message_type = _DOCUMENT_TEXTANCHOR -_DOCUMENT_STYLE.fields_by_name[ - "color" -].message_type = google_dot_type_dot_color__pb2._COLOR -_DOCUMENT_STYLE.fields_by_name[ - "background_color" -].message_type = google_dot_type_dot_color__pb2._COLOR -_DOCUMENT_STYLE.fields_by_name["font_size"].message_type = _DOCUMENT_STYLE_FONTSIZE -_DOCUMENT_STYLE.containing_type = _DOCUMENT -_DOCUMENT_PAGE_DIMENSION.containing_type = _DOCUMENT_PAGE -_DOCUMENT_PAGE_LAYOUT.fields_by_name["text_anchor"].message_type = _DOCUMENT_TEXTANCHOR -_DOCUMENT_PAGE_LAYOUT.fields_by_name[ - "bounding_poly" -].message_type = ( - google_dot_cloud_dot_documentai__v1beta1_dot_proto_dot_geometry__pb2._BOUNDINGPOLY -) -_DOCUMENT_PAGE_LAYOUT.fields_by_name[ - "orientation" -].enum_type = _DOCUMENT_PAGE_LAYOUT_ORIENTATION -_DOCUMENT_PAGE_LAYOUT.containing_type = _DOCUMENT_PAGE -_DOCUMENT_PAGE_LAYOUT_ORIENTATION.containing_type = _DOCUMENT_PAGE_LAYOUT -_DOCUMENT_PAGE_BLOCK.fields_by_name["layout"].message_type = _DOCUMENT_PAGE_LAYOUT -_DOCUMENT_PAGE_BLOCK.fields_by_name[ - "detected_languages" -].message_type = _DOCUMENT_PAGE_DETECTEDLANGUAGE -_DOCUMENT_PAGE_BLOCK.containing_type = _DOCUMENT_PAGE -_DOCUMENT_PAGE_PARAGRAPH.fields_by_name["layout"].message_type = _DOCUMENT_PAGE_LAYOUT -_DOCUMENT_PAGE_PARAGRAPH.fields_by_name[ - "detected_languages" -].message_type = _DOCUMENT_PAGE_DETECTEDLANGUAGE -_DOCUMENT_PAGE_PARAGRAPH.containing_type = _DOCUMENT_PAGE -_DOCUMENT_PAGE_LINE.fields_by_name["layout"].message_type = _DOCUMENT_PAGE_LAYOUT -_DOCUMENT_PAGE_LINE.fields_by_name[ - "detected_languages" -].message_type = _DOCUMENT_PAGE_DETECTEDLANGUAGE -_DOCUMENT_PAGE_LINE.containing_type = _DOCUMENT_PAGE -_DOCUMENT_PAGE_TOKEN_DETECTEDBREAK.fields_by_name[ - "type" -].enum_type = _DOCUMENT_PAGE_TOKEN_DETECTEDBREAK_TYPE -_DOCUMENT_PAGE_TOKEN_DETECTEDBREAK.containing_type = _DOCUMENT_PAGE_TOKEN -_DOCUMENT_PAGE_TOKEN_DETECTEDBREAK_TYPE.containing_type = ( - _DOCUMENT_PAGE_TOKEN_DETECTEDBREAK -) -_DOCUMENT_PAGE_TOKEN.fields_by_name["layout"].message_type = _DOCUMENT_PAGE_LAYOUT -_DOCUMENT_PAGE_TOKEN.fields_by_name[ - "detected_break" -].message_type = _DOCUMENT_PAGE_TOKEN_DETECTEDBREAK -_DOCUMENT_PAGE_TOKEN.fields_by_name[ - "detected_languages" -].message_type = _DOCUMENT_PAGE_DETECTEDLANGUAGE -_DOCUMENT_PAGE_TOKEN.containing_type = _DOCUMENT_PAGE -_DOCUMENT_PAGE_VISUALELEMENT.fields_by_name[ - "layout" -].message_type = _DOCUMENT_PAGE_LAYOUT -_DOCUMENT_PAGE_VISUALELEMENT.fields_by_name[ - "detected_languages" -].message_type = _DOCUMENT_PAGE_DETECTEDLANGUAGE -_DOCUMENT_PAGE_VISUALELEMENT.containing_type = _DOCUMENT_PAGE -_DOCUMENT_PAGE_TABLE_TABLEROW.fields_by_name[ - "cells" -].message_type = _DOCUMENT_PAGE_TABLE_TABLECELL -_DOCUMENT_PAGE_TABLE_TABLEROW.containing_type = _DOCUMENT_PAGE_TABLE -_DOCUMENT_PAGE_TABLE_TABLECELL.fields_by_name[ - "layout" -].message_type = _DOCUMENT_PAGE_LAYOUT -_DOCUMENT_PAGE_TABLE_TABLECELL.fields_by_name[ - "detected_languages" -].message_type = _DOCUMENT_PAGE_DETECTEDLANGUAGE -_DOCUMENT_PAGE_TABLE_TABLECELL.containing_type = _DOCUMENT_PAGE_TABLE -_DOCUMENT_PAGE_TABLE.fields_by_name["layout"].message_type = _DOCUMENT_PAGE_LAYOUT -_DOCUMENT_PAGE_TABLE.fields_by_name[ - "header_rows" -].message_type = _DOCUMENT_PAGE_TABLE_TABLEROW -_DOCUMENT_PAGE_TABLE.fields_by_name[ - "body_rows" -].message_type = _DOCUMENT_PAGE_TABLE_TABLEROW -_DOCUMENT_PAGE_TABLE.fields_by_name[ - "detected_languages" -].message_type = _DOCUMENT_PAGE_DETECTEDLANGUAGE -_DOCUMENT_PAGE_TABLE.containing_type = _DOCUMENT_PAGE -_DOCUMENT_PAGE_FORMFIELD.fields_by_name[ - "field_name" -].message_type = _DOCUMENT_PAGE_LAYOUT -_DOCUMENT_PAGE_FORMFIELD.fields_by_name[ - "field_value" -].message_type = _DOCUMENT_PAGE_LAYOUT -_DOCUMENT_PAGE_FORMFIELD.fields_by_name[ - "name_detected_languages" -].message_type = _DOCUMENT_PAGE_DETECTEDLANGUAGE -_DOCUMENT_PAGE_FORMFIELD.fields_by_name[ - "value_detected_languages" -].message_type = _DOCUMENT_PAGE_DETECTEDLANGUAGE -_DOCUMENT_PAGE_FORMFIELD.containing_type = _DOCUMENT_PAGE -_DOCUMENT_PAGE_DETECTEDLANGUAGE.containing_type = _DOCUMENT_PAGE -_DOCUMENT_PAGE.fields_by_name["dimension"].message_type = _DOCUMENT_PAGE_DIMENSION -_DOCUMENT_PAGE.fields_by_name["layout"].message_type = _DOCUMENT_PAGE_LAYOUT -_DOCUMENT_PAGE.fields_by_name[ - "detected_languages" -].message_type = _DOCUMENT_PAGE_DETECTEDLANGUAGE -_DOCUMENT_PAGE.fields_by_name["blocks"].message_type = _DOCUMENT_PAGE_BLOCK -_DOCUMENT_PAGE.fields_by_name["paragraphs"].message_type = _DOCUMENT_PAGE_PARAGRAPH -_DOCUMENT_PAGE.fields_by_name["lines"].message_type = _DOCUMENT_PAGE_LINE -_DOCUMENT_PAGE.fields_by_name["tokens"].message_type = _DOCUMENT_PAGE_TOKEN -_DOCUMENT_PAGE.fields_by_name[ - "visual_elements" -].message_type = _DOCUMENT_PAGE_VISUALELEMENT -_DOCUMENT_PAGE.fields_by_name["tables"].message_type = _DOCUMENT_PAGE_TABLE -_DOCUMENT_PAGE.fields_by_name["form_fields"].message_type = _DOCUMENT_PAGE_FORMFIELD -_DOCUMENT_PAGE.containing_type = _DOCUMENT -_DOCUMENT_ENTITY.fields_by_name["text_anchor"].message_type = _DOCUMENT_TEXTANCHOR -_DOCUMENT_ENTITY.containing_type = _DOCUMENT -_DOCUMENT_ENTITYRELATION.containing_type = _DOCUMENT -_DOCUMENT_TEXTANCHOR_TEXTSEGMENT.containing_type = _DOCUMENT_TEXTANCHOR -_DOCUMENT_TEXTANCHOR.fields_by_name[ - "text_segments" -].message_type = _DOCUMENT_TEXTANCHOR_TEXTSEGMENT -_DOCUMENT_TEXTANCHOR.containing_type = _DOCUMENT -_DOCUMENT.fields_by_name["text_styles"].message_type = _DOCUMENT_STYLE -_DOCUMENT.fields_by_name["pages"].message_type = _DOCUMENT_PAGE -_DOCUMENT.fields_by_name["entities"].message_type = _DOCUMENT_ENTITY -_DOCUMENT.fields_by_name["entity_relations"].message_type = _DOCUMENT_ENTITYRELATION -_DOCUMENT.fields_by_name["shard_info"].message_type = _DOCUMENT_SHARDINFO -_DOCUMENT.fields_by_name["error"].message_type = google_dot_rpc_dot_status__pb2._STATUS -_DOCUMENT.oneofs_by_name["source"].fields.append(_DOCUMENT.fields_by_name["uri"]) -_DOCUMENT.fields_by_name["uri"].containing_oneof = _DOCUMENT.oneofs_by_name["source"] -_DOCUMENT.oneofs_by_name["source"].fields.append(_DOCUMENT.fields_by_name["content"]) -_DOCUMENT.fields_by_name["content"].containing_oneof = _DOCUMENT.oneofs_by_name[ - "source" -] -DESCRIPTOR.message_types_by_name["Document"] = _DOCUMENT -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -Document = _reflection.GeneratedProtocolMessageType( - "Document", - (_message.Message,), - dict( - ShardInfo=_reflection.GeneratedProtocolMessageType( - "ShardInfo", - (_message.Message,), - dict( - DESCRIPTOR=_DOCUMENT_SHARDINFO, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""For a large document, sharding may be performed to produce - several document shards. Each document shard contains this field to - detail which shard it is. - - - Attributes: - shard_index: - The 0-based index of this shard. - shard_count: - Total number of shards. - text_offset: - The index of the first character in - [Document.text][google.cloud.documentai.v1beta1.Document.text] - in the overall document global text. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.ShardInfo) - ), - ), - Style=_reflection.GeneratedProtocolMessageType( - "Style", - (_message.Message,), - dict( - FontSize=_reflection.GeneratedProtocolMessageType( - "FontSize", - (_message.Message,), - dict( - DESCRIPTOR=_DOCUMENT_STYLE_FONTSIZE, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""Font size with unit. - - - Attributes: - size: - Font size for the text. - unit: - Unit for the font size. Follows CSS naming (in, px, pt, etc.). - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Style.FontSize) - ), - ), - DESCRIPTOR=_DOCUMENT_STYLE, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""Annotation for common text style attributes. This adheres - to CSS conventions as much as possible. - - - Attributes: - text_anchor: - Text anchor indexing into the [Document.text][google.cloud.doc - umentai.v1beta1.Document.text]. - color: - Text color. - background_color: - Text background color. - font_weight: - Font weight. Possible values are normal, bold, bolder, and - lighter. https://www.w3schools.com/cssref/pr\_font\_weight.asp - text_style: - Text style. Possible values are normal, italic, and oblique. - https://www.w3schools.com/cssref/pr\_font\_font-style.asp - text_decoration: - Text decoration. Follows CSS standard. - https://www.w3schools.com/cssref/pr\_text\_text-decoration.asp - font_size: - Font size. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Style) - ), - ), - Page=_reflection.GeneratedProtocolMessageType( - "Page", - (_message.Message,), - dict( - Dimension=_reflection.GeneratedProtocolMessageType( - "Dimension", - (_message.Message,), - dict( - DESCRIPTOR=_DOCUMENT_PAGE_DIMENSION, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""Dimension for the page. - - - Attributes: - width: - Page width. - height: - Page height. - unit: - Dimension unit. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.Dimension) - ), - ), - Layout=_reflection.GeneratedProtocolMessageType( - "Layout", - (_message.Message,), - dict( - DESCRIPTOR=_DOCUMENT_PAGE_LAYOUT, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""Visual element describing a layout unit on a page. - - - Attributes: - text_anchor: - Text anchor indexing into the [Document.text][google.cloud.doc - umentai.v1beta1.Document.text]. - confidence: - Confidence of the current - [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] - within context of the object this layout is for. e.g. - confidence can be for a single token, a table, a visual - element, etc. depending on context. Range [0, 1]. - bounding_poly: - The bounding polygon for the [Layout][google.cloud.documentai. - v1beta1.Document.Page.Layout]. - orientation: - Detected orientation for the [Layout][google.cloud.documentai. - v1beta1.Document.Page.Layout]. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.Layout) - ), - ), - Block=_reflection.GeneratedProtocolMessageType( - "Block", - (_message.Message,), - dict( - DESCRIPTOR=_DOCUMENT_PAGE_BLOCK, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""A block has a set of lines (collected into paragraphs) - that have a common line-spacing and orientation. - - - Attributes: - layout: - [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] - for - [Block][google.cloud.documentai.v1beta1.Document.Page.Block]. - detected_languages: - A list of detected languages together with confidence. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.Block) - ), - ), - Paragraph=_reflection.GeneratedProtocolMessageType( - "Paragraph", - (_message.Message,), - dict( - DESCRIPTOR=_DOCUMENT_PAGE_PARAGRAPH, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""A collection of lines that a human would perceive as a - paragraph. - - - Attributes: - layout: - [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] - for [Paragraph][google.cloud.documentai.v1beta1.Document.Page. - Paragraph]. - detected_languages: - A list of detected languages together with confidence. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.Paragraph) - ), - ), - Line=_reflection.GeneratedProtocolMessageType( - "Line", - (_message.Message,), - dict( - DESCRIPTOR=_DOCUMENT_PAGE_LINE, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""A collection of tokens that a human would perceive as a - line. Does not cross column boundaries, can be horizontal, vertical, - etc. - - - Attributes: - layout: - [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] - for - [Line][google.cloud.documentai.v1beta1.Document.Page.Line]. - detected_languages: - A list of detected languages together with confidence. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.Line) - ), - ), - Token=_reflection.GeneratedProtocolMessageType( - "Token", - (_message.Message,), - dict( - DetectedBreak=_reflection.GeneratedProtocolMessageType( - "DetectedBreak", - (_message.Message,), - dict( - DESCRIPTOR=_DOCUMENT_PAGE_TOKEN_DETECTEDBREAK, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""Detected break at the end of a - [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. - - - Attributes: - type: - Detected break type. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.Token.DetectedBreak) - ), - ), - DESCRIPTOR=_DOCUMENT_PAGE_TOKEN, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""A detected token. - - - Attributes: - layout: - [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] - for - [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. - detected_break: - Detected break at the end of a - [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. - detected_languages: - A list of detected languages together with confidence. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.Token) - ), - ), - VisualElement=_reflection.GeneratedProtocolMessageType( - "VisualElement", - (_message.Message,), - dict( - DESCRIPTOR=_DOCUMENT_PAGE_VISUALELEMENT, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""Detected non-text visual elements e.g. checkbox, signature - etc. on the page. - - - Attributes: - layout: - [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] - for - [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. - type: - Type of the [VisualElement][google.cloud.documentai.v1beta1.Do - cument.Page.VisualElement]. - detected_languages: - A list of detected languages together with confidence. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.VisualElement) - ), - ), - Table=_reflection.GeneratedProtocolMessageType( - "Table", - (_message.Message,), - dict( - TableRow=_reflection.GeneratedProtocolMessageType( - "TableRow", - (_message.Message,), - dict( - DESCRIPTOR=_DOCUMENT_PAGE_TABLE_TABLEROW, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""A row of table cells. - - - Attributes: - cells: - Cells that make up this row. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.Table.TableRow) - ), - ), - TableCell=_reflection.GeneratedProtocolMessageType( - "TableCell", - (_message.Message,), - dict( - DESCRIPTOR=_DOCUMENT_PAGE_TABLE_TABLECELL, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""A cell representation inside the table. - - - Attributes: - layout: - [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] - for [TableCell][google.cloud.documentai.v1beta1.Document.Page. - Table.TableCell]. - row_span: - How many rows this cell spans. - col_span: - How many columns this cell spans. - detected_languages: - A list of detected languages together with confidence. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.Table.TableCell) - ), - ), - DESCRIPTOR=_DOCUMENT_PAGE_TABLE, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""A table representation similar to HTML table structure. - - - Attributes: - layout: - [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] - for - [Table][google.cloud.documentai.v1beta1.Document.Page.Table]. - header_rows: - Header rows of the table. - body_rows: - Body rows of the table. - detected_languages: - A list of detected languages together with confidence. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.Table) - ), - ), - FormField=_reflection.GeneratedProtocolMessageType( - "FormField", - (_message.Message,), - dict( - DESCRIPTOR=_DOCUMENT_PAGE_FORMFIELD, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""A form field detected on the page. - - - Attributes: - field_name: - [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] - for the [FormField][google.cloud.documentai.v1beta1.Document.P - age.FormField] name. e.g. ``Address``, ``Email``, ``Grand - total``, ``Phone number``, etc. - field_value: - [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] - for the [FormField][google.cloud.documentai.v1beta1.Document.P - age.FormField] value. - name_detected_languages: - A list of detected languages for name together with - confidence. - value_detected_languages: - A list of detected languages for value together with - confidence. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.FormField) - ), - ), - DetectedLanguage=_reflection.GeneratedProtocolMessageType( - "DetectedLanguage", - (_message.Message,), - dict( - DESCRIPTOR=_DOCUMENT_PAGE_DETECTEDLANGUAGE, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""Detected language for a structural component. - - - Attributes: - language_code: - The BCP-47 language code, such as "en-US" or "sr-Latn". For - more information, see http://www.unicode.org/reports/tr35/#Uni - code\_locale\_identifier. - confidence: - Confidence of detected language. Range [0, 1]. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage) - ), - ), - DESCRIPTOR=_DOCUMENT_PAGE, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""A page in a - [Document][google.cloud.documentai.v1beta1.Document]. - - - Attributes: - page_number: - 1-based index for current - [Page][google.cloud.documentai.v1beta1.Document.Page] in a - parent [Document][google.cloud.documentai.v1beta1.Document]. - Useful when a page is taken out of a - [Document][google.cloud.documentai.v1beta1.Document] for - individual processing. - dimension: - Physical dimension of the page. - layout: - [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] - for the page. - detected_languages: - A list of detected languages together with confidence. - blocks: - A list of visually detected text blocks on the page. A block - has a set of lines (collected into paragraphs) that have a - common line-spacing and orientation. - paragraphs: - A list of visually detected text paragraphs on the page. A - collection of lines that a human would perceive as a - paragraph. - lines: - A list of visually detected text lines on the page. A - collection of tokens that a human would perceive as a line. - tokens: - A list of visually detected tokens on the page. - visual_elements: - A list of detected non-text visual elements e.g. checkbox, - signature etc. on the page. - tables: - A list of visually detected tables on the page. - form_fields: - A list of visually detected form fields on the page. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page) - ), - ), - Entity=_reflection.GeneratedProtocolMessageType( - "Entity", - (_message.Message,), - dict( - DESCRIPTOR=_DOCUMENT_ENTITY, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""A phrase in the text that is a known entity type, such as - a person, an organization, or location. - - - Attributes: - text_anchor: - Provenance of the entity. Text anchor indexing into the [Docum - ent.text][google.cloud.documentai.v1beta1.Document.text]. - type: - Entity type from a schema e.g. ``Address``. - mention_text: - Text value in the document e.g. ``1600 Amphitheatre Pkwy``. - mention_id: - Canonical mention name. This will be a unique value in the - entity list for this document. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Entity) - ), - ), - EntityRelation=_reflection.GeneratedProtocolMessageType( - "EntityRelation", - (_message.Message,), - dict( - DESCRIPTOR=_DOCUMENT_ENTITYRELATION, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""Relationship between - [Entities][google.cloud.documentai.v1beta1.Document.Entity]. - - - Attributes: - subject_id: - Subject entity mention\_id. - object_id: - Object entity mention\_id. - relation: - Relationship description. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.EntityRelation) - ), - ), - TextAnchor=_reflection.GeneratedProtocolMessageType( - "TextAnchor", - (_message.Message,), - dict( - TextSegment=_reflection.GeneratedProtocolMessageType( - "TextSegment", - (_message.Message,), - dict( - DESCRIPTOR=_DOCUMENT_TEXTANCHOR_TEXTSEGMENT, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""A text segment in the - [Document.text][google.cloud.documentai.v1beta1.Document.text]. The - indices may be out of bounds which indicate that the text extends into - another document shard for large sharded documents. See - [ShardInfo.text\_offset][google.cloud.documentai.v1beta1.Document.ShardInfo.text\_offset] - - - Attributes: - start_index: - [TextSegment][google.cloud.documentai.v1beta1.Document.TextAnc - hor.TextSegment] start UTF-8 char index in the [Document.text] - [google.cloud.documentai.v1beta1.Document.text]. - end_index: - [TextSegment][google.cloud.documentai.v1beta1.Document.TextAnc - hor.TextSegment] half open end UTF-8 char index in the [Docume - nt.text][google.cloud.documentai.v1beta1.Document.text]. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment) - ), - ), - DESCRIPTOR=_DOCUMENT_TEXTANCHOR, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""Text reference indexing into the - [Document.text][google.cloud.documentai.v1beta1.Document.text]. - - - Attributes: - text_segments: - The text segments from the [Document.text][google.cloud.docume - ntai.v1beta1.Document.text]. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.TextAnchor) - ), - ), - DESCRIPTOR=_DOCUMENT, - __module__="google.cloud.documentai_v1beta1.proto.document_pb2", - __doc__="""Document represents the canonical document resource in - Document Understanding AI. It is an interchange format that provides - insights into documents and allows for collaboration between users and - Document Understanding AI to iterate and optimize for quality. - - - Attributes: - source: - Original source document from the user. - uri: - Currently supports Google Cloud Storage URI of the form - ``gs://bucket_name/object_name``. Object versioning is not - supported. See `Google Cloud Storage Request URIs - `__ for - more info. - content: - Inline document content, represented as a stream of bytes. - Note: As with all ``bytes`` fields, protobuffers use a pure - binary representation, whereas JSON representations use - base64. - mime_type: - An IANA published MIME type (also referred to as media type). - For more information, see - https://www.iana.org/assignments/media-types/media- - types.xhtml. - text: - UTF-8 encoded text in reading order from the document. - text_styles: - Styles for the [Document.text][google.cloud.documentai.v1beta1 - .Document.text]. - pages: - Visual page layout for the - [Document][google.cloud.documentai.v1beta1.Document]. - entities: - A list of entities detected on [Document.text][google.cloud.do - cumentai.v1beta1.Document.text]. For document shards, entities - in this list may cross shard boundaries. - entity_relations: - Relationship among [Document.entities][google.cloud.documentai - .v1beta1.Document.entities]. - shard_info: - Information about the sharding if this document is sharded - part of a larger document. If the document is not sharded, - this message is not specified. - error: - Any error that occurred while processing this document. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document) - ), -) -_sym_db.RegisterMessage(Document) -_sym_db.RegisterMessage(Document.ShardInfo) -_sym_db.RegisterMessage(Document.Style) -_sym_db.RegisterMessage(Document.Style.FontSize) -_sym_db.RegisterMessage(Document.Page) -_sym_db.RegisterMessage(Document.Page.Dimension) -_sym_db.RegisterMessage(Document.Page.Layout) -_sym_db.RegisterMessage(Document.Page.Block) -_sym_db.RegisterMessage(Document.Page.Paragraph) -_sym_db.RegisterMessage(Document.Page.Line) -_sym_db.RegisterMessage(Document.Page.Token) -_sym_db.RegisterMessage(Document.Page.Token.DetectedBreak) -_sym_db.RegisterMessage(Document.Page.VisualElement) -_sym_db.RegisterMessage(Document.Page.Table) -_sym_db.RegisterMessage(Document.Page.Table.TableRow) -_sym_db.RegisterMessage(Document.Page.Table.TableCell) -_sym_db.RegisterMessage(Document.Page.FormField) -_sym_db.RegisterMessage(Document.Page.DetectedLanguage) -_sym_db.RegisterMessage(Document.Entity) -_sym_db.RegisterMessage(Document.EntityRelation) -_sym_db.RegisterMessage(Document.TextAnchor) -_sym_db.RegisterMessage(Document.TextAnchor.TextSegment) - - -DESCRIPTOR._options = None -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/documentai_v1beta1/proto/document_pb2_grpc.py b/google/cloud/documentai_v1beta1/proto/document_pb2_grpc.py deleted file mode 100644 index 07cb78fe..00000000 --- a/google/cloud/documentai_v1beta1/proto/document_pb2_grpc.py +++ /dev/null @@ -1,2 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -import grpc diff --git a/google/cloud/documentai_v1beta1/proto/document_understanding.proto b/google/cloud/documentai_v1beta1/proto/document_understanding.proto deleted file mode 100644 index 4f8dfb72..00000000 --- a/google/cloud/documentai_v1beta1/proto/document_understanding.proto +++ /dev/null @@ -1,299 +0,0 @@ -// Copyright 2019 Google LLC. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -syntax = "proto3"; - -package google.cloud.documentai.v1beta1; - -import "google/api/annotations.proto"; -import "google/api/client.proto"; -import "google/api/field_behavior.proto"; -import "google/cloud/documentai/v1beta1/geometry.proto"; -import "google/longrunning/operations.proto"; -import "google/protobuf/timestamp.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentai"; -option java_multiple_files = true; -option java_outer_classname = "DocumentAiProto"; -option java_package = "com.google.cloud.documentai.v1beta1"; - -// Service to parse structured information from unstructured or semi-structured -// documents using state-of-the-art Google AI such as natural language, -// computer vision, and translation. -service DocumentUnderstandingService { - option (google.api.default_host) = "documentai.googleapis.com"; - option (google.api.oauth_scopes) = - "https://www.googleapis.com/auth/cloud-platform"; - - // LRO endpoint to batch process many documents. - rpc BatchProcessDocuments(BatchProcessDocumentsRequest) - returns (google.longrunning.Operation) { - option (google.api.http) = { - post: "/v1beta1/{parent=projects/*/locations/*}/documents:batchProcess" - body: "*" - additional_bindings { - post: "/v1beta1/{parent=projects/*}/documents:batchProcess" - body: "*" - } - }; - option (google.api.method_signature) = "requests"; - option (google.longrunning.operation_info) = { - response_type: "BatchProcessDocumentsResponse" - metadata_type: "OperationMetadata" - }; - } -} - -// Request to batch process documents as an asynchronous operation. -message BatchProcessDocumentsRequest { - // Required. Individual requests for each document. - repeated ProcessDocumentRequest requests = 1 - [(google.api.field_behavior) = REQUIRED]; - - // Target project and location to make a call. - // - // Format: `projects/{project-id}/locations/{location-id}`. - // - // If no location is specified, a region will be chosen automatically. - string parent = 2; -} - -// Request to process one document. -message ProcessDocumentRequest { - // Required. Information about the input file. - InputConfig input_config = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. The desired output location. - OutputConfig output_config = 2 [(google.api.field_behavior) = REQUIRED]; - - // Specifies a known document type for deeper structure detection. Valid - // values are currently "general" and "invoice". If not provided, "general"\ - // is used as default. If any other value is given, the request is rejected. - string document_type = 3; - - // Controls table extraction behavior. If not specified, the system will - // decide reasonable defaults. - TableExtractionParams table_extraction_params = 4; - - // Controls form extraction behavior. If not specified, the system will - // decide reasonable defaults. - FormExtractionParams form_extraction_params = 5; - - // Controls entity extraction behavior. If not specified, the system will - // decide reasonable defaults. - EntityExtractionParams entity_extraction_params = 6; - - // Controls OCR behavior. If not specified, the system will decide reasonable - // defaults. - OcrParams ocr_params = 7; -} - -// Response to an batch document processing request. This is returned in -// the LRO Operation after the operation is complete. -message BatchProcessDocumentsResponse { - // Responses for each individual document. - repeated ProcessDocumentResponse responses = 1; -} - -// Response to a single document processing request. -message ProcessDocumentResponse { - // Information about the input file. This is the same as the corresponding - // input config in the request. - InputConfig input_config = 1; - - // The output location of the parsed responses. The responses are written to - // this location as JSON-serialized `Document` objects. - OutputConfig output_config = 2; -} - -// Parameters to control Optical Character Recognition (OCR) behavior. -message OcrParams { - // List of languages to use for OCR. In most cases, an empty value - // yields the best results since it enables automatic language detection. For - // languages based on the Latin alphabet, setting `language_hints` is not - // needed. In rare cases, when the language of the text in the image is known, - // setting a hint will help get better results (although it will be a - // significant hindrance if the hint is wrong). Document processing returns an - // error if one or more of the specified languages is not one of the - // supported languages. - repeated string language_hints = 1; -} - -// Parameters to control table extraction behavior. -message TableExtractionParams { - // Whether to enable table extraction. - bool enabled = 1; - - // Optional. Table bounding box hints that can be provided to complex cases - // which our algorithm cannot locate the table(s) in. - repeated TableBoundHint table_bound_hints = 2 - [(google.api.field_behavior) = OPTIONAL]; - - // Optional. Table header hints. The extraction will bias towards producing - // these terms as table headers, which may improve accuracy. - repeated string header_hints = 3 [(google.api.field_behavior) = OPTIONAL]; - - // Model version of the table extraction system. Default is "builtin/stable". - // Specify "builtin/latest" for the latest model. - string model_version = 4; -} - -// A hint for a table bounding box on the page for table parsing. -message TableBoundHint { - // Optional. Page number for multi-paged inputs this hint applies to. If not - // provided, this hint will apply to all pages by default. This value is - // 1-based. - int32 page_number = 1 [(google.api.field_behavior) = OPTIONAL]; - - // Bounding box hint for a table on this page. The coordinates must be - // normalized to [0,1] and the bounding box must be an axis-aligned rectangle. - BoundingPoly bounding_box = 2; -} - -// Parameters to control form extraction behavior. -message FormExtractionParams { - // Whether to enable form extraction. - bool enabled = 1; - - // User can provide pairs of (key text, value type) to improve the parsing - // result. - // - // For example, if a document has a field called "Date" that holds a date - // value and a field called "Amount" that may hold either a currency value - // (e.g., "$500.00") or a simple number value (e.g., "20"), you could use the - // following hints: [ {"key": "Date", value_types: [ "DATE"]}, {"key": - // "Amount", "value_types": [ "PRICE", "NUMBER" ]} ] - // - // If the value type is unknown, but you want to provide hints for the keys, - // you can leave the value_types field blank. e.g. {"key": "Date", - // "value_types": []} - repeated KeyValuePairHint key_value_pair_hints = 2; - - // Model version of the form extraction system. Default is - // "builtin/stable". Specify "builtin/latest" for the latest model. - string model_version = 3; -} - -// User-provided hint for key value pair. -message KeyValuePairHint { - // The key text for the hint. - string key = 1; - - // Type of the value. This is case-insensitive, and could be one of: - // ADDRESS, LOCATION, ORGANIZATION, PERSON, PHONE_NUMBER, - // ID, NUMBER, EMAIL, PRICE, TERMS, DATE, NAME. Types not in this list will - // be ignored. - repeated string value_types = 2; -} - -// Parameters to control entity extraction behavior. -message EntityExtractionParams { - // Whether to enable entity extraction. - bool enabled = 1; - - // Model version of the entity extraction. Default is - // "builtin/stable". Specify "builtin/latest" for the latest model. - string model_version = 2; -} - -// The desired input location and metadata. -message InputConfig { - // Required. - oneof source { - // The Google Cloud Storage location to read the input from. This must be a - // single file. - GcsSource gcs_source = 1; - } - - // Required. Mimetype of the input. Current supported mimetypes are - // application/pdf, image/tiff, and image/gif. - string mime_type = 2 [(google.api.field_behavior) = REQUIRED]; -} - -// The desired output location and metadata. -message OutputConfig { - // Required. - oneof destination { - // The Google Cloud Storage location to write the output to. - GcsDestination gcs_destination = 1; - } - - // The max number of pages to include into each output Document shard JSON on - // Google Cloud Storage. - // - // The valid range is [1, 100]. If not specified, the default value is 20. - // - // For example, for one pdf file with 100 pages, 100 parsed pages will be - // produced. If `pages_per_shard` = 20, then 5 Document shard JSON files each - // containing 20 parsed pages will be written under the prefix - // [OutputConfig.gcs_destination.uri][] and suffix pages-x-to-y.json where - // x and y are 1-indexed page numbers. - // - // Example GCS outputs with 157 pages and pages_per_shard = 50: - // - // pages-001-to-050.json - // pages-051-to-100.json - // pages-101-to-150.json - // pages-151-to-157.json - int32 pages_per_shard = 2; -} - -// The Google Cloud Storage location where the input file will be read from. -message GcsSource { - string uri = 1 [(google.api.field_behavior) = REQUIRED]; -} - -// The Google Cloud Storage location where the output file will be written to. -message GcsDestination { - string uri = 1 [(google.api.field_behavior) = REQUIRED]; -} - -// Contains metadata for the BatchProcessDocuments operation. -message OperationMetadata { - enum State { - // The default value. This value is used if the state is omitted. - STATE_UNSPECIFIED = 0; - - // Request is received. - ACCEPTED = 1; - - // Request operation is waiting for scheduling. - WAITING = 2; - - // Request is being processed. - RUNNING = 3; - - // The batch processing completed successfully. - SUCCEEDED = 4; - - // The batch processing was cancelled. - CANCELLED = 5; - - // The batch processing has failed. - FAILED = 6; - } - - // The state of the current batch processing. - State state = 1; - - // A message providing more details about the current state of processing. - string state_message = 2; - - // The creation time of the operation. - google.protobuf.Timestamp create_time = 3; - - // The last update time of the operation. - google.protobuf.Timestamp update_time = 4; -} diff --git a/google/cloud/documentai_v1beta1/proto/document_understanding_pb2.py b/google/cloud/documentai_v1beta1/proto/document_understanding_pb2.py deleted file mode 100644 index 02fbee61..00000000 --- a/google/cloud/documentai_v1beta1/proto/document_understanding_pb2.py +++ /dev/null @@ -1,1559 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/documentai_v1beta1/proto/document_understanding.proto - -import sys - -_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 -from google.api import client_pb2 as google_dot_api_dot_client__pb2 -from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 -from google.cloud.documentai_v1beta1.proto import ( - geometry_pb2 as google_dot_cloud_dot_documentai__v1beta1_dot_proto_dot_geometry__pb2, -) -from google.longrunning import ( - operations_pb2 as google_dot_longrunning_dot_operations__pb2, -) -from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/documentai_v1beta1/proto/document_understanding.proto", - package="google.cloud.documentai.v1beta1", - syntax="proto3", - serialized_options=_b( - "\n#com.google.cloud.documentai.v1beta1B\017DocumentAiProtoP\001ZIgoogle.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentai" - ), - serialized_pb=_b( - '\nBgoogle/cloud/documentai_v1beta1/proto/document_understanding.proto\x12\x1fgoogle.cloud.documentai.v1beta1\x1a\x1cgoogle/api/annotations.proto\x1a\x17google/api/client.proto\x1a\x1fgoogle/api/field_behavior.proto\x1a\x34google/cloud/documentai_v1beta1/proto/geometry.proto\x1a#google/longrunning/operations.proto\x1a\x1fgoogle/protobuf/timestamp.proto"~\n\x1c\x42\x61tchProcessDocumentsRequest\x12N\n\x08requests\x18\x01 \x03(\x0b\x32\x37.google.cloud.documentai.v1beta1.ProcessDocumentRequestB\x03\xe0\x41\x02\x12\x0e\n\x06parent\x18\x02 \x01(\t"\x8e\x04\n\x16ProcessDocumentRequest\x12G\n\x0cinput_config\x18\x01 \x01(\x0b\x32,.google.cloud.documentai.v1beta1.InputConfigB\x03\xe0\x41\x02\x12I\n\routput_config\x18\x02 \x01(\x0b\x32-.google.cloud.documentai.v1beta1.OutputConfigB\x03\xe0\x41\x02\x12\x15\n\rdocument_type\x18\x03 \x01(\t\x12W\n\x17table_extraction_params\x18\x04 \x01(\x0b\x32\x36.google.cloud.documentai.v1beta1.TableExtractionParams\x12U\n\x16\x66orm_extraction_params\x18\x05 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.FormExtractionParams\x12Y\n\x18\x65ntity_extraction_params\x18\x06 \x01(\x0b\x32\x37.google.cloud.documentai.v1beta1.EntityExtractionParams\x12>\n\nocr_params\x18\x07 \x01(\x0b\x32*.google.cloud.documentai.v1beta1.OcrParams"l\n\x1d\x42\x61tchProcessDocumentsResponse\x12K\n\tresponses\x18\x01 \x03(\x0b\x32\x38.google.cloud.documentai.v1beta1.ProcessDocumentResponse"\xa3\x01\n\x17ProcessDocumentResponse\x12\x42\n\x0cinput_config\x18\x01 \x01(\x0b\x32,.google.cloud.documentai.v1beta1.InputConfig\x12\x44\n\routput_config\x18\x02 \x01(\x0b\x32-.google.cloud.documentai.v1beta1.OutputConfig"#\n\tOcrParams\x12\x16\n\x0elanguage_hints\x18\x01 \x03(\t"\xab\x01\n\x15TableExtractionParams\x12\x0f\n\x07\x65nabled\x18\x01 \x01(\x08\x12O\n\x11table_bound_hints\x18\x02 \x03(\x0b\x32/.google.cloud.documentai.v1beta1.TableBoundHintB\x03\xe0\x41\x01\x12\x19\n\x0cheader_hints\x18\x03 \x03(\tB\x03\xe0\x41\x01\x12\x15\n\rmodel_version\x18\x04 \x01(\t"o\n\x0eTableBoundHint\x12\x18\n\x0bpage_number\x18\x01 \x01(\x05\x42\x03\xe0\x41\x01\x12\x43\n\x0c\x62ounding_box\x18\x02 \x01(\x0b\x32-.google.cloud.documentai.v1beta1.BoundingPoly"\x8f\x01\n\x14\x46ormExtractionParams\x12\x0f\n\x07\x65nabled\x18\x01 \x01(\x08\x12O\n\x14key_value_pair_hints\x18\x02 \x03(\x0b\x32\x31.google.cloud.documentai.v1beta1.KeyValuePairHint\x12\x15\n\rmodel_version\x18\x03 \x01(\t"4\n\x10KeyValuePairHint\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x13\n\x0bvalue_types\x18\x02 \x03(\t"@\n\x16\x45ntityExtractionParams\x12\x0f\n\x07\x65nabled\x18\x01 \x01(\x08\x12\x15\n\rmodel_version\x18\x02 \x01(\t"q\n\x0bInputConfig\x12@\n\ngcs_source\x18\x01 \x01(\x0b\x32*.google.cloud.documentai.v1beta1.GcsSourceH\x00\x12\x16\n\tmime_type\x18\x02 \x01(\tB\x03\xe0\x41\x02\x42\x08\n\x06source"\x82\x01\n\x0cOutputConfig\x12J\n\x0fgcs_destination\x18\x01 \x01(\x0b\x32/.google.cloud.documentai.v1beta1.GcsDestinationH\x00\x12\x17\n\x0fpages_per_shard\x18\x02 \x01(\x05\x42\r\n\x0b\x64\x65stination"\x1d\n\tGcsSource\x12\x10\n\x03uri\x18\x01 \x01(\tB\x03\xe0\x41\x02""\n\x0eGcsDestination\x12\x10\n\x03uri\x18\x01 \x01(\tB\x03\xe0\x41\x02"\xc7\x02\n\x11OperationMetadata\x12G\n\x05state\x18\x01 \x01(\x0e\x32\x38.google.cloud.documentai.v1beta1.OperationMetadata.State\x12\x15\n\rstate_message\x18\x02 \x01(\t\x12/\n\x0b\x63reate_time\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12/\n\x0bupdate_time\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.Timestamp"p\n\x05State\x12\x15\n\x11STATE_UNSPECIFIED\x10\x00\x12\x0c\n\x08\x41\x43\x43\x45PTED\x10\x01\x12\x0b\n\x07WAITING\x10\x02\x12\x0b\n\x07RUNNING\x10\x03\x12\r\n\tSUCCEEDED\x10\x04\x12\r\n\tCANCELLED\x10\x05\x12\n\n\x06\x46\x41ILED\x10\x06\x32\xac\x03\n\x1c\x44ocumentUnderstandingService\x12\xbc\x02\n\x15\x42\x61tchProcessDocuments\x12=.google.cloud.documentai.v1beta1.BatchProcessDocumentsRequest\x1a\x1d.google.longrunning.Operation"\xc4\x01\x82\xd3\xe4\x93\x02~"?/v1beta1/{parent=projects/*/locations/*}/documents:batchProcess:\x01*Z8"3/v1beta1/{parent=projects/*}/documents:batchProcess:\x01*\xda\x41\x08requests\xca\x41\x32\n\x1d\x42\x61tchProcessDocumentsResponse\x12\x11OperationMetadata\x1aM\xca\x41\x19\x64ocumentai.googleapis.com\xd2\x41.https://www.googleapis.com/auth/cloud-platformB\x83\x01\n#com.google.cloud.documentai.v1beta1B\x0f\x44ocumentAiProtoP\x01ZIgoogle.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentaib\x06proto3' - ), - dependencies=[ - google_dot_api_dot_annotations__pb2.DESCRIPTOR, - google_dot_api_dot_client__pb2.DESCRIPTOR, - google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, - google_dot_cloud_dot_documentai__v1beta1_dot_proto_dot_geometry__pb2.DESCRIPTOR, - google_dot_longrunning_dot_operations__pb2.DESCRIPTOR, - google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR, - ], -) - - -_OPERATIONMETADATA_STATE = _descriptor.EnumDescriptor( - name="State", - full_name="google.cloud.documentai.v1beta1.OperationMetadata.State", - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name="STATE_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - ), - _descriptor.EnumValueDescriptor( - name="ACCEPTED", index=1, number=1, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="WAITING", index=2, number=2, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="RUNNING", index=3, number=3, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="SUCCEEDED", index=4, number=4, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="CANCELLED", index=5, number=5, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="FAILED", index=6, number=6, serialized_options=None, type=None - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=2369, - serialized_end=2481, -) -_sym_db.RegisterEnumDescriptor(_OPERATIONMETADATA_STATE) - - -_BATCHPROCESSDOCUMENTSREQUEST = _descriptor.Descriptor( - name="BatchProcessDocumentsRequest", - full_name="google.cloud.documentai.v1beta1.BatchProcessDocumentsRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="requests", - full_name="google.cloud.documentai.v1beta1.BatchProcessDocumentsRequest.requests", - index=0, - number=1, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=_b("\340A\002"), - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="parent", - full_name="google.cloud.documentai.v1beta1.BatchProcessDocumentsRequest.parent", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=315, - serialized_end=441, -) - - -_PROCESSDOCUMENTREQUEST = _descriptor.Descriptor( - name="ProcessDocumentRequest", - full_name="google.cloud.documentai.v1beta1.ProcessDocumentRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="input_config", - full_name="google.cloud.documentai.v1beta1.ProcessDocumentRequest.input_config", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=_b("\340A\002"), - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="output_config", - full_name="google.cloud.documentai.v1beta1.ProcessDocumentRequest.output_config", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=_b("\340A\002"), - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="document_type", - full_name="google.cloud.documentai.v1beta1.ProcessDocumentRequest.document_type", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="table_extraction_params", - full_name="google.cloud.documentai.v1beta1.ProcessDocumentRequest.table_extraction_params", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="form_extraction_params", - full_name="google.cloud.documentai.v1beta1.ProcessDocumentRequest.form_extraction_params", - index=4, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="entity_extraction_params", - full_name="google.cloud.documentai.v1beta1.ProcessDocumentRequest.entity_extraction_params", - index=5, - number=6, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="ocr_params", - full_name="google.cloud.documentai.v1beta1.ProcessDocumentRequest.ocr_params", - index=6, - number=7, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=444, - serialized_end=970, -) - - -_BATCHPROCESSDOCUMENTSRESPONSE = _descriptor.Descriptor( - name="BatchProcessDocumentsResponse", - full_name="google.cloud.documentai.v1beta1.BatchProcessDocumentsResponse", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="responses", - full_name="google.cloud.documentai.v1beta1.BatchProcessDocumentsResponse.responses", - index=0, - number=1, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ) - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=972, - serialized_end=1080, -) - - -_PROCESSDOCUMENTRESPONSE = _descriptor.Descriptor( - name="ProcessDocumentResponse", - full_name="google.cloud.documentai.v1beta1.ProcessDocumentResponse", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="input_config", - full_name="google.cloud.documentai.v1beta1.ProcessDocumentResponse.input_config", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="output_config", - full_name="google.cloud.documentai.v1beta1.ProcessDocumentResponse.output_config", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1083, - serialized_end=1246, -) - - -_OCRPARAMS = _descriptor.Descriptor( - name="OcrParams", - full_name="google.cloud.documentai.v1beta1.OcrParams", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="language_hints", - full_name="google.cloud.documentai.v1beta1.OcrParams.language_hints", - index=0, - number=1, - type=9, - cpp_type=9, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ) - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1248, - serialized_end=1283, -) - - -_TABLEEXTRACTIONPARAMS = _descriptor.Descriptor( - name="TableExtractionParams", - full_name="google.cloud.documentai.v1beta1.TableExtractionParams", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="enabled", - full_name="google.cloud.documentai.v1beta1.TableExtractionParams.enabled", - index=0, - number=1, - type=8, - cpp_type=7, - label=1, - has_default_value=False, - default_value=False, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="table_bound_hints", - full_name="google.cloud.documentai.v1beta1.TableExtractionParams.table_bound_hints", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=_b("\340A\001"), - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="header_hints", - full_name="google.cloud.documentai.v1beta1.TableExtractionParams.header_hints", - index=2, - number=3, - type=9, - cpp_type=9, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=_b("\340A\001"), - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="model_version", - full_name="google.cloud.documentai.v1beta1.TableExtractionParams.model_version", - index=3, - number=4, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1286, - serialized_end=1457, -) - - -_TABLEBOUNDHINT = _descriptor.Descriptor( - name="TableBoundHint", - full_name="google.cloud.documentai.v1beta1.TableBoundHint", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="page_number", - full_name="google.cloud.documentai.v1beta1.TableBoundHint.page_number", - index=0, - number=1, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=_b("\340A\001"), - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="bounding_box", - full_name="google.cloud.documentai.v1beta1.TableBoundHint.bounding_box", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1459, - serialized_end=1570, -) - - -_FORMEXTRACTIONPARAMS = _descriptor.Descriptor( - name="FormExtractionParams", - full_name="google.cloud.documentai.v1beta1.FormExtractionParams", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="enabled", - full_name="google.cloud.documentai.v1beta1.FormExtractionParams.enabled", - index=0, - number=1, - type=8, - cpp_type=7, - label=1, - has_default_value=False, - default_value=False, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="key_value_pair_hints", - full_name="google.cloud.documentai.v1beta1.FormExtractionParams.key_value_pair_hints", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="model_version", - full_name="google.cloud.documentai.v1beta1.FormExtractionParams.model_version", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1573, - serialized_end=1716, -) - - -_KEYVALUEPAIRHINT = _descriptor.Descriptor( - name="KeyValuePairHint", - full_name="google.cloud.documentai.v1beta1.KeyValuePairHint", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="key", - full_name="google.cloud.documentai.v1beta1.KeyValuePairHint.key", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="value_types", - full_name="google.cloud.documentai.v1beta1.KeyValuePairHint.value_types", - index=1, - number=2, - type=9, - cpp_type=9, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1718, - serialized_end=1770, -) - - -_ENTITYEXTRACTIONPARAMS = _descriptor.Descriptor( - name="EntityExtractionParams", - full_name="google.cloud.documentai.v1beta1.EntityExtractionParams", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="enabled", - full_name="google.cloud.documentai.v1beta1.EntityExtractionParams.enabled", - index=0, - number=1, - type=8, - cpp_type=7, - label=1, - has_default_value=False, - default_value=False, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="model_version", - full_name="google.cloud.documentai.v1beta1.EntityExtractionParams.model_version", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1772, - serialized_end=1836, -) - - -_INPUTCONFIG = _descriptor.Descriptor( - name="InputConfig", - full_name="google.cloud.documentai.v1beta1.InputConfig", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="gcs_source", - full_name="google.cloud.documentai.v1beta1.InputConfig.gcs_source", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="mime_type", - full_name="google.cloud.documentai.v1beta1.InputConfig.mime_type", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=_b("\340A\002"), - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="source", - full_name="google.cloud.documentai.v1beta1.InputConfig.source", - index=0, - containing_type=None, - fields=[], - ) - ], - serialized_start=1838, - serialized_end=1951, -) - - -_OUTPUTCONFIG = _descriptor.Descriptor( - name="OutputConfig", - full_name="google.cloud.documentai.v1beta1.OutputConfig", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="gcs_destination", - full_name="google.cloud.documentai.v1beta1.OutputConfig.gcs_destination", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="pages_per_shard", - full_name="google.cloud.documentai.v1beta1.OutputConfig.pages_per_shard", - index=1, - number=2, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="destination", - full_name="google.cloud.documentai.v1beta1.OutputConfig.destination", - index=0, - containing_type=None, - fields=[], - ) - ], - serialized_start=1954, - serialized_end=2084, -) - - -_GCSSOURCE = _descriptor.Descriptor( - name="GcsSource", - full_name="google.cloud.documentai.v1beta1.GcsSource", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="uri", - full_name="google.cloud.documentai.v1beta1.GcsSource.uri", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=_b("\340A\002"), - file=DESCRIPTOR, - ) - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2086, - serialized_end=2115, -) - - -_GCSDESTINATION = _descriptor.Descriptor( - name="GcsDestination", - full_name="google.cloud.documentai.v1beta1.GcsDestination", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="uri", - full_name="google.cloud.documentai.v1beta1.GcsDestination.uri", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=_b("\340A\002"), - file=DESCRIPTOR, - ) - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2117, - serialized_end=2151, -) - - -_OPERATIONMETADATA = _descriptor.Descriptor( - name="OperationMetadata", - full_name="google.cloud.documentai.v1beta1.OperationMetadata", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="state", - full_name="google.cloud.documentai.v1beta1.OperationMetadata.state", - index=0, - number=1, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="state_message", - full_name="google.cloud.documentai.v1beta1.OperationMetadata.state_message", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="create_time", - full_name="google.cloud.documentai.v1beta1.OperationMetadata.create_time", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="update_time", - full_name="google.cloud.documentai.v1beta1.OperationMetadata.update_time", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[_OPERATIONMETADATA_STATE], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2154, - serialized_end=2481, -) - -_BATCHPROCESSDOCUMENTSREQUEST.fields_by_name[ - "requests" -].message_type = _PROCESSDOCUMENTREQUEST -_PROCESSDOCUMENTREQUEST.fields_by_name["input_config"].message_type = _INPUTCONFIG -_PROCESSDOCUMENTREQUEST.fields_by_name["output_config"].message_type = _OUTPUTCONFIG -_PROCESSDOCUMENTREQUEST.fields_by_name[ - "table_extraction_params" -].message_type = _TABLEEXTRACTIONPARAMS -_PROCESSDOCUMENTREQUEST.fields_by_name[ - "form_extraction_params" -].message_type = _FORMEXTRACTIONPARAMS -_PROCESSDOCUMENTREQUEST.fields_by_name[ - "entity_extraction_params" -].message_type = _ENTITYEXTRACTIONPARAMS -_PROCESSDOCUMENTREQUEST.fields_by_name["ocr_params"].message_type = _OCRPARAMS -_BATCHPROCESSDOCUMENTSRESPONSE.fields_by_name[ - "responses" -].message_type = _PROCESSDOCUMENTRESPONSE -_PROCESSDOCUMENTRESPONSE.fields_by_name["input_config"].message_type = _INPUTCONFIG -_PROCESSDOCUMENTRESPONSE.fields_by_name["output_config"].message_type = _OUTPUTCONFIG -_TABLEEXTRACTIONPARAMS.fields_by_name[ - "table_bound_hints" -].message_type = _TABLEBOUNDHINT -_TABLEBOUNDHINT.fields_by_name[ - "bounding_box" -].message_type = ( - google_dot_cloud_dot_documentai__v1beta1_dot_proto_dot_geometry__pb2._BOUNDINGPOLY -) -_FORMEXTRACTIONPARAMS.fields_by_name[ - "key_value_pair_hints" -].message_type = _KEYVALUEPAIRHINT -_INPUTCONFIG.fields_by_name["gcs_source"].message_type = _GCSSOURCE -_INPUTCONFIG.oneofs_by_name["source"].fields.append( - _INPUTCONFIG.fields_by_name["gcs_source"] -) -_INPUTCONFIG.fields_by_name[ - "gcs_source" -].containing_oneof = _INPUTCONFIG.oneofs_by_name["source"] -_OUTPUTCONFIG.fields_by_name["gcs_destination"].message_type = _GCSDESTINATION -_OUTPUTCONFIG.oneofs_by_name["destination"].fields.append( - _OUTPUTCONFIG.fields_by_name["gcs_destination"] -) -_OUTPUTCONFIG.fields_by_name[ - "gcs_destination" -].containing_oneof = _OUTPUTCONFIG.oneofs_by_name["destination"] -_OPERATIONMETADATA.fields_by_name["state"].enum_type = _OPERATIONMETADATA_STATE -_OPERATIONMETADATA.fields_by_name[ - "create_time" -].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP -_OPERATIONMETADATA.fields_by_name[ - "update_time" -].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP -_OPERATIONMETADATA_STATE.containing_type = _OPERATIONMETADATA -DESCRIPTOR.message_types_by_name[ - "BatchProcessDocumentsRequest" -] = _BATCHPROCESSDOCUMENTSREQUEST -DESCRIPTOR.message_types_by_name["ProcessDocumentRequest"] = _PROCESSDOCUMENTREQUEST -DESCRIPTOR.message_types_by_name[ - "BatchProcessDocumentsResponse" -] = _BATCHPROCESSDOCUMENTSRESPONSE -DESCRIPTOR.message_types_by_name["ProcessDocumentResponse"] = _PROCESSDOCUMENTRESPONSE -DESCRIPTOR.message_types_by_name["OcrParams"] = _OCRPARAMS -DESCRIPTOR.message_types_by_name["TableExtractionParams"] = _TABLEEXTRACTIONPARAMS -DESCRIPTOR.message_types_by_name["TableBoundHint"] = _TABLEBOUNDHINT -DESCRIPTOR.message_types_by_name["FormExtractionParams"] = _FORMEXTRACTIONPARAMS -DESCRIPTOR.message_types_by_name["KeyValuePairHint"] = _KEYVALUEPAIRHINT -DESCRIPTOR.message_types_by_name["EntityExtractionParams"] = _ENTITYEXTRACTIONPARAMS -DESCRIPTOR.message_types_by_name["InputConfig"] = _INPUTCONFIG -DESCRIPTOR.message_types_by_name["OutputConfig"] = _OUTPUTCONFIG -DESCRIPTOR.message_types_by_name["GcsSource"] = _GCSSOURCE -DESCRIPTOR.message_types_by_name["GcsDestination"] = _GCSDESTINATION -DESCRIPTOR.message_types_by_name["OperationMetadata"] = _OPERATIONMETADATA -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -BatchProcessDocumentsRequest = _reflection.GeneratedProtocolMessageType( - "BatchProcessDocumentsRequest", - (_message.Message,), - dict( - DESCRIPTOR=_BATCHPROCESSDOCUMENTSREQUEST, - __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", - __doc__="""Request to batch process documents as an asynchronous - operation. - - - Attributes: - requests: - Required. Individual requests for each document. - parent: - Target project and location to make a call. Format: - ``projects/{project-id}/locations/{location-id}``. If no - location is specified, a region will be chosen automatically. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.BatchProcessDocumentsRequest) - ), -) -_sym_db.RegisterMessage(BatchProcessDocumentsRequest) - -ProcessDocumentRequest = _reflection.GeneratedProtocolMessageType( - "ProcessDocumentRequest", - (_message.Message,), - dict( - DESCRIPTOR=_PROCESSDOCUMENTREQUEST, - __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", - __doc__="""Request to process one document. - - - Attributes: - input_config: - Required. Information about the input file. - output_config: - Required. The desired output location. | - document_type: - Specifies a known document type for deeper structure - detection. Valid values are currently "general" and "invoice". - If not provided, "general" | is used as default. If any - other value is given, the request is rejected. - table_extraction_params: - Controls table extraction behavior. If not specified, the - system will decide reasonable defaults. - form_extraction_params: - Controls form extraction behavior. If not specified, the - system will decide reasonable defaults. - entity_extraction_params: - Controls entity extraction behavior. If not specified, the - system will decide reasonable defaults. - ocr_params: - Controls OCR behavior. If not specified, the system will - decide reasonable defaults. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.ProcessDocumentRequest) - ), -) -_sym_db.RegisterMessage(ProcessDocumentRequest) - -BatchProcessDocumentsResponse = _reflection.GeneratedProtocolMessageType( - "BatchProcessDocumentsResponse", - (_message.Message,), - dict( - DESCRIPTOR=_BATCHPROCESSDOCUMENTSRESPONSE, - __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", - __doc__="""Response to an batch document processing request. This is - returned in the LRO Operation after the operation is complete. - - - Attributes: - responses: - Responses for each individual document. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.BatchProcessDocumentsResponse) - ), -) -_sym_db.RegisterMessage(BatchProcessDocumentsResponse) - -ProcessDocumentResponse = _reflection.GeneratedProtocolMessageType( - "ProcessDocumentResponse", - (_message.Message,), - dict( - DESCRIPTOR=_PROCESSDOCUMENTRESPONSE, - __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", - __doc__="""Response to a single document processing request. - - - Attributes: - input_config: - Information about the input file. This is the same as the - corresponding input config in the request. - output_config: - The output location of the parsed responses. The responses are - written to this location as JSON-serialized ``Document`` - objects. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.ProcessDocumentResponse) - ), -) -_sym_db.RegisterMessage(ProcessDocumentResponse) - -OcrParams = _reflection.GeneratedProtocolMessageType( - "OcrParams", - (_message.Message,), - dict( - DESCRIPTOR=_OCRPARAMS, - __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", - __doc__="""Parameters to control Optical Character Recognition (OCR) - behavior. - - - Attributes: - language_hints: - List of languages to use for OCR. In most cases, an empty - value yields the best results since it enables automatic - language detection. For languages based on the Latin alphabet, - setting ``language_hints`` is not needed. In rare cases, when - the language of the text in the image is known, setting a hint - will help get better results (although it will be a - significant hindrance if the hint is wrong). Document - processing returns an error if one or more of the specified - languages is not one of the supported languages. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.OcrParams) - ), -) -_sym_db.RegisterMessage(OcrParams) - -TableExtractionParams = _reflection.GeneratedProtocolMessageType( - "TableExtractionParams", - (_message.Message,), - dict( - DESCRIPTOR=_TABLEEXTRACTIONPARAMS, - __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", - __doc__="""Parameters to control table extraction behavior. - - - Attributes: - enabled: - Whether to enable table extraction. - table_bound_hints: - Optional. Table bounding box hints that can be provided to - complex cases which our algorithm cannot locate the table(s) - in. - header_hints: - Optional. Table header hints. The extraction will bias towards - producing these terms as table headers, which may improve - accuracy. - model_version: - Model version of the table extraction system. Default is - "builtin/stable". Specify "builtin/latest" for the latest - model. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.TableExtractionParams) - ), -) -_sym_db.RegisterMessage(TableExtractionParams) - -TableBoundHint = _reflection.GeneratedProtocolMessageType( - "TableBoundHint", - (_message.Message,), - dict( - DESCRIPTOR=_TABLEBOUNDHINT, - __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", - __doc__="""A hint for a table bounding box on the page for table - parsing. - - - Attributes: - page_number: - Optional. Page number for multi-paged inputs this hint applies - to. If not provided, this hint will apply to all pages by - default. This value is 1-based. - bounding_box: - Bounding box hint for a table on this page. The coordinates - must be normalized to [0,1] and the bounding box must be an - axis-aligned rectangle. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.TableBoundHint) - ), -) -_sym_db.RegisterMessage(TableBoundHint) - -FormExtractionParams = _reflection.GeneratedProtocolMessageType( - "FormExtractionParams", - (_message.Message,), - dict( - DESCRIPTOR=_FORMEXTRACTIONPARAMS, - __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", - __doc__="""Parameters to control form extraction behavior. - - - Attributes: - enabled: - Whether to enable form extraction. - key_value_pair_hints: - User can provide pairs of (key text, value type) to improve - the parsing result. For example, if a document has a field - called "Date" that holds a date value and a field called - "Amount" that may hold either a currency value (e.g., - "$500.00") or a simple number value (e.g., "20"), you could - use the following hints: [ {"key": "Date", value\_types: [ - "DATE"]}, {"key": "Amount", "value\_types": [ "PRICE", - "NUMBER" ]} ] If the value type is unknown, but you want to - provide hints for the keys, you can leave the value\_types - field blank. e.g. {"key": "Date", "value\_types": []} - model_version: - Model version of the form extraction system. Default is - "builtin/stable". Specify "builtin/latest" for the latest - model. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.FormExtractionParams) - ), -) -_sym_db.RegisterMessage(FormExtractionParams) - -KeyValuePairHint = _reflection.GeneratedProtocolMessageType( - "KeyValuePairHint", - (_message.Message,), - dict( - DESCRIPTOR=_KEYVALUEPAIRHINT, - __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", - __doc__="""User-provided hint for key value pair. - - - Attributes: - key: - The key text for the hint. - value_types: - Type of the value. This is case-insensitive, and could be one - of: ADDRESS, LOCATION, ORGANIZATION, PERSON, PHONE\_NUMBER, - ID, NUMBER, EMAIL, PRICE, TERMS, DATE, NAME. Types not in this - list will be ignored. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.KeyValuePairHint) - ), -) -_sym_db.RegisterMessage(KeyValuePairHint) - -EntityExtractionParams = _reflection.GeneratedProtocolMessageType( - "EntityExtractionParams", - (_message.Message,), - dict( - DESCRIPTOR=_ENTITYEXTRACTIONPARAMS, - __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", - __doc__="""Parameters to control entity extraction behavior. - - - Attributes: - enabled: - Whether to enable entity extraction. - model_version: - Model version of the entity extraction. Default is - "builtin/stable". Specify "builtin/latest" for the latest - model. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.EntityExtractionParams) - ), -) -_sym_db.RegisterMessage(EntityExtractionParams) - -InputConfig = _reflection.GeneratedProtocolMessageType( - "InputConfig", - (_message.Message,), - dict( - DESCRIPTOR=_INPUTCONFIG, - __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", - __doc__="""The desired input location and metadata. - - - Attributes: - source: - Required. - gcs_source: - The Google Cloud Storage location to read the input from. This - must be a single file. - mime_type: - Required. Mimetype of the input. Current supported mimetypes - are application/pdf, image/tiff, and image/gif. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.InputConfig) - ), -) -_sym_db.RegisterMessage(InputConfig) - -OutputConfig = _reflection.GeneratedProtocolMessageType( - "OutputConfig", - (_message.Message,), - dict( - DESCRIPTOR=_OUTPUTCONFIG, - __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", - __doc__="""The desired output location and metadata. - - - Attributes: - destination: - Required. - gcs_destination: - The Google Cloud Storage location to write the output to. - pages_per_shard: - The max number of pages to include into each output Document - shard JSON on Google Cloud Storage. The valid range is [1, - 100]. If not specified, the default value is 20. For example, - for one pdf file with 100 pages, 100 parsed pages will be - produced. If ``pages_per_shard`` = 20, then 5 Document shard - JSON files each containing 20 parsed pages will be written - under the prefix [OutputConfig.gcs\_destination.uri][] and - suffix pages-x-to-y.json where x and y are 1-indexed page - numbers. Example GCS outputs with 157 pages and - pages\_per\_shard = 50: pages-001-to-050.json - pages-051-to-100.json pages-101-to-150.json - pages-151-to-157.json - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.OutputConfig) - ), -) -_sym_db.RegisterMessage(OutputConfig) - -GcsSource = _reflection.GeneratedProtocolMessageType( - "GcsSource", - (_message.Message,), - dict( - DESCRIPTOR=_GCSSOURCE, - __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", - __doc__="""The Google Cloud Storage location where the input file - will be read from. - - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.GcsSource) - ), -) -_sym_db.RegisterMessage(GcsSource) - -GcsDestination = _reflection.GeneratedProtocolMessageType( - "GcsDestination", - (_message.Message,), - dict( - DESCRIPTOR=_GCSDESTINATION, - __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", - __doc__="""The Google Cloud Storage location where the output file - will be written to. - - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.GcsDestination) - ), -) -_sym_db.RegisterMessage(GcsDestination) - -OperationMetadata = _reflection.GeneratedProtocolMessageType( - "OperationMetadata", - (_message.Message,), - dict( - DESCRIPTOR=_OPERATIONMETADATA, - __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", - __doc__="""Contains metadata for the BatchProcessDocuments operation. - - - Attributes: - state: - The state of the current batch processing. - state_message: - A message providing more details about the current state of - processing. - create_time: - The creation time of the operation. - update_time: - The last update time of the operation. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.OperationMetadata) - ), -) -_sym_db.RegisterMessage(OperationMetadata) - - -DESCRIPTOR._options = None -_BATCHPROCESSDOCUMENTSREQUEST.fields_by_name["requests"]._options = None -_PROCESSDOCUMENTREQUEST.fields_by_name["input_config"]._options = None -_PROCESSDOCUMENTREQUEST.fields_by_name["output_config"]._options = None -_TABLEEXTRACTIONPARAMS.fields_by_name["table_bound_hints"]._options = None -_TABLEEXTRACTIONPARAMS.fields_by_name["header_hints"]._options = None -_TABLEBOUNDHINT.fields_by_name["page_number"]._options = None -_INPUTCONFIG.fields_by_name["mime_type"]._options = None -_GCSSOURCE.fields_by_name["uri"]._options = None -_GCSDESTINATION.fields_by_name["uri"]._options = None - -_DOCUMENTUNDERSTANDINGSERVICE = _descriptor.ServiceDescriptor( - name="DocumentUnderstandingService", - full_name="google.cloud.documentai.v1beta1.DocumentUnderstandingService", - file=DESCRIPTOR, - index=0, - serialized_options=_b( - "\312A\031documentai.googleapis.com\322A.https://www.googleapis.com/auth/cloud-platform" - ), - serialized_start=2484, - serialized_end=2912, - methods=[ - _descriptor.MethodDescriptor( - name="BatchProcessDocuments", - full_name="google.cloud.documentai.v1beta1.DocumentUnderstandingService.BatchProcessDocuments", - index=0, - containing_service=None, - input_type=_BATCHPROCESSDOCUMENTSREQUEST, - output_type=google_dot_longrunning_dot_operations__pb2._OPERATION, - serialized_options=_b( - '\202\323\344\223\002~"?/v1beta1/{parent=projects/*/locations/*}/documents:batchProcess:\001*Z8"3/v1beta1/{parent=projects/*}/documents:batchProcess:\001*\332A\010requests\312A2\n\035BatchProcessDocumentsResponse\022\021OperationMetadata' - ), - ) - ], -) -_sym_db.RegisterServiceDescriptor(_DOCUMENTUNDERSTANDINGSERVICE) - -DESCRIPTOR.services_by_name[ - "DocumentUnderstandingService" -] = _DOCUMENTUNDERSTANDINGSERVICE - -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/documentai_v1beta1/proto/document_understanding_pb2_grpc.py b/google/cloud/documentai_v1beta1/proto/document_understanding_pb2_grpc.py deleted file mode 100644 index 6e6d2308..00000000 --- a/google/cloud/documentai_v1beta1/proto/document_understanding_pb2_grpc.py +++ /dev/null @@ -1,57 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -import grpc - -from google.cloud.documentai_v1beta1.proto import ( - document_understanding_pb2 as google_dot_cloud_dot_documentai__v1beta1_dot_proto_dot_document__understanding__pb2, -) -from google.longrunning import ( - operations_pb2 as google_dot_longrunning_dot_operations__pb2, -) - - -class DocumentUnderstandingServiceStub(object): - """Service to parse structured information from unstructured or semi-structured - documents using state-of-the-art Google AI such as natural language, - computer vision, and translation. - """ - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.BatchProcessDocuments = channel.unary_unary( - "/google.cloud.documentai.v1beta1.DocumentUnderstandingService/BatchProcessDocuments", - request_serializer=google_dot_cloud_dot_documentai__v1beta1_dot_proto_dot_document__understanding__pb2.BatchProcessDocumentsRequest.SerializeToString, - response_deserializer=google_dot_longrunning_dot_operations__pb2.Operation.FromString, - ) - - -class DocumentUnderstandingServiceServicer(object): - """Service to parse structured information from unstructured or semi-structured - documents using state-of-the-art Google AI such as natural language, - computer vision, and translation. - """ - - def BatchProcessDocuments(self, request, context): - """LRO endpoint to batch process many documents. - """ - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - -def add_DocumentUnderstandingServiceServicer_to_server(servicer, server): - rpc_method_handlers = { - "BatchProcessDocuments": grpc.unary_unary_rpc_method_handler( - servicer.BatchProcessDocuments, - request_deserializer=google_dot_cloud_dot_documentai__v1beta1_dot_proto_dot_document__understanding__pb2.BatchProcessDocumentsRequest.FromString, - response_serializer=google_dot_longrunning_dot_operations__pb2.Operation.SerializeToString, - ) - } - generic_handler = grpc.method_handlers_generic_handler( - "google.cloud.documentai.v1beta1.DocumentUnderstandingService", - rpc_method_handlers, - ) - server.add_generic_rpc_handlers((generic_handler,)) diff --git a/google/cloud/documentai_v1beta1/proto/geometry.proto b/google/cloud/documentai_v1beta1/proto/geometry.proto deleted file mode 100644 index 9dbe2b78..00000000 --- a/google/cloud/documentai_v1beta1/proto/geometry.proto +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2019 Google LLC. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -syntax = "proto3"; - -package google.cloud.documentai.v1beta1; - -import "google/api/annotations.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentai"; -option java_multiple_files = true; -option java_outer_classname = "GeometryProto"; -option java_package = "com.google.cloud.documentai.v1beta1"; - -// A vertex represents a 2D point in the image. -// NOTE: the vertex coordinates are in the same scale as the original image. -message Vertex { - // X coordinate. - int32 x = 1; - - // Y coordinate. - int32 y = 2; -} - -// A vertex represents a 2D point in the image. -// NOTE: the normalized vertex coordinates are relative to the original image -// and range from 0 to 1. -message NormalizedVertex { - // X coordinate. - float x = 1; - - // Y coordinate. - float y = 2; -} - -// A bounding polygon for the detected image annotation. -message BoundingPoly { - // The bounding polygon vertices. - repeated Vertex vertices = 1; - - // The bounding polygon normalized vertices. - repeated NormalizedVertex normalized_vertices = 2; -} diff --git a/google/cloud/documentai_v1beta1/proto/geometry_pb2.py b/google/cloud/documentai_v1beta1/proto/geometry_pb2.py deleted file mode 100644 index 2e4217e3..00000000 --- a/google/cloud/documentai_v1beta1/proto/geometry_pb2.py +++ /dev/null @@ -1,270 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/documentai_v1beta1/proto/geometry.proto - -import sys - -_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/documentai_v1beta1/proto/geometry.proto", - package="google.cloud.documentai.v1beta1", - syntax="proto3", - serialized_options=_b( - "\n#com.google.cloud.documentai.v1beta1B\rGeometryProtoP\001ZIgoogle.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentai" - ), - serialized_pb=_b( - '\n4google/cloud/documentai_v1beta1/proto/geometry.proto\x12\x1fgoogle.cloud.documentai.v1beta1\x1a\x1cgoogle/api/annotations.proto"\x1e\n\x06Vertex\x12\t\n\x01x\x18\x01 \x01(\x05\x12\t\n\x01y\x18\x02 \x01(\x05"(\n\x10NormalizedVertex\x12\t\n\x01x\x18\x01 \x01(\x02\x12\t\n\x01y\x18\x02 \x01(\x02"\x99\x01\n\x0c\x42oundingPoly\x12\x39\n\x08vertices\x18\x01 \x03(\x0b\x32\'.google.cloud.documentai.v1beta1.Vertex\x12N\n\x13normalized_vertices\x18\x02 \x03(\x0b\x32\x31.google.cloud.documentai.v1beta1.NormalizedVertexB\x81\x01\n#com.google.cloud.documentai.v1beta1B\rGeometryProtoP\x01ZIgoogle.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentaib\x06proto3' - ), - dependencies=[google_dot_api_dot_annotations__pb2.DESCRIPTOR], -) - - -_VERTEX = _descriptor.Descriptor( - name="Vertex", - full_name="google.cloud.documentai.v1beta1.Vertex", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="x", - full_name="google.cloud.documentai.v1beta1.Vertex.x", - index=0, - number=1, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="y", - full_name="google.cloud.documentai.v1beta1.Vertex.y", - index=1, - number=2, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=119, - serialized_end=149, -) - - -_NORMALIZEDVERTEX = _descriptor.Descriptor( - name="NormalizedVertex", - full_name="google.cloud.documentai.v1beta1.NormalizedVertex", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="x", - full_name="google.cloud.documentai.v1beta1.NormalizedVertex.x", - index=0, - number=1, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="y", - full_name="google.cloud.documentai.v1beta1.NormalizedVertex.y", - index=1, - number=2, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=151, - serialized_end=191, -) - - -_BOUNDINGPOLY = _descriptor.Descriptor( - name="BoundingPoly", - full_name="google.cloud.documentai.v1beta1.BoundingPoly", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="vertices", - full_name="google.cloud.documentai.v1beta1.BoundingPoly.vertices", - index=0, - number=1, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="normalized_vertices", - full_name="google.cloud.documentai.v1beta1.BoundingPoly.normalized_vertices", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=194, - serialized_end=347, -) - -_BOUNDINGPOLY.fields_by_name["vertices"].message_type = _VERTEX -_BOUNDINGPOLY.fields_by_name["normalized_vertices"].message_type = _NORMALIZEDVERTEX -DESCRIPTOR.message_types_by_name["Vertex"] = _VERTEX -DESCRIPTOR.message_types_by_name["NormalizedVertex"] = _NORMALIZEDVERTEX -DESCRIPTOR.message_types_by_name["BoundingPoly"] = _BOUNDINGPOLY -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -Vertex = _reflection.GeneratedProtocolMessageType( - "Vertex", - (_message.Message,), - dict( - DESCRIPTOR=_VERTEX, - __module__="google.cloud.documentai_v1beta1.proto.geometry_pb2", - __doc__="""X coordinate. - - - Attributes: - y: - Y coordinate. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Vertex) - ), -) -_sym_db.RegisterMessage(Vertex) - -NormalizedVertex = _reflection.GeneratedProtocolMessageType( - "NormalizedVertex", - (_message.Message,), - dict( - DESCRIPTOR=_NORMALIZEDVERTEX, - __module__="google.cloud.documentai_v1beta1.proto.geometry_pb2", - __doc__="""X coordinate. - - - Attributes: - y: - Y coordinate. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.NormalizedVertex) - ), -) -_sym_db.RegisterMessage(NormalizedVertex) - -BoundingPoly = _reflection.GeneratedProtocolMessageType( - "BoundingPoly", - (_message.Message,), - dict( - DESCRIPTOR=_BOUNDINGPOLY, - __module__="google.cloud.documentai_v1beta1.proto.geometry_pb2", - __doc__="""A bounding polygon for the detected image annotation. - - - Attributes: - vertices: - The bounding polygon vertices. - normalized_vertices: - The bounding polygon normalized vertices. - """, - # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.BoundingPoly) - ), -) -_sym_db.RegisterMessage(BoundingPoly) - - -DESCRIPTOR._options = None -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/documentai_v1beta1/proto/geometry_pb2_grpc.py b/google/cloud/documentai_v1beta1/proto/geometry_pb2_grpc.py deleted file mode 100644 index 07cb78fe..00000000 --- a/google/cloud/documentai_v1beta1/proto/geometry_pb2_grpc.py +++ /dev/null @@ -1,2 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -import grpc diff --git a/google/cloud/documentai_v1beta1/types.py b/google/cloud/documentai_v1beta1/types.py deleted file mode 100644 index ccc4adc2..00000000 --- a/google/cloud/documentai_v1beta1/types.py +++ /dev/null @@ -1,58 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from __future__ import absolute_import -import sys - -from google.api_core.protobuf_helpers import get_messages - -from google.cloud.documentai_v1beta1.proto import document_pb2 -from google.cloud.documentai_v1beta1.proto import document_understanding_pb2 -from google.cloud.documentai_v1beta1.proto import geometry_pb2 -from google.longrunning import operations_pb2 -from google.protobuf import any_pb2 -from google.protobuf import timestamp_pb2 -from google.protobuf import wrappers_pb2 -from google.rpc import status_pb2 -from google.type import color_pb2 - - -_shared_modules = [ - operations_pb2, - any_pb2, - timestamp_pb2, - wrappers_pb2, - status_pb2, - color_pb2, -] - -_local_modules = [document_pb2, document_understanding_pb2, geometry_pb2] - -names = [] - -for module in _shared_modules: # pragma: NO COVER - for name, message in get_messages(module).items(): - setattr(sys.modules[__name__], name, message) - names.append(name) -for module in _local_modules: - for name, message in get_messages(module).items(): - message.__module__ = "google.cloud.documentai_v1beta1.types" - setattr(sys.modules[__name__], name, message) - names.append(name) - - -__all__ = tuple(sorted(names)) diff --git a/google/cloud/documentai_v1beta2/__init__.py b/google/cloud/documentai_v1beta2/__init__.py new file mode 100644 index 00000000..f855da07 --- /dev/null +++ b/google/cloud/documentai_v1beta2/__init__.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +from .services.document_understanding_service import DocumentUnderstandingServiceClient +from .types.document import Document +from .types.document_understanding import AutoMlParams +from .types.document_understanding import BatchProcessDocumentsRequest +from .types.document_understanding import BatchProcessDocumentsResponse +from .types.document_understanding import EntityExtractionParams +from .types.document_understanding import FormExtractionParams +from .types.document_understanding import GcsDestination +from .types.document_understanding import GcsSource +from .types.document_understanding import InputConfig +from .types.document_understanding import KeyValuePairHint +from .types.document_understanding import OcrParams +from .types.document_understanding import OperationMetadata +from .types.document_understanding import OutputConfig +from .types.document_understanding import ProcessDocumentRequest +from .types.document_understanding import ProcessDocumentResponse +from .types.document_understanding import TableBoundHint +from .types.document_understanding import TableExtractionParams +from .types.geometry import BoundingPoly +from .types.geometry import NormalizedVertex +from .types.geometry import Vertex + + +__all__ = ( + "AutoMlParams", + "BatchProcessDocumentsRequest", + "BatchProcessDocumentsResponse", + "BoundingPoly", + "Document", + "EntityExtractionParams", + "FormExtractionParams", + "GcsDestination", + "GcsSource", + "InputConfig", + "KeyValuePairHint", + "NormalizedVertex", + "OcrParams", + "OperationMetadata", + "OutputConfig", + "ProcessDocumentRequest", + "ProcessDocumentResponse", + "TableBoundHint", + "TableExtractionParams", + "Vertex", + "DocumentUnderstandingServiceClient", +) diff --git a/google/cloud/documentai_v1beta2/py.typed b/google/cloud/documentai_v1beta2/py.typed new file mode 100644 index 00000000..81b45001 --- /dev/null +++ b/google/cloud/documentai_v1beta2/py.typed @@ -0,0 +1,2 @@ +# Marker file for PEP 561. +# The google-cloud-documentai package uses inline types. diff --git a/google/cloud/__init__.py b/google/cloud/documentai_v1beta2/services/__init__.py similarity index 67% rename from google/cloud/__init__.py rename to google/cloud/documentai_v1beta2/services/__init__.py index 9a1b64a6..2c56c537 100644 --- a/google/cloud/__init__.py +++ b/google/cloud/documentai_v1beta2/services/__init__.py @@ -1,24 +1,16 @@ # -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC + +# Copyright (C) 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# https://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -try: - import pkg_resources - - pkg_resources.declare_namespace(__name__) -except ImportError: - import pkgutil - - __path__ = pkgutil.extend_path(__path__, __name__) +# diff --git a/google/__init__.py b/google/cloud/documentai_v1beta2/services/document_understanding_service/__init__.py similarity index 67% rename from google/__init__.py rename to google/cloud/documentai_v1beta2/services/document_understanding_service/__init__.py index 9a1b64a6..8e39d0b8 100644 --- a/google/__init__.py +++ b/google/cloud/documentai_v1beta2/services/document_understanding_service/__init__.py @@ -1,24 +1,20 @@ # -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC + +# Copyright (C) 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# https://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# -try: - import pkg_resources - - pkg_resources.declare_namespace(__name__) -except ImportError: - import pkgutil +from .client import DocumentUnderstandingServiceClient - __path__ = pkgutil.extend_path(__path__, __name__) +__all__ = ("DocumentUnderstandingServiceClient",) diff --git a/google/cloud/documentai_v1beta2/services/document_understanding_service/client.py b/google/cloud/documentai_v1beta2/services/document_understanding_service/client.py new file mode 100644 index 00000000..1035b8c5 --- /dev/null +++ b/google/cloud/documentai_v1beta2/services/document_understanding_service/client.py @@ -0,0 +1,281 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from collections import OrderedDict +from typing import Dict, Sequence, Tuple, Type, Union +import pkg_resources + +import google.api_core.client_options as ClientOptions # type: ignore +from google.api_core import exceptions # type: ignore +from google.api_core import gapic_v1 # type: ignore +from google.api_core import retry as retries # type: ignore +from google.auth import credentials # type: ignore +from google.oauth2 import service_account # type: ignore + +from google.api_core import operation +from google.cloud.documentai_v1beta2.types import document +from google.cloud.documentai_v1beta2.types import document_understanding +from google.rpc import status_pb2 as status # type: ignore + +from .transports.base import DocumentUnderstandingServiceTransport +from .transports.grpc import DocumentUnderstandingServiceGrpcTransport + + +class DocumentUnderstandingServiceClientMeta(type): + """Metaclass for the DocumentUnderstandingService client. + + This provides class-level methods for building and retrieving + support objects (e.g. transport) without polluting the client instance + objects. + """ + + _transport_registry = ( + OrderedDict() + ) # type: Dict[str, Type[DocumentUnderstandingServiceTransport]] + _transport_registry["grpc"] = DocumentUnderstandingServiceGrpcTransport + + def get_transport_class( + cls, label: str = None + ) -> Type[DocumentUnderstandingServiceTransport]: + """Return an appropriate transport class. + + Args: + label: The name of the desired transport. If none is + provided, then the first transport in the registry is used. + + Returns: + The transport class to use. + """ + # If a specific transport is requested, return that one. + if label: + return cls._transport_registry[label] + + # No transport is requested; return the default (that is, the first one + # in the dictionary). + return next(iter(cls._transport_registry.values())) + + +class DocumentUnderstandingServiceClient( + metaclass=DocumentUnderstandingServiceClientMeta +): + """Service to parse structured information from unstructured or + semi-structured documents using state-of-the-art Google AI such + as natural language, computer vision, and translation. + """ + + DEFAULT_OPTIONS = ClientOptions.ClientOptions( + api_endpoint="us-documentai.googleapis.com" + ) + + @classmethod + def from_service_account_file(cls, filename: str, *args, **kwargs): + """Creates an instance of this client using the provided credentials + file. + + Args: + filename (str): The path to the service account private key json + file. + args: Additional arguments to pass to the constructor. + kwargs: Additional arguments to pass to the constructor. + + Returns: + {@api.name}: The constructed client. + """ + credentials = service_account.Credentials.from_service_account_file(filename) + kwargs["credentials"] = credentials + return cls(*args, **kwargs) + + from_service_account_json = from_service_account_file + + def __init__( + self, + *, + credentials: credentials.Credentials = None, + transport: Union[str, DocumentUnderstandingServiceTransport] = None, + client_options: ClientOptions = DEFAULT_OPTIONS, + ) -> None: + """Instantiate the document understanding service client. + + Args: + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + transport (Union[str, ~.DocumentUnderstandingServiceTransport]): The + transport to use. If set to None, a transport is chosen + automatically. + client_options (ClientOptions): Custom options for the client. + """ + if isinstance(client_options, dict): + client_options = ClientOptions.from_dict(client_options) + + # Save or instantiate the transport. + # Ordinarily, we provide the transport, but allowing a custom transport + # instance provides an extensibility point for unusual situations. + if isinstance(transport, DocumentUnderstandingServiceTransport): + if credentials: + raise ValueError( + "When providing a transport instance, " + "provide its credentials directly." + ) + self._transport = transport + else: + Transport = type(self).get_transport_class(transport) + self._transport = Transport( + credentials=credentials, + host=client_options.api_endpoint or "us-documentai.googleapis.com", + ) + + def batch_process_documents( + self, + request: document_understanding.BatchProcessDocumentsRequest = None, + *, + requests: Sequence[document_understanding.ProcessDocumentRequest] = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> operation.Operation: + r"""LRO endpoint to batch process many documents. The output is + written to Cloud Storage as JSON in the [Document] format. + + Args: + request (:class:`~.document_understanding.BatchProcessDocumentsRequest`): + The request object. Request to batch process documents + as an asynchronous operation. The output is written to + Cloud Storage as JSON in the [Document] format. + requests (:class:`Sequence[~.document_understanding.ProcessDocumentRequest]`): + Required. Individual requests for + each document. + This corresponds to the ``requests`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + ~.operation.Operation: + An object representing a long-running operation. + + The result type for the operation will be + :class:``~.document_understanding.BatchProcessDocumentsResponse``: + Response to an batch document processing request. This + is returned in the LRO Operation after the operation is + complete. + + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + if request is not None and any([requests]): + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + request = document_understanding.BatchProcessDocumentsRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + if requests is not None: + request.requests = requests + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method.wrap_method( + self._transport.batch_process_documents, + default_timeout=None, + client_info=_client_info, + ) + + # Send the request. + response = rpc(request, retry=retry, timeout=timeout, metadata=metadata) + + # Wrap the response in an operation future. + response = operation.from_gapic( + response, + self._transport.operations_client, + document_understanding.BatchProcessDocumentsResponse, + metadata_type=document_understanding.OperationMetadata, + ) + + # Done; return the response. + return response + + def process_document( + self, + request: document_understanding.ProcessDocumentRequest = None, + *, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> document.Document: + r"""Processes a single document. + + Args: + request (:class:`~.document_understanding.ProcessDocumentRequest`): + The request object. Request to process one document. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + ~.document.Document: + Document represents the canonical + document resource in Document + Understanding AI. It is an interchange + format that provides insights into + documents and allows for collaboration + between users and Document Understanding + AI to iterate and optimize for quality. + + """ + # Create or coerce a protobuf request object. + + request = document_understanding.ProcessDocumentRequest(request) + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method.wrap_method( + self._transport.process_document, + default_timeout=None, + client_info=_client_info, + ) + + # Send the request. + response = rpc(request, retry=retry, timeout=timeout, metadata=metadata) + + # Done; return the response. + return response + + +try: + _client_info = gapic_v1.client_info.ClientInfo( + gapic_version=pkg_resources.get_distribution("google-cloud-documentai").version + ) +except pkg_resources.DistributionNotFound: + _client_info = gapic_v1.client_info.ClientInfo() + + +__all__ = ("DocumentUnderstandingServiceClient",) diff --git a/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/__init__.py b/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/__init__.py new file mode 100644 index 00000000..f3a91e0d --- /dev/null +++ b/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/__init__.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from collections import OrderedDict +from typing import Dict, Type + +from .base import DocumentUnderstandingServiceTransport +from .grpc import DocumentUnderstandingServiceGrpcTransport + + +# Compile a registry of transports. +_transport_registry = ( + OrderedDict() +) # type: Dict[str, Type[DocumentUnderstandingServiceTransport]] +_transport_registry["grpc"] = DocumentUnderstandingServiceGrpcTransport + + +__all__ = ( + "DocumentUnderstandingServiceTransport", + "DocumentUnderstandingServiceGrpcTransport", +) diff --git a/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/base.py b/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/base.py new file mode 100644 index 00000000..956f3a2c --- /dev/null +++ b/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/base.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import abc +import typing + +from google import auth +from google.api_core import operations_v1 # type: ignore +from google.auth import credentials # type: ignore + +from google.cloud.documentai_v1beta2.types import document +from google.cloud.documentai_v1beta2.types import document_understanding +from google.longrunning import operations_pb2 as operations # type: ignore + + +class DocumentUnderstandingServiceTransport(metaclass=abc.ABCMeta): + """Abstract transport class for DocumentUnderstandingService.""" + + AUTH_SCOPES = ("https://www.googleapis.com/auth/cloud-platform",) + + def __init__( + self, + *, + host: str = "us-documentai.googleapis.com", + credentials: credentials.Credentials = None, + ) -> None: + """Instantiate the transport. + + Args: + host (Optional[str]): The hostname to connect to. + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + """ + # Save the hostname. Default to port 443 (HTTPS) if none is specified. + if ":" not in host: + host += ":443" + self._host = host + + # If no credentials are provided, then determine the appropriate + # defaults. + if credentials is None: + credentials, _ = auth.default(scopes=self.AUTH_SCOPES) + + # Save the credentials. + self._credentials = credentials + + @property + def operations_client(self) -> operations_v1.OperationsClient: + """Return the client designed to process long-running operations.""" + raise NotImplementedError + + @property + def batch_process_documents( + self + ) -> typing.Callable[ + [document_understanding.BatchProcessDocumentsRequest], operations.Operation + ]: + raise NotImplementedError + + @property + def process_document( + self + ) -> typing.Callable[ + [document_understanding.ProcessDocumentRequest], document.Document + ]: + raise NotImplementedError + + +__all__ = ("DocumentUnderstandingServiceTransport",) diff --git a/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/grpc.py b/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/grpc.py new file mode 100644 index 00000000..80519aa8 --- /dev/null +++ b/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/grpc.py @@ -0,0 +1,194 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import Callable, Dict + +from google.api_core import grpc_helpers # type: ignore +from google.api_core import operations_v1 # type: ignore +from google.auth import credentials # type: ignore + +import grpc # type: ignore + +from google.cloud.documentai_v1beta2.types import document +from google.cloud.documentai_v1beta2.types import document_understanding +from google.longrunning import operations_pb2 as operations # type: ignore + +from .base import DocumentUnderstandingServiceTransport + + +class DocumentUnderstandingServiceGrpcTransport(DocumentUnderstandingServiceTransport): + """gRPC backend transport for DocumentUnderstandingService. + + Service to parse structured information from unstructured or + semi-structured documents using state-of-the-art Google AI such + as natural language, computer vision, and translation. + + This class defines the same methods as the primary client, so the + primary client can load the underlying transport implementation + and call it. + + It sends protocol buffers over the wire using gRPC (which is built on + top of HTTP/2); the ``grpcio`` package must be installed. + """ + + def __init__( + self, + *, + host: str = "us-documentai.googleapis.com", + credentials: credentials.Credentials = None, + channel: grpc.Channel = None + ) -> None: + """Instantiate the transport. + + Args: + host (Optional[str]): The hostname to connect to. + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + This argument is ignored if ``channel`` is provided. + channel (Optional[grpc.Channel]): A ``Channel`` instance through + which to make calls. + """ + # Sanity check: Ensure that channel and credentials are not both + # provided. + if channel: + credentials = False + + # Run the base constructor. + super().__init__(host=host, credentials=credentials) + self._stubs = {} # type: Dict[str, Callable] + + # If a channel was explicitly provided, set it. + if channel: + self._grpc_channel = channel + + @classmethod + def create_channel( + cls, + host: str = "us-documentai.googleapis.com", + credentials: credentials.Credentials = None, + **kwargs + ) -> grpc.Channel: + """Create and return a gRPC channel object. + Args: + address (Optionsl[str]): The host for the channel to use. + credentials (Optional[~.Credentials]): The + authorization credentials to attach to requests. These + credentials identify this application to the service. If + none are specified, the client will attempt to ascertain + the credentials from the environment. + kwargs (Optional[dict]): Keyword arguments, which are passed to the + channel creation. + Returns: + grpc.Channel: A gRPC channel object. + """ + return grpc_helpers.create_channel( + host, credentials=credentials, scopes=cls.AUTH_SCOPES, **kwargs + ) + + @property + def grpc_channel(self) -> grpc.Channel: + """Create the channel designed to connect to this service. + + This property caches on the instance; repeated calls return + the same channel. + """ + # Sanity check: Only create a new channel if we do not already + # have one. + if not hasattr(self, "_grpc_channel"): + self._grpc_channel = self.create_channel( + self._host, credentials=self._credentials + ) + + # Return the channel from cache. + return self._grpc_channel + + @property + def operations_client(self) -> operations_v1.OperationsClient: + """Create the client designed to process long-running operations. + + This property caches on the instance; repeated calls return the same + client. + """ + # Sanity check: Only create a new client if we do not already have one. + if "operations_client" not in self.__dict__: + self.__dict__["operations_client"] = operations_v1.OperationsClient( + self.grpc_channel + ) + + # Return the client from cache. + return self.__dict__["operations_client"] + + @property + def batch_process_documents( + self + ) -> Callable[ + [document_understanding.BatchProcessDocumentsRequest], operations.Operation + ]: + r"""Return a callable for the batch process documents method over gRPC. + + LRO endpoint to batch process many documents. The output is + written to Cloud Storage as JSON in the [Document] format. + + Returns: + Callable[[~.BatchProcessDocumentsRequest], + ~.Operation]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "batch_process_documents" not in self._stubs: + self._stubs["batch_process_documents"] = self.grpc_channel.unary_unary( + "/google.cloud.documentai.v1beta2.DocumentUnderstandingService/BatchProcessDocuments", + request_serializer=document_understanding.BatchProcessDocumentsRequest.serialize, + response_deserializer=operations.Operation.FromString, + ) + return self._stubs["batch_process_documents"] + + @property + def process_document( + self + ) -> Callable[[document_understanding.ProcessDocumentRequest], document.Document]: + r"""Return a callable for the process document method over gRPC. + + Processes a single document. + + Returns: + Callable[[~.ProcessDocumentRequest], + ~.Document]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "process_document" not in self._stubs: + self._stubs["process_document"] = self.grpc_channel.unary_unary( + "/google.cloud.documentai.v1beta2.DocumentUnderstandingService/ProcessDocument", + request_serializer=document_understanding.ProcessDocumentRequest.serialize, + response_deserializer=document.Document.deserialize, + ) + return self._stubs["process_document"] + + +__all__ = ("DocumentUnderstandingServiceGrpcTransport",) diff --git a/google/cloud/documentai_v1beta2/types/__init__.py b/google/cloud/documentai_v1beta2/types/__init__.py new file mode 100644 index 00000000..c22af483 --- /dev/null +++ b/google/cloud/documentai_v1beta2/types/__init__.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .geometry import Vertex, NormalizedVertex, BoundingPoly +from .document import Document +from .document_understanding import ( + BatchProcessDocumentsRequest, + ProcessDocumentRequest, + BatchProcessDocumentsResponse, + ProcessDocumentResponse, + OcrParams, + TableExtractionParams, + TableBoundHint, + FormExtractionParams, + KeyValuePairHint, + EntityExtractionParams, + AutoMlParams, + InputConfig, + OutputConfig, + GcsSource, + GcsDestination, + OperationMetadata, +) + + +__all__ = ( + "Vertex", + "NormalizedVertex", + "BoundingPoly", + "Document", + "BatchProcessDocumentsRequest", + "ProcessDocumentRequest", + "BatchProcessDocumentsResponse", + "ProcessDocumentResponse", + "OcrParams", + "TableExtractionParams", + "TableBoundHint", + "FormExtractionParams", + "KeyValuePairHint", + "EntityExtractionParams", + "AutoMlParams", + "InputConfig", + "OutputConfig", + "GcsSource", + "GcsDestination", + "OperationMetadata", +) diff --git a/google/cloud/documentai_v1beta2/types/document.py b/google/cloud/documentai_v1beta2/types/document.py new file mode 100644 index 00000000..d9ca295f --- /dev/null +++ b/google/cloud/documentai_v1beta2/types/document.py @@ -0,0 +1,756 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +from google.cloud.documentai_v1beta2.types import geometry +from google.rpc import status_pb2 as status # type: ignore +from google.type import color_pb2 as gt_color # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.documentai.v1beta2", manifest={"Document"} +) + + +class Document(proto.Message): + r"""Document represents the canonical document resource in + Document Understanding AI. + It is an interchange format that provides insights into + documents and allows for collaboration between users and + Document Understanding AI to iterate and optimize for quality. + + Attributes: + uri (str): + Currently supports Google Cloud Storage URI of the form + ``gs://bucket_name/object_name``. Object versioning is not + supported. See `Google Cloud Storage Request + URIs `__ + for more info. + content (bytes): + Inline document content, represented as a stream of bytes. + Note: As with all ``bytes`` fields, protobuffers use a pure + binary representation, whereas JSON representations use + base64. + mime_type (str): + An IANA published MIME type (also referred to + as media type). For more information, see + https://www.iana.org/assignments/media- + types/media-types.xhtml. + text (str): + UTF-8 encoded text in reading order from the + document. + text_styles (Sequence[~.document.Document.Style]): + Styles for the + [Document.text][google.cloud.documentai.v1beta2.Document.text]. + pages (Sequence[~.document.Document.Page]): + Visual page layout for the + [Document][google.cloud.documentai.v1beta2.Document]. + entities (Sequence[~.document.Document.Entity]): + A list of entities detected on + [Document.text][google.cloud.documentai.v1beta2.Document.text]. + For document shards, entities in this list may cross shard + boundaries. + entity_relations (Sequence[~.document.Document.EntityRelation]): + Relationship among + [Document.entities][google.cloud.documentai.v1beta2.Document.entities]. + shard_info (~.document.Document.ShardInfo): + Information about the sharding if this + document is sharded part of a larger document. + If the document is not sharded, this message is + not specified. + labels (Sequence[~.document.Document.Label]): + [Label][google.cloud.documentai.v1beta2.Document.Label]s for + this document. + error (~.status.Status): + Any error that occurred while processing this + document. + """ + + class ShardInfo(proto.Message): + r"""For a large document, sharding may be performed to produce + several document shards. Each document shard contains this field + to detail which shard it is. + + Attributes: + shard_index (int): + The 0-based index of this shard. + shard_count (int): + Total number of shards. + text_offset (int): + The index of the first character in + [Document.text][google.cloud.documentai.v1beta2.Document.text] + in the overall document global text. + """ + + shard_index = proto.Field(proto.INT64, number=1) + shard_count = proto.Field(proto.INT64, number=2) + text_offset = proto.Field(proto.INT64, number=3) + + class Label(proto.Message): + r"""Label attaches schema information and/or other metadata to segments + within a [Document][google.cloud.documentai.v1beta2.Document]. + Multiple [Label][google.cloud.documentai.v1beta2.Document.Label]s on + a single field can denote either different labels, different + instances of the same label created at different times, or some + combination of both. + + Attributes: + automl_model (str): + Label is generated AutoML model. This field stores the full + resource name of the AutoML model. + + Format: + ``projects/{project-id}/locations/{location-id}/models/{model-id}`` + name (str): + Name of the label. + When the label is generated from AutoML Text + Classification model, this field represents the + name of the category. + confidence (float): + Confidence score between 0 and 1 for label + assignment. + """ + + automl_model = proto.Field(proto.STRING, number=2) + name = proto.Field(proto.STRING, number=1) + confidence = proto.Field(proto.FLOAT, number=3) + + class Style(proto.Message): + r"""Annotation for common text style attributes. This adheres to + CSS conventions as much as possible. + + Attributes: + text_anchor (~.document.Document.TextAnchor): + Text anchor indexing into the + [Document.text][google.cloud.documentai.v1beta2.Document.text]. + color (~.gt_color.Color): + Text color. + background_color (~.gt_color.Color): + Text background color. + font_weight (str): + Font weight. Possible values are normal, bold, bolder, and + lighter. https://www.w3schools.com/cssref/pr_font_weight.asp + text_style (str): + Text style. Possible values are normal, italic, and oblique. + https://www.w3schools.com/cssref/pr_font_font-style.asp + text_decoration (str): + Text decoration. Follows CSS standard. + https://www.w3schools.com/cssref/pr_text_text-decoration.asp + font_size (~.document.Document.Style.FontSize): + Font size. + """ + + class FontSize(proto.Message): + r"""Font size with unit. + + Attributes: + size (float): + Font size for the text. + unit (str): + Unit for the font size. Follows CSS naming + (in, px, pt, etc.). + """ + + size = proto.Field(proto.FLOAT, number=1) + unit = proto.Field(proto.STRING, number=2) + + text_anchor = proto.Field( + proto.MESSAGE, number=1, message="Document.TextAnchor" + ) + color = proto.Field(proto.MESSAGE, number=2, message=gt_color.Color) + background_color = proto.Field(proto.MESSAGE, number=3, message=gt_color.Color) + font_weight = proto.Field(proto.STRING, number=4) + text_style = proto.Field(proto.STRING, number=5) + text_decoration = proto.Field(proto.STRING, number=6) + font_size = proto.Field( + proto.MESSAGE, number=7, message="Document.Style.FontSize" + ) + + class Page(proto.Message): + r"""A page in a [Document][google.cloud.documentai.v1beta2.Document]. + + Attributes: + page_number (int): + 1-based index for current + [Page][google.cloud.documentai.v1beta2.Document.Page] in a + parent [Document][google.cloud.documentai.v1beta2.Document]. + Useful when a page is taken out of a + [Document][google.cloud.documentai.v1beta2.Document] for + individual processing. + dimension (~.document.Document.Page.Dimension): + Physical dimension of the page. + layout (~.document.Document.Page.Layout): + [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] + for the page. + detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + A list of detected languages together with + confidence. + blocks (Sequence[~.document.Document.Page.Block]): + A list of visually detected text blocks on + the page. A block has a set of lines (collected + into paragraphs) that have a common line-spacing + and orientation. + paragraphs (Sequence[~.document.Document.Page.Paragraph]): + A list of visually detected text paragraphs + on the page. A collection of lines that a human + would perceive as a paragraph. + lines (Sequence[~.document.Document.Page.Line]): + A list of visually detected text lines on the + page. A collection of tokens that a human would + perceive as a line. + tokens (Sequence[~.document.Document.Page.Token]): + A list of visually detected tokens on the + page. + visual_elements (Sequence[~.document.Document.Page.VisualElement]): + A list of detected non-text visual elements + e.g. checkbox, signature etc. on the page. + tables (Sequence[~.document.Document.Page.Table]): + A list of visually detected tables on the + page. + form_fields (Sequence[~.document.Document.Page.FormField]): + A list of visually detected form fields on + the page. + """ + + class Dimension(proto.Message): + r"""Dimension for the page. + + Attributes: + width (float): + Page width. + height (float): + Page height. + unit (str): + Dimension unit. + """ + + width = proto.Field(proto.FLOAT, number=1) + height = proto.Field(proto.FLOAT, number=2) + unit = proto.Field(proto.STRING, number=3) + + class Layout(proto.Message): + r"""Visual element describing a layout unit on a page. + + Attributes: + text_anchor (~.document.Document.TextAnchor): + Text anchor indexing into the + [Document.text][google.cloud.documentai.v1beta2.Document.text]. + confidence (float): + Confidence of the current + [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] + within context of the object this layout is for. e.g. + confidence can be for a single token, a table, a visual + element, etc. depending on context. Range [0, 1]. + bounding_poly (~.geometry.BoundingPoly): + The bounding polygon for the + [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout]. + orientation (~.document.Document.Page.Layout.Orientation): + Detected orientation for the + [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout]. + id (str): + Optional. This is the identifier used by referencing + [PageAnchor][google.cloud.documentai.v1beta2.Document.PageAnchor]s. + """ + + class Orientation(proto.Enum): + r"""Detected human reading orientation.""" + ORIENTATION_UNSPECIFIED = 0 + PAGE_UP = 1 + PAGE_RIGHT = 2 + PAGE_DOWN = 3 + PAGE_LEFT = 4 + + text_anchor = proto.Field( + proto.MESSAGE, number=1, message="Document.TextAnchor" + ) + confidence = proto.Field(proto.FLOAT, number=2) + bounding_poly = proto.Field( + proto.MESSAGE, number=3, message=geometry.BoundingPoly + ) + orientation = proto.Field( + proto.ENUM, number=4, enum="Document.Page.Layout.Orientation" + ) + id = proto.Field(proto.STRING, number=5) + + class Block(proto.Message): + r"""A block has a set of lines (collected into paragraphs) that + have a common line-spacing and orientation. + + Attributes: + layout (~.document.Document.Page.Layout): + [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] + for + [Block][google.cloud.documentai.v1beta2.Document.Page.Block]. + detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + A list of detected languages together with + confidence. + """ + + layout = proto.Field( + proto.MESSAGE, number=1, message="Document.Page.Layout" + ) + detected_languages = proto.RepeatedField( + proto.MESSAGE, number=2, message="Document.Page.DetectedLanguage" + ) + + class Paragraph(proto.Message): + r"""A collection of lines that a human would perceive as a + paragraph. + + Attributes: + layout (~.document.Document.Page.Layout): + [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] + for + [Paragraph][google.cloud.documentai.v1beta2.Document.Page.Paragraph]. + detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + A list of detected languages together with + confidence. + """ + + layout = proto.Field( + proto.MESSAGE, number=1, message="Document.Page.Layout" + ) + detected_languages = proto.RepeatedField( + proto.MESSAGE, number=2, message="Document.Page.DetectedLanguage" + ) + + class Line(proto.Message): + r"""A collection of tokens that a human would perceive as a line. + Does not cross column boundaries, can be horizontal, vertical, + etc. + + Attributes: + layout (~.document.Document.Page.Layout): + [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] + for + [Line][google.cloud.documentai.v1beta2.Document.Page.Line]. + detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + A list of detected languages together with + confidence. + """ + + layout = proto.Field( + proto.MESSAGE, number=1, message="Document.Page.Layout" + ) + detected_languages = proto.RepeatedField( + proto.MESSAGE, number=2, message="Document.Page.DetectedLanguage" + ) + + class Token(proto.Message): + r"""A detected token. + + Attributes: + layout (~.document.Document.Page.Layout): + [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] + for + [Token][google.cloud.documentai.v1beta2.Document.Page.Token]. + detected_break (~.document.Document.Page.Token.DetectedBreak): + Detected break at the end of a + [Token][google.cloud.documentai.v1beta2.Document.Page.Token]. + detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + A list of detected languages together with + confidence. + """ + + class DetectedBreak(proto.Message): + r"""Detected break at the end of a + [Token][google.cloud.documentai.v1beta2.Document.Page.Token]. + + Attributes: + type (~.document.Document.Page.Token.DetectedBreak.Type): + Detected break type. + """ + + class Type(proto.Enum): + r"""Enum to denote the type of break found.""" + TYPE_UNSPECIFIED = 0 + SPACE = 1 + WIDE_SPACE = 2 + HYPHEN = 3 + + type = proto.Field( + proto.ENUM, number=1, enum="Document.Page.Token.DetectedBreak.Type" + ) + + layout = proto.Field( + proto.MESSAGE, number=1, message="Document.Page.Layout" + ) + detected_break = proto.Field( + proto.MESSAGE, number=2, message="Document.Page.Token.DetectedBreak" + ) + detected_languages = proto.RepeatedField( + proto.MESSAGE, number=3, message="Document.Page.DetectedLanguage" + ) + + class VisualElement(proto.Message): + r"""Detected non-text visual elements e.g. checkbox, signature + etc. on the page. + + Attributes: + layout (~.document.Document.Page.Layout): + [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] + for + [VisualElement][google.cloud.documentai.v1beta2.Document.Page.VisualElement]. + type (str): + Type of the + [VisualElement][google.cloud.documentai.v1beta2.Document.Page.VisualElement]. + detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + A list of detected languages together with + confidence. + """ + + layout = proto.Field( + proto.MESSAGE, number=1, message="Document.Page.Layout" + ) + type = proto.Field(proto.STRING, number=2) + detected_languages = proto.RepeatedField( + proto.MESSAGE, number=3, message="Document.Page.DetectedLanguage" + ) + + class Table(proto.Message): + r"""A table representation similar to HTML table structure. + + Attributes: + layout (~.document.Document.Page.Layout): + [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] + for + [Table][google.cloud.documentai.v1beta2.Document.Page.Table]. + header_rows (Sequence[~.document.Document.Page.Table.TableRow]): + Header rows of the table. + body_rows (Sequence[~.document.Document.Page.Table.TableRow]): + Body rows of the table. + detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + A list of detected languages together with + confidence. + """ + + class TableRow(proto.Message): + r"""A row of table cells. + + Attributes: + cells (Sequence[~.document.Document.Page.Table.TableCell]): + Cells that make up this row. + """ + + cells = proto.RepeatedField( + proto.MESSAGE, number=1, message="Document.Page.Table.TableCell" + ) + + class TableCell(proto.Message): + r"""A cell representation inside the table. + + Attributes: + layout (~.document.Document.Page.Layout): + [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] + for + [TableCell][google.cloud.documentai.v1beta2.Document.Page.Table.TableCell]. + row_span (int): + How many rows this cell spans. + col_span (int): + How many columns this cell spans. + detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + A list of detected languages together with + confidence. + """ + + layout = proto.Field( + proto.MESSAGE, number=1, message="Document.Page.Layout" + ) + row_span = proto.Field(proto.INT32, number=2) + col_span = proto.Field(proto.INT32, number=3) + detected_languages = proto.RepeatedField( + proto.MESSAGE, number=4, message="Document.Page.DetectedLanguage" + ) + + layout = proto.Field( + proto.MESSAGE, number=1, message="Document.Page.Layout" + ) + header_rows = proto.RepeatedField( + proto.MESSAGE, number=2, message="Document.Page.Table.TableRow" + ) + body_rows = proto.RepeatedField( + proto.MESSAGE, number=3, message="Document.Page.Table.TableRow" + ) + detected_languages = proto.RepeatedField( + proto.MESSAGE, number=4, message="Document.Page.DetectedLanguage" + ) + + class FormField(proto.Message): + r"""A form field detected on the page. + + Attributes: + field_name (~.document.Document.Page.Layout): + [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] + for the + [FormField][google.cloud.documentai.v1beta2.Document.Page.FormField] + name. e.g. ``Address``, ``Email``, ``Grand total``, + ``Phone number``, etc. + field_value (~.document.Document.Page.Layout): + [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] + for the + [FormField][google.cloud.documentai.v1beta2.Document.Page.FormField] + value. + name_detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + A list of detected languages for name + together with confidence. + value_detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + A list of detected languages for value + together with confidence. + value_type (str): + If the value is non-textual, this field represents the type. + Current valid values are: + + - blank (this indicates the field_value is normal text) + - "unfilled_checkbox" + - "filled_checkbox". + corrected_key_text (str): + An internal field, created for Labeling UI to + export key text. + corrected_value_text (str): + An internal field, created for Labeling UI to + export value text. + """ + + field_name = proto.Field( + proto.MESSAGE, number=1, message="Document.Page.Layout" + ) + field_value = proto.Field( + proto.MESSAGE, number=2, message="Document.Page.Layout" + ) + name_detected_languages = proto.RepeatedField( + proto.MESSAGE, number=3, message="Document.Page.DetectedLanguage" + ) + value_detected_languages = proto.RepeatedField( + proto.MESSAGE, number=4, message="Document.Page.DetectedLanguage" + ) + value_type = proto.Field(proto.STRING, number=5) + corrected_key_text = proto.Field(proto.STRING, number=6) + corrected_value_text = proto.Field(proto.STRING, number=7) + + class DetectedLanguage(proto.Message): + r"""Detected language for a structural component. + + Attributes: + language_code (str): + The BCP-47 language code, such as "en-US" or "sr-Latn". For + more information, see + http://www.unicode.org/reports/tr35/#Unicode_locale_identifier. + confidence (float): + Confidence of detected language. Range [0, 1]. + """ + + language_code = proto.Field(proto.STRING, number=1) + confidence = proto.Field(proto.FLOAT, number=2) + + page_number = proto.Field(proto.INT32, number=1) + dimension = proto.Field( + proto.MESSAGE, number=2, message="Document.Page.Dimension" + ) + layout = proto.Field(proto.MESSAGE, number=3, message="Document.Page.Layout") + detected_languages = proto.RepeatedField( + proto.MESSAGE, number=4, message="Document.Page.DetectedLanguage" + ) + blocks = proto.RepeatedField( + proto.MESSAGE, number=5, message="Document.Page.Block" + ) + paragraphs = proto.RepeatedField( + proto.MESSAGE, number=6, message="Document.Page.Paragraph" + ) + lines = proto.RepeatedField( + proto.MESSAGE, number=7, message="Document.Page.Line" + ) + tokens = proto.RepeatedField( + proto.MESSAGE, number=8, message="Document.Page.Token" + ) + visual_elements = proto.RepeatedField( + proto.MESSAGE, number=9, message="Document.Page.VisualElement" + ) + tables = proto.RepeatedField( + proto.MESSAGE, number=10, message="Document.Page.Table" + ) + form_fields = proto.RepeatedField( + proto.MESSAGE, number=11, message="Document.Page.FormField" + ) + + class Entity(proto.Message): + r"""A phrase in the text that is a known entity type, such as a + person, an organization, or location. + + Attributes: + text_anchor (~.document.Document.TextAnchor): + Provenance of the entity. Text anchor indexing into the + [Document.text][google.cloud.documentai.v1beta2.Document.text]. + type (str): + Entity type from a schema e.g. ``Address``. + mention_text (str): + Text value in the document e.g. ``1600 Amphitheatre Pkwy``. + mention_id (str): + Deprecated. Use ``id`` field instead. + confidence (float): + Optional. Confidence of detected Schema entity. Range [0, + 1]. + page_anchor (~.document.Document.PageAnchor): + Optional. Represents the provenance of this + entity wrt. the location on the page where it + was found. + id (str): + Optional. Canonical id. This will be a unique + value in the entity list for this document. + bounding_poly_for_demo_frontend (~.geometry.BoundingPoly): + Optional. Temporary field to store the + bounding poly for short-term POCs. Used by the + frontend only. Do not use before you talk to + ybo@ and lukasr@. + """ + + text_anchor = proto.Field( + proto.MESSAGE, number=1, message="Document.TextAnchor" + ) + type = proto.Field(proto.STRING, number=2) + mention_text = proto.Field(proto.STRING, number=3) + mention_id = proto.Field(proto.STRING, number=4) + confidence = proto.Field(proto.FLOAT, number=5) + page_anchor = proto.Field( + proto.MESSAGE, number=6, message="Document.PageAnchor" + ) + id = proto.Field(proto.STRING, number=7) + bounding_poly_for_demo_frontend = proto.Field( + proto.MESSAGE, number=8, message=geometry.BoundingPoly + ) + + class EntityRelation(proto.Message): + r"""Relationship between + [Entities][google.cloud.documentai.v1beta2.Document.Entity]. + + Attributes: + subject_id (str): + Subject entity id. + object_id (str): + Object entity id. + relation (str): + Relationship description. + """ + + subject_id = proto.Field(proto.STRING, number=1) + object_id = proto.Field(proto.STRING, number=2) + relation = proto.Field(proto.STRING, number=3) + + class TextAnchor(proto.Message): + r"""Text reference indexing into the + [Document.text][google.cloud.documentai.v1beta2.Document.text]. + + Attributes: + text_segments (Sequence[~.document.Document.TextAnchor.TextSegment]): + The text segments from the + [Document.text][google.cloud.documentai.v1beta2.Document.text]. + """ + + class TextSegment(proto.Message): + r"""A text segment in the + [Document.text][google.cloud.documentai.v1beta2.Document.text]. The + indices may be out of bounds which indicate that the text extends + into another document shard for large sharded documents. See + [ShardInfo.text_offset][google.cloud.documentai.v1beta2.Document.ShardInfo.text_offset] + + Attributes: + start_index (int): + [TextSegment][google.cloud.documentai.v1beta2.Document.TextAnchor.TextSegment] + start UTF-8 char index in the + [Document.text][google.cloud.documentai.v1beta2.Document.text]. + end_index (int): + [TextSegment][google.cloud.documentai.v1beta2.Document.TextAnchor.TextSegment] + half open end UTF-8 char index in the + [Document.text][google.cloud.documentai.v1beta2.Document.text]. + """ + + start_index = proto.Field(proto.INT64, number=1) + end_index = proto.Field(proto.INT64, number=2) + + text_segments = proto.RepeatedField( + proto.MESSAGE, number=1, message="Document.TextAnchor.TextSegment" + ) + + class PageAnchor(proto.Message): + r"""Referencing elements in + [Document.pages][google.cloud.documentai.v1beta2.Document.pages]. + + Attributes: + page_refs (Sequence[~.document.Document.PageAnchor.PageRef]): + One or more references to visual page + elements + """ + + class PageRef(proto.Message): + r"""Represents a weak reference to a page element within a + document. + + Attributes: + page (int): + Required. Index into the + [Document.pages][google.cloud.documentai.v1beta2.Document.pages] + element + layout_type (~.document.Document.PageAnchor.PageRef.LayoutType): + Optional. The type of the layout element that + is being referenced. If not specified the whole + page is assumed to be referenced. + layout_id (str): + Optional. The + [Page.Layout.id][google.cloud.documentai.v1beta2.Document.Page.Layout.id] + on the page that this element references. If + [LayoutRef.type][] is specified this id must also be + specified. + """ + + class LayoutType(proto.Enum): + r"""The type of layout that is being referenced.""" + LAYOUT_TYPE_UNSPECIFIED = 0 + BLOCK = 1 + PARAGRAPH = 2 + LINE = 3 + TOKEN = 4 + VISUAL_ELEMENT = 5 + TABLE = 6 + FORM_FIELD = 7 + + page = proto.Field(proto.INT64, number=1) + layout_type = proto.Field( + proto.ENUM, number=2, enum="Document.PageAnchor.PageRef.LayoutType" + ) + layout_id = proto.Field(proto.STRING, number=3) + + page_refs = proto.RepeatedField( + proto.MESSAGE, number=1, message="Document.PageAnchor.PageRef" + ) + + uri = proto.Field(proto.STRING, number=1) + content = proto.Field(proto.BYTES, number=2) + mime_type = proto.Field(proto.STRING, number=3) + text = proto.Field(proto.STRING, number=4) + text_styles = proto.RepeatedField(proto.MESSAGE, number=5, message=Style) + pages = proto.RepeatedField(proto.MESSAGE, number=6, message=Page) + entities = proto.RepeatedField(proto.MESSAGE, number=7, message=Entity) + entity_relations = proto.RepeatedField( + proto.MESSAGE, number=8, message=EntityRelation + ) + shard_info = proto.Field(proto.MESSAGE, number=9, message=ShardInfo) + labels = proto.RepeatedField(proto.MESSAGE, number=11, message=Label) + error = proto.Field(proto.MESSAGE, number=10, message=status.Status) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/documentai_v1beta2/types/document_understanding.py b/google/cloud/documentai_v1beta2/types/document_understanding.py new file mode 100644 index 00000000..3b36d129 --- /dev/null +++ b/google/cloud/documentai_v1beta2/types/document_understanding.py @@ -0,0 +1,432 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +from google.cloud.documentai_v1beta2.types import geometry +from google.protobuf import timestamp_pb2 as timestamp # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.documentai.v1beta2", + manifest={ + "BatchProcessDocumentsRequest", + "ProcessDocumentRequest", + "BatchProcessDocumentsResponse", + "ProcessDocumentResponse", + "OcrParams", + "TableExtractionParams", + "TableBoundHint", + "FormExtractionParams", + "KeyValuePairHint", + "EntityExtractionParams", + "AutoMlParams", + "InputConfig", + "OutputConfig", + "GcsSource", + "GcsDestination", + "OperationMetadata", + }, +) + + +class BatchProcessDocumentsRequest(proto.Message): + r"""Request to batch process documents as an asynchronous operation. The + output is written to Cloud Storage as JSON in the [Document] format. + + Attributes: + requests (Sequence[~.document_understanding.ProcessDocumentRequest]): + Required. Individual requests for each + document. + parent (str): + Target project and location to make a call. + + Format: ``projects/{project-id}/locations/{location-id}``. + + If no location is specified, a region will be chosen + automatically. + """ + + requests = proto.RepeatedField( + proto.MESSAGE, number=1, message="ProcessDocumentRequest" + ) + parent = proto.Field(proto.STRING, number=2) + + +class ProcessDocumentRequest(proto.Message): + r"""Request to process one document. + + Attributes: + parent (str): + Target project and location to make a call. + + Format: ``projects/{project-id}/locations/{location-id}``. + + If no location is specified, a region will be chosen + automatically. This field is only populated when used in + ProcessDocument method. + input_config (~.document_understanding.InputConfig): + Required. Information about the input file. + output_config (~.document_understanding.OutputConfig): + Optional. The desired output location. This + field is only needed in + BatchProcessDocumentsRequest. + document_type (str): + Specifies a known document type for deeper + structure detection. Valid values are currently + "general" and "invoice". If not provided, + "general"\ is used as default. If any other + value is given, the request is rejected. + table_extraction_params (~.document_understanding.TableExtractionParams): + Controls table extraction behavior. If not + specified, the system will decide reasonable + defaults. + form_extraction_params (~.document_understanding.FormExtractionParams): + Controls form extraction behavior. If not + specified, the system will decide reasonable + defaults. + entity_extraction_params (~.document_understanding.EntityExtractionParams): + Controls entity extraction behavior. If not + specified, the system will decide reasonable + defaults. + ocr_params (~.document_understanding.OcrParams): + Controls OCR behavior. If not specified, the + system will decide reasonable defaults. + automl_params (~.document_understanding.AutoMlParams): + Controls AutoML model prediction behavior. + AutoMlParams cannot be used together with other + Params. + """ + + parent = proto.Field(proto.STRING, number=9) + input_config = proto.Field(proto.MESSAGE, number=1, message="InputConfig") + output_config = proto.Field(proto.MESSAGE, number=2, message="OutputConfig") + document_type = proto.Field(proto.STRING, number=3) + table_extraction_params = proto.Field( + proto.MESSAGE, number=4, message="TableExtractionParams" + ) + form_extraction_params = proto.Field( + proto.MESSAGE, number=5, message="FormExtractionParams" + ) + entity_extraction_params = proto.Field( + proto.MESSAGE, number=6, message="EntityExtractionParams" + ) + ocr_params = proto.Field(proto.MESSAGE, number=7, message="OcrParams") + automl_params = proto.Field(proto.MESSAGE, number=8, message="AutoMlParams") + + +class BatchProcessDocumentsResponse(proto.Message): + r"""Response to an batch document processing request. This is + returned in the LRO Operation after the operation is complete. + + Attributes: + responses (Sequence[~.document_understanding.ProcessDocumentResponse]): + Responses for each individual document. + """ + + responses = proto.RepeatedField( + proto.MESSAGE, number=1, message="ProcessDocumentResponse" + ) + + +class ProcessDocumentResponse(proto.Message): + r"""Response to a single document processing request. + + Attributes: + input_config (~.document_understanding.InputConfig): + Information about the input file. This is the + same as the corresponding input config in the + request. + output_config (~.document_understanding.OutputConfig): + The output location of the parsed responses. The responses + are written to this location as JSON-serialized ``Document`` + objects. + """ + + input_config = proto.Field(proto.MESSAGE, number=1, message="InputConfig") + output_config = proto.Field(proto.MESSAGE, number=2, message="OutputConfig") + + +class OcrParams(proto.Message): + r"""Parameters to control Optical Character Recognition (OCR) + behavior. + + Attributes: + language_hints (Sequence[str]): + List of languages to use for OCR. In most cases, an empty + value yields the best results since it enables automatic + language detection. For languages based on the Latin + alphabet, setting ``language_hints`` is not needed. In rare + cases, when the language of the text in the image is known, + setting a hint will help get better results (although it + will be a significant hindrance if the hint is wrong). + Document processing returns an error if one or more of the + specified languages is not one of the supported languages. + """ + + language_hints = proto.RepeatedField(proto.STRING, number=1) + + +class TableExtractionParams(proto.Message): + r"""Parameters to control table extraction behavior. + + Attributes: + enabled (bool): + Whether to enable table extraction. + table_bound_hints (Sequence[~.document_understanding.TableBoundHint]): + Optional. Table bounding box hints that can + be provided to complex cases which our algorithm + cannot locate the table(s) in. + header_hints (Sequence[str]): + Optional. Table header hints. The extraction + will bias towards producing these terms as table + headers, which may improve accuracy. + model_version (str): + Model version of the table extraction system. + Default is "builtin/stable". Specify + "builtin/latest" for the latest model. + """ + + enabled = proto.Field(proto.BOOL, number=1) + table_bound_hints = proto.RepeatedField( + proto.MESSAGE, number=2, message="TableBoundHint" + ) + header_hints = proto.RepeatedField(proto.STRING, number=3) + model_version = proto.Field(proto.STRING, number=4) + + +class TableBoundHint(proto.Message): + r"""A hint for a table bounding box on the page for table + parsing. + + Attributes: + page_number (int): + Optional. Page number for multi-paged inputs + this hint applies to. If not provided, this hint + will apply to all pages by default. This value + is 1-based. + bounding_box (~.geometry.BoundingPoly): + Bounding box hint for a table on this page. The coordinates + must be normalized to [0,1] and the bounding box must be an + axis-aligned rectangle. + """ + + page_number = proto.Field(proto.INT32, number=1) + bounding_box = proto.Field(proto.MESSAGE, number=2, message=geometry.BoundingPoly) + + +class FormExtractionParams(proto.Message): + r"""Parameters to control form extraction behavior. + + Attributes: + enabled (bool): + Whether to enable form extraction. + key_value_pair_hints (Sequence[~.document_understanding.KeyValuePairHint]): + User can provide pairs of (key text, value type) to improve + the parsing result. + + For example, if a document has a field called "Date" that + holds a date value and a field called "Amount" that may hold + either a currency value (e.g., "$500.00") or a simple number + value (e.g., "20"), you could use the following hints: [ + {"key": "Date", value_types: [ "DATE"]}, {"key": "Amount", + "value_types": [ "PRICE", "NUMBER" ]} ] + + If the value type is unknown, but you want to provide hints + for the keys, you can leave the value_types field blank. + e.g. {"key": "Date", "value_types": []} + model_version (str): + Model version of the form extraction system. Default is + "builtin/stable". Specify "builtin/latest" for the latest + model. For custom form models, specify: + “custom/{model_name}". Model name format is + "bucket_name/path/to/modeldir" corresponding to + "gs://bucket_name/path/to/modeldir" where annotated examples + are stored. + """ + + enabled = proto.Field(proto.BOOL, number=1) + key_value_pair_hints = proto.RepeatedField( + proto.MESSAGE, number=2, message="KeyValuePairHint" + ) + model_version = proto.Field(proto.STRING, number=3) + + +class KeyValuePairHint(proto.Message): + r"""User-provided hint for key value pair. + + Attributes: + key (str): + The key text for the hint. + value_types (Sequence[str]): + Type of the value. This is case-insensitive, and could be + one of: ADDRESS, LOCATION, ORGANIZATION, PERSON, + PHONE_NUMBER, ID, NUMBER, EMAIL, PRICE, TERMS, DATE, NAME. + Types not in this list will be ignored. + """ + + key = proto.Field(proto.STRING, number=1) + value_types = proto.RepeatedField(proto.STRING, number=2) + + +class EntityExtractionParams(proto.Message): + r"""Parameters to control entity extraction behavior. + + Attributes: + enabled (bool): + Whether to enable entity extraction. + model_version (str): + Model version of the entity extraction. + Default is "builtin/stable". Specify + "builtin/latest" for the latest model. + """ + + enabled = proto.Field(proto.BOOL, number=1) + model_version = proto.Field(proto.STRING, number=2) + + +class AutoMlParams(proto.Message): + r"""Parameters to control AutoML model prediction behavior. + + Attributes: + model (str): + Resource name of the AutoML model. + + Format: + ``projects/{project-id}/locations/{location-id}/models/{model-id}``. + """ + + model = proto.Field(proto.STRING, number=1) + + +class InputConfig(proto.Message): + r"""The desired input location and metadata. + + Attributes: + gcs_source (~.document_understanding.GcsSource): + The Google Cloud Storage location to read the + input from. This must be a single file. + contents (bytes): + Content in bytes, represented as a stream of bytes. Note: As + with all ``bytes`` fields, proto buffer messages use a pure + binary representation, whereas JSON representations use + base64. + + This field only works for synchronous ProcessDocument + method. + mime_type (str): + Required. Mimetype of the input. Current supported mimetypes + are application/pdf, image/tiff, and image/gif. In addition, + application/json type is supported for requests with + [ProcessDocumentRequest.automl_params][google.cloud.documentai.v1beta2.ProcessDocumentRequest.automl_params] + field set. The JSON file needs to be in + [Document][google.cloud.documentai.v1beta2.Document] format. + """ + + gcs_source = proto.Field(proto.MESSAGE, number=1, message="GcsSource") + contents = proto.Field(proto.BYTES, number=3) + mime_type = proto.Field(proto.STRING, number=2) + + +class OutputConfig(proto.Message): + r"""The desired output location and metadata. + + Attributes: + gcs_destination (~.document_understanding.GcsDestination): + The Google Cloud Storage location to write + the output to. + pages_per_shard (int): + The max number of pages to include into each output Document + shard JSON on Google Cloud Storage. + + The valid range is [1, 100]. If not specified, the default + value is 20. + + For example, for one pdf file with 100 pages, 100 parsed + pages will be produced. If ``pages_per_shard`` = 20, then 5 + Document shard JSON files each containing 20 parsed pages + will be written under the prefix + [OutputConfig.gcs_destination.uri][] and suffix + pages-x-to-y.json where x and y are 1-indexed page numbers. + + Example GCS outputs with 157 pages and pages_per_shard = 50: + + pages-001-to-050.json pages-051-to-100.json + pages-101-to-150.json pages-151-to-157.json + """ + + gcs_destination = proto.Field(proto.MESSAGE, number=1, message="GcsDestination") + pages_per_shard = proto.Field(proto.INT32, number=2) + + +class GcsSource(proto.Message): + r"""The Google Cloud Storage location where the input file will + be read from. + + Attributes: + uri (str): + + """ + + uri = proto.Field(proto.STRING, number=1) + + +class GcsDestination(proto.Message): + r"""The Google Cloud Storage location where the output file will + be written to. + + Attributes: + uri (str): + + """ + + uri = proto.Field(proto.STRING, number=1) + + +class OperationMetadata(proto.Message): + r"""Contains metadata for the BatchProcessDocuments operation. + + Attributes: + state (~.document_understanding.OperationMetadata.State): + The state of the current batch processing. + state_message (str): + A message providing more details about the + current state of processing. + create_time (~.timestamp.Timestamp): + The creation time of the operation. + update_time (~.timestamp.Timestamp): + The last update time of the operation. + """ + + class State(proto.Enum): + r"""""" + STATE_UNSPECIFIED = 0 + ACCEPTED = 1 + WAITING = 2 + RUNNING = 3 + SUCCEEDED = 4 + CANCELLED = 5 + FAILED = 6 + + state = proto.Field(proto.ENUM, number=1, enum=State) + state_message = proto.Field(proto.STRING, number=2) + create_time = proto.Field(proto.MESSAGE, number=3, message=timestamp.Timestamp) + update_time = proto.Field(proto.MESSAGE, number=4, message=timestamp.Timestamp) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/documentai_v1beta2/types/geometry.py b/google/cloud/documentai_v1beta2/types/geometry.py new file mode 100644 index 00000000..69166b38 --- /dev/null +++ b/google/cloud/documentai_v1beta2/types/geometry.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.documentai.v1beta2", + manifest={"Vertex", "NormalizedVertex", "BoundingPoly"}, +) + + +class Vertex(proto.Message): + r"""A vertex represents a 2D point in the image. + NOTE: the vertex coordinates are in the same scale as the + original image. + + Attributes: + x (int): + X coordinate. + y (int): + Y coordinate. + """ + + x = proto.Field(proto.INT32, number=1) + y = proto.Field(proto.INT32, number=2) + + +class NormalizedVertex(proto.Message): + r"""A vertex represents a 2D point in the image. + NOTE: the normalized vertex coordinates are relative to the + original image and range from 0 to 1. + + Attributes: + x (float): + X coordinate. + y (float): + Y coordinate. + """ + + x = proto.Field(proto.FLOAT, number=1) + y = proto.Field(proto.FLOAT, number=2) + + +class BoundingPoly(proto.Message): + r"""A bounding polygon for the detected image annotation. + + Attributes: + vertices (Sequence[~.geometry.Vertex]): + The bounding polygon vertices. + normalized_vertices (Sequence[~.geometry.NormalizedVertex]): + The bounding polygon normalized vertices. + """ + + vertices = proto.RepeatedField(proto.MESSAGE, number=1, message=Vertex) + normalized_vertices = proto.RepeatedField( + proto.MESSAGE, number=2, message=NormalizedVertex + ) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 00000000..f23e6b53 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,3 @@ +[mypy] +python_version = 3.5 +namespace_packages = True diff --git a/noxfile.py b/noxfile.py index ac831388..5cc185a1 100644 --- a/noxfile.py +++ b/noxfile.py @@ -84,13 +84,13 @@ def default(session): ) -@nox.session(python=["2.7", "3.5", "3.6", "3.7", "3.8"]) +@nox.session(python=["3.6", "3.7", "3.8"]) def unit(session): """Run the unit test suite.""" default(session) -@nox.session(python=["2.7", "3.7"]) +@nox.session(python=["3.7"]) def system(session): """Run the system test suite.""" system_test_path = os.path.join("tests", "system.py") diff --git a/scripts/fixup_keywords.py b/scripts/fixup_keywords.py new file mode 100644 index 00000000..295732fa --- /dev/null +++ b/scripts/fixup_keywords.py @@ -0,0 +1,178 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import os +import libcst as cst +import pathlib +import sys +from typing import (Any, Callable, Dict, List, Sequence, Tuple) + + +def partition( + predicate: Callable[[Any], bool], + iterator: Sequence[Any] +) -> Tuple[List[Any], List[Any]]: + """A stable, out-of-place partition.""" + results = ([], []) + + for i in iterator: + results[int(predicate(i))].append(i) + + # Returns trueList, falseList + return results[1], results[0] + + +class documentaiCallTransformer(cst.CSTTransformer): + CTRL_PARAMS: Tuple[str] = ('retry', 'timeout', 'metadata') + METHOD_TO_PARAMS: Dict[str, Tuple[str]] = { + 'batch_process_documents': ('requests', 'parent', ), + 'process_document': ('input_config', 'parent', 'output_config', 'document_type', 'table_extraction_params', 'form_extraction_params', 'entity_extraction_params', 'ocr_params', 'automl_params', ), + } + + def leave_Call(self, original: cst.Call, updated: cst.Call) -> cst.CSTNode: + try: + key = original.func.attr.value + kword_params = self.METHOD_TO_PARAMS[key] + except (AttributeError, KeyError): + # Either not a method from the API or too convoluted to be sure. + return updated + + # If the existing code is valid, keyword args come after positional args. + # Therefore, all positional args must map to the first parameters. + args, kwargs = partition(lambda a: not bool(a.keyword), updated.args) + if any(k.keyword.value == "request" for k in kwargs): + # We've already fixed this file, don't fix it again. + return updated + + kwargs, ctrl_kwargs = partition( + lambda a: not a.keyword.value in self.CTRL_PARAMS, + kwargs + ) + + args, ctrl_args = args[:len(kword_params)], args[len(kword_params):] + ctrl_kwargs.extend(cst.Arg(value=a.value, keyword=cst.Name(value=ctrl)) + for a, ctrl in zip(ctrl_args, self.CTRL_PARAMS)) + + request_arg = cst.Arg( + value=cst.Dict([ + cst.DictElement( + cst.SimpleString("'{}'".format(name)), + cst.Element(value=arg.value) + ) + # Note: the args + kwargs looks silly, but keep in mind that + # the control parameters had to be stripped out, and that + # those could have been passed positionally or by keyword. + for name, arg in zip(kword_params, args + kwargs)]), + keyword=cst.Name("request") + ) + + return updated.with_changes( + args=[request_arg] + ctrl_kwargs + ) + + +def fix_files( + in_dir: pathlib.Path, + out_dir: pathlib.Path, + *, + transformer=documentaiCallTransformer(), +): + """Duplicate the input dir to the output dir, fixing file method calls. + + Preconditions: + * in_dir is a real directory + * out_dir is a real, empty directory + """ + pyfile_gen = ( + pathlib.Path(os.path.join(root, f)) + for root, _, files in os.walk(in_dir) + for f in files if os.path.splitext(f)[1] == ".py" + ) + + for fpath in pyfile_gen: + with open(fpath, 'r') as f: + src = f.read() + + # Parse the code and insert method call fixes. + tree = cst.parse_module(src) + updated = tree.visit(transformer) + + # Create the path and directory structure for the new file. + updated_path = out_dir.joinpath(fpath.relative_to(in_dir)) + updated_path.parent.mkdir(parents=True, exist_ok=True) + + # Generate the updated source file at the corresponding path. + with open(updated_path, 'w') as f: + f.write(updated.code) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="""Fix up source that uses the documentai client library. + +The existing sources are NOT overwritten but are copied to output_dir with changes made. + +Note: This tool operates at a best-effort level at converting positional + parameters in client method calls to keyword based parameters. + Cases where it WILL FAIL include + A) * or ** expansion in a method call. + B) Calls via function or method alias (includes free function calls) + C) Indirect or dispatched calls (e.g. the method is looked up dynamically) + + These all constitute false negatives. The tool will also detect false + positives when an API method shares a name with another method. +""") + parser.add_argument( + '-d', + '--input-directory', + required=True, + dest='input_dir', + help='the input directory to walk for python files to fix up', + ) + parser.add_argument( + '-o', + '--output-directory', + required=True, + dest='output_dir', + help='the directory to output files fixed via un-flattening', + ) + args = parser.parse_args() + input_dir = pathlib.Path(args.input_dir) + output_dir = pathlib.Path(args.output_dir) + if not input_dir.is_dir(): + print( + f"input directory '{input_dir}' does not exist or is not a directory", + file=sys.stderr, + ) + sys.exit(-1) + + if not output_dir.is_dir(): + print( + f"output directory '{output_dir}' does not exist or is not a directory", + file=sys.stderr, + ) + sys.exit(-1) + + if os.listdir(output_dir): + print( + f"output directory '{output_dir}' is not empty", + file=sys.stderr, + ) + sys.exit(-1) + + fix_files(input_dir, output_dir) diff --git a/setup.py b/setup.py index ff498f2a..1645c0ed 100644 --- a/setup.py +++ b/setup.py @@ -1,32 +1,25 @@ # -*- coding: utf-8 -*- -# -# Copyright 2018 Google LLC + +# Copyright (C) 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# https://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# import io import os +import setuptools # type: ignore -import setuptools - -name = "google-cloud-documentai" -description = "Cloud Document AI API API client library" version = "0.1.0" -release_status = "Development Status :: 3 - Alpha" -dependencies = [ - "google-api-core[grpc] >= 1.14.0, < 2.0.0dev", - 'enum34; python_version < "3.4"', -] package_root = os.path.abspath(os.path.dirname(__file__)) @@ -34,40 +27,37 @@ with io.open(readme_filename, encoding="utf-8") as readme_file: readme = readme_file.read() -packages = [ - package for package in setuptools.find_packages() if package.startswith("google") -] - -namespaces = ["google"] -if "google.cloud" in packages: - namespaces.append("google.cloud") setuptools.setup( - name=name, + name="google-cloud-documentai", version=version, - description=description, long_description=readme, author="Google LLC", author_email="googleapis-packages@google.com", license="Apache 2.0", url="https://github.com/googleapis/python-documentai", + packages=setuptools.PEP420PackageFinder.find(), + namespace_packages=("google", "google.cloud"), + platforms="Posix; MacOS X; Windows", + include_package_data=True, + install_requires=( + "google-api-core >= 1.8.0, < 2.0.0dev", + "googleapis-common-protos >= 1.5.8", + "grpcio >= 1.10.0", + "proto-plus >= 0.4.0", + ), + python_requires=">=3.6", + setup_requires=["libcst >= 0.2.5"], + scripts=["scripts/fixup_keywords.py"], classifiers=[ - release_status, + "Development Status :: 4 - Beta", "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.5", + "Operating System :: OS Independent", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", - "Operating System :: OS Independent", + "Programming Language :: Python :: 3.8", "Topic :: Internet", + "Topic :: Software Development :: Libraries :: Python Modules", ], - platforms="Posix; MacOS X; Windows", - packages=packages, - namespace_packages=namespaces, - install_requires=dependencies, - python_requires=">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*", - include_package_data=True, zip_safe=False, ) diff --git a/synth.metadata b/synth.metadata index 8c82e9ff..14bc94de 100644 --- a/synth.metadata +++ b/synth.metadata @@ -1,39 +1,25 @@ { - "updateTime": "2020-02-19T02:23:28.267436Z", + "updateTime": "2020-03-23T22:13:00.832554Z", "sources": [ - { - "generator": { - "name": "artman", - "version": "0.45.0", - "dockerImage": "googleapis/artman@sha256:6aec9c34db0e4be221cdaf6faba27bdc07cfea846808b3d3b964dfce3a9a0f9b" - } - }, { "git": { "name": ".", - "remote": "https://github.com/googleapis/python-documentai.git", - "sha": "e295b32d360294c2c5af51c75150fdffcfe7cc23" + "remote": "git@github.com:googleapis/python-documentai", + "sha": "1d9adb1b77dbf11e09567e9fe7bf511c1d313231" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "ce4f4c21d9dd2bfab18873a80449b9d9851efde8", - "internalRef": "295861722", - "log": "ce4f4c21d9dd2bfab18873a80449b9d9851efde8\nasset: v1p1beta1 remove SearchResources and SearchIamPolicies\n\nPiperOrigin-RevId: 295861722\n\ncb61d6c2d070b589980c779b68ffca617f789116\nasset: v1p1beta1 remove SearchResources and SearchIamPolicies\n\nPiperOrigin-RevId: 295855449\n\nab2685d8d3a0e191dc8aef83df36773c07cb3d06\nfix: Dataproc v1 - AutoscalingPolicy annotation\n\nThis adds the second resource name pattern to the\nAutoscalingPolicy resource.\n\nCommitter: @lukesneeringer\nPiperOrigin-RevId: 295738415\n\n8a1020bf6828f6e3c84c3014f2c51cb62b739140\nUpdate cloud asset api v1p4beta1.\n\nPiperOrigin-RevId: 295286165\n\n5cfa105206e77670369e4b2225597386aba32985\nAdd service control related proto build rule.\n\nPiperOrigin-RevId: 295262088\n\nee4dddf805072004ab19ac94df2ce669046eec26\nmonitoring v3: Add prefix \"https://cloud.google.com/\" into the link for global access\ncl 295167522, get ride of synth.py hacks\n\nPiperOrigin-RevId: 295238095\n\nd9835e922ea79eed8497db270d2f9f85099a519c\nUpdate some minor docs changes about user event proto\n\nPiperOrigin-RevId: 295185610\n\n5f311e416e69c170243de722023b22f3df89ec1c\nfix: use correct PHP package name in gapic configuration\n\nPiperOrigin-RevId: 295161330\n\n6cdd74dcdb071694da6a6b5a206e3a320b62dd11\npubsub: v1 add client config annotations and retry config\n\nPiperOrigin-RevId: 295158776\n\n5169f46d9f792e2934d9fa25c36d0515b4fd0024\nAdded cloud asset api v1p4beta1.\n\nPiperOrigin-RevId: 295026522\n\n56b55aa8818cd0a532a7d779f6ef337ba809ccbd\nFix: Resource annotations for CreateTimeSeriesRequest and ListTimeSeriesRequest should refer to valid resources. TimeSeries is not a named resource.\n\nPiperOrigin-RevId: 294931650\n\n0646bc775203077226c2c34d3e4d50cc4ec53660\nRemove unnecessary languages from bigquery-related artman configuration files.\n\nPiperOrigin-RevId: 294809380\n\n8b78aa04382e3d4147112ad6d344666771bb1909\nUpdate backend.proto for schemes and protocol\n\nPiperOrigin-RevId: 294788800\n\n80b8f8b3de2359831295e24e5238641a38d8488f\nAdds artman config files for bigquerystorage endpoints v1beta2, v1alpha2, v1\n\nPiperOrigin-RevId: 294763931\n\n2c17ac33b226194041155bb5340c3f34733f1b3a\nAdd parameter to sample generated for UpdateInstance. Related to https://github.com/googleapis/python-redis/issues/4\n\nPiperOrigin-RevId: 294734008\n\nd5e8a8953f2acdfe96fb15e85eb2f33739623957\nMove bigquery datatransfer to gapic v2.\n\nPiperOrigin-RevId: 294703703\n\nefd36705972cfcd7d00ab4c6dfa1135bafacd4ae\nfix: Add two annotations that we missed.\n\nPiperOrigin-RevId: 294664231\n\n8a36b928873ff9c05b43859b9d4ea14cd205df57\nFix: Define the \"bigquery.googleapis.com/Table\" resource in the BigQuery Storage API (v1beta2).\n\nPiperOrigin-RevId: 294459768\n\nc7a3caa2c40c49f034a3c11079dd90eb24987047\nFix: Define the \"bigquery.googleapis.com/Table\" resource in the BigQuery Storage API (v1).\n\nPiperOrigin-RevId: 294456889\n\n5006247aa157e59118833658084345ee59af7c09\nFix: Make deprecated fields optional\nFix: Deprecate SetLoggingServiceRequest.zone in line with the comments\nFeature: Add resource name method signatures where appropriate\n\nPiperOrigin-RevId: 294383128\n\neabba40dac05c5cbe0fca3a35761b17e372036c4\nFix: C# and PHP package/namespace capitalization for BigQuery Storage v1.\n\nPiperOrigin-RevId: 294382444\n\nf8d9a858a7a55eba8009a23aa3f5cc5fe5e88dde\nfix: artman configuration file for bigtable-admin\n\nPiperOrigin-RevId: 294322616\n\n0f29555d1cfcf96add5c0b16b089235afbe9b1a9\nAPI definition for (not-yet-launched) GCS gRPC.\n\nPiperOrigin-RevId: 294321472\n\nfcc86bee0e84dc11e9abbff8d7c3529c0626f390\nfix: Bigtable Admin v2\n\nChange LRO metadata from PartialUpdateInstanceMetadata\nto UpdateInstanceMetadata. (Otherwise, it will not build.)\n\nPiperOrigin-RevId: 294264582\n\n6d9361eae2ebb3f42d8c7ce5baf4bab966fee7c0\nrefactor: Add annotations to Bigtable Admin v2.\n\nPiperOrigin-RevId: 294243406\n\nad7616f3fc8e123451c8b3a7987bc91cea9e6913\nFix: Resource type in CreateLogMetricRequest should use logging.googleapis.com.\nFix: ListLogEntries should have a method signature for convenience of calling it.\n\nPiperOrigin-RevId: 294222165\n\n63796fcbb08712676069e20a3e455c9f7aa21026\nFix: Remove extraneous resource definition for cloudkms.googleapis.com/CryptoKey.\n\nPiperOrigin-RevId: 294176658\n\ne7d8a694f4559201e6913f6610069cb08b39274e\nDepend on the latest gapic-generator and resource names plugin.\n\nThis fixes the very old an very annoying bug: https://github.com/googleapis/gapic-generator/pull/3087\n\nPiperOrigin-RevId: 293903652\n\n806b2854a966d55374ee26bb0cef4e30eda17b58\nfix: correct capitalization of Ruby namespaces in SecurityCenter V1p1beta1\n\nPiperOrigin-RevId: 293903613\n\n1b83c92462b14d67a7644e2980f723112472e03a\nPublish annotations and grpc service config for Logging API.\n\nPiperOrigin-RevId: 293893514\n\ne46f761cd6ec15a9e3d5ed4ff321a4bcba8e8585\nGenerate the Bazel build file for recommendengine public api\n\nPiperOrigin-RevId: 293710856\n\n68477017c4173c98addac0373950c6aa9d7b375f\nMake `language_code` optional for UpdateIntentRequest and BatchUpdateIntentsRequest.\n\nThe comments and proto annotations describe this parameter as optional.\n\nPiperOrigin-RevId: 293703548\n\n16f823f578bca4e845a19b88bb9bc5870ea71ab2\nAdd BUILD.bazel files for managedidentities API\n\nPiperOrigin-RevId: 293698246\n\n2f53fd8178c9a9de4ad10fae8dd17a7ba36133f2\nAdd v1p1beta1 config file\n\nPiperOrigin-RevId: 293696729\n\n052b274138fce2be80f97b6dcb83ab343c7c8812\nAdd source field for user event and add field behavior annotations\n\nPiperOrigin-RevId: 293693115\n\n1e89732b2d69151b1b3418fff3d4cc0434f0dded\ndatacatalog: v1beta1 add three new RPCs to gapic v1beta1 config\n\nPiperOrigin-RevId: 293692823\n\n9c8bd09bbdc7c4160a44f1fbab279b73cd7a2337\nchange the name of AccessApproval service to AccessApprovalAdmin\n\nPiperOrigin-RevId: 293690934\n\n2e23b8fbc45f5d9e200572ca662fe1271bcd6760\nAdd ListEntryGroups method, add http bindings to support entry group tagging, and update some comments.\n\nPiperOrigin-RevId: 293666452\n\n0275e38a4ca03a13d3f47a9613aac8c8b0d3f1f2\nAdd proto_package field to managedidentities API. It is needed for APIs that still depend on artman generation.\n\nPiperOrigin-RevId: 293643323\n\n4cdfe8278cb6f308106580d70648001c9146e759\nRegenerating public protos for Data Catalog to add new Custom Type Entry feature.\n\nPiperOrigin-RevId: 293614782\n\n45d2a569ab526a1fad3720f95eefb1c7330eaada\nEnable client generation for v1 ManagedIdentities API.\n\nPiperOrigin-RevId: 293515675\n\n2c17086b77e6f3bcf04a1f65758dfb0c3da1568f\nAdd the Actions on Google common types (//google/actions/type/*).\n\nPiperOrigin-RevId: 293478245\n\n781aadb932e64a12fb6ead7cd842698d99588433\nDialogflow weekly v2/v2beta1 library update:\n- Documentation updates\nImportant updates are also posted at\nhttps://cloud.google.com/dialogflow/docs/release-notes\n\nPiperOrigin-RevId: 293443396\n\ne2602608c9138c2fca24162720e67f9307c30b95\nDialogflow weekly v2/v2beta1 library update:\n- Documentation updates\nImportant updates are also posted at\nhttps://cloud.google.com/dialogflow/docs/release-notes\n\nPiperOrigin-RevId: 293442964\n\nc8aef82028d06b7992278fa9294c18570dc86c3d\nAdd cc_proto_library and cc_grpc_library targets for Bigtable protos.\n\nAlso fix indentation of cc_grpc_library targets in Spanner and IAM protos.\n\nPiperOrigin-RevId: 293440538\n\ne2faab04f4cb7f9755072330866689b1943a16e9\ncloudtasks: v2 replace non-standard retry params in gapic config v2\n\nPiperOrigin-RevId: 293424055\n\ndfb4097ea628a8470292c6590a4313aee0c675bd\nerrorreporting: v1beta1 add legacy artman config for php\n\nPiperOrigin-RevId: 293423790\n\nb18aed55b45bfe5b62476292c72759e6c3e573c6\nasset: v1p1beta1 updated comment for `page_size` limit.\n\nPiperOrigin-RevId: 293421386\n\nc9ef36b7956d9859a2fc86ad35fcaa16958ab44f\nbazel: Refactor CI build scripts\n\nPiperOrigin-RevId: 293387911\n\na8ed9d921fdddc61d8467bfd7c1668f0ad90435c\nfix: set Ruby module name for OrgPolicy\n\nPiperOrigin-RevId: 293257997\n\n6c7d28509bd8315de8af0889688ee20099594269\nredis: v1beta1 add UpgradeInstance and connect_mode field to Instance\n\nPiperOrigin-RevId: 293242878\n\nae0abed4fcb4c21f5cb67a82349a049524c4ef68\nredis: v1 add connect_mode field to Instance\n\nPiperOrigin-RevId: 293241914\n\n3f7a0d29b28ee9365771da2b66edf7fa2b4e9c56\nAdds service config definition for bigqueryreservation v1beta1\n\nPiperOrigin-RevId: 293234418\n\n0c88168d5ed6fe353a8cf8cbdc6bf084f6bb66a5\naddition of BUILD & configuration for accessapproval v1\n\nPiperOrigin-RevId: 293219198\n\n39bedc2e30f4778ce81193f6ba1fec56107bcfc4\naccessapproval: v1 publish protos\n\nPiperOrigin-RevId: 293167048\n\n69d9945330a5721cd679f17331a78850e2618226\nAdd file-level `Session` resource definition\n\nPiperOrigin-RevId: 293080182\n\nf6a1a6b417f39694275ca286110bc3c1ca4db0dc\nAdd file-level `Session` resource definition\n\nPiperOrigin-RevId: 293080178\n\n29d40b78e3dc1579b0b209463fbcb76e5767f72a\nExpose managedidentities/v1beta1/ API for client library usage.\n\nPiperOrigin-RevId: 292979741\n\na22129a1fb6e18056d576dfb7717aef74b63734a\nExpose managedidentities/v1/ API for client library usage.\n\nPiperOrigin-RevId: 292968186\n\nb5cbe4a4ba64ab19e6627573ff52057a1657773d\nSecurityCenter v1p1beta1: move file-level option on top to workaround protobuf.js bug.\n\nPiperOrigin-RevId: 292647187\n\nb224b317bf20c6a4fbc5030b4a969c3147f27ad3\nAdds API definitions for bigqueryreservation v1beta1.\n\nPiperOrigin-RevId: 292634722\n\n" - } - }, - { - "git": { - "name": "synthtool", - "remote": "rpc://devrel/cloud/libraries/tools/autosynth", - "sha": "b4b7af4a16a07b40bfd8dcdda89f9f193ff4e2ed" + "sha": "0be7105dc52590fa9a24e784052298ae37ce53aa", + "internalRef": "302154871", + "log": "0be7105dc52590fa9a24e784052298ae37ce53aa\nAdd BUILD.bazel file to asset/v1p1beta1\n\nPiperOrigin-RevId: 302154871\n\n6c248fd13e8543f8d22cbf118d978301a9fbe2a8\nAdd missing resource annotations and additional_bindings to dialogflow v2 API.\n\nPiperOrigin-RevId: 302063117\n\n9a3a7f33be9eeacf7b3e98435816b7022d206bd7\nChange the service name from \"chromeos-moblab.googleapis.com\" to \"chromeosmoblab.googleapis.com\"\n\nPiperOrigin-RevId: 302060989\n\n98a339237577e3de26cb4921f75fb5c57cc7a19f\nfeat: devtools/build/v1 publish client library config annotations\n\n* add details field to some of the BuildEvents\n* add final_invocation_id and build_tool_exit_code fields to BuildStatus\n\nPiperOrigin-RevId: 302044087\n\ncfabc98c6bbbb22d1aeaf7612179c0be193b3a13\nfeat: home/graph/v1 publish client library config annotations & comment updates\n\nThis change includes adding the client library configuration annotations, updated proto comments, and some client library configuration files.\n\nPiperOrigin-RevId: 302042647\n\nc8c8c0bd15d082db9546253dbaad1087c7a9782c\nchore: use latest gapic-generator in bazel WORKSPACE.\nincluding the following commits from gapic-generator:\n- feat: take source protos in all sub-packages (#3144)\n\nPiperOrigin-RevId: 301843591\n\ne4daf5202ea31cb2cb6916fdbfa9d6bd771aeb4c\nAdd bazel file for v1 client lib generation\n\nPiperOrigin-RevId: 301802926\n\n" } }, { "template": { - "name": "python_split_library", + "name": "python_library", "origin": "synthtool.gcp", "version": "2020.2.4" } @@ -44,10 +30,9 @@ "client": { "source": "googleapis", "apiName": "documentai", - "apiVersion": "v1beta1", + "apiVersion": "v1beta2", "language": "python", - "generator": "gapic", - "config": "google/cloud/documentai/artman_documentai_v1beta1.yaml" + "generator": "gapic-generator-python" } } ] diff --git a/synth.py b/synth.py index 332cf273..e6792c25 100644 --- a/synth.py +++ b/synth.py @@ -20,34 +20,31 @@ logging.basicConfig(level=logging.DEBUG) -gapic = gcp.GAPICGenerator() +gapic = gcp.GAPICMicrogenerator() common = gcp.CommonTemplates() # ---------------------------------------------------------------------------- # Generate document AI GAPIC layer # ---------------------------------------------------------------------------- -library = gapic.py_library("documentai", "v1beta1", include_protos=True) +library = gapic.py_library("documentai", "v1beta2") -excludes = ["README.rst", "nox.py", "setup.py", "docs/index.rst"] +excludes = ["README.rst", "nox.py", "docs/index.rst", "setup.py"] s.move(library, excludes=excludes) -# Fix bad docstring with stray pipe characters -s.replace( - "google/cloud/**/document_understanding_pb2.py", - """\| Specifies a known document type for deeper structure - detection\. Valid values are currently "general" and - "invoice"\. If not provided, "general" \| is used as default. - If any other value is given, the request is rejected\.""", - """Specifies a known document type for deeper structure - detection. Valid values are currently "general" and - "invoice". If not provided, "general" is used as default. - If any other value is given, the request is rejected.""", -) - # ---------------------------------------------------------------------------- # Add templated files # ---------------------------------------------------------------------------- templated_files = common.py_library(cov_level=73) s.move(templated_files) +# Remove 2.7 and 3.5 tests from noxfile.py +s.replace("noxfile.py", '''\["2\.7", ''', '[') +s.replace("noxfile.py", '''"3.5", ''', '') + +# Expand flake errors permitted to accomodate the Microgenerator +# TODO: remove extra error codes once issues below are resolved +# F401: https://github.com/googleapis/gapic-generator-python/issues/324 +# F841: local variable 'client'/'response' is assigned to but never use +s.replace(".flake8", "ignore = .*", "ignore = E203, E266, E501, W503, F401, F841") + s.shell.run(["nox", "-s", "blacken"], hide_output=False) diff --git a/tests/unit/documentai_v1beta2/test_document_understanding_service.py b/tests/unit/documentai_v1beta2/test_document_understanding_service.py new file mode 100644 index 00000000..8a442050 --- /dev/null +++ b/tests/unit/documentai_v1beta2/test_document_understanding_service.py @@ -0,0 +1,296 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from unittest import mock + +import grpc +import math +import pytest + +from google import auth +from google.api_core import client_options +from google.api_core import future +from google.api_core import operations_v1 +from google.auth import credentials +from google.cloud.documentai_v1beta2.services.document_understanding_service import ( + DocumentUnderstandingServiceClient, +) +from google.cloud.documentai_v1beta2.services.document_understanding_service import ( + transports, +) +from google.cloud.documentai_v1beta2.types import document +from google.cloud.documentai_v1beta2.types import document_understanding +from google.longrunning import operations_pb2 +from google.oauth2 import service_account +from google.rpc import status_pb2 as status # type: ignore + + +def test_document_understanding_service_client_from_service_account_file(): + creds = credentials.AnonymousCredentials() + with mock.patch.object( + service_account.Credentials, "from_service_account_file" + ) as factory: + factory.return_value = creds + client = DocumentUnderstandingServiceClient.from_service_account_file( + "dummy/file/path.json" + ) + assert client._transport._credentials == creds + + client = DocumentUnderstandingServiceClient.from_service_account_json( + "dummy/file/path.json" + ) + assert client._transport._credentials == creds + + assert client._transport._host == "us-documentai.googleapis.com:443" + + +def test_document_understanding_service_client_client_options(): + # Check the default options have their expected values. + assert ( + DocumentUnderstandingServiceClient.DEFAULT_OPTIONS.api_endpoint + == "us-documentai.googleapis.com" + ) + + # Check that options can be customized. + options = client_options.ClientOptions(api_endpoint="squid.clam.whelk") + with mock.patch( + "google.cloud.documentai_v1beta2.services.document_understanding_service.DocumentUnderstandingServiceClient.get_transport_class" + ) as gtc: + transport = gtc.return_value = mock.MagicMock() + client = DocumentUnderstandingServiceClient(client_options=options) + transport.assert_called_once_with(credentials=None, host="squid.clam.whelk") + + +def test_document_understanding_service_client_client_options_from_dict(): + with mock.patch( + "google.cloud.documentai_v1beta2.services.document_understanding_service.DocumentUnderstandingServiceClient.get_transport_class" + ) as gtc: + transport = gtc.return_value = mock.MagicMock() + client = DocumentUnderstandingServiceClient( + client_options={"api_endpoint": "squid.clam.whelk"} + ) + transport.assert_called_once_with(credentials=None, host="squid.clam.whelk") + + +def test_batch_process_documents(transport: str = "grpc"): + client = DocumentUnderstandingServiceClient( + credentials=credentials.AnonymousCredentials(), transport=transport + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = document_understanding.BatchProcessDocumentsRequest() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client._transport.batch_process_documents), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = operations_pb2.Operation(name="operations/spam") + + response = client.batch_process_documents(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0] == request + + # Establish that the response is the type that we expect. + assert isinstance(response, future.Future) + + +def test_batch_process_documents_flattened(): + client = DocumentUnderstandingServiceClient( + credentials=credentials.AnonymousCredentials() + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client._transport.batch_process_documents), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = operations_pb2.Operation(name="operations/op") + + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + response = client.batch_process_documents( + requests=[ + document_understanding.ProcessDocumentRequest(parent="parent_value") + ] + ) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0].requests == [ + document_understanding.ProcessDocumentRequest(parent="parent_value") + ] + + +def test_batch_process_documents_flattened_error(): + client = DocumentUnderstandingServiceClient( + credentials=credentials.AnonymousCredentials() + ) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + client.batch_process_documents( + document_understanding.BatchProcessDocumentsRequest(), + requests=[ + document_understanding.ProcessDocumentRequest(parent="parent_value") + ], + ) + + +def test_process_document(transport: str = "grpc"): + client = DocumentUnderstandingServiceClient( + credentials=credentials.AnonymousCredentials(), transport=transport + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = document_understanding.ProcessDocumentRequest() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client._transport.process_document), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = document.Document( + uri="uri_value", + content=b"content_blob", + mime_type="mime_type_value", + text="text_value", + ) + + response = client.process_document(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0] == request + + # Establish that the response is the type that we expect. + assert isinstance(response, document.Document) + assert response.uri == "uri_value" + assert response.content == b"content_blob" + assert response.mime_type == "mime_type_value" + assert response.text == "text_value" + + +def test_credentials_transport_error(): + # It is an error to provide credentials and a transport instance. + transport = transports.DocumentUnderstandingServiceGrpcTransport( + credentials=credentials.AnonymousCredentials() + ) + with pytest.raises(ValueError): + client = DocumentUnderstandingServiceClient( + credentials=credentials.AnonymousCredentials(), transport=transport + ) + + +def test_transport_instance(): + # A client may be instantiated with a custom transport instance. + transport = transports.DocumentUnderstandingServiceGrpcTransport( + credentials=credentials.AnonymousCredentials() + ) + client = DocumentUnderstandingServiceClient(transport=transport) + assert client._transport is transport + + +def test_transport_grpc_default(): + # A client should use the gRPC transport by default. + client = DocumentUnderstandingServiceClient( + credentials=credentials.AnonymousCredentials() + ) + assert isinstance( + client._transport, transports.DocumentUnderstandingServiceGrpcTransport + ) + + +def test_document_understanding_service_base_transport(): + # Instantiate the base transport. + transport = transports.DocumentUnderstandingServiceTransport( + credentials=credentials.AnonymousCredentials() + ) + + # Every method on the transport should just blindly + # raise NotImplementedError. + methods = ("batch_process_documents", "process_document") + for method in methods: + with pytest.raises(NotImplementedError): + getattr(transport, method)(request=object()) + + # Additionally, the LRO client (a property) should + # also raise NotImplementedError + with pytest.raises(NotImplementedError): + transport.operations_client + + +def test_document_understanding_service_auth_adc(): + # If no credentials are provided, we should use ADC credentials. + with mock.patch.object(auth, "default") as adc: + adc.return_value = (credentials.AnonymousCredentials(), None) + DocumentUnderstandingServiceClient() + adc.assert_called_once_with( + scopes=("https://www.googleapis.com/auth/cloud-platform",) + ) + + +def test_document_understanding_service_host_no_port(): + client = DocumentUnderstandingServiceClient( + credentials=credentials.AnonymousCredentials(), + client_options=client_options.ClientOptions( + api_endpoint="us-documentai.googleapis.com" + ), + transport="grpc", + ) + assert client._transport._host == "us-documentai.googleapis.com:443" + + +def test_document_understanding_service_host_with_port(): + client = DocumentUnderstandingServiceClient( + credentials=credentials.AnonymousCredentials(), + client_options=client_options.ClientOptions( + api_endpoint="us-documentai.googleapis.com:8000" + ), + transport="grpc", + ) + assert client._transport._host == "us-documentai.googleapis.com:8000" + + +def test_document_understanding_service_grpc_transport_channel(): + channel = grpc.insecure_channel("http://localhost/") + transport = transports.DocumentUnderstandingServiceGrpcTransport(channel=channel) + assert transport.grpc_channel is channel + + +def test_document_understanding_service_grpc_lro_client(): + client = DocumentUnderstandingServiceClient( + credentials=credentials.AnonymousCredentials(), transport="grpc" + ) + transport = client._transport + + # Ensure that we have a api-core operations client. + assert isinstance(transport.operations_client, operations_v1.OperationsClient) + + # Ensure that subsequent calls to the property send the exact same object. + assert transport.operations_client is transport.operations_client diff --git a/tests/unit/gapic/v1beta1/test_document_understanding_service_client_v1beta1.py b/tests/unit/gapic/v1beta1/test_document_understanding_service_client_v1beta1.py deleted file mode 100644 index ec66fab0..00000000 --- a/tests/unit/gapic/v1beta1/test_document_understanding_service_client_v1beta1.py +++ /dev/null @@ -1,118 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Unit tests.""" - -import mock -import pytest - -from google.rpc import status_pb2 - -from google.cloud import documentai_v1beta1 -from google.cloud.documentai_v1beta1.proto import document_understanding_pb2 -from google.longrunning import operations_pb2 - - -class MultiCallableStub(object): - """Stub for the grpc.UnaryUnaryMultiCallable interface.""" - - def __init__(self, method, channel_stub): - self.method = method - self.channel_stub = channel_stub - - def __call__(self, request, timeout=None, metadata=None, credentials=None): - self.channel_stub.requests.append((self.method, request)) - - response = None - if self.channel_stub.responses: - response = self.channel_stub.responses.pop() - - if isinstance(response, Exception): - raise response - - if response: - return response - - -class ChannelStub(object): - """Stub for the grpc.Channel interface.""" - - def __init__(self, responses=[]): - self.responses = responses - self.requests = [] - - def unary_unary(self, method, request_serializer=None, response_deserializer=None): - return MultiCallableStub(method, self) - - -class CustomException(Exception): - pass - - -class TestDocumentUnderstandingServiceClient(object): - def test_batch_process_documents(self): - # Setup Expected Response - expected_response = {} - expected_response = document_understanding_pb2.BatchProcessDocumentsResponse( - **expected_response - ) - operation = operations_pb2.Operation( - name="operations/test_batch_process_documents", done=True - ) - operation.response.Pack(expected_response) - - # Mock the API response - channel = ChannelStub(responses=[operation]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = documentai_v1beta1.DocumentUnderstandingServiceClient() - - # Setup Request - requests = [] - - response = client.batch_process_documents(requests) - result = response.result() - assert expected_response == result - - assert len(channel.requests) == 1 - expected_request = document_understanding_pb2.BatchProcessDocumentsRequest( - requests=requests - ) - actual_request = channel.requests[0][1] - assert expected_request == actual_request - - def test_batch_process_documents_exception(self): - # Setup Response - error = status_pb2.Status() - operation = operations_pb2.Operation( - name="operations/test_batch_process_documents_exception", done=True - ) - operation.error.CopyFrom(error) - - # Mock the API response - channel = ChannelStub(responses=[operation]) - patch = mock.patch("google.api_core.grpc_helpers.create_channel") - with patch as create_channel: - create_channel.return_value = channel - client = documentai_v1beta1.DocumentUnderstandingServiceClient() - - # Setup Request - requests = [] - - response = client.batch_process_documents(requests) - exception = response.exception() - assert exception.errors[0] == error