From fc3d29fd388531b0e7d2812ceef4bc26a3cb1c7b Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Thu, 7 Nov 2019 13:58:29 -0800 Subject: [PATCH] feat(documentai): initial generation of documentai (#9623) --- .coveragerc | 19 + .flake8 | 14 + .repo-metadata.json | 13 + CHANGELOG.md | 1 + LICENSE | 201 ++ MANIFEST.in | 5 + README.rst | 77 + docs/README.rst | 1 + docs/changelog.md | 1 + docs/conf.py | 363 +++ docs/gapic/v1beta1/api.rst | 6 + docs/gapic/v1beta1/types.rst | 5 + docs/index.rst | 19 + google/__init__.py | 24 + google/cloud/__init__.py | 24 + google/cloud/documentai.py | 25 + google/cloud/documentai_v1beta1/__init__.py | 34 + .../documentai_v1beta1/gapic/__init__.py | 0 .../document_understanding_service_client.py | 291 ++ ...ent_understanding_service_client_config.py | 28 + .../cloud/documentai_v1beta1/gapic/enums.py | 84 + .../gapic/transports/__init__.py | 0 ...nt_understanding_service_grpc_transport.py | 131 + .../documentai_v1beta1/proto/__init__.py | 0 .../documentai_v1beta1/proto/document.proto | 446 +++ .../documentai_v1beta1/proto/document_pb2.py | 2695 +++++++++++++++++ .../proto/document_pb2_grpc.py | 2 + .../proto/document_understanding.proto | 299 ++ .../proto/document_understanding_pb2.py | 1554 ++++++++++ .../proto/document_understanding_pb2_grpc.py | 57 + .../documentai_v1beta1/proto/geometry.proto | 55 + .../documentai_v1beta1/proto/geometry_pb2.py | 270 ++ .../proto/geometry_pb2_grpc.py | 2 + google/cloud/documentai_v1beta1/types.py | 58 + noxfile.py | 160 + setup.cfg | 3 + setup.py | 73 + synth.metadata | 39 + synth.py | 53 + ...nt_understanding_service_client_v1beta1.py | 118 + 40 files changed, 7250 insertions(+) create mode 100644 .coveragerc create mode 100644 .flake8 create mode 100644 .repo-metadata.json create mode 100644 CHANGELOG.md create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 README.rst create mode 120000 docs/README.rst create mode 120000 docs/changelog.md create mode 100644 docs/conf.py create mode 100644 docs/gapic/v1beta1/api.rst create mode 100644 docs/gapic/v1beta1/types.rst create mode 100644 docs/index.rst create mode 100644 google/__init__.py create mode 100644 google/cloud/__init__.py create mode 100644 google/cloud/documentai.py create mode 100644 google/cloud/documentai_v1beta1/__init__.py create mode 100644 google/cloud/documentai_v1beta1/gapic/__init__.py create mode 100644 google/cloud/documentai_v1beta1/gapic/document_understanding_service_client.py create mode 100644 google/cloud/documentai_v1beta1/gapic/document_understanding_service_client_config.py create mode 100644 google/cloud/documentai_v1beta1/gapic/enums.py create mode 100644 google/cloud/documentai_v1beta1/gapic/transports/__init__.py create mode 100644 google/cloud/documentai_v1beta1/gapic/transports/document_understanding_service_grpc_transport.py create mode 100644 google/cloud/documentai_v1beta1/proto/__init__.py create mode 100644 google/cloud/documentai_v1beta1/proto/document.proto create mode 100644 google/cloud/documentai_v1beta1/proto/document_pb2.py create mode 100644 google/cloud/documentai_v1beta1/proto/document_pb2_grpc.py create mode 100644 google/cloud/documentai_v1beta1/proto/document_understanding.proto create mode 100644 google/cloud/documentai_v1beta1/proto/document_understanding_pb2.py create mode 100644 google/cloud/documentai_v1beta1/proto/document_understanding_pb2_grpc.py create mode 100644 google/cloud/documentai_v1beta1/proto/geometry.proto create mode 100644 google/cloud/documentai_v1beta1/proto/geometry_pb2.py create mode 100644 google/cloud/documentai_v1beta1/proto/geometry_pb2_grpc.py create mode 100644 google/cloud/documentai_v1beta1/types.py create mode 100644 noxfile.py create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 synth.metadata create mode 100644 synth.py create mode 100644 tests/unit/gapic/v1beta1/test_document_understanding_service_client_v1beta1.py diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..b178b094 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,19 @@ +# Generated by synthtool. DO NOT EDIT! +[run] +branch = True + +[report] +fail_under = 100 +show_missing = True +exclude_lines = + # Re-enable the standard pragma + pragma: NO COVER + # Ignore debug-only repr + def __repr__ + # Ignore abstract methods + raise NotImplementedError +omit = + */gapic/*.py + */proto/*.py + */core/*.py + */site-packages/*.py \ No newline at end of file diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..0268ecc9 --- /dev/null +++ b/.flake8 @@ -0,0 +1,14 @@ +# Generated by synthtool. DO NOT EDIT! +[flake8] +ignore = E203, E266, E501, W503 +exclude = + # Exclude generated code. + **/proto/** + **/gapic/** + *_pb2.py + + # Standard linting exemptions. + __pycache__, + .git, + *.pyc, + conf.py diff --git a/.repo-metadata.json b/.repo-metadata.json new file mode 100644 index 00000000..3f883298 --- /dev/null +++ b/.repo-metadata.json @@ -0,0 +1,13 @@ +{ + "name": "documentai", + "name_pretty": "Cloud Document Understanding API", + "product_documentation": "https://cloud.google.com/document-understanding/docs/", + "client_documentation": "https://googleapis.dev/python/documentai/latest", + "issue_tracker": "", + "release_level": "alpha", + "language": "python", + "repo": "googleapis/google-cloud-python", + "distribution_name": "google-cloud-documentai", + "api_id": "documentai.googleapis.com", + "requires_billing": true +} \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..825c32f0 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1 @@ +# Changelog diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..a8ee855d --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..9cbf175a --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,5 @@ +include README.rst LICENSE +recursive-include google *.json *.proto +recursive-include tests * +global-exclude *.py[co] +global-exclude __pycache__ diff --git a/README.rst b/README.rst new file mode 100644 index 00000000..13151dbc --- /dev/null +++ b/README.rst @@ -0,0 +1,77 @@ +Python Client for Cloud Document AI API (`Alpha`_) +================================================== + +`Cloud Document AI API`_: Service to parse structured information from unstructured or +semi-structured documents using state-of-the-art Google AI such as natural +language, computer vision, translation, and AutoML. + +- `Client Library Documentation`_ +- `Product Documentation`_ + +.. _Alpha: https://github.com/googleapis/google-cloud-python/blob/master/README.rst +.. _Cloud Document AI API: https://cloud.google.com/document-understanding/docs/ +.. _Client Library Documentation: https://googleapis.dev/python/documentai/latest +.. _Product Documentation: https://cloud.google.com/document-understanding/docs/ + +Quick Start +----------- + +In order to use this library, you first need to go through the following steps: + +1. `Select or create a Cloud Platform project.`_ +2. `Enable billing for your project.`_ +3. `Enable the Cloud Document AI API.`_ +4. `Setup Authentication.`_ + +.. _Select or create a Cloud Platform project.: https://console.cloud.google.com/project +.. _Enable billing for your project.: https://cloud.google.com/billing/docs/how-to/modify-project#enable_billing_for_a_project +.. _Enable the Cloud Document AI API.: https://cloud.google.com/document-understanding/docs/ +.. _Setup Authentication.: https://googleapis.dev/python/google-api-core/latest/auth.html + +Installation +~~~~~~~~~~~~ + +Install this library in a `virtualenv`_ using pip. `virtualenv`_ is a tool to +create isolated Python environments. The basic problem it addresses is one of +dependencies and versions, and indirectly permissions. + +With `virtualenv`_, it's possible to install this library without needing system +install permissions, and without clashing with the installed system +dependencies. + +.. _`virtualenv`: https://virtualenv.pypa.io/en/latest/ + + +Mac/Linux +^^^^^^^^^ + +.. code-block:: console + + pip install virtualenv + virtualenv + source /bin/activate + /bin/pip install google-cloud-documentai + + +Windows +^^^^^^^ + +.. code-block:: console + + pip install virtualenv + virtualenv + \Scripts\activate + \Scripts\pip.exe install google-cloud-documentai + +Next Steps +~~~~~~~~~~ + +- Read the `Client Library Documentation`_ for Cloud Document AI API + API to see other available methods on the client. +- Read the `Cloud Document AI API Product documentation`_ to learn + more about the product and see How-to Guides. +- View this `repository’s main README`_ to see the full list of Cloud + APIs that we cover. + +.. _Cloud Document AI API Product documentation: https://cloud.google.com/document-understanding/docs/ +.. _repository’s main README: https://github.com/googleapis/google-cloud-python/blob/master/README.rst \ No newline at end of file diff --git a/docs/README.rst b/docs/README.rst new file mode 120000 index 00000000..89a01069 --- /dev/null +++ b/docs/README.rst @@ -0,0 +1 @@ +../README.rst \ No newline at end of file diff --git a/docs/changelog.md b/docs/changelog.md new file mode 120000 index 00000000..04c99a55 --- /dev/null +++ b/docs/changelog.md @@ -0,0 +1 @@ +../CHANGELOG.md \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 00000000..682903ab --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,363 @@ +# -*- coding: utf-8 -*- +# +# google-cloud-documentai documentation build configuration file +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os +import shlex + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +sys.path.insert(0, os.path.abspath("..")) + +__version__ = "0.1.0" + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +needs_sphinx = "1.6.3" + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.intersphinx", + "sphinx.ext.coverage", + "sphinx.ext.napoleon", + "sphinx.ext.todo", + "sphinx.ext.viewcode", +] + +# autodoc/autosummary flags +autoclass_content = "both" +autodoc_default_flags = ["members"] +autosummary_generate = True + + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# Allow markdown includes (so releases.md can include CHANGLEOG.md) +# http://www.sphinx-doc.org/en/master/markdown.html +source_parsers = {".md": "recommonmark.parser.CommonMarkParser"} + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# source_suffix = ['.rst', '.md'] +source_suffix = [".rst", ".md"] + +# The encoding of source files. +# source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = "index" + +# General information about the project. +project = u"google-cloud-documentai" +copyright = u"2017, Google" +author = u"Google APIs" + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The full version, including alpha/beta/rc tags. +release = __version__ +# The short X.Y version. +version = ".".join(release.split(".")[0:2]) + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +# today = '' +# Else, today_fmt is used as the format for a strftime call. +# today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ["_build"] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +# default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +# add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +# add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +# show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = "sphinx" + +# A list of ignored prefixes for module index sorting. +# modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +# keep_warnings = False + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = True + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = "alabaster" + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +html_theme_options = { + "description": "Google Cloud Client Libraries for Python", + "github_user": "googleapis", + "github_repo": "google-cloud-python", + "github_banner": True, + "font_family": "'Roboto', Georgia, sans", + "head_font_family": "'Roboto', Georgia, serif", + "code_font_family": "'Roboto Mono', 'Consolas', monospace", +} + +# Add any paths that contain custom themes here, relative to this directory. +# html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +# html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +# html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +# html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +# html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +# html_static_path = [] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +# html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +# html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +# html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +# html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +# html_additional_pages = {} + +# If false, no module index is generated. +# html_domain_indices = True + +# If false, no index is generated. +# html_use_index = True + +# If true, the index is split into individual pages for each letter. +# html_split_index = False + +# If true, links to the reST sources are added to the pages. +# html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +# html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +# html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +# html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +# html_file_suffix = None + +# Language to be used for generating the HTML full-text search index. +# Sphinx supports the following languages: +# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' +# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' +# html_search_language = 'en' + +# A dictionary with options for the search language support, empty by default. +# Now only 'ja' uses this config value +# html_search_options = {'type': 'default'} + +# The name of a javascript file (relative to the configuration directory) that +# implements a search results scorer. If empty, the default will be used. +# html_search_scorer = 'scorer.js' + +# Output file base name for HTML help builder. +htmlhelp_basename = "google-cloud-documentai-doc" + +# -- Options for warnings ------------------------------------------------------ + + +suppress_warnings = [ + # Temporarily suppress this to avoid "more than one target found for + # cross-reference" warning, which are intractable for us to avoid while in + # a mono-repo. + # See https://github.com/sphinx-doc/sphinx/blob + # /2a65ffeef5c107c19084fabdd706cdff3f52d93c/sphinx/domains/python.py#L843 + "ref.python" +] + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + #'preamble': '', + # Latex figure (float) alignment + #'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + ( + master_doc, + "google-cloud-documentai.tex", + u"google-cloud-documentai Documentation", + author, + "manual", + ) +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +# latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +# latex_use_parts = False + +# If true, show page references after internal links. +# latex_show_pagerefs = False + +# If true, show URL addresses after external links. +# latex_show_urls = False + +# Documents to append as an appendix to all manuals. +# latex_appendices = [] + +# If false, no module index is generated. +# latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ( + master_doc, + "google-cloud-documentai", + u"google-cloud-documentai Documentation", + [author], + 1, + ) +] + +# If true, show URL addresses after external links. +# man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ( + master_doc, + "google-cloud-documentai", + u"google-cloud-documentai Documentation", + author, + "google-cloud-documentai", + "GAPIC library for the {metadata.shortName} v1beta1 service", + "APIs", + ) +] + +# Documents to append as an appendix to all manuals. +# texinfo_appendices = [] + +# If false, no module index is generated. +# texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +# texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +# texinfo_no_detailmenu = False + + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = { + "python": ("http://python.readthedocs.org/en/latest/", None), + "gax": ("https://gax-python.readthedocs.org/en/latest/", None), + "google-auth": ("https://google-auth.readthedocs.io/en/stable", None), + "google-gax": ("https://gax-python.readthedocs.io/en/latest/", None), + "google.api_core": ("https://googleapis.dev/python/google-api-core/latest", None), + "grpc": ("https://grpc.io/grpc/python/", None), + "requests": ("https://requests.kennethreitz.org/en/master/", None), + "fastavro": ("https://fastavro.readthedocs.io/en/stable/", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), +} + + +# Napoleon settings +napoleon_google_docstring = True +napoleon_numpy_docstring = True +napoleon_include_private_with_doc = False +napoleon_include_special_with_doc = True +napoleon_use_admonition_for_examples = False +napoleon_use_admonition_for_notes = False +napoleon_use_admonition_for_references = False +napoleon_use_ivar = False +napoleon_use_param = True +napoleon_use_rtype = True diff --git a/docs/gapic/v1beta1/api.rst b/docs/gapic/v1beta1/api.rst new file mode 100644 index 00000000..e588392f --- /dev/null +++ b/docs/gapic/v1beta1/api.rst @@ -0,0 +1,6 @@ +Client for Cloud Document AI API +================================ + +.. automodule:: google.cloud.documentai_v1beta1 + :members: + :inherited-members: \ No newline at end of file diff --git a/docs/gapic/v1beta1/types.rst b/docs/gapic/v1beta1/types.rst new file mode 100644 index 00000000..469eb107 --- /dev/null +++ b/docs/gapic/v1beta1/types.rst @@ -0,0 +1,5 @@ +Types for Cloud Document AI API Client +====================================== + +.. automodule:: google.cloud.documentai_v1beta1.types + :members: \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 00000000..f15473e5 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,19 @@ +.. include:: README.rst + +Api Reference +------------- +.. toctree:: + :maxdepth: 2 + + gapic/v1beta1/api + gapic/v1beta1/types + +Changelog +--------- + +For a list of all ``google-cloud-documentai`` releases: + +.. toctree:: + :maxdepth: 2 + + changelog \ No newline at end of file diff --git a/google/__init__.py b/google/__init__.py new file mode 100644 index 00000000..8fcc60e2 --- /dev/null +++ b/google/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + import pkg_resources + + pkg_resources.declare_namespace(__name__) +except ImportError: + import pkgutil + + __path__ = pkgutil.extend_path(__path__, __name__) diff --git a/google/cloud/__init__.py b/google/cloud/__init__.py new file mode 100644 index 00000000..8fcc60e2 --- /dev/null +++ b/google/cloud/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + import pkg_resources + + pkg_resources.declare_namespace(__name__) +except ImportError: + import pkgutil + + __path__ = pkgutil.extend_path(__path__, __name__) diff --git a/google/cloud/documentai.py b/google/cloud/documentai.py new file mode 100644 index 00000000..436c2d77 --- /dev/null +++ b/google/cloud/documentai.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from __future__ import absolute_import + +from google.cloud.documentai_v1beta1 import DocumentUnderstandingServiceClient +from google.cloud.documentai_v1beta1 import enums +from google.cloud.documentai_v1beta1 import types + + +__all__ = ("enums", "types", "DocumentUnderstandingServiceClient") diff --git a/google/cloud/documentai_v1beta1/__init__.py b/google/cloud/documentai_v1beta1/__init__.py new file mode 100644 index 00000000..beaf5faa --- /dev/null +++ b/google/cloud/documentai_v1beta1/__init__.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from __future__ import absolute_import + +from google.cloud.documentai_v1beta1 import types +from google.cloud.documentai_v1beta1.gapic import document_understanding_service_client +from google.cloud.documentai_v1beta1.gapic import enums + + +class DocumentUnderstandingServiceClient( + document_understanding_service_client.DocumentUnderstandingServiceClient +): + __doc__ = ( + document_understanding_service_client.DocumentUnderstandingServiceClient.__doc__ + ) + enums = enums + + +__all__ = ("enums", "types", "DocumentUnderstandingServiceClient") diff --git a/google/cloud/documentai_v1beta1/gapic/__init__.py b/google/cloud/documentai_v1beta1/gapic/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/google/cloud/documentai_v1beta1/gapic/document_understanding_service_client.py b/google/cloud/documentai_v1beta1/gapic/document_understanding_service_client.py new file mode 100644 index 00000000..2e1b20fa --- /dev/null +++ b/google/cloud/documentai_v1beta1/gapic/document_understanding_service_client.py @@ -0,0 +1,291 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Accesses the google.cloud.documentai.v1beta1 DocumentUnderstandingService API.""" + +import pkg_resources +import warnings + +from google.oauth2 import service_account +import google.api_core.client_options +import google.api_core.gapic_v1.client_info +import google.api_core.gapic_v1.config +import google.api_core.gapic_v1.method +import google.api_core.gapic_v1.routing_header +import google.api_core.grpc_helpers +import google.api_core.operation +import google.api_core.operations_v1 +import grpc + +from google.cloud.documentai_v1beta1.gapic import ( + document_understanding_service_client_config, +) +from google.cloud.documentai_v1beta1.gapic import enums +from google.cloud.documentai_v1beta1.gapic.transports import ( + document_understanding_service_grpc_transport, +) +from google.cloud.documentai_v1beta1.proto import document_understanding_pb2 +from google.cloud.documentai_v1beta1.proto import document_understanding_pb2_grpc +from google.longrunning import operations_pb2 + + +_GAPIC_LIBRARY_VERSION = pkg_resources.get_distribution( + "google-cloud-documentai" +).version + + +class DocumentUnderstandingServiceClient(object): + """ + Service to parse structured information from unstructured or semi-structured + documents using state-of-the-art Google AI such as natural language, + computer vision, and translation. + """ + + SERVICE_ADDRESS = "documentai.googleapis.com:443" + """The default address of the service.""" + + # The name of the interface for this client. This is the key used to + # find the method configuration in the client_config dictionary. + _INTERFACE_NAME = "google.cloud.documentai.v1beta1.DocumentUnderstandingService" + + @classmethod + def from_service_account_file(cls, filename, *args, **kwargs): + """Creates an instance of this client using the provided credentials + file. + + Args: + filename (str): The path to the service account private key json + file. + args: Additional arguments to pass to the constructor. + kwargs: Additional arguments to pass to the constructor. + + Returns: + DocumentUnderstandingServiceClient: The constructed client. + """ + credentials = service_account.Credentials.from_service_account_file(filename) + kwargs["credentials"] = credentials + return cls(*args, **kwargs) + + from_service_account_json = from_service_account_file + + def __init__( + self, + transport=None, + channel=None, + credentials=None, + client_config=None, + client_info=None, + client_options=None, + ): + """Constructor. + + Args: + transport (Union[~.DocumentUnderstandingServiceGrpcTransport, + Callable[[~.Credentials, type], ~.DocumentUnderstandingServiceGrpcTransport]): A transport + instance, responsible for actually making the API calls. + The default transport uses the gRPC protocol. + This argument may also be a callable which returns a + transport instance. Callables will be sent the credentials + as the first argument and the default transport class as + the second argument. + channel (grpc.Channel): DEPRECATED. A ``Channel`` instance + through which to make calls. This argument is mutually exclusive + with ``credentials``; providing both will raise an exception. + credentials (google.auth.credentials.Credentials): The + authorization credentials to attach to requests. These + credentials identify this application to the service. If none + are specified, the client will attempt to ascertain the + credentials from the environment. + This argument is mutually exclusive with providing a + transport instance to ``transport``; doing so will raise + an exception. + client_config (dict): DEPRECATED. A dictionary of call options for + each method. If not specified, the default configuration is used. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing + your own client library. + client_options (Union[dict, google.api_core.client_options.ClientOptions]): + Client options used to set user options on the client. API Endpoint + should be set through client_options. + """ + # Raise deprecation warnings for things we want to go away. + if client_config is not None: + warnings.warn( + "The `client_config` argument is deprecated.", + PendingDeprecationWarning, + stacklevel=2, + ) + else: + client_config = document_understanding_service_client_config.config + + if channel: + warnings.warn( + "The `channel` argument is deprecated; use " "`transport` instead.", + PendingDeprecationWarning, + stacklevel=2, + ) + + api_endpoint = self.SERVICE_ADDRESS + if client_options: + if type(client_options) == dict: + client_options = google.api_core.client_options.from_dict( + client_options + ) + if client_options.api_endpoint: + api_endpoint = client_options.api_endpoint + + # Instantiate the transport. + # The transport is responsible for handling serialization and + # deserialization and actually sending data to the service. + if transport: + if callable(transport): + self.transport = transport( + credentials=credentials, + default_class=document_understanding_service_grpc_transport.DocumentUnderstandingServiceGrpcTransport, + address=api_endpoint, + ) + else: + if credentials: + raise ValueError( + "Received both a transport instance and " + "credentials; these are mutually exclusive." + ) + self.transport = transport + else: + self.transport = document_understanding_service_grpc_transport.DocumentUnderstandingServiceGrpcTransport( + address=api_endpoint, channel=channel, credentials=credentials + ) + + if client_info is None: + client_info = google.api_core.gapic_v1.client_info.ClientInfo( + gapic_version=_GAPIC_LIBRARY_VERSION + ) + else: + client_info.gapic_version = _GAPIC_LIBRARY_VERSION + self._client_info = client_info + + # Parse out the default settings for retry and timeout for each RPC + # from the client configuration. + # (Ordinarily, these are the defaults specified in the `*_config.py` + # file next to this one.) + self._method_configs = google.api_core.gapic_v1.config.parse_method_configs( + client_config["interfaces"][self._INTERFACE_NAME] + ) + + # Save a dictionary of cached API call functions. + # These are the actual callables which invoke the proper + # transport methods, wrapped with `wrap_method` to add retry, + # timeout, and the like. + self._inner_api_calls = {} + + # Service calls + def batch_process_documents( + self, + requests, + parent=None, + retry=google.api_core.gapic_v1.method.DEFAULT, + timeout=google.api_core.gapic_v1.method.DEFAULT, + metadata=None, + ): + """ + LRO endpoint to batch process many documents. + + Example: + >>> from google.cloud import documentai_v1beta1 + >>> + >>> client = documentai_v1beta1.DocumentUnderstandingServiceClient() + >>> + >>> # TODO: Initialize `requests`: + >>> requests = [] + >>> + >>> response = client.batch_process_documents(requests) + >>> + >>> def callback(operation_future): + ... # Handle result. + ... result = operation_future.result() + >>> + >>> response.add_done_callback(callback) + >>> + >>> # Handle metadata. + >>> metadata = response.metadata() + + Args: + requests (list[Union[dict, ~google.cloud.documentai_v1beta1.types.ProcessDocumentRequest]]): Required. Individual requests for each document. + + If a dict is provided, it must be of the same form as the protobuf + message :class:`~google.cloud.documentai_v1beta1.types.ProcessDocumentRequest` + parent (str): Target project and location to make a call. + + Format: ``projects/{project-id}/locations/{location-id}``. + + If no location is specified, a region will be chosen automatically. + retry (Optional[google.api_core.retry.Retry]): A retry object used + to retry requests. If ``None`` is specified, requests will + be retried using a default configuration. + timeout (Optional[float]): The amount of time, in seconds, to wait + for the request to complete. Note that if ``retry`` is + specified, the timeout applies to each individual attempt. + metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata + that is provided to the method. + + Returns: + A :class:`~google.cloud.documentai_v1beta1.types._OperationFuture` instance. + + Raises: + google.api_core.exceptions.GoogleAPICallError: If the request + failed for any reason. + google.api_core.exceptions.RetryError: If the request failed due + to a retryable error and retry attempts failed. + ValueError: If the parameters are invalid. + """ + # Wrap the transport method to add retry and timeout logic. + if "batch_process_documents" not in self._inner_api_calls: + self._inner_api_calls[ + "batch_process_documents" + ] = google.api_core.gapic_v1.method.wrap_method( + self.transport.batch_process_documents, + default_retry=self._method_configs["BatchProcessDocuments"].retry, + default_timeout=self._method_configs["BatchProcessDocuments"].timeout, + client_info=self._client_info, + ) + + request = document_understanding_pb2.BatchProcessDocumentsRequest( + requests=requests, parent=parent + ) + if metadata is None: + metadata = [] + metadata = list(metadata) + try: + routing_header = [("parent", parent)] + except AttributeError: + pass + else: + routing_metadata = google.api_core.gapic_v1.routing_header.to_grpc_metadata( + routing_header + ) + metadata.append(routing_metadata) + + operation = self._inner_api_calls["batch_process_documents"]( + request, retry=retry, timeout=timeout, metadata=metadata + ) + return google.api_core.operation.from_gapic( + operation, + self.transport._operations_client, + document_understanding_pb2.BatchProcessDocumentsResponse, + metadata_type=document_understanding_pb2.OperationMetadata, + ) diff --git a/google/cloud/documentai_v1beta1/gapic/document_understanding_service_client_config.py b/google/cloud/documentai_v1beta1/gapic/document_understanding_service_client_config.py new file mode 100644 index 00000000..25695074 --- /dev/null +++ b/google/cloud/documentai_v1beta1/gapic/document_understanding_service_client_config.py @@ -0,0 +1,28 @@ +config = { + "interfaces": { + "google.cloud.documentai.v1beta1.DocumentUnderstandingService": { + "retry_codes": { + "idempotent": ["DEADLINE_EXCEEDED", "UNAVAILABLE"], + "non_idempotent": [], + }, + "retry_params": { + "default": { + "initial_retry_delay_millis": 100, + "retry_delay_multiplier": 1.3, + "max_retry_delay_millis": 60000, + "initial_rpc_timeout_millis": 20000, + "rpc_timeout_multiplier": 1.0, + "max_rpc_timeout_millis": 20000, + "total_timeout_millis": 600000, + } + }, + "methods": { + "BatchProcessDocuments": { + "timeout_millis": 60000, + "retry_codes_name": "idempotent", + "retry_params_name": "default", + } + }, + } + } +} diff --git a/google/cloud/documentai_v1beta1/gapic/enums.py b/google/cloud/documentai_v1beta1/gapic/enums.py new file mode 100644 index 00000000..4c907fee --- /dev/null +++ b/google/cloud/documentai_v1beta1/gapic/enums.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Wrappers for protocol buffer enum types.""" + +import enum + + +class Document(object): + class Page(object): + class Layout(object): + class Orientation(enum.IntEnum): + """ + Detected human reading orientation. + + Attributes: + ORIENTATION_UNSPECIFIED (int): Unspecified orientation. + PAGE_UP (int): Orientation is aligned with page up. + PAGE_RIGHT (int): Orientation is aligned with page right. + Turn the head 90 degrees clockwise from upright to read. + PAGE_DOWN (int): Orientation is aligned with page down. + Turn the head 180 degrees from upright to read. + PAGE_LEFT (int): Orientation is aligned with page left. + Turn the head 90 degrees counterclockwise from upright to read. + """ + + ORIENTATION_UNSPECIFIED = 0 + PAGE_UP = 1 + PAGE_RIGHT = 2 + PAGE_DOWN = 3 + PAGE_LEFT = 4 + + class Token(object): + class DetectedBreak(object): + class Type(enum.IntEnum): + """ + Enum to denote the type of break found. + + Attributes: + TYPE_UNSPECIFIED (int): Unspecified break type. + SPACE (int): A single whitespace. + WIDE_SPACE (int): A wider whitespace. + HYPHEN (int): A hyphen that indicates that a token has been split across lines. + """ + + TYPE_UNSPECIFIED = 0 + SPACE = 1 + WIDE_SPACE = 2 + HYPHEN = 3 + + +class OperationMetadata(object): + class State(enum.IntEnum): + """ + Attributes: + STATE_UNSPECIFIED (int): The default value. This value is used if the state is omitted. + ACCEPTED (int): Request is received. + WAITING (int): Request operation is waiting for scheduling. + RUNNING (int): Request is being processed. + SUCCEEDED (int): The batch processing completed successfully. + CANCELLED (int): The batch processing was cancelled. + FAILED (int): The batch processing has failed. + """ + + STATE_UNSPECIFIED = 0 + ACCEPTED = 1 + WAITING = 2 + RUNNING = 3 + SUCCEEDED = 4 + CANCELLED = 5 + FAILED = 6 diff --git a/google/cloud/documentai_v1beta1/gapic/transports/__init__.py b/google/cloud/documentai_v1beta1/gapic/transports/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/google/cloud/documentai_v1beta1/gapic/transports/document_understanding_service_grpc_transport.py b/google/cloud/documentai_v1beta1/gapic/transports/document_understanding_service_grpc_transport.py new file mode 100644 index 00000000..7d2f1391 --- /dev/null +++ b/google/cloud/documentai_v1beta1/gapic/transports/document_understanding_service_grpc_transport.py @@ -0,0 +1,131 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import google.api_core.grpc_helpers +import google.api_core.operations_v1 + +from google.cloud.documentai_v1beta1.proto import document_understanding_pb2_grpc + + +class DocumentUnderstandingServiceGrpcTransport(object): + """gRPC transport class providing stubs for + google.cloud.documentai.v1beta1 DocumentUnderstandingService API. + + The transport provides access to the raw gRPC stubs, + which can be used to take advantage of advanced + features of gRPC. + """ + + # The scopes needed to make gRPC calls to all of the methods defined + # in this service. + _OAUTH_SCOPES = ("https://www.googleapis.com/auth/cloud-platform",) + + def __init__( + self, channel=None, credentials=None, address="documentai.googleapis.com:443" + ): + """Instantiate the transport class. + + Args: + channel (grpc.Channel): A ``Channel`` instance through + which to make calls. This argument is mutually exclusive + with ``credentials``; providing both will raise an exception. + credentials (google.auth.credentials.Credentials): The + authorization credentials to attach to requests. These + credentials identify this application to the service. If none + are specified, the client will attempt to ascertain the + credentials from the environment. + address (str): The address where the service is hosted. + """ + # If both `channel` and `credentials` are specified, raise an + # exception (channels come with credentials baked in already). + if channel is not None and credentials is not None: + raise ValueError( + "The `channel` and `credentials` arguments are mutually " "exclusive." + ) + + # Create the channel. + if channel is None: + channel = self.create_channel( + address=address, + credentials=credentials, + options={ + "grpc.max_send_message_length": -1, + "grpc.max_receive_message_length": -1, + }.items(), + ) + + self._channel = channel + + # gRPC uses objects called "stubs" that are bound to the + # channel and provide a basic method for each RPC. + self._stubs = { + "document_understanding_service_stub": document_understanding_pb2_grpc.DocumentUnderstandingServiceStub( + channel + ) + } + + # Because this API includes a method that returns a + # long-running operation (proto: google.longrunning.Operation), + # instantiate an LRO client. + self._operations_client = google.api_core.operations_v1.OperationsClient( + channel + ) + + @classmethod + def create_channel( + cls, address="documentai.googleapis.com:443", credentials=None, **kwargs + ): + """Create and return a gRPC channel object. + + Args: + address (str): The host for the channel to use. + credentials (~.Credentials): The + authorization credentials to attach to requests. These + credentials identify this application to the service. If + none are specified, the client will attempt to ascertain + the credentials from the environment. + kwargs (dict): Keyword arguments, which are passed to the + channel creation. + + Returns: + grpc.Channel: A gRPC channel object. + """ + return google.api_core.grpc_helpers.create_channel( + address, credentials=credentials, scopes=cls._OAUTH_SCOPES, **kwargs + ) + + @property + def channel(self): + """The gRPC channel used by the transport. + + Returns: + grpc.Channel: A gRPC channel object. + """ + return self._channel + + @property + def batch_process_documents(self): + """Return the gRPC stub for :meth:`DocumentUnderstandingServiceClient.batch_process_documents`. + + LRO endpoint to batch process many documents. + + Returns: + Callable: A callable which accepts the appropriate + deserialized request object and returns a + deserialized response object. + """ + return self._stubs["document_understanding_service_stub"].BatchProcessDocuments diff --git a/google/cloud/documentai_v1beta1/proto/__init__.py b/google/cloud/documentai_v1beta1/proto/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/google/cloud/documentai_v1beta1/proto/document.proto b/google/cloud/documentai_v1beta1/proto/document.proto new file mode 100644 index 00000000..1303c32d --- /dev/null +++ b/google/cloud/documentai_v1beta1/proto/document.proto @@ -0,0 +1,446 @@ +// Copyright 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto3"; + +package google.cloud.documentai.v1beta1; + +import "google/api/annotations.proto"; +import "google/cloud/documentai/v1beta1/geometry.proto"; +import "google/rpc/status.proto"; +import "google/type/color.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentai"; +option java_multiple_files = true; +option java_outer_classname = "DocumentProto"; +option java_package = "com.google.cloud.documentai.v1beta1"; + +// Document represents the canonical document resource in Document Understanding +// AI. +// It is an interchange format that provides insights into documents and allows +// for collaboration between users and Document Understanding AI to iterate and +// optimize for quality. +message Document { + // For a large document, sharding may be performed to produce several + // document shards. Each document shard contains this field to detail which + // shard it is. + message ShardInfo { + // The 0-based index of this shard. + int64 shard_index = 1; + + // Total number of shards. + int64 shard_count = 2; + + // The index of the first character in + // [Document.text][google.cloud.documentai.v1beta1.Document.text] in the + // overall document global text. + int64 text_offset = 3; + } + + // Annotation for common text style attributes. This adheres to CSS + // conventions as much as possible. + message Style { + // Font size with unit. + message FontSize { + // Font size for the text. + float size = 1; + + // Unit for the font size. Follows CSS naming (in, px, pt, etc.). + string unit = 2; + } + + // Text anchor indexing into the + // [Document.text][google.cloud.documentai.v1beta1.Document.text]. + TextAnchor text_anchor = 1; + + // Text color. + google.type.Color color = 2; + + // Text background color. + google.type.Color background_color = 3; + + // Font weight. Possible values are normal, bold, bolder, and lighter. + // https://www.w3schools.com/cssref/pr_font_weight.asp + string font_weight = 4; + + // Text style. Possible values are normal, italic, and oblique. + // https://www.w3schools.com/cssref/pr_font_font-style.asp + string text_style = 5; + + // Text decoration. Follows CSS standard. + // + // https://www.w3schools.com/cssref/pr_text_text-decoration.asp + string text_decoration = 6; + + // Font size. + FontSize font_size = 7; + } + + // A page in a [Document][google.cloud.documentai.v1beta1.Document]. + message Page { + // Dimension for the page. + message Dimension { + // Page width. + float width = 1; + + // Page height. + float height = 2; + + // Dimension unit. + string unit = 3; + } + + // Visual element describing a layout unit on a page. + message Layout { + // Detected human reading orientation. + enum Orientation { + // Unspecified orientation. + ORIENTATION_UNSPECIFIED = 0; + + // Orientation is aligned with page up. + PAGE_UP = 1; + + // Orientation is aligned with page right. + // Turn the head 90 degrees clockwise from upright to read. + PAGE_RIGHT = 2; + + // Orientation is aligned with page down. + // Turn the head 180 degrees from upright to read. + PAGE_DOWN = 3; + + // Orientation is aligned with page left. + // Turn the head 90 degrees counterclockwise from upright to read. + PAGE_LEFT = 4; + } + + // Text anchor indexing into the + // [Document.text][google.cloud.documentai.v1beta1.Document.text]. + TextAnchor text_anchor = 1; + + // Confidence of the current + // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] within + // context of the object this layout is for. e.g. confidence can be for a + // single token, a table, a visual element, etc. depending on context. + // Range [0, 1]. + float confidence = 2; + + // The bounding polygon for the + // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout]. + BoundingPoly bounding_poly = 3; + + // Detected orientation for the + // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout]. + Orientation orientation = 4; + } + + // A block has a set of lines (collected into paragraphs) that have a + // common line-spacing and orientation. + message Block { + // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for + // [Block][google.cloud.documentai.v1beta1.Document.Page.Block]. + Layout layout = 1; + + // A list of detected languages together with confidence. + repeated DetectedLanguage detected_languages = 2; + } + + // A collection of lines that a human would perceive as a paragraph. + message Paragraph { + // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for + // [Paragraph][google.cloud.documentai.v1beta1.Document.Page.Paragraph]. + Layout layout = 1; + + // A list of detected languages together with confidence. + repeated DetectedLanguage detected_languages = 2; + } + + // A collection of tokens that a human would perceive as a line. + // Does not cross column boundaries, can be horizontal, vertical, etc. + message Line { + // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for + // [Line][google.cloud.documentai.v1beta1.Document.Page.Line]. + Layout layout = 1; + + // A list of detected languages together with confidence. + repeated DetectedLanguage detected_languages = 2; + } + + // A detected token. + message Token { + // Detected break at the end of a + // [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. + message DetectedBreak { + // Enum to denote the type of break found. + enum Type { + // Unspecified break type. + TYPE_UNSPECIFIED = 0; + + // A single whitespace. + SPACE = 1; + + // A wider whitespace. + WIDE_SPACE = 2; + + // A hyphen that indicates that a token has been split across lines. + HYPHEN = 3; + } + + // Detected break type. + Type type = 1; + } + + // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for + // [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. + Layout layout = 1; + + // Detected break at the end of a + // [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. + DetectedBreak detected_break = 2; + + // A list of detected languages together with confidence. + repeated DetectedLanguage detected_languages = 3; + } + + // Detected non-text visual elements e.g. checkbox, signature etc. on the + // page. + message VisualElement { + // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for + // [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. + Layout layout = 1; + + // Type of the + // [VisualElement][google.cloud.documentai.v1beta1.Document.Page.VisualElement]. + string type = 2; + + // A list of detected languages together with confidence. + repeated DetectedLanguage detected_languages = 3; + } + + // A table representation similar to HTML table structure. + message Table { + // A row of table cells. + message TableRow { + // Cells that make up this row. + repeated TableCell cells = 1; + } + + // A cell representation inside the table. + message TableCell { + // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for + // [TableCell][google.cloud.documentai.v1beta1.Document.Page.Table.TableCell]. + Layout layout = 1; + + // How many rows this cell spans. + int32 row_span = 2; + + // How many columns this cell spans. + int32 col_span = 3; + + // A list of detected languages together with confidence. + repeated DetectedLanguage detected_languages = 4; + } + + // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for + // [Table][google.cloud.documentai.v1beta1.Document.Page.Table]. + Layout layout = 1; + + // Header rows of the table. + repeated TableRow header_rows = 2; + + // Body rows of the table. + repeated TableRow body_rows = 3; + + // A list of detected languages together with confidence. + repeated DetectedLanguage detected_languages = 4; + } + + // A form field detected on the page. + message FormField { + // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the + // [FormField][google.cloud.documentai.v1beta1.Document.Page.FormField] + // name. e.g. `Address`, `Email`, `Grand total`, `Phone number`, etc. + Layout field_name = 1; + + // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the + // [FormField][google.cloud.documentai.v1beta1.Document.Page.FormField] + // value. + Layout field_value = 2; + + // A list of detected languages for name together with confidence. + repeated DetectedLanguage name_detected_languages = 3; + + // A list of detected languages for value together with confidence. + repeated DetectedLanguage value_detected_languages = 4; + } + + // Detected language for a structural component. + message DetectedLanguage { + // The BCP-47 language code, such as "en-US" or "sr-Latn". For more + // information, see + // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier. + string language_code = 1; + + // Confidence of detected language. Range [0, 1]. + float confidence = 2; + } + + // 1-based index for current + // [Page][google.cloud.documentai.v1beta1.Document.Page] in a parent + // [Document][google.cloud.documentai.v1beta1.Document]. Useful when a page + // is taken out of a [Document][google.cloud.documentai.v1beta1.Document] + // for individual processing. + int32 page_number = 1; + + // Physical dimension of the page. + Dimension dimension = 2; + + // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the + // page. + Layout layout = 3; + + // A list of detected languages together with confidence. + repeated DetectedLanguage detected_languages = 4; + + // A list of visually detected text blocks on the page. + // A block has a set of lines (collected into paragraphs) that have a common + // line-spacing and orientation. + repeated Block blocks = 5; + + // A list of visually detected text paragraphs on the page. + // A collection of lines that a human would perceive as a paragraph. + repeated Paragraph paragraphs = 6; + + // A list of visually detected text lines on the page. + // A collection of tokens that a human would perceive as a line. + repeated Line lines = 7; + + // A list of visually detected tokens on the page. + repeated Token tokens = 8; + + // A list of detected non-text visual elements e.g. checkbox, + // signature etc. on the page. + repeated VisualElement visual_elements = 9; + + // A list of visually detected tables on the page. + repeated Table tables = 10; + + // A list of visually detected form fields on the page. + repeated FormField form_fields = 11; + } + + // A phrase in the text that is a known entity type, such as a person, an + // organization, or location. + message Entity { + // Provenance of the entity. + // Text anchor indexing into the + // [Document.text][google.cloud.documentai.v1beta1.Document.text]. + TextAnchor text_anchor = 1; + + // Entity type from a schema e.g. `Address`. + string type = 2; + + // Text value in the document e.g. `1600 Amphitheatre Pkwy`. + string mention_text = 3; + + // Canonical mention name. This will be a unique value in the entity list + // for this document. + string mention_id = 4; + } + + // Relationship between + // [Entities][google.cloud.documentai.v1beta1.Document.Entity]. + message EntityRelation { + // Subject entity mention_id. + string subject_id = 1; + + // Object entity mention_id. + string object_id = 2; + + // Relationship description. + string relation = 3; + } + + // Text reference indexing into the + // [Document.text][google.cloud.documentai.v1beta1.Document.text]. + message TextAnchor { + // A text segment in the + // [Document.text][google.cloud.documentai.v1beta1.Document.text]. The + // indices may be out of bounds which indicate that the text extends into + // another document shard for large sharded documents. See + // [ShardInfo.text_offset][google.cloud.documentai.v1beta1.Document.ShardInfo.text_offset] + message TextSegment { + // [TextSegment][google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment] + // start UTF-8 char index in the + // [Document.text][google.cloud.documentai.v1beta1.Document.text]. + int64 start_index = 1; + + // [TextSegment][google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment] + // half open end UTF-8 char index in the + // [Document.text][google.cloud.documentai.v1beta1.Document.text]. + int64 end_index = 2; + } + + // The text segments from the + // [Document.text][google.cloud.documentai.v1beta1.Document.text]. + repeated TextSegment text_segments = 1; + } + + // Original source document from the user. + oneof source { + // Currently supports Google Cloud Storage URI of the form + // `gs://bucket_name/object_name`. Object versioning is not supported. + // See [Google Cloud Storage Request + // URIs](https://cloud.google.com/storage/docs/reference-uris) for more + // info. + string uri = 1; + + // Inline document content, represented as a stream of bytes. + // Note: As with all `bytes` fields, protobuffers use a pure binary + // representation, whereas JSON representations use base64. + bytes content = 2; + } + + // An IANA published MIME type (also referred to as media type). For more + // information, see + // https://www.iana.org/assignments/media-types/media-types.xhtml. + string mime_type = 3; + + // UTF-8 encoded text in reading order from the document. + string text = 4; + + // Styles for the + // [Document.text][google.cloud.documentai.v1beta1.Document.text]. + repeated Style text_styles = 5; + + // Visual page layout for the + // [Document][google.cloud.documentai.v1beta1.Document]. + repeated Page pages = 6; + + // A list of entities detected on + // [Document.text][google.cloud.documentai.v1beta1.Document.text]. For + // document shards, entities in this list may cross shard boundaries. + repeated Entity entities = 7; + + // Relationship among + // [Document.entities][google.cloud.documentai.v1beta1.Document.entities]. + repeated EntityRelation entity_relations = 8; + + // Information about the sharding if this document is sharded part of a larger + // document. If the document is not sharded, this message is not specified. + ShardInfo shard_info = 9; + + // Any error that occurred while processing this document. + google.rpc.Status error = 10; +} diff --git a/google/cloud/documentai_v1beta1/proto/document_pb2.py b/google/cloud/documentai_v1beta1/proto/document_pb2.py new file mode 100644 index 00000000..73c90d36 --- /dev/null +++ b/google/cloud/documentai_v1beta1/proto/document_pb2.py @@ -0,0 +1,2695 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: google/cloud/documentai_v1beta1/proto/document.proto + +import sys + +_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database + +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 +from google.cloud.documentai_v1beta1.proto import ( + geometry_pb2 as google_dot_cloud_dot_documentai__v1beta1_dot_proto_dot_geometry__pb2, +) +from google.rpc import status_pb2 as google_dot_rpc_dot_status__pb2 +from google.type import color_pb2 as google_dot_type_dot_color__pb2 + + +DESCRIPTOR = _descriptor.FileDescriptor( + name="google/cloud/documentai_v1beta1/proto/document.proto", + package="google.cloud.documentai.v1beta1", + syntax="proto3", + serialized_options=_b( + "\n#com.google.cloud.documentai.v1beta1B\rDocumentProtoP\001ZIgoogle.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentai" + ), + serialized_pb=_b( + '\n4google/cloud/documentai_v1beta1/proto/document.proto\x12\x1fgoogle.cloud.documentai.v1beta1\x1a\x1cgoogle/api/annotations.proto\x1a\x34google/cloud/documentai_v1beta1/proto/geometry.proto\x1a\x17google/rpc/status.proto\x1a\x17google/type/color.proto"\xfa$\n\x08\x44ocument\x12\r\n\x03uri\x18\x01 \x01(\tH\x00\x12\x11\n\x07\x63ontent\x18\x02 \x01(\x0cH\x00\x12\x11\n\tmime_type\x18\x03 \x01(\t\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x44\n\x0btext_styles\x18\x05 \x03(\x0b\x32/.google.cloud.documentai.v1beta1.Document.Style\x12=\n\x05pages\x18\x06 \x03(\x0b\x32..google.cloud.documentai.v1beta1.Document.Page\x12\x42\n\x08\x65ntities\x18\x07 \x03(\x0b\x32\x30.google.cloud.documentai.v1beta1.Document.Entity\x12R\n\x10\x65ntity_relations\x18\x08 \x03(\x0b\x32\x38.google.cloud.documentai.v1beta1.Document.EntityRelation\x12G\n\nshard_info\x18\t \x01(\x0b\x32\x33.google.cloud.documentai.v1beta1.Document.ShardInfo\x12!\n\x05\x65rror\x18\n \x01(\x0b\x32\x12.google.rpc.Status\x1aJ\n\tShardInfo\x12\x13\n\x0bshard_index\x18\x01 \x01(\x03\x12\x13\n\x0bshard_count\x18\x02 \x01(\x03\x12\x13\n\x0btext_offset\x18\x03 \x01(\x03\x1a\xda\x02\n\x05Style\x12I\n\x0btext_anchor\x18\x01 \x01(\x0b\x32\x34.google.cloud.documentai.v1beta1.Document.TextAnchor\x12!\n\x05\x63olor\x18\x02 \x01(\x0b\x32\x12.google.type.Color\x12,\n\x10\x62\x61\x63kground_color\x18\x03 \x01(\x0b\x32\x12.google.type.Color\x12\x13\n\x0b\x66ont_weight\x18\x04 \x01(\t\x12\x12\n\ntext_style\x18\x05 \x01(\t\x12\x17\n\x0ftext_decoration\x18\x06 \x01(\t\x12K\n\tfont_size\x18\x07 \x01(\x0b\x32\x38.google.cloud.documentai.v1beta1.Document.Style.FontSize\x1a&\n\x08\x46ontSize\x12\x0c\n\x04size\x18\x01 \x01(\x02\x12\x0c\n\x04unit\x18\x02 \x01(\t\x1a\xf6\x1a\n\x04Page\x12\x13\n\x0bpage_number\x18\x01 \x01(\x05\x12K\n\tdimension\x18\x02 \x01(\x0b\x32\x38.google.cloud.documentai.v1beta1.Document.Page.Dimension\x12\x45\n\x06layout\x18\x03 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.Document.Page.Layout\x12[\n\x12\x64\x65tected_languages\x18\x04 \x03(\x0b\x32?.google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage\x12\x44\n\x06\x62locks\x18\x05 \x03(\x0b\x32\x34.google.cloud.documentai.v1beta1.Document.Page.Block\x12L\n\nparagraphs\x18\x06 \x03(\x0b\x32\x38.google.cloud.documentai.v1beta1.Document.Page.Paragraph\x12\x42\n\x05lines\x18\x07 \x03(\x0b\x32\x33.google.cloud.documentai.v1beta1.Document.Page.Line\x12\x44\n\x06tokens\x18\x08 \x03(\x0b\x32\x34.google.cloud.documentai.v1beta1.Document.Page.Token\x12U\n\x0fvisual_elements\x18\t \x03(\x0b\x32<.google.cloud.documentai.v1beta1.Document.Page.VisualElement\x12\x44\n\x06tables\x18\n \x03(\x0b\x32\x34.google.cloud.documentai.v1beta1.Document.Page.Table\x12M\n\x0b\x66orm_fields\x18\x0b \x03(\x0b\x32\x38.google.cloud.documentai.v1beta1.Document.Page.FormField\x1a\x38\n\tDimension\x12\r\n\x05width\x18\x01 \x01(\x02\x12\x0e\n\x06height\x18\x02 \x01(\x02\x12\x0c\n\x04unit\x18\x03 \x01(\t\x1a\xec\x02\n\x06Layout\x12I\n\x0btext_anchor\x18\x01 \x01(\x0b\x32\x34.google.cloud.documentai.v1beta1.Document.TextAnchor\x12\x12\n\nconfidence\x18\x02 \x01(\x02\x12\x44\n\rbounding_poly\x18\x03 \x01(\x0b\x32-.google.cloud.documentai.v1beta1.BoundingPoly\x12V\n\x0borientation\x18\x04 \x01(\x0e\x32\x41.google.cloud.documentai.v1beta1.Document.Page.Layout.Orientation"e\n\x0bOrientation\x12\x1b\n\x17ORIENTATION_UNSPECIFIED\x10\x00\x12\x0b\n\x07PAGE_UP\x10\x01\x12\x0e\n\nPAGE_RIGHT\x10\x02\x12\r\n\tPAGE_DOWN\x10\x03\x12\r\n\tPAGE_LEFT\x10\x04\x1a\xab\x01\n\x05\x42lock\x12\x45\n\x06layout\x18\x01 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.Document.Page.Layout\x12[\n\x12\x64\x65tected_languages\x18\x02 \x03(\x0b\x32?.google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage\x1a\xaf\x01\n\tParagraph\x12\x45\n\x06layout\x18\x01 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.Document.Page.Layout\x12[\n\x12\x64\x65tected_languages\x18\x02 \x03(\x0b\x32?.google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage\x1a\xaa\x01\n\x04Line\x12\x45\n\x06layout\x18\x01 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.Document.Page.Layout\x12[\n\x12\x64\x65tected_languages\x18\x02 \x03(\x0b\x32?.google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage\x1a\xb5\x03\n\x05Token\x12\x45\n\x06layout\x18\x01 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.Document.Page.Layout\x12Z\n\x0e\x64\x65tected_break\x18\x02 \x01(\x0b\x32\x42.google.cloud.documentai.v1beta1.Document.Page.Token.DetectedBreak\x12[\n\x12\x64\x65tected_languages\x18\x03 \x03(\x0b\x32?.google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage\x1a\xab\x01\n\rDetectedBreak\x12U\n\x04type\x18\x01 \x01(\x0e\x32G.google.cloud.documentai.v1beta1.Document.Page.Token.DetectedBreak.Type"C\n\x04Type\x12\x14\n\x10TYPE_UNSPECIFIED\x10\x00\x12\t\n\x05SPACE\x10\x01\x12\x0e\n\nWIDE_SPACE\x10\x02\x12\n\n\x06HYPHEN\x10\x03\x1a\xc1\x01\n\rVisualElement\x12\x45\n\x06layout\x18\x01 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.Document.Page.Layout\x12\x0c\n\x04type\x18\x02 \x01(\t\x12[\n\x12\x64\x65tected_languages\x18\x03 \x03(\x0b\x32?.google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage\x1a\x82\x05\n\x05Table\x12\x45\n\x06layout\x18\x01 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.Document.Page.Layout\x12R\n\x0bheader_rows\x18\x02 \x03(\x0b\x32=.google.cloud.documentai.v1beta1.Document.Page.Table.TableRow\x12P\n\tbody_rows\x18\x03 \x03(\x0b\x32=.google.cloud.documentai.v1beta1.Document.Page.Table.TableRow\x12[\n\x12\x64\x65tected_languages\x18\x04 \x03(\x0b\x32?.google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage\x1aY\n\x08TableRow\x12M\n\x05\x63\x65lls\x18\x01 \x03(\x0b\x32>.google.cloud.documentai.v1beta1.Document.Page.Table.TableCell\x1a\xd3\x01\n\tTableCell\x12\x45\n\x06layout\x18\x01 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.Document.Page.Layout\x12\x10\n\x08row_span\x18\x02 \x01(\x05\x12\x10\n\x08\x63ol_span\x18\x03 \x01(\x05\x12[\n\x12\x64\x65tected_languages\x18\x04 \x03(\x0b\x32?.google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage\x1a\xe7\x02\n\tFormField\x12I\n\nfield_name\x18\x01 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.Document.Page.Layout\x12J\n\x0b\x66ield_value\x18\x02 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.Document.Page.Layout\x12`\n\x17name_detected_languages\x18\x03 \x03(\x0b\x32?.google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage\x12\x61\n\x18value_detected_languages\x18\x04 \x03(\x0b\x32?.google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage\x1a=\n\x10\x44\x65tectedLanguage\x12\x15\n\rlanguage_code\x18\x01 \x01(\t\x12\x12\n\nconfidence\x18\x02 \x01(\x02\x1a\x8b\x01\n\x06\x45ntity\x12I\n\x0btext_anchor\x18\x01 \x01(\x0b\x32\x34.google.cloud.documentai.v1beta1.Document.TextAnchor\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x14\n\x0cmention_text\x18\x03 \x01(\t\x12\x12\n\nmention_id\x18\x04 \x01(\t\x1aI\n\x0e\x45ntityRelation\x12\x12\n\nsubject_id\x18\x01 \x01(\t\x12\x11\n\tobject_id\x18\x02 \x01(\t\x12\x10\n\x08relation\x18\x03 \x01(\t\x1a\x9c\x01\n\nTextAnchor\x12W\n\rtext_segments\x18\x01 \x03(\x0b\x32@.google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment\x1a\x35\n\x0bTextSegment\x12\x13\n\x0bstart_index\x18\x01 \x01(\x03\x12\x11\n\tend_index\x18\x02 \x01(\x03\x42\x08\n\x06sourceB\x81\x01\n#com.google.cloud.documentai.v1beta1B\rDocumentProtoP\x01ZIgoogle.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentaib\x06proto3' + ), + dependencies=[ + google_dot_api_dot_annotations__pb2.DESCRIPTOR, + google_dot_cloud_dot_documentai__v1beta1_dot_proto_dot_geometry__pb2.DESCRIPTOR, + google_dot_rpc_dot_status__pb2.DESCRIPTOR, + google_dot_type_dot_color__pb2.DESCRIPTOR, + ], +) + + +_DOCUMENT_PAGE_LAYOUT_ORIENTATION = _descriptor.EnumDescriptor( + name="Orientation", + full_name="google.cloud.documentai.v1beta1.Document.Page.Layout.Orientation", + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name="ORIENTATION_UNSPECIFIED", + index=0, + number=0, + serialized_options=None, + type=None, + ), + _descriptor.EnumValueDescriptor( + name="PAGE_UP", index=1, number=1, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="PAGE_RIGHT", index=2, number=2, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="PAGE_DOWN", index=3, number=3, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="PAGE_LEFT", index=4, number=4, serialized_options=None, type=None + ), + ], + containing_type=None, + serialized_options=None, + serialized_start=2236, + serialized_end=2337, +) +_sym_db.RegisterEnumDescriptor(_DOCUMENT_PAGE_LAYOUT_ORIENTATION) + +_DOCUMENT_PAGE_TOKEN_DETECTEDBREAK_TYPE = _descriptor.EnumDescriptor( + name="Type", + full_name="google.cloud.documentai.v1beta1.Document.Page.Token.DetectedBreak.Type", + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name="TYPE_UNSPECIFIED", + index=0, + number=0, + serialized_options=None, + type=None, + ), + _descriptor.EnumValueDescriptor( + name="SPACE", index=1, number=1, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="WIDE_SPACE", index=2, number=2, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="HYPHEN", index=3, number=3, serialized_options=None, type=None + ), + ], + containing_type=None, + serialized_options=None, + serialized_start=3235, + serialized_end=3302, +) +_sym_db.RegisterEnumDescriptor(_DOCUMENT_PAGE_TOKEN_DETECTEDBREAK_TYPE) + + +_DOCUMENT_SHARDINFO = _descriptor.Descriptor( + name="ShardInfo", + full_name="google.cloud.documentai.v1beta1.Document.ShardInfo", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="shard_index", + full_name="google.cloud.documentai.v1beta1.Document.ShardInfo.shard_index", + index=0, + number=1, + type=3, + cpp_type=2, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="shard_count", + full_name="google.cloud.documentai.v1beta1.Document.ShardInfo.shard_count", + index=1, + number=2, + type=3, + cpp_type=2, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="text_offset", + full_name="google.cloud.documentai.v1beta1.Document.ShardInfo.text_offset", + index=2, + number=3, + type=3, + cpp_type=2, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=696, + serialized_end=770, +) + +_DOCUMENT_STYLE_FONTSIZE = _descriptor.Descriptor( + name="FontSize", + full_name="google.cloud.documentai.v1beta1.Document.Style.FontSize", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="size", + full_name="google.cloud.documentai.v1beta1.Document.Style.FontSize.size", + index=0, + number=1, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="unit", + full_name="google.cloud.documentai.v1beta1.Document.Style.FontSize.unit", + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=1081, + serialized_end=1119, +) + +_DOCUMENT_STYLE = _descriptor.Descriptor( + name="Style", + full_name="google.cloud.documentai.v1beta1.Document.Style", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="text_anchor", + full_name="google.cloud.documentai.v1beta1.Document.Style.text_anchor", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="color", + full_name="google.cloud.documentai.v1beta1.Document.Style.color", + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="background_color", + full_name="google.cloud.documentai.v1beta1.Document.Style.background_color", + index=2, + number=3, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="font_weight", + full_name="google.cloud.documentai.v1beta1.Document.Style.font_weight", + index=3, + number=4, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="text_style", + full_name="google.cloud.documentai.v1beta1.Document.Style.text_style", + index=4, + number=5, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="text_decoration", + full_name="google.cloud.documentai.v1beta1.Document.Style.text_decoration", + index=5, + number=6, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="font_size", + full_name="google.cloud.documentai.v1beta1.Document.Style.font_size", + index=6, + number=7, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[_DOCUMENT_STYLE_FONTSIZE], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=773, + serialized_end=1119, +) + +_DOCUMENT_PAGE_DIMENSION = _descriptor.Descriptor( + name="Dimension", + full_name="google.cloud.documentai.v1beta1.Document.Page.Dimension", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="width", + full_name="google.cloud.documentai.v1beta1.Document.Page.Dimension.width", + index=0, + number=1, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="height", + full_name="google.cloud.documentai.v1beta1.Document.Page.Dimension.height", + index=1, + number=2, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="unit", + full_name="google.cloud.documentai.v1beta1.Document.Page.Dimension.unit", + index=2, + number=3, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=1914, + serialized_end=1970, +) + +_DOCUMENT_PAGE_LAYOUT = _descriptor.Descriptor( + name="Layout", + full_name="google.cloud.documentai.v1beta1.Document.Page.Layout", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="text_anchor", + full_name="google.cloud.documentai.v1beta1.Document.Page.Layout.text_anchor", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="confidence", + full_name="google.cloud.documentai.v1beta1.Document.Page.Layout.confidence", + index=1, + number=2, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="bounding_poly", + full_name="google.cloud.documentai.v1beta1.Document.Page.Layout.bounding_poly", + index=2, + number=3, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="orientation", + full_name="google.cloud.documentai.v1beta1.Document.Page.Layout.orientation", + index=3, + number=4, + type=14, + cpp_type=8, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[_DOCUMENT_PAGE_LAYOUT_ORIENTATION], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=1973, + serialized_end=2337, +) + +_DOCUMENT_PAGE_BLOCK = _descriptor.Descriptor( + name="Block", + full_name="google.cloud.documentai.v1beta1.Document.Page.Block", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="layout", + full_name="google.cloud.documentai.v1beta1.Document.Page.Block.layout", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="detected_languages", + full_name="google.cloud.documentai.v1beta1.Document.Page.Block.detected_languages", + index=1, + number=2, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=2340, + serialized_end=2511, +) + +_DOCUMENT_PAGE_PARAGRAPH = _descriptor.Descriptor( + name="Paragraph", + full_name="google.cloud.documentai.v1beta1.Document.Page.Paragraph", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="layout", + full_name="google.cloud.documentai.v1beta1.Document.Page.Paragraph.layout", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="detected_languages", + full_name="google.cloud.documentai.v1beta1.Document.Page.Paragraph.detected_languages", + index=1, + number=2, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=2514, + serialized_end=2689, +) + +_DOCUMENT_PAGE_LINE = _descriptor.Descriptor( + name="Line", + full_name="google.cloud.documentai.v1beta1.Document.Page.Line", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="layout", + full_name="google.cloud.documentai.v1beta1.Document.Page.Line.layout", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="detected_languages", + full_name="google.cloud.documentai.v1beta1.Document.Page.Line.detected_languages", + index=1, + number=2, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=2692, + serialized_end=2862, +) + +_DOCUMENT_PAGE_TOKEN_DETECTEDBREAK = _descriptor.Descriptor( + name="DetectedBreak", + full_name="google.cloud.documentai.v1beta1.Document.Page.Token.DetectedBreak", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="type", + full_name="google.cloud.documentai.v1beta1.Document.Page.Token.DetectedBreak.type", + index=0, + number=1, + type=14, + cpp_type=8, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ) + ], + extensions=[], + nested_types=[], + enum_types=[_DOCUMENT_PAGE_TOKEN_DETECTEDBREAK_TYPE], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=3131, + serialized_end=3302, +) + +_DOCUMENT_PAGE_TOKEN = _descriptor.Descriptor( + name="Token", + full_name="google.cloud.documentai.v1beta1.Document.Page.Token", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="layout", + full_name="google.cloud.documentai.v1beta1.Document.Page.Token.layout", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="detected_break", + full_name="google.cloud.documentai.v1beta1.Document.Page.Token.detected_break", + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="detected_languages", + full_name="google.cloud.documentai.v1beta1.Document.Page.Token.detected_languages", + index=2, + number=3, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[_DOCUMENT_PAGE_TOKEN_DETECTEDBREAK], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=2865, + serialized_end=3302, +) + +_DOCUMENT_PAGE_VISUALELEMENT = _descriptor.Descriptor( + name="VisualElement", + full_name="google.cloud.documentai.v1beta1.Document.Page.VisualElement", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="layout", + full_name="google.cloud.documentai.v1beta1.Document.Page.VisualElement.layout", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="type", + full_name="google.cloud.documentai.v1beta1.Document.Page.VisualElement.type", + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="detected_languages", + full_name="google.cloud.documentai.v1beta1.Document.Page.VisualElement.detected_languages", + index=2, + number=3, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=3305, + serialized_end=3498, +) + +_DOCUMENT_PAGE_TABLE_TABLEROW = _descriptor.Descriptor( + name="TableRow", + full_name="google.cloud.documentai.v1beta1.Document.Page.Table.TableRow", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="cells", + full_name="google.cloud.documentai.v1beta1.Document.Page.Table.TableRow.cells", + index=0, + number=1, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ) + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=3840, + serialized_end=3929, +) + +_DOCUMENT_PAGE_TABLE_TABLECELL = _descriptor.Descriptor( + name="TableCell", + full_name="google.cloud.documentai.v1beta1.Document.Page.Table.TableCell", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="layout", + full_name="google.cloud.documentai.v1beta1.Document.Page.Table.TableCell.layout", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="row_span", + full_name="google.cloud.documentai.v1beta1.Document.Page.Table.TableCell.row_span", + index=1, + number=2, + type=5, + cpp_type=1, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="col_span", + full_name="google.cloud.documentai.v1beta1.Document.Page.Table.TableCell.col_span", + index=2, + number=3, + type=5, + cpp_type=1, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="detected_languages", + full_name="google.cloud.documentai.v1beta1.Document.Page.Table.TableCell.detected_languages", + index=3, + number=4, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=3932, + serialized_end=4143, +) + +_DOCUMENT_PAGE_TABLE = _descriptor.Descriptor( + name="Table", + full_name="google.cloud.documentai.v1beta1.Document.Page.Table", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="layout", + full_name="google.cloud.documentai.v1beta1.Document.Page.Table.layout", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="header_rows", + full_name="google.cloud.documentai.v1beta1.Document.Page.Table.header_rows", + index=1, + number=2, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="body_rows", + full_name="google.cloud.documentai.v1beta1.Document.Page.Table.body_rows", + index=2, + number=3, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="detected_languages", + full_name="google.cloud.documentai.v1beta1.Document.Page.Table.detected_languages", + index=3, + number=4, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[_DOCUMENT_PAGE_TABLE_TABLEROW, _DOCUMENT_PAGE_TABLE_TABLECELL], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=3501, + serialized_end=4143, +) + +_DOCUMENT_PAGE_FORMFIELD = _descriptor.Descriptor( + name="FormField", + full_name="google.cloud.documentai.v1beta1.Document.Page.FormField", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="field_name", + full_name="google.cloud.documentai.v1beta1.Document.Page.FormField.field_name", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="field_value", + full_name="google.cloud.documentai.v1beta1.Document.Page.FormField.field_value", + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="name_detected_languages", + full_name="google.cloud.documentai.v1beta1.Document.Page.FormField.name_detected_languages", + index=2, + number=3, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="value_detected_languages", + full_name="google.cloud.documentai.v1beta1.Document.Page.FormField.value_detected_languages", + index=3, + number=4, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=4146, + serialized_end=4505, +) + +_DOCUMENT_PAGE_DETECTEDLANGUAGE = _descriptor.Descriptor( + name="DetectedLanguage", + full_name="google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="language_code", + full_name="google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage.language_code", + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="confidence", + full_name="google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage.confidence", + index=1, + number=2, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=4507, + serialized_end=4568, +) + +_DOCUMENT_PAGE = _descriptor.Descriptor( + name="Page", + full_name="google.cloud.documentai.v1beta1.Document.Page", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="page_number", + full_name="google.cloud.documentai.v1beta1.Document.Page.page_number", + index=0, + number=1, + type=5, + cpp_type=1, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="dimension", + full_name="google.cloud.documentai.v1beta1.Document.Page.dimension", + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="layout", + full_name="google.cloud.documentai.v1beta1.Document.Page.layout", + index=2, + number=3, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="detected_languages", + full_name="google.cloud.documentai.v1beta1.Document.Page.detected_languages", + index=3, + number=4, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="blocks", + full_name="google.cloud.documentai.v1beta1.Document.Page.blocks", + index=4, + number=5, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="paragraphs", + full_name="google.cloud.documentai.v1beta1.Document.Page.paragraphs", + index=5, + number=6, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="lines", + full_name="google.cloud.documentai.v1beta1.Document.Page.lines", + index=6, + number=7, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="tokens", + full_name="google.cloud.documentai.v1beta1.Document.Page.tokens", + index=7, + number=8, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="visual_elements", + full_name="google.cloud.documentai.v1beta1.Document.Page.visual_elements", + index=8, + number=9, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="tables", + full_name="google.cloud.documentai.v1beta1.Document.Page.tables", + index=9, + number=10, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="form_fields", + full_name="google.cloud.documentai.v1beta1.Document.Page.form_fields", + index=10, + number=11, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[ + _DOCUMENT_PAGE_DIMENSION, + _DOCUMENT_PAGE_LAYOUT, + _DOCUMENT_PAGE_BLOCK, + _DOCUMENT_PAGE_PARAGRAPH, + _DOCUMENT_PAGE_LINE, + _DOCUMENT_PAGE_TOKEN, + _DOCUMENT_PAGE_VISUALELEMENT, + _DOCUMENT_PAGE_TABLE, + _DOCUMENT_PAGE_FORMFIELD, + _DOCUMENT_PAGE_DETECTEDLANGUAGE, + ], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=1122, + serialized_end=4568, +) + +_DOCUMENT_ENTITY = _descriptor.Descriptor( + name="Entity", + full_name="google.cloud.documentai.v1beta1.Document.Entity", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="text_anchor", + full_name="google.cloud.documentai.v1beta1.Document.Entity.text_anchor", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="type", + full_name="google.cloud.documentai.v1beta1.Document.Entity.type", + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="mention_text", + full_name="google.cloud.documentai.v1beta1.Document.Entity.mention_text", + index=2, + number=3, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="mention_id", + full_name="google.cloud.documentai.v1beta1.Document.Entity.mention_id", + index=3, + number=4, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=4571, + serialized_end=4710, +) + +_DOCUMENT_ENTITYRELATION = _descriptor.Descriptor( + name="EntityRelation", + full_name="google.cloud.documentai.v1beta1.Document.EntityRelation", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="subject_id", + full_name="google.cloud.documentai.v1beta1.Document.EntityRelation.subject_id", + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="object_id", + full_name="google.cloud.documentai.v1beta1.Document.EntityRelation.object_id", + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="relation", + full_name="google.cloud.documentai.v1beta1.Document.EntityRelation.relation", + index=2, + number=3, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=4712, + serialized_end=4785, +) + +_DOCUMENT_TEXTANCHOR_TEXTSEGMENT = _descriptor.Descriptor( + name="TextSegment", + full_name="google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="start_index", + full_name="google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment.start_index", + index=0, + number=1, + type=3, + cpp_type=2, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="end_index", + full_name="google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment.end_index", + index=1, + number=2, + type=3, + cpp_type=2, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=4891, + serialized_end=4944, +) + +_DOCUMENT_TEXTANCHOR = _descriptor.Descriptor( + name="TextAnchor", + full_name="google.cloud.documentai.v1beta1.Document.TextAnchor", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="text_segments", + full_name="google.cloud.documentai.v1beta1.Document.TextAnchor.text_segments", + index=0, + number=1, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ) + ], + extensions=[], + nested_types=[_DOCUMENT_TEXTANCHOR_TEXTSEGMENT], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=4788, + serialized_end=4944, +) + +_DOCUMENT = _descriptor.Descriptor( + name="Document", + full_name="google.cloud.documentai.v1beta1.Document", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="uri", + full_name="google.cloud.documentai.v1beta1.Document.uri", + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="content", + full_name="google.cloud.documentai.v1beta1.Document.content", + index=1, + number=2, + type=12, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b(""), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="mime_type", + full_name="google.cloud.documentai.v1beta1.Document.mime_type", + index=2, + number=3, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="text", + full_name="google.cloud.documentai.v1beta1.Document.text", + index=3, + number=4, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="text_styles", + full_name="google.cloud.documentai.v1beta1.Document.text_styles", + index=4, + number=5, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="pages", + full_name="google.cloud.documentai.v1beta1.Document.pages", + index=5, + number=6, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="entities", + full_name="google.cloud.documentai.v1beta1.Document.entities", + index=6, + number=7, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="entity_relations", + full_name="google.cloud.documentai.v1beta1.Document.entity_relations", + index=7, + number=8, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="shard_info", + full_name="google.cloud.documentai.v1beta1.Document.shard_info", + index=8, + number=9, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="error", + full_name="google.cloud.documentai.v1beta1.Document.error", + index=9, + number=10, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[ + _DOCUMENT_SHARDINFO, + _DOCUMENT_STYLE, + _DOCUMENT_PAGE, + _DOCUMENT_ENTITY, + _DOCUMENT_ENTITYRELATION, + _DOCUMENT_TEXTANCHOR, + ], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[ + _descriptor.OneofDescriptor( + name="source", + full_name="google.cloud.documentai.v1beta1.Document.source", + index=0, + containing_type=None, + fields=[], + ) + ], + serialized_start=224, + serialized_end=4954, +) + +_DOCUMENT_SHARDINFO.containing_type = _DOCUMENT +_DOCUMENT_STYLE_FONTSIZE.containing_type = _DOCUMENT_STYLE +_DOCUMENT_STYLE.fields_by_name["text_anchor"].message_type = _DOCUMENT_TEXTANCHOR +_DOCUMENT_STYLE.fields_by_name[ + "color" +].message_type = google_dot_type_dot_color__pb2._COLOR +_DOCUMENT_STYLE.fields_by_name[ + "background_color" +].message_type = google_dot_type_dot_color__pb2._COLOR +_DOCUMENT_STYLE.fields_by_name["font_size"].message_type = _DOCUMENT_STYLE_FONTSIZE +_DOCUMENT_STYLE.containing_type = _DOCUMENT +_DOCUMENT_PAGE_DIMENSION.containing_type = _DOCUMENT_PAGE +_DOCUMENT_PAGE_LAYOUT.fields_by_name["text_anchor"].message_type = _DOCUMENT_TEXTANCHOR +_DOCUMENT_PAGE_LAYOUT.fields_by_name[ + "bounding_poly" +].message_type = ( + google_dot_cloud_dot_documentai__v1beta1_dot_proto_dot_geometry__pb2._BOUNDINGPOLY +) +_DOCUMENT_PAGE_LAYOUT.fields_by_name[ + "orientation" +].enum_type = _DOCUMENT_PAGE_LAYOUT_ORIENTATION +_DOCUMENT_PAGE_LAYOUT.containing_type = _DOCUMENT_PAGE +_DOCUMENT_PAGE_LAYOUT_ORIENTATION.containing_type = _DOCUMENT_PAGE_LAYOUT +_DOCUMENT_PAGE_BLOCK.fields_by_name["layout"].message_type = _DOCUMENT_PAGE_LAYOUT +_DOCUMENT_PAGE_BLOCK.fields_by_name[ + "detected_languages" +].message_type = _DOCUMENT_PAGE_DETECTEDLANGUAGE +_DOCUMENT_PAGE_BLOCK.containing_type = _DOCUMENT_PAGE +_DOCUMENT_PAGE_PARAGRAPH.fields_by_name["layout"].message_type = _DOCUMENT_PAGE_LAYOUT +_DOCUMENT_PAGE_PARAGRAPH.fields_by_name[ + "detected_languages" +].message_type = _DOCUMENT_PAGE_DETECTEDLANGUAGE +_DOCUMENT_PAGE_PARAGRAPH.containing_type = _DOCUMENT_PAGE +_DOCUMENT_PAGE_LINE.fields_by_name["layout"].message_type = _DOCUMENT_PAGE_LAYOUT +_DOCUMENT_PAGE_LINE.fields_by_name[ + "detected_languages" +].message_type = _DOCUMENT_PAGE_DETECTEDLANGUAGE +_DOCUMENT_PAGE_LINE.containing_type = _DOCUMENT_PAGE +_DOCUMENT_PAGE_TOKEN_DETECTEDBREAK.fields_by_name[ + "type" +].enum_type = _DOCUMENT_PAGE_TOKEN_DETECTEDBREAK_TYPE +_DOCUMENT_PAGE_TOKEN_DETECTEDBREAK.containing_type = _DOCUMENT_PAGE_TOKEN +_DOCUMENT_PAGE_TOKEN_DETECTEDBREAK_TYPE.containing_type = ( + _DOCUMENT_PAGE_TOKEN_DETECTEDBREAK +) +_DOCUMENT_PAGE_TOKEN.fields_by_name["layout"].message_type = _DOCUMENT_PAGE_LAYOUT +_DOCUMENT_PAGE_TOKEN.fields_by_name[ + "detected_break" +].message_type = _DOCUMENT_PAGE_TOKEN_DETECTEDBREAK +_DOCUMENT_PAGE_TOKEN.fields_by_name[ + "detected_languages" +].message_type = _DOCUMENT_PAGE_DETECTEDLANGUAGE +_DOCUMENT_PAGE_TOKEN.containing_type = _DOCUMENT_PAGE +_DOCUMENT_PAGE_VISUALELEMENT.fields_by_name[ + "layout" +].message_type = _DOCUMENT_PAGE_LAYOUT +_DOCUMENT_PAGE_VISUALELEMENT.fields_by_name[ + "detected_languages" +].message_type = _DOCUMENT_PAGE_DETECTEDLANGUAGE +_DOCUMENT_PAGE_VISUALELEMENT.containing_type = _DOCUMENT_PAGE +_DOCUMENT_PAGE_TABLE_TABLEROW.fields_by_name[ + "cells" +].message_type = _DOCUMENT_PAGE_TABLE_TABLECELL +_DOCUMENT_PAGE_TABLE_TABLEROW.containing_type = _DOCUMENT_PAGE_TABLE +_DOCUMENT_PAGE_TABLE_TABLECELL.fields_by_name[ + "layout" +].message_type = _DOCUMENT_PAGE_LAYOUT +_DOCUMENT_PAGE_TABLE_TABLECELL.fields_by_name[ + "detected_languages" +].message_type = _DOCUMENT_PAGE_DETECTEDLANGUAGE +_DOCUMENT_PAGE_TABLE_TABLECELL.containing_type = _DOCUMENT_PAGE_TABLE +_DOCUMENT_PAGE_TABLE.fields_by_name["layout"].message_type = _DOCUMENT_PAGE_LAYOUT +_DOCUMENT_PAGE_TABLE.fields_by_name[ + "header_rows" +].message_type = _DOCUMENT_PAGE_TABLE_TABLEROW +_DOCUMENT_PAGE_TABLE.fields_by_name[ + "body_rows" +].message_type = _DOCUMENT_PAGE_TABLE_TABLEROW +_DOCUMENT_PAGE_TABLE.fields_by_name[ + "detected_languages" +].message_type = _DOCUMENT_PAGE_DETECTEDLANGUAGE +_DOCUMENT_PAGE_TABLE.containing_type = _DOCUMENT_PAGE +_DOCUMENT_PAGE_FORMFIELD.fields_by_name[ + "field_name" +].message_type = _DOCUMENT_PAGE_LAYOUT +_DOCUMENT_PAGE_FORMFIELD.fields_by_name[ + "field_value" +].message_type = _DOCUMENT_PAGE_LAYOUT +_DOCUMENT_PAGE_FORMFIELD.fields_by_name[ + "name_detected_languages" +].message_type = _DOCUMENT_PAGE_DETECTEDLANGUAGE +_DOCUMENT_PAGE_FORMFIELD.fields_by_name[ + "value_detected_languages" +].message_type = _DOCUMENT_PAGE_DETECTEDLANGUAGE +_DOCUMENT_PAGE_FORMFIELD.containing_type = _DOCUMENT_PAGE +_DOCUMENT_PAGE_DETECTEDLANGUAGE.containing_type = _DOCUMENT_PAGE +_DOCUMENT_PAGE.fields_by_name["dimension"].message_type = _DOCUMENT_PAGE_DIMENSION +_DOCUMENT_PAGE.fields_by_name["layout"].message_type = _DOCUMENT_PAGE_LAYOUT +_DOCUMENT_PAGE.fields_by_name[ + "detected_languages" +].message_type = _DOCUMENT_PAGE_DETECTEDLANGUAGE +_DOCUMENT_PAGE.fields_by_name["blocks"].message_type = _DOCUMENT_PAGE_BLOCK +_DOCUMENT_PAGE.fields_by_name["paragraphs"].message_type = _DOCUMENT_PAGE_PARAGRAPH +_DOCUMENT_PAGE.fields_by_name["lines"].message_type = _DOCUMENT_PAGE_LINE +_DOCUMENT_PAGE.fields_by_name["tokens"].message_type = _DOCUMENT_PAGE_TOKEN +_DOCUMENT_PAGE.fields_by_name[ + "visual_elements" +].message_type = _DOCUMENT_PAGE_VISUALELEMENT +_DOCUMENT_PAGE.fields_by_name["tables"].message_type = _DOCUMENT_PAGE_TABLE +_DOCUMENT_PAGE.fields_by_name["form_fields"].message_type = _DOCUMENT_PAGE_FORMFIELD +_DOCUMENT_PAGE.containing_type = _DOCUMENT +_DOCUMENT_ENTITY.fields_by_name["text_anchor"].message_type = _DOCUMENT_TEXTANCHOR +_DOCUMENT_ENTITY.containing_type = _DOCUMENT +_DOCUMENT_ENTITYRELATION.containing_type = _DOCUMENT +_DOCUMENT_TEXTANCHOR_TEXTSEGMENT.containing_type = _DOCUMENT_TEXTANCHOR +_DOCUMENT_TEXTANCHOR.fields_by_name[ + "text_segments" +].message_type = _DOCUMENT_TEXTANCHOR_TEXTSEGMENT +_DOCUMENT_TEXTANCHOR.containing_type = _DOCUMENT +_DOCUMENT.fields_by_name["text_styles"].message_type = _DOCUMENT_STYLE +_DOCUMENT.fields_by_name["pages"].message_type = _DOCUMENT_PAGE +_DOCUMENT.fields_by_name["entities"].message_type = _DOCUMENT_ENTITY +_DOCUMENT.fields_by_name["entity_relations"].message_type = _DOCUMENT_ENTITYRELATION +_DOCUMENT.fields_by_name["shard_info"].message_type = _DOCUMENT_SHARDINFO +_DOCUMENT.fields_by_name["error"].message_type = google_dot_rpc_dot_status__pb2._STATUS +_DOCUMENT.oneofs_by_name["source"].fields.append(_DOCUMENT.fields_by_name["uri"]) +_DOCUMENT.fields_by_name["uri"].containing_oneof = _DOCUMENT.oneofs_by_name["source"] +_DOCUMENT.oneofs_by_name["source"].fields.append(_DOCUMENT.fields_by_name["content"]) +_DOCUMENT.fields_by_name["content"].containing_oneof = _DOCUMENT.oneofs_by_name[ + "source" +] +DESCRIPTOR.message_types_by_name["Document"] = _DOCUMENT +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +Document = _reflection.GeneratedProtocolMessageType( + "Document", + (_message.Message,), + dict( + ShardInfo=_reflection.GeneratedProtocolMessageType( + "ShardInfo", + (_message.Message,), + dict( + DESCRIPTOR=_DOCUMENT_SHARDINFO, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""For a large document, sharding may be performed to produce several + document shards. Each document shard contains this field to detail which + shard it is. + + + Attributes: + shard_index: + The 0-based index of this shard. + shard_count: + Total number of shards. + text_offset: + The index of the first character in + [Document.text][google.cloud.documentai.v1beta1.Document.text] + in the overall document global text. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.ShardInfo) + ), + ), + Style=_reflection.GeneratedProtocolMessageType( + "Style", + (_message.Message,), + dict( + FontSize=_reflection.GeneratedProtocolMessageType( + "FontSize", + (_message.Message,), + dict( + DESCRIPTOR=_DOCUMENT_STYLE_FONTSIZE, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""Font size with unit. + + + Attributes: + size: + Font size for the text. + unit: + Unit for the font size. Follows CSS naming (in, px, pt, etc.). + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Style.FontSize) + ), + ), + DESCRIPTOR=_DOCUMENT_STYLE, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""Annotation for common text style attributes. This adheres to CSS + conventions as much as possible. + + + Attributes: + text_anchor: + Text anchor indexing into the [Document.text][google.cloud.doc + umentai.v1beta1.Document.text]. + color: + Text color. + background_color: + Text background color. + font_weight: + Font weight. Possible values are normal, bold, bolder, and + lighter. https://www.w3schools.com/cssref/pr\_font\_weight.asp + text_style: + Text style. Possible values are normal, italic, and oblique. + https://www.w3schools.com/cssref/pr\_font\_font-style.asp + text_decoration: + Text decoration. Follows CSS standard. + https://www.w3schools.com/cssref/pr\_text\_text-decoration.asp + font_size: + Font size. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Style) + ), + ), + Page=_reflection.GeneratedProtocolMessageType( + "Page", + (_message.Message,), + dict( + Dimension=_reflection.GeneratedProtocolMessageType( + "Dimension", + (_message.Message,), + dict( + DESCRIPTOR=_DOCUMENT_PAGE_DIMENSION, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""Dimension for the page. + + + Attributes: + width: + Page width. + height: + Page height. + unit: + Dimension unit. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.Dimension) + ), + ), + Layout=_reflection.GeneratedProtocolMessageType( + "Layout", + (_message.Message,), + dict( + DESCRIPTOR=_DOCUMENT_PAGE_LAYOUT, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""Visual element describing a layout unit on a page. + + + Attributes: + text_anchor: + Text anchor indexing into the [Document.text][google.cloud.doc + umentai.v1beta1.Document.text]. + confidence: + Confidence of the current + [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] + within context of the object this layout is for. e.g. + confidence can be for a single token, a table, a visual + element, etc. depending on context. Range [0, 1]. + bounding_poly: + The bounding polygon for the [Layout][google.cloud.documentai. + v1beta1.Document.Page.Layout]. + orientation: + Detected orientation for the [Layout][google.cloud.documentai. + v1beta1.Document.Page.Layout]. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.Layout) + ), + ), + Block=_reflection.GeneratedProtocolMessageType( + "Block", + (_message.Message,), + dict( + DESCRIPTOR=_DOCUMENT_PAGE_BLOCK, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""A block has a set of lines (collected into paragraphs) that have a + common line-spacing and orientation. + + + Attributes: + layout: + [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] + for + [Block][google.cloud.documentai.v1beta1.Document.Page.Block]. + detected_languages: + A list of detected languages together with confidence. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.Block) + ), + ), + Paragraph=_reflection.GeneratedProtocolMessageType( + "Paragraph", + (_message.Message,), + dict( + DESCRIPTOR=_DOCUMENT_PAGE_PARAGRAPH, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""A collection of lines that a human would perceive as a paragraph. + + + Attributes: + layout: + [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] + for [Paragraph][google.cloud.documentai.v1beta1.Document.Page. + Paragraph]. + detected_languages: + A list of detected languages together with confidence. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.Paragraph) + ), + ), + Line=_reflection.GeneratedProtocolMessageType( + "Line", + (_message.Message,), + dict( + DESCRIPTOR=_DOCUMENT_PAGE_LINE, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""A collection of tokens that a human would perceive as a line. Does not + cross column boundaries, can be horizontal, vertical, etc. + + + Attributes: + layout: + [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] + for + [Line][google.cloud.documentai.v1beta1.Document.Page.Line]. + detected_languages: + A list of detected languages together with confidence. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.Line) + ), + ), + Token=_reflection.GeneratedProtocolMessageType( + "Token", + (_message.Message,), + dict( + DetectedBreak=_reflection.GeneratedProtocolMessageType( + "DetectedBreak", + (_message.Message,), + dict( + DESCRIPTOR=_DOCUMENT_PAGE_TOKEN_DETECTEDBREAK, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""Detected break at the end of a + [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. + + + Attributes: + type: + Detected break type. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.Token.DetectedBreak) + ), + ), + DESCRIPTOR=_DOCUMENT_PAGE_TOKEN, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""A detected token. + + + Attributes: + layout: + [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] + for + [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. + detected_break: + Detected break at the end of a + [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. + detected_languages: + A list of detected languages together with confidence. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.Token) + ), + ), + VisualElement=_reflection.GeneratedProtocolMessageType( + "VisualElement", + (_message.Message,), + dict( + DESCRIPTOR=_DOCUMENT_PAGE_VISUALELEMENT, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""Detected non-text visual elements e.g. checkbox, signature etc. on the + page. + + + Attributes: + layout: + [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] + for + [Token][google.cloud.documentai.v1beta1.Document.Page.Token]. + type: + Type of the [VisualElement][google.cloud.documentai.v1beta1.Do + cument.Page.VisualElement]. + detected_languages: + A list of detected languages together with confidence. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.VisualElement) + ), + ), + Table=_reflection.GeneratedProtocolMessageType( + "Table", + (_message.Message,), + dict( + TableRow=_reflection.GeneratedProtocolMessageType( + "TableRow", + (_message.Message,), + dict( + DESCRIPTOR=_DOCUMENT_PAGE_TABLE_TABLEROW, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""A row of table cells. + + + Attributes: + cells: + Cells that make up this row. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.Table.TableRow) + ), + ), + TableCell=_reflection.GeneratedProtocolMessageType( + "TableCell", + (_message.Message,), + dict( + DESCRIPTOR=_DOCUMENT_PAGE_TABLE_TABLECELL, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""A cell representation inside the table. + + + Attributes: + layout: + [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] + for [TableCell][google.cloud.documentai.v1beta1.Document.Page. + Table.TableCell]. + row_span: + How many rows this cell spans. + col_span: + How many columns this cell spans. + detected_languages: + A list of detected languages together with confidence. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.Table.TableCell) + ), + ), + DESCRIPTOR=_DOCUMENT_PAGE_TABLE, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""A table representation similar to HTML table structure. + + + Attributes: + layout: + [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] + for + [Table][google.cloud.documentai.v1beta1.Document.Page.Table]. + header_rows: + Header rows of the table. + body_rows: + Body rows of the table. + detected_languages: + A list of detected languages together with confidence. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.Table) + ), + ), + FormField=_reflection.GeneratedProtocolMessageType( + "FormField", + (_message.Message,), + dict( + DESCRIPTOR=_DOCUMENT_PAGE_FORMFIELD, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""A form field detected on the page. + + + Attributes: + field_name: + [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] + for the [FormField][google.cloud.documentai.v1beta1.Document.P + age.FormField] name. e.g. ``Address``, ``Email``, ``Grand + total``, ``Phone number``, etc. + field_value: + [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] + for the [FormField][google.cloud.documentai.v1beta1.Document.P + age.FormField] value. + name_detected_languages: + A list of detected languages for name together with + confidence. + value_detected_languages: + A list of detected languages for value together with + confidence. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.FormField) + ), + ), + DetectedLanguage=_reflection.GeneratedProtocolMessageType( + "DetectedLanguage", + (_message.Message,), + dict( + DESCRIPTOR=_DOCUMENT_PAGE_DETECTEDLANGUAGE, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""Detected language for a structural component. + + + Attributes: + language_code: + The BCP-47 language code, such as "en-US" or "sr-Latn". For + more information, see http://www.unicode.org/reports/tr35/#Uni + code\_locale\_identifier. + confidence: + Confidence of detected language. Range [0, 1]. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page.DetectedLanguage) + ), + ), + DESCRIPTOR=_DOCUMENT_PAGE, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""A page in a [Document][google.cloud.documentai.v1beta1.Document]. + + + Attributes: + page_number: + 1-based index for current + [Page][google.cloud.documentai.v1beta1.Document.Page] in a + parent [Document][google.cloud.documentai.v1beta1.Document]. + Useful when a page is taken out of a + [Document][google.cloud.documentai.v1beta1.Document] for + individual processing. + dimension: + Physical dimension of the page. + layout: + [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] + for the page. + detected_languages: + A list of detected languages together with confidence. + blocks: + A list of visually detected text blocks on the page. A block + has a set of lines (collected into paragraphs) that have a + common line-spacing and orientation. + paragraphs: + A list of visually detected text paragraphs on the page. A + collection of lines that a human would perceive as a + paragraph. + lines: + A list of visually detected text lines on the page. A + collection of tokens that a human would perceive as a line. + tokens: + A list of visually detected tokens on the page. + visual_elements: + A list of detected non-text visual elements e.g. checkbox, + signature etc. on the page. + tables: + A list of visually detected tables on the page. + form_fields: + A list of visually detected form fields on the page. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Page) + ), + ), + Entity=_reflection.GeneratedProtocolMessageType( + "Entity", + (_message.Message,), + dict( + DESCRIPTOR=_DOCUMENT_ENTITY, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""A phrase in the text that is a known entity type, such as a person, an + organization, or location. + + + Attributes: + text_anchor: + Provenance of the entity. Text anchor indexing into the [Docum + ent.text][google.cloud.documentai.v1beta1.Document.text]. + type: + Entity type from a schema e.g. ``Address``. + mention_text: + Text value in the document e.g. ``1600 Amphitheatre Pkwy``. + mention_id: + Canonical mention name. This will be a unique value in the + entity list for this document. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.Entity) + ), + ), + EntityRelation=_reflection.GeneratedProtocolMessageType( + "EntityRelation", + (_message.Message,), + dict( + DESCRIPTOR=_DOCUMENT_ENTITYRELATION, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""Relationship between + [Entities][google.cloud.documentai.v1beta1.Document.Entity]. + + + Attributes: + subject_id: + Subject entity mention\_id. + object_id: + Object entity mention\_id. + relation: + Relationship description. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.EntityRelation) + ), + ), + TextAnchor=_reflection.GeneratedProtocolMessageType( + "TextAnchor", + (_message.Message,), + dict( + TextSegment=_reflection.GeneratedProtocolMessageType( + "TextSegment", + (_message.Message,), + dict( + DESCRIPTOR=_DOCUMENT_TEXTANCHOR_TEXTSEGMENT, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""A text segment in the + [Document.text][google.cloud.documentai.v1beta1.Document.text]. The + indices may be out of bounds which indicate that the text extends into + another document shard for large sharded documents. See + [ShardInfo.text\_offset][google.cloud.documentai.v1beta1.Document.ShardInfo.text\_offset] + + + Attributes: + start_index: + [TextSegment][google.cloud.documentai.v1beta1.Document.TextAnc + hor.TextSegment] start UTF-8 char index in the [Document.text] + [google.cloud.documentai.v1beta1.Document.text]. + end_index: + [TextSegment][google.cloud.documentai.v1beta1.Document.TextAnc + hor.TextSegment] half open end UTF-8 char index in the [Docume + nt.text][google.cloud.documentai.v1beta1.Document.text]. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment) + ), + ), + DESCRIPTOR=_DOCUMENT_TEXTANCHOR, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""Text reference indexing into the + [Document.text][google.cloud.documentai.v1beta1.Document.text]. + + + Attributes: + text_segments: + The text segments from the [Document.text][google.cloud.docume + ntai.v1beta1.Document.text]. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document.TextAnchor) + ), + ), + DESCRIPTOR=_DOCUMENT, + __module__="google.cloud.documentai_v1beta1.proto.document_pb2", + __doc__="""Document represents the canonical document resource in Document + Understanding AI. It is an interchange format that provides insights + into documents and allows for collaboration between users and Document + Understanding AI to iterate and optimize for quality. + + + Attributes: + source: + Original source document from the user. + uri: + Currently supports Google Cloud Storage URI of the form + ``gs://bucket_name/object_name``. Object versioning is not + supported. See `Google Cloud Storage Request URIs + `__ for + more info. + content: + Inline document content, represented as a stream of bytes. + Note: As with all ``bytes`` fields, protobuffers use a pure + binary representation, whereas JSON representations use + base64. + mime_type: + An IANA published MIME type (also referred to as media type). + For more information, see + https://www.iana.org/assignments/media-types/media- + types.xhtml. + text: + UTF-8 encoded text in reading order from the document. + text_styles: + Styles for the [Document.text][google.cloud.documentai.v1beta1 + .Document.text]. + pages: + Visual page layout for the + [Document][google.cloud.documentai.v1beta1.Document]. + entities: + A list of entities detected on [Document.text][google.cloud.do + cumentai.v1beta1.Document.text]. For document shards, entities + in this list may cross shard boundaries. + entity_relations: + Relationship among [Document.entities][google.cloud.documentai + .v1beta1.Document.entities]. + shard_info: + Information about the sharding if this document is sharded + part of a larger document. If the document is not sharded, + this message is not specified. + error: + Any error that occurred while processing this document. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Document) + ), +) +_sym_db.RegisterMessage(Document) +_sym_db.RegisterMessage(Document.ShardInfo) +_sym_db.RegisterMessage(Document.Style) +_sym_db.RegisterMessage(Document.Style.FontSize) +_sym_db.RegisterMessage(Document.Page) +_sym_db.RegisterMessage(Document.Page.Dimension) +_sym_db.RegisterMessage(Document.Page.Layout) +_sym_db.RegisterMessage(Document.Page.Block) +_sym_db.RegisterMessage(Document.Page.Paragraph) +_sym_db.RegisterMessage(Document.Page.Line) +_sym_db.RegisterMessage(Document.Page.Token) +_sym_db.RegisterMessage(Document.Page.Token.DetectedBreak) +_sym_db.RegisterMessage(Document.Page.VisualElement) +_sym_db.RegisterMessage(Document.Page.Table) +_sym_db.RegisterMessage(Document.Page.Table.TableRow) +_sym_db.RegisterMessage(Document.Page.Table.TableCell) +_sym_db.RegisterMessage(Document.Page.FormField) +_sym_db.RegisterMessage(Document.Page.DetectedLanguage) +_sym_db.RegisterMessage(Document.Entity) +_sym_db.RegisterMessage(Document.EntityRelation) +_sym_db.RegisterMessage(Document.TextAnchor) +_sym_db.RegisterMessage(Document.TextAnchor.TextSegment) + + +DESCRIPTOR._options = None +# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/documentai_v1beta1/proto/document_pb2_grpc.py b/google/cloud/documentai_v1beta1/proto/document_pb2_grpc.py new file mode 100644 index 00000000..07cb78fe --- /dev/null +++ b/google/cloud/documentai_v1beta1/proto/document_pb2_grpc.py @@ -0,0 +1,2 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +import grpc diff --git a/google/cloud/documentai_v1beta1/proto/document_understanding.proto b/google/cloud/documentai_v1beta1/proto/document_understanding.proto new file mode 100644 index 00000000..4f8dfb72 --- /dev/null +++ b/google/cloud/documentai_v1beta1/proto/document_understanding.proto @@ -0,0 +1,299 @@ +// Copyright 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto3"; + +package google.cloud.documentai.v1beta1; + +import "google/api/annotations.proto"; +import "google/api/client.proto"; +import "google/api/field_behavior.proto"; +import "google/cloud/documentai/v1beta1/geometry.proto"; +import "google/longrunning/operations.proto"; +import "google/protobuf/timestamp.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentai"; +option java_multiple_files = true; +option java_outer_classname = "DocumentAiProto"; +option java_package = "com.google.cloud.documentai.v1beta1"; + +// Service to parse structured information from unstructured or semi-structured +// documents using state-of-the-art Google AI such as natural language, +// computer vision, and translation. +service DocumentUnderstandingService { + option (google.api.default_host) = "documentai.googleapis.com"; + option (google.api.oauth_scopes) = + "https://www.googleapis.com/auth/cloud-platform"; + + // LRO endpoint to batch process many documents. + rpc BatchProcessDocuments(BatchProcessDocumentsRequest) + returns (google.longrunning.Operation) { + option (google.api.http) = { + post: "/v1beta1/{parent=projects/*/locations/*}/documents:batchProcess" + body: "*" + additional_bindings { + post: "/v1beta1/{parent=projects/*}/documents:batchProcess" + body: "*" + } + }; + option (google.api.method_signature) = "requests"; + option (google.longrunning.operation_info) = { + response_type: "BatchProcessDocumentsResponse" + metadata_type: "OperationMetadata" + }; + } +} + +// Request to batch process documents as an asynchronous operation. +message BatchProcessDocumentsRequest { + // Required. Individual requests for each document. + repeated ProcessDocumentRequest requests = 1 + [(google.api.field_behavior) = REQUIRED]; + + // Target project and location to make a call. + // + // Format: `projects/{project-id}/locations/{location-id}`. + // + // If no location is specified, a region will be chosen automatically. + string parent = 2; +} + +// Request to process one document. +message ProcessDocumentRequest { + // Required. Information about the input file. + InputConfig input_config = 1 [(google.api.field_behavior) = REQUIRED]; + + // Required. The desired output location. + OutputConfig output_config = 2 [(google.api.field_behavior) = REQUIRED]; + + // Specifies a known document type for deeper structure detection. Valid + // values are currently "general" and "invoice". If not provided, "general"\ + // is used as default. If any other value is given, the request is rejected. + string document_type = 3; + + // Controls table extraction behavior. If not specified, the system will + // decide reasonable defaults. + TableExtractionParams table_extraction_params = 4; + + // Controls form extraction behavior. If not specified, the system will + // decide reasonable defaults. + FormExtractionParams form_extraction_params = 5; + + // Controls entity extraction behavior. If not specified, the system will + // decide reasonable defaults. + EntityExtractionParams entity_extraction_params = 6; + + // Controls OCR behavior. If not specified, the system will decide reasonable + // defaults. + OcrParams ocr_params = 7; +} + +// Response to an batch document processing request. This is returned in +// the LRO Operation after the operation is complete. +message BatchProcessDocumentsResponse { + // Responses for each individual document. + repeated ProcessDocumentResponse responses = 1; +} + +// Response to a single document processing request. +message ProcessDocumentResponse { + // Information about the input file. This is the same as the corresponding + // input config in the request. + InputConfig input_config = 1; + + // The output location of the parsed responses. The responses are written to + // this location as JSON-serialized `Document` objects. + OutputConfig output_config = 2; +} + +// Parameters to control Optical Character Recognition (OCR) behavior. +message OcrParams { + // List of languages to use for OCR. In most cases, an empty value + // yields the best results since it enables automatic language detection. For + // languages based on the Latin alphabet, setting `language_hints` is not + // needed. In rare cases, when the language of the text in the image is known, + // setting a hint will help get better results (although it will be a + // significant hindrance if the hint is wrong). Document processing returns an + // error if one or more of the specified languages is not one of the + // supported languages. + repeated string language_hints = 1; +} + +// Parameters to control table extraction behavior. +message TableExtractionParams { + // Whether to enable table extraction. + bool enabled = 1; + + // Optional. Table bounding box hints that can be provided to complex cases + // which our algorithm cannot locate the table(s) in. + repeated TableBoundHint table_bound_hints = 2 + [(google.api.field_behavior) = OPTIONAL]; + + // Optional. Table header hints. The extraction will bias towards producing + // these terms as table headers, which may improve accuracy. + repeated string header_hints = 3 [(google.api.field_behavior) = OPTIONAL]; + + // Model version of the table extraction system. Default is "builtin/stable". + // Specify "builtin/latest" for the latest model. + string model_version = 4; +} + +// A hint for a table bounding box on the page for table parsing. +message TableBoundHint { + // Optional. Page number for multi-paged inputs this hint applies to. If not + // provided, this hint will apply to all pages by default. This value is + // 1-based. + int32 page_number = 1 [(google.api.field_behavior) = OPTIONAL]; + + // Bounding box hint for a table on this page. The coordinates must be + // normalized to [0,1] and the bounding box must be an axis-aligned rectangle. + BoundingPoly bounding_box = 2; +} + +// Parameters to control form extraction behavior. +message FormExtractionParams { + // Whether to enable form extraction. + bool enabled = 1; + + // User can provide pairs of (key text, value type) to improve the parsing + // result. + // + // For example, if a document has a field called "Date" that holds a date + // value and a field called "Amount" that may hold either a currency value + // (e.g., "$500.00") or a simple number value (e.g., "20"), you could use the + // following hints: [ {"key": "Date", value_types: [ "DATE"]}, {"key": + // "Amount", "value_types": [ "PRICE", "NUMBER" ]} ] + // + // If the value type is unknown, but you want to provide hints for the keys, + // you can leave the value_types field blank. e.g. {"key": "Date", + // "value_types": []} + repeated KeyValuePairHint key_value_pair_hints = 2; + + // Model version of the form extraction system. Default is + // "builtin/stable". Specify "builtin/latest" for the latest model. + string model_version = 3; +} + +// User-provided hint for key value pair. +message KeyValuePairHint { + // The key text for the hint. + string key = 1; + + // Type of the value. This is case-insensitive, and could be one of: + // ADDRESS, LOCATION, ORGANIZATION, PERSON, PHONE_NUMBER, + // ID, NUMBER, EMAIL, PRICE, TERMS, DATE, NAME. Types not in this list will + // be ignored. + repeated string value_types = 2; +} + +// Parameters to control entity extraction behavior. +message EntityExtractionParams { + // Whether to enable entity extraction. + bool enabled = 1; + + // Model version of the entity extraction. Default is + // "builtin/stable". Specify "builtin/latest" for the latest model. + string model_version = 2; +} + +// The desired input location and metadata. +message InputConfig { + // Required. + oneof source { + // The Google Cloud Storage location to read the input from. This must be a + // single file. + GcsSource gcs_source = 1; + } + + // Required. Mimetype of the input. Current supported mimetypes are + // application/pdf, image/tiff, and image/gif. + string mime_type = 2 [(google.api.field_behavior) = REQUIRED]; +} + +// The desired output location and metadata. +message OutputConfig { + // Required. + oneof destination { + // The Google Cloud Storage location to write the output to. + GcsDestination gcs_destination = 1; + } + + // The max number of pages to include into each output Document shard JSON on + // Google Cloud Storage. + // + // The valid range is [1, 100]. If not specified, the default value is 20. + // + // For example, for one pdf file with 100 pages, 100 parsed pages will be + // produced. If `pages_per_shard` = 20, then 5 Document shard JSON files each + // containing 20 parsed pages will be written under the prefix + // [OutputConfig.gcs_destination.uri][] and suffix pages-x-to-y.json where + // x and y are 1-indexed page numbers. + // + // Example GCS outputs with 157 pages and pages_per_shard = 50: + // + // pages-001-to-050.json + // pages-051-to-100.json + // pages-101-to-150.json + // pages-151-to-157.json + int32 pages_per_shard = 2; +} + +// The Google Cloud Storage location where the input file will be read from. +message GcsSource { + string uri = 1 [(google.api.field_behavior) = REQUIRED]; +} + +// The Google Cloud Storage location where the output file will be written to. +message GcsDestination { + string uri = 1 [(google.api.field_behavior) = REQUIRED]; +} + +// Contains metadata for the BatchProcessDocuments operation. +message OperationMetadata { + enum State { + // The default value. This value is used if the state is omitted. + STATE_UNSPECIFIED = 0; + + // Request is received. + ACCEPTED = 1; + + // Request operation is waiting for scheduling. + WAITING = 2; + + // Request is being processed. + RUNNING = 3; + + // The batch processing completed successfully. + SUCCEEDED = 4; + + // The batch processing was cancelled. + CANCELLED = 5; + + // The batch processing has failed. + FAILED = 6; + } + + // The state of the current batch processing. + State state = 1; + + // A message providing more details about the current state of processing. + string state_message = 2; + + // The creation time of the operation. + google.protobuf.Timestamp create_time = 3; + + // The last update time of the operation. + google.protobuf.Timestamp update_time = 4; +} diff --git a/google/cloud/documentai_v1beta1/proto/document_understanding_pb2.py b/google/cloud/documentai_v1beta1/proto/document_understanding_pb2.py new file mode 100644 index 00000000..9c978420 --- /dev/null +++ b/google/cloud/documentai_v1beta1/proto/document_understanding_pb2.py @@ -0,0 +1,1554 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: google/cloud/documentai_v1beta1/proto/document_understanding.proto + +import sys + +_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database + +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 +from google.api import client_pb2 as google_dot_api_dot_client__pb2 +from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 +from google.cloud.documentai_v1beta1.proto import ( + geometry_pb2 as google_dot_cloud_dot_documentai__v1beta1_dot_proto_dot_geometry__pb2, +) +from google.longrunning import ( + operations_pb2 as google_dot_longrunning_dot_operations__pb2, +) +from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 + + +DESCRIPTOR = _descriptor.FileDescriptor( + name="google/cloud/documentai_v1beta1/proto/document_understanding.proto", + package="google.cloud.documentai.v1beta1", + syntax="proto3", + serialized_options=_b( + "\n#com.google.cloud.documentai.v1beta1B\017DocumentAiProtoP\001ZIgoogle.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentai" + ), + serialized_pb=_b( + '\nBgoogle/cloud/documentai_v1beta1/proto/document_understanding.proto\x12\x1fgoogle.cloud.documentai.v1beta1\x1a\x1cgoogle/api/annotations.proto\x1a\x17google/api/client.proto\x1a\x1fgoogle/api/field_behavior.proto\x1a\x34google/cloud/documentai_v1beta1/proto/geometry.proto\x1a#google/longrunning/operations.proto\x1a\x1fgoogle/protobuf/timestamp.proto"~\n\x1c\x42\x61tchProcessDocumentsRequest\x12N\n\x08requests\x18\x01 \x03(\x0b\x32\x37.google.cloud.documentai.v1beta1.ProcessDocumentRequestB\x03\xe0\x41\x02\x12\x0e\n\x06parent\x18\x02 \x01(\t"\x8e\x04\n\x16ProcessDocumentRequest\x12G\n\x0cinput_config\x18\x01 \x01(\x0b\x32,.google.cloud.documentai.v1beta1.InputConfigB\x03\xe0\x41\x02\x12I\n\routput_config\x18\x02 \x01(\x0b\x32-.google.cloud.documentai.v1beta1.OutputConfigB\x03\xe0\x41\x02\x12\x15\n\rdocument_type\x18\x03 \x01(\t\x12W\n\x17table_extraction_params\x18\x04 \x01(\x0b\x32\x36.google.cloud.documentai.v1beta1.TableExtractionParams\x12U\n\x16\x66orm_extraction_params\x18\x05 \x01(\x0b\x32\x35.google.cloud.documentai.v1beta1.FormExtractionParams\x12Y\n\x18\x65ntity_extraction_params\x18\x06 \x01(\x0b\x32\x37.google.cloud.documentai.v1beta1.EntityExtractionParams\x12>\n\nocr_params\x18\x07 \x01(\x0b\x32*.google.cloud.documentai.v1beta1.OcrParams"l\n\x1d\x42\x61tchProcessDocumentsResponse\x12K\n\tresponses\x18\x01 \x03(\x0b\x32\x38.google.cloud.documentai.v1beta1.ProcessDocumentResponse"\xa3\x01\n\x17ProcessDocumentResponse\x12\x42\n\x0cinput_config\x18\x01 \x01(\x0b\x32,.google.cloud.documentai.v1beta1.InputConfig\x12\x44\n\routput_config\x18\x02 \x01(\x0b\x32-.google.cloud.documentai.v1beta1.OutputConfig"#\n\tOcrParams\x12\x16\n\x0elanguage_hints\x18\x01 \x03(\t"\xab\x01\n\x15TableExtractionParams\x12\x0f\n\x07\x65nabled\x18\x01 \x01(\x08\x12O\n\x11table_bound_hints\x18\x02 \x03(\x0b\x32/.google.cloud.documentai.v1beta1.TableBoundHintB\x03\xe0\x41\x01\x12\x19\n\x0cheader_hints\x18\x03 \x03(\tB\x03\xe0\x41\x01\x12\x15\n\rmodel_version\x18\x04 \x01(\t"o\n\x0eTableBoundHint\x12\x18\n\x0bpage_number\x18\x01 \x01(\x05\x42\x03\xe0\x41\x01\x12\x43\n\x0c\x62ounding_box\x18\x02 \x01(\x0b\x32-.google.cloud.documentai.v1beta1.BoundingPoly"\x8f\x01\n\x14\x46ormExtractionParams\x12\x0f\n\x07\x65nabled\x18\x01 \x01(\x08\x12O\n\x14key_value_pair_hints\x18\x02 \x03(\x0b\x32\x31.google.cloud.documentai.v1beta1.KeyValuePairHint\x12\x15\n\rmodel_version\x18\x03 \x01(\t"4\n\x10KeyValuePairHint\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x13\n\x0bvalue_types\x18\x02 \x03(\t"@\n\x16\x45ntityExtractionParams\x12\x0f\n\x07\x65nabled\x18\x01 \x01(\x08\x12\x15\n\rmodel_version\x18\x02 \x01(\t"q\n\x0bInputConfig\x12@\n\ngcs_source\x18\x01 \x01(\x0b\x32*.google.cloud.documentai.v1beta1.GcsSourceH\x00\x12\x16\n\tmime_type\x18\x02 \x01(\tB\x03\xe0\x41\x02\x42\x08\n\x06source"\x82\x01\n\x0cOutputConfig\x12J\n\x0fgcs_destination\x18\x01 \x01(\x0b\x32/.google.cloud.documentai.v1beta1.GcsDestinationH\x00\x12\x17\n\x0fpages_per_shard\x18\x02 \x01(\x05\x42\r\n\x0b\x64\x65stination"\x1d\n\tGcsSource\x12\x10\n\x03uri\x18\x01 \x01(\tB\x03\xe0\x41\x02""\n\x0eGcsDestination\x12\x10\n\x03uri\x18\x01 \x01(\tB\x03\xe0\x41\x02"\xc7\x02\n\x11OperationMetadata\x12G\n\x05state\x18\x01 \x01(\x0e\x32\x38.google.cloud.documentai.v1beta1.OperationMetadata.State\x12\x15\n\rstate_message\x18\x02 \x01(\t\x12/\n\x0b\x63reate_time\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12/\n\x0bupdate_time\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.Timestamp"p\n\x05State\x12\x15\n\x11STATE_UNSPECIFIED\x10\x00\x12\x0c\n\x08\x41\x43\x43\x45PTED\x10\x01\x12\x0b\n\x07WAITING\x10\x02\x12\x0b\n\x07RUNNING\x10\x03\x12\r\n\tSUCCEEDED\x10\x04\x12\r\n\tCANCELLED\x10\x05\x12\n\n\x06\x46\x41ILED\x10\x06\x32\xac\x03\n\x1c\x44ocumentUnderstandingService\x12\xbc\x02\n\x15\x42\x61tchProcessDocuments\x12=.google.cloud.documentai.v1beta1.BatchProcessDocumentsRequest\x1a\x1d.google.longrunning.Operation"\xc4\x01\x82\xd3\xe4\x93\x02~"?/v1beta1/{parent=projects/*/locations/*}/documents:batchProcess:\x01*Z8"3/v1beta1/{parent=projects/*}/documents:batchProcess:\x01*\xda\x41\x08requests\xca\x41\x32\n\x1d\x42\x61tchProcessDocumentsResponse\x12\x11OperationMetadata\x1aM\xca\x41\x19\x64ocumentai.googleapis.com\xd2\x41.https://www.googleapis.com/auth/cloud-platformB\x83\x01\n#com.google.cloud.documentai.v1beta1B\x0f\x44ocumentAiProtoP\x01ZIgoogle.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentaib\x06proto3' + ), + dependencies=[ + google_dot_api_dot_annotations__pb2.DESCRIPTOR, + google_dot_api_dot_client__pb2.DESCRIPTOR, + google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, + google_dot_cloud_dot_documentai__v1beta1_dot_proto_dot_geometry__pb2.DESCRIPTOR, + google_dot_longrunning_dot_operations__pb2.DESCRIPTOR, + google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR, + ], +) + + +_OPERATIONMETADATA_STATE = _descriptor.EnumDescriptor( + name="State", + full_name="google.cloud.documentai.v1beta1.OperationMetadata.State", + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name="STATE_UNSPECIFIED", + index=0, + number=0, + serialized_options=None, + type=None, + ), + _descriptor.EnumValueDescriptor( + name="ACCEPTED", index=1, number=1, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="WAITING", index=2, number=2, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="RUNNING", index=3, number=3, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="SUCCEEDED", index=4, number=4, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="CANCELLED", index=5, number=5, serialized_options=None, type=None + ), + _descriptor.EnumValueDescriptor( + name="FAILED", index=6, number=6, serialized_options=None, type=None + ), + ], + containing_type=None, + serialized_options=None, + serialized_start=2369, + serialized_end=2481, +) +_sym_db.RegisterEnumDescriptor(_OPERATIONMETADATA_STATE) + + +_BATCHPROCESSDOCUMENTSREQUEST = _descriptor.Descriptor( + name="BatchProcessDocumentsRequest", + full_name="google.cloud.documentai.v1beta1.BatchProcessDocumentsRequest", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="requests", + full_name="google.cloud.documentai.v1beta1.BatchProcessDocumentsRequest.requests", + index=0, + number=1, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=_b("\340A\002"), + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="parent", + full_name="google.cloud.documentai.v1beta1.BatchProcessDocumentsRequest.parent", + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=315, + serialized_end=441, +) + + +_PROCESSDOCUMENTREQUEST = _descriptor.Descriptor( + name="ProcessDocumentRequest", + full_name="google.cloud.documentai.v1beta1.ProcessDocumentRequest", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="input_config", + full_name="google.cloud.documentai.v1beta1.ProcessDocumentRequest.input_config", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=_b("\340A\002"), + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="output_config", + full_name="google.cloud.documentai.v1beta1.ProcessDocumentRequest.output_config", + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=_b("\340A\002"), + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="document_type", + full_name="google.cloud.documentai.v1beta1.ProcessDocumentRequest.document_type", + index=2, + number=3, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="table_extraction_params", + full_name="google.cloud.documentai.v1beta1.ProcessDocumentRequest.table_extraction_params", + index=3, + number=4, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="form_extraction_params", + full_name="google.cloud.documentai.v1beta1.ProcessDocumentRequest.form_extraction_params", + index=4, + number=5, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="entity_extraction_params", + full_name="google.cloud.documentai.v1beta1.ProcessDocumentRequest.entity_extraction_params", + index=5, + number=6, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="ocr_params", + full_name="google.cloud.documentai.v1beta1.ProcessDocumentRequest.ocr_params", + index=6, + number=7, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=444, + serialized_end=970, +) + + +_BATCHPROCESSDOCUMENTSRESPONSE = _descriptor.Descriptor( + name="BatchProcessDocumentsResponse", + full_name="google.cloud.documentai.v1beta1.BatchProcessDocumentsResponse", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="responses", + full_name="google.cloud.documentai.v1beta1.BatchProcessDocumentsResponse.responses", + index=0, + number=1, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ) + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=972, + serialized_end=1080, +) + + +_PROCESSDOCUMENTRESPONSE = _descriptor.Descriptor( + name="ProcessDocumentResponse", + full_name="google.cloud.documentai.v1beta1.ProcessDocumentResponse", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="input_config", + full_name="google.cloud.documentai.v1beta1.ProcessDocumentResponse.input_config", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="output_config", + full_name="google.cloud.documentai.v1beta1.ProcessDocumentResponse.output_config", + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=1083, + serialized_end=1246, +) + + +_OCRPARAMS = _descriptor.Descriptor( + name="OcrParams", + full_name="google.cloud.documentai.v1beta1.OcrParams", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="language_hints", + full_name="google.cloud.documentai.v1beta1.OcrParams.language_hints", + index=0, + number=1, + type=9, + cpp_type=9, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ) + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=1248, + serialized_end=1283, +) + + +_TABLEEXTRACTIONPARAMS = _descriptor.Descriptor( + name="TableExtractionParams", + full_name="google.cloud.documentai.v1beta1.TableExtractionParams", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="enabled", + full_name="google.cloud.documentai.v1beta1.TableExtractionParams.enabled", + index=0, + number=1, + type=8, + cpp_type=7, + label=1, + has_default_value=False, + default_value=False, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="table_bound_hints", + full_name="google.cloud.documentai.v1beta1.TableExtractionParams.table_bound_hints", + index=1, + number=2, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=_b("\340A\001"), + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="header_hints", + full_name="google.cloud.documentai.v1beta1.TableExtractionParams.header_hints", + index=2, + number=3, + type=9, + cpp_type=9, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=_b("\340A\001"), + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="model_version", + full_name="google.cloud.documentai.v1beta1.TableExtractionParams.model_version", + index=3, + number=4, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=1286, + serialized_end=1457, +) + + +_TABLEBOUNDHINT = _descriptor.Descriptor( + name="TableBoundHint", + full_name="google.cloud.documentai.v1beta1.TableBoundHint", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="page_number", + full_name="google.cloud.documentai.v1beta1.TableBoundHint.page_number", + index=0, + number=1, + type=5, + cpp_type=1, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=_b("\340A\001"), + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="bounding_box", + full_name="google.cloud.documentai.v1beta1.TableBoundHint.bounding_box", + index=1, + number=2, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=1459, + serialized_end=1570, +) + + +_FORMEXTRACTIONPARAMS = _descriptor.Descriptor( + name="FormExtractionParams", + full_name="google.cloud.documentai.v1beta1.FormExtractionParams", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="enabled", + full_name="google.cloud.documentai.v1beta1.FormExtractionParams.enabled", + index=0, + number=1, + type=8, + cpp_type=7, + label=1, + has_default_value=False, + default_value=False, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="key_value_pair_hints", + full_name="google.cloud.documentai.v1beta1.FormExtractionParams.key_value_pair_hints", + index=1, + number=2, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="model_version", + full_name="google.cloud.documentai.v1beta1.FormExtractionParams.model_version", + index=2, + number=3, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=1573, + serialized_end=1716, +) + + +_KEYVALUEPAIRHINT = _descriptor.Descriptor( + name="KeyValuePairHint", + full_name="google.cloud.documentai.v1beta1.KeyValuePairHint", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="key", + full_name="google.cloud.documentai.v1beta1.KeyValuePairHint.key", + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="value_types", + full_name="google.cloud.documentai.v1beta1.KeyValuePairHint.value_types", + index=1, + number=2, + type=9, + cpp_type=9, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=1718, + serialized_end=1770, +) + + +_ENTITYEXTRACTIONPARAMS = _descriptor.Descriptor( + name="EntityExtractionParams", + full_name="google.cloud.documentai.v1beta1.EntityExtractionParams", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="enabled", + full_name="google.cloud.documentai.v1beta1.EntityExtractionParams.enabled", + index=0, + number=1, + type=8, + cpp_type=7, + label=1, + has_default_value=False, + default_value=False, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="model_version", + full_name="google.cloud.documentai.v1beta1.EntityExtractionParams.model_version", + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=1772, + serialized_end=1836, +) + + +_INPUTCONFIG = _descriptor.Descriptor( + name="InputConfig", + full_name="google.cloud.documentai.v1beta1.InputConfig", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="gcs_source", + full_name="google.cloud.documentai.v1beta1.InputConfig.gcs_source", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="mime_type", + full_name="google.cloud.documentai.v1beta1.InputConfig.mime_type", + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=_b("\340A\002"), + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[ + _descriptor.OneofDescriptor( + name="source", + full_name="google.cloud.documentai.v1beta1.InputConfig.source", + index=0, + containing_type=None, + fields=[], + ) + ], + serialized_start=1838, + serialized_end=1951, +) + + +_OUTPUTCONFIG = _descriptor.Descriptor( + name="OutputConfig", + full_name="google.cloud.documentai.v1beta1.OutputConfig", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="gcs_destination", + full_name="google.cloud.documentai.v1beta1.OutputConfig.gcs_destination", + index=0, + number=1, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="pages_per_shard", + full_name="google.cloud.documentai.v1beta1.OutputConfig.pages_per_shard", + index=1, + number=2, + type=5, + cpp_type=1, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[ + _descriptor.OneofDescriptor( + name="destination", + full_name="google.cloud.documentai.v1beta1.OutputConfig.destination", + index=0, + containing_type=None, + fields=[], + ) + ], + serialized_start=1954, + serialized_end=2084, +) + + +_GCSSOURCE = _descriptor.Descriptor( + name="GcsSource", + full_name="google.cloud.documentai.v1beta1.GcsSource", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="uri", + full_name="google.cloud.documentai.v1beta1.GcsSource.uri", + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=_b("\340A\002"), + file=DESCRIPTOR, + ) + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=2086, + serialized_end=2115, +) + + +_GCSDESTINATION = _descriptor.Descriptor( + name="GcsDestination", + full_name="google.cloud.documentai.v1beta1.GcsDestination", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="uri", + full_name="google.cloud.documentai.v1beta1.GcsDestination.uri", + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=_b("\340A\002"), + file=DESCRIPTOR, + ) + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=2117, + serialized_end=2151, +) + + +_OPERATIONMETADATA = _descriptor.Descriptor( + name="OperationMetadata", + full_name="google.cloud.documentai.v1beta1.OperationMetadata", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="state", + full_name="google.cloud.documentai.v1beta1.OperationMetadata.state", + index=0, + number=1, + type=14, + cpp_type=8, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="state_message", + full_name="google.cloud.documentai.v1beta1.OperationMetadata.state_message", + index=1, + number=2, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=_b("").decode("utf-8"), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="create_time", + full_name="google.cloud.documentai.v1beta1.OperationMetadata.create_time", + index=2, + number=3, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="update_time", + full_name="google.cloud.documentai.v1beta1.OperationMetadata.update_time", + index=3, + number=4, + type=11, + cpp_type=10, + label=1, + has_default_value=False, + default_value=None, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[_OPERATIONMETADATA_STATE], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=2154, + serialized_end=2481, +) + +_BATCHPROCESSDOCUMENTSREQUEST.fields_by_name[ + "requests" +].message_type = _PROCESSDOCUMENTREQUEST +_PROCESSDOCUMENTREQUEST.fields_by_name["input_config"].message_type = _INPUTCONFIG +_PROCESSDOCUMENTREQUEST.fields_by_name["output_config"].message_type = _OUTPUTCONFIG +_PROCESSDOCUMENTREQUEST.fields_by_name[ + "table_extraction_params" +].message_type = _TABLEEXTRACTIONPARAMS +_PROCESSDOCUMENTREQUEST.fields_by_name[ + "form_extraction_params" +].message_type = _FORMEXTRACTIONPARAMS +_PROCESSDOCUMENTREQUEST.fields_by_name[ + "entity_extraction_params" +].message_type = _ENTITYEXTRACTIONPARAMS +_PROCESSDOCUMENTREQUEST.fields_by_name["ocr_params"].message_type = _OCRPARAMS +_BATCHPROCESSDOCUMENTSRESPONSE.fields_by_name[ + "responses" +].message_type = _PROCESSDOCUMENTRESPONSE +_PROCESSDOCUMENTRESPONSE.fields_by_name["input_config"].message_type = _INPUTCONFIG +_PROCESSDOCUMENTRESPONSE.fields_by_name["output_config"].message_type = _OUTPUTCONFIG +_TABLEEXTRACTIONPARAMS.fields_by_name[ + "table_bound_hints" +].message_type = _TABLEBOUNDHINT +_TABLEBOUNDHINT.fields_by_name[ + "bounding_box" +].message_type = ( + google_dot_cloud_dot_documentai__v1beta1_dot_proto_dot_geometry__pb2._BOUNDINGPOLY +) +_FORMEXTRACTIONPARAMS.fields_by_name[ + "key_value_pair_hints" +].message_type = _KEYVALUEPAIRHINT +_INPUTCONFIG.fields_by_name["gcs_source"].message_type = _GCSSOURCE +_INPUTCONFIG.oneofs_by_name["source"].fields.append( + _INPUTCONFIG.fields_by_name["gcs_source"] +) +_INPUTCONFIG.fields_by_name[ + "gcs_source" +].containing_oneof = _INPUTCONFIG.oneofs_by_name["source"] +_OUTPUTCONFIG.fields_by_name["gcs_destination"].message_type = _GCSDESTINATION +_OUTPUTCONFIG.oneofs_by_name["destination"].fields.append( + _OUTPUTCONFIG.fields_by_name["gcs_destination"] +) +_OUTPUTCONFIG.fields_by_name[ + "gcs_destination" +].containing_oneof = _OUTPUTCONFIG.oneofs_by_name["destination"] +_OPERATIONMETADATA.fields_by_name["state"].enum_type = _OPERATIONMETADATA_STATE +_OPERATIONMETADATA.fields_by_name[ + "create_time" +].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP +_OPERATIONMETADATA.fields_by_name[ + "update_time" +].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP +_OPERATIONMETADATA_STATE.containing_type = _OPERATIONMETADATA +DESCRIPTOR.message_types_by_name[ + "BatchProcessDocumentsRequest" +] = _BATCHPROCESSDOCUMENTSREQUEST +DESCRIPTOR.message_types_by_name["ProcessDocumentRequest"] = _PROCESSDOCUMENTREQUEST +DESCRIPTOR.message_types_by_name[ + "BatchProcessDocumentsResponse" +] = _BATCHPROCESSDOCUMENTSRESPONSE +DESCRIPTOR.message_types_by_name["ProcessDocumentResponse"] = _PROCESSDOCUMENTRESPONSE +DESCRIPTOR.message_types_by_name["OcrParams"] = _OCRPARAMS +DESCRIPTOR.message_types_by_name["TableExtractionParams"] = _TABLEEXTRACTIONPARAMS +DESCRIPTOR.message_types_by_name["TableBoundHint"] = _TABLEBOUNDHINT +DESCRIPTOR.message_types_by_name["FormExtractionParams"] = _FORMEXTRACTIONPARAMS +DESCRIPTOR.message_types_by_name["KeyValuePairHint"] = _KEYVALUEPAIRHINT +DESCRIPTOR.message_types_by_name["EntityExtractionParams"] = _ENTITYEXTRACTIONPARAMS +DESCRIPTOR.message_types_by_name["InputConfig"] = _INPUTCONFIG +DESCRIPTOR.message_types_by_name["OutputConfig"] = _OUTPUTCONFIG +DESCRIPTOR.message_types_by_name["GcsSource"] = _GCSSOURCE +DESCRIPTOR.message_types_by_name["GcsDestination"] = _GCSDESTINATION +DESCRIPTOR.message_types_by_name["OperationMetadata"] = _OPERATIONMETADATA +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +BatchProcessDocumentsRequest = _reflection.GeneratedProtocolMessageType( + "BatchProcessDocumentsRequest", + (_message.Message,), + dict( + DESCRIPTOR=_BATCHPROCESSDOCUMENTSREQUEST, + __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", + __doc__="""Request to batch process documents as an asynchronous operation. + + + Attributes: + requests: + Required. Individual requests for each document. + parent: + Target project and location to make a call. Format: + ``projects/{project-id}/locations/{location-id}``. If no + location is specified, a region will be chosen automatically. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.BatchProcessDocumentsRequest) + ), +) +_sym_db.RegisterMessage(BatchProcessDocumentsRequest) + +ProcessDocumentRequest = _reflection.GeneratedProtocolMessageType( + "ProcessDocumentRequest", + (_message.Message,), + dict( + DESCRIPTOR=_PROCESSDOCUMENTREQUEST, + __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", + __doc__="""Request to process one document. + + + Attributes: + input_config: + Required. Information about the input file. + output_config: + Required. The desired output location. + document_type: + Specifies a known document type for deeper structure + detection. Valid values are currently "general" and + "invoice". If not provided, "general" is used as default. + If any other value is given, the request is rejected. + table_extraction_params: + Controls table extraction behavior. If not specified, the + system will decide reasonable defaults. + form_extraction_params: + Controls form extraction behavior. If not specified, the + system will decide reasonable defaults. + entity_extraction_params: + Controls entity extraction behavior. If not specified, the + system will decide reasonable defaults. + ocr_params: + Controls OCR behavior. If not specified, the system will + decide reasonable defaults. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.ProcessDocumentRequest) + ), +) +_sym_db.RegisterMessage(ProcessDocumentRequest) + +BatchProcessDocumentsResponse = _reflection.GeneratedProtocolMessageType( + "BatchProcessDocumentsResponse", + (_message.Message,), + dict( + DESCRIPTOR=_BATCHPROCESSDOCUMENTSRESPONSE, + __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", + __doc__="""Response to an batch document processing request. This is returned in + the LRO Operation after the operation is complete. + + + Attributes: + responses: + Responses for each individual document. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.BatchProcessDocumentsResponse) + ), +) +_sym_db.RegisterMessage(BatchProcessDocumentsResponse) + +ProcessDocumentResponse = _reflection.GeneratedProtocolMessageType( + "ProcessDocumentResponse", + (_message.Message,), + dict( + DESCRIPTOR=_PROCESSDOCUMENTRESPONSE, + __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", + __doc__="""Response to a single document processing request. + + + Attributes: + input_config: + Information about the input file. This is the same as the + corresponding input config in the request. + output_config: + The output location of the parsed responses. The responses are + written to this location as JSON-serialized ``Document`` + objects. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.ProcessDocumentResponse) + ), +) +_sym_db.RegisterMessage(ProcessDocumentResponse) + +OcrParams = _reflection.GeneratedProtocolMessageType( + "OcrParams", + (_message.Message,), + dict( + DESCRIPTOR=_OCRPARAMS, + __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", + __doc__="""Parameters to control Optical Character Recognition (OCR) behavior. + + + Attributes: + language_hints: + List of languages to use for OCR. In most cases, an empty + value yields the best results since it enables automatic + language detection. For languages based on the Latin alphabet, + setting ``language_hints`` is not needed. In rare cases, when + the language of the text in the image is known, setting a hint + will help get better results (although it will be a + significant hindrance if the hint is wrong). Document + processing returns an error if one or more of the specified + languages is not one of the supported languages. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.OcrParams) + ), +) +_sym_db.RegisterMessage(OcrParams) + +TableExtractionParams = _reflection.GeneratedProtocolMessageType( + "TableExtractionParams", + (_message.Message,), + dict( + DESCRIPTOR=_TABLEEXTRACTIONPARAMS, + __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", + __doc__="""Parameters to control table extraction behavior. + + + Attributes: + enabled: + Whether to enable table extraction. + table_bound_hints: + Optional. Table bounding box hints that can be provided to + complex cases which our algorithm cannot locate the table(s) + in. + header_hints: + Optional. Table header hints. The extraction will bias towards + producing these terms as table headers, which may improve + accuracy. + model_version: + Model version of the table extraction system. Default is + "builtin/stable". Specify "builtin/latest" for the latest + model. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.TableExtractionParams) + ), +) +_sym_db.RegisterMessage(TableExtractionParams) + +TableBoundHint = _reflection.GeneratedProtocolMessageType( + "TableBoundHint", + (_message.Message,), + dict( + DESCRIPTOR=_TABLEBOUNDHINT, + __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", + __doc__="""A hint for a table bounding box on the page for table parsing. + + + Attributes: + page_number: + Optional. Page number for multi-paged inputs this hint applies + to. If not provided, this hint will apply to all pages by + default. This value is 1-based. + bounding_box: + Bounding box hint for a table on this page. The coordinates + must be normalized to [0,1] and the bounding box must be an + axis-aligned rectangle. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.TableBoundHint) + ), +) +_sym_db.RegisterMessage(TableBoundHint) + +FormExtractionParams = _reflection.GeneratedProtocolMessageType( + "FormExtractionParams", + (_message.Message,), + dict( + DESCRIPTOR=_FORMEXTRACTIONPARAMS, + __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", + __doc__="""Parameters to control form extraction behavior. + + + Attributes: + enabled: + Whether to enable form extraction. + key_value_pair_hints: + User can provide pairs of (key text, value type) to improve + the parsing result. For example, if a document has a field + called "Date" that holds a date value and a field called + "Amount" that may hold either a currency value (e.g., + "$500.00") or a simple number value (e.g., "20"), you could + use the following hints: [ {"key": "Date", value\_types: [ + "DATE"]}, {"key": "Amount", "value\_types": [ "PRICE", + "NUMBER" ]} ] If the value type is unknown, but you want to + provide hints for the keys, you can leave the value\_types + field blank. e.g. {"key": "Date", "value\_types": []} + model_version: + Model version of the form extraction system. Default is + "builtin/stable". Specify "builtin/latest" for the latest + model. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.FormExtractionParams) + ), +) +_sym_db.RegisterMessage(FormExtractionParams) + +KeyValuePairHint = _reflection.GeneratedProtocolMessageType( + "KeyValuePairHint", + (_message.Message,), + dict( + DESCRIPTOR=_KEYVALUEPAIRHINT, + __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", + __doc__="""User-provided hint for key value pair. + + + Attributes: + key: + The key text for the hint. + value_types: + Type of the value. This is case-insensitive, and could be one + of: ADDRESS, LOCATION, ORGANIZATION, PERSON, PHONE\_NUMBER, + ID, NUMBER, EMAIL, PRICE, TERMS, DATE, NAME. Types not in this + list will be ignored. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.KeyValuePairHint) + ), +) +_sym_db.RegisterMessage(KeyValuePairHint) + +EntityExtractionParams = _reflection.GeneratedProtocolMessageType( + "EntityExtractionParams", + (_message.Message,), + dict( + DESCRIPTOR=_ENTITYEXTRACTIONPARAMS, + __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", + __doc__="""Parameters to control entity extraction behavior. + + + Attributes: + enabled: + Whether to enable entity extraction. + model_version: + Model version of the entity extraction. Default is + "builtin/stable". Specify "builtin/latest" for the latest + model. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.EntityExtractionParams) + ), +) +_sym_db.RegisterMessage(EntityExtractionParams) + +InputConfig = _reflection.GeneratedProtocolMessageType( + "InputConfig", + (_message.Message,), + dict( + DESCRIPTOR=_INPUTCONFIG, + __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", + __doc__="""The desired input location and metadata. + + + Attributes: + source: + Required. + gcs_source: + The Google Cloud Storage location to read the input from. This + must be a single file. + mime_type: + Required. Mimetype of the input. Current supported mimetypes + are application/pdf, image/tiff, and image/gif. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.InputConfig) + ), +) +_sym_db.RegisterMessage(InputConfig) + +OutputConfig = _reflection.GeneratedProtocolMessageType( + "OutputConfig", + (_message.Message,), + dict( + DESCRIPTOR=_OUTPUTCONFIG, + __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", + __doc__="""The desired output location and metadata. + + + Attributes: + destination: + Required. + gcs_destination: + The Google Cloud Storage location to write the output to. + pages_per_shard: + The max number of pages to include into each output Document + shard JSON on Google Cloud Storage. The valid range is [1, + 100]. If not specified, the default value is 20. For example, + for one pdf file with 100 pages, 100 parsed pages will be + produced. If ``pages_per_shard`` = 20, then 5 Document shard + JSON files each containing 20 parsed pages will be written + under the prefix [OutputConfig.gcs\_destination.uri][] and + suffix pages-x-to-y.json where x and y are 1-indexed page + numbers. Example GCS outputs with 157 pages and + pages\_per\_shard = 50: pages-001-to-050.json + pages-051-to-100.json pages-101-to-150.json + pages-151-to-157.json + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.OutputConfig) + ), +) +_sym_db.RegisterMessage(OutputConfig) + +GcsSource = _reflection.GeneratedProtocolMessageType( + "GcsSource", + (_message.Message,), + dict( + DESCRIPTOR=_GCSSOURCE, + __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", + __doc__="""The Google Cloud Storage location where the input file will be read + from. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.GcsSource) + ), +) +_sym_db.RegisterMessage(GcsSource) + +GcsDestination = _reflection.GeneratedProtocolMessageType( + "GcsDestination", + (_message.Message,), + dict( + DESCRIPTOR=_GCSDESTINATION, + __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", + __doc__="""The Google Cloud Storage location where the output file will be written + to. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.GcsDestination) + ), +) +_sym_db.RegisterMessage(GcsDestination) + +OperationMetadata = _reflection.GeneratedProtocolMessageType( + "OperationMetadata", + (_message.Message,), + dict( + DESCRIPTOR=_OPERATIONMETADATA, + __module__="google.cloud.documentai_v1beta1.proto.document_understanding_pb2", + __doc__="""Contains metadata for the BatchProcessDocuments operation. + + + Attributes: + state: + The state of the current batch processing. + state_message: + A message providing more details about the current state of + processing. + create_time: + The creation time of the operation. + update_time: + The last update time of the operation. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.OperationMetadata) + ), +) +_sym_db.RegisterMessage(OperationMetadata) + + +DESCRIPTOR._options = None +_BATCHPROCESSDOCUMENTSREQUEST.fields_by_name["requests"]._options = None +_PROCESSDOCUMENTREQUEST.fields_by_name["input_config"]._options = None +_PROCESSDOCUMENTREQUEST.fields_by_name["output_config"]._options = None +_TABLEEXTRACTIONPARAMS.fields_by_name["table_bound_hints"]._options = None +_TABLEEXTRACTIONPARAMS.fields_by_name["header_hints"]._options = None +_TABLEBOUNDHINT.fields_by_name["page_number"]._options = None +_INPUTCONFIG.fields_by_name["mime_type"]._options = None +_GCSSOURCE.fields_by_name["uri"]._options = None +_GCSDESTINATION.fields_by_name["uri"]._options = None + +_DOCUMENTUNDERSTANDINGSERVICE = _descriptor.ServiceDescriptor( + name="DocumentUnderstandingService", + full_name="google.cloud.documentai.v1beta1.DocumentUnderstandingService", + file=DESCRIPTOR, + index=0, + serialized_options=_b( + "\312A\031documentai.googleapis.com\322A.https://www.googleapis.com/auth/cloud-platform" + ), + serialized_start=2484, + serialized_end=2912, + methods=[ + _descriptor.MethodDescriptor( + name="BatchProcessDocuments", + full_name="google.cloud.documentai.v1beta1.DocumentUnderstandingService.BatchProcessDocuments", + index=0, + containing_service=None, + input_type=_BATCHPROCESSDOCUMENTSREQUEST, + output_type=google_dot_longrunning_dot_operations__pb2._OPERATION, + serialized_options=_b( + '\202\323\344\223\002~"?/v1beta1/{parent=projects/*/locations/*}/documents:batchProcess:\001*Z8"3/v1beta1/{parent=projects/*}/documents:batchProcess:\001*\332A\010requests\312A2\n\035BatchProcessDocumentsResponse\022\021OperationMetadata' + ), + ) + ], +) +_sym_db.RegisterServiceDescriptor(_DOCUMENTUNDERSTANDINGSERVICE) + +DESCRIPTOR.services_by_name[ + "DocumentUnderstandingService" +] = _DOCUMENTUNDERSTANDINGSERVICE + +# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/documentai_v1beta1/proto/document_understanding_pb2_grpc.py b/google/cloud/documentai_v1beta1/proto/document_understanding_pb2_grpc.py new file mode 100644 index 00000000..6e6d2308 --- /dev/null +++ b/google/cloud/documentai_v1beta1/proto/document_understanding_pb2_grpc.py @@ -0,0 +1,57 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +import grpc + +from google.cloud.documentai_v1beta1.proto import ( + document_understanding_pb2 as google_dot_cloud_dot_documentai__v1beta1_dot_proto_dot_document__understanding__pb2, +) +from google.longrunning import ( + operations_pb2 as google_dot_longrunning_dot_operations__pb2, +) + + +class DocumentUnderstandingServiceStub(object): + """Service to parse structured information from unstructured or semi-structured + documents using state-of-the-art Google AI such as natural language, + computer vision, and translation. + """ + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.BatchProcessDocuments = channel.unary_unary( + "/google.cloud.documentai.v1beta1.DocumentUnderstandingService/BatchProcessDocuments", + request_serializer=google_dot_cloud_dot_documentai__v1beta1_dot_proto_dot_document__understanding__pb2.BatchProcessDocumentsRequest.SerializeToString, + response_deserializer=google_dot_longrunning_dot_operations__pb2.Operation.FromString, + ) + + +class DocumentUnderstandingServiceServicer(object): + """Service to parse structured information from unstructured or semi-structured + documents using state-of-the-art Google AI such as natural language, + computer vision, and translation. + """ + + def BatchProcessDocuments(self, request, context): + """LRO endpoint to batch process many documents. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") + + +def add_DocumentUnderstandingServiceServicer_to_server(servicer, server): + rpc_method_handlers = { + "BatchProcessDocuments": grpc.unary_unary_rpc_method_handler( + servicer.BatchProcessDocuments, + request_deserializer=google_dot_cloud_dot_documentai__v1beta1_dot_proto_dot_document__understanding__pb2.BatchProcessDocumentsRequest.FromString, + response_serializer=google_dot_longrunning_dot_operations__pb2.Operation.SerializeToString, + ) + } + generic_handler = grpc.method_handlers_generic_handler( + "google.cloud.documentai.v1beta1.DocumentUnderstandingService", + rpc_method_handlers, + ) + server.add_generic_rpc_handlers((generic_handler,)) diff --git a/google/cloud/documentai_v1beta1/proto/geometry.proto b/google/cloud/documentai_v1beta1/proto/geometry.proto new file mode 100644 index 00000000..9dbe2b78 --- /dev/null +++ b/google/cloud/documentai_v1beta1/proto/geometry.proto @@ -0,0 +1,55 @@ +// Copyright 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto3"; + +package google.cloud.documentai.v1beta1; + +import "google/api/annotations.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentai"; +option java_multiple_files = true; +option java_outer_classname = "GeometryProto"; +option java_package = "com.google.cloud.documentai.v1beta1"; + +// A vertex represents a 2D point in the image. +// NOTE: the vertex coordinates are in the same scale as the original image. +message Vertex { + // X coordinate. + int32 x = 1; + + // Y coordinate. + int32 y = 2; +} + +// A vertex represents a 2D point in the image. +// NOTE: the normalized vertex coordinates are relative to the original image +// and range from 0 to 1. +message NormalizedVertex { + // X coordinate. + float x = 1; + + // Y coordinate. + float y = 2; +} + +// A bounding polygon for the detected image annotation. +message BoundingPoly { + // The bounding polygon vertices. + repeated Vertex vertices = 1; + + // The bounding polygon normalized vertices. + repeated NormalizedVertex normalized_vertices = 2; +} diff --git a/google/cloud/documentai_v1beta1/proto/geometry_pb2.py b/google/cloud/documentai_v1beta1/proto/geometry_pb2.py new file mode 100644 index 00000000..2e4217e3 --- /dev/null +++ b/google/cloud/documentai_v1beta1/proto/geometry_pb2.py @@ -0,0 +1,270 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: google/cloud/documentai_v1beta1/proto/geometry.proto + +import sys + +_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database + +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 + + +DESCRIPTOR = _descriptor.FileDescriptor( + name="google/cloud/documentai_v1beta1/proto/geometry.proto", + package="google.cloud.documentai.v1beta1", + syntax="proto3", + serialized_options=_b( + "\n#com.google.cloud.documentai.v1beta1B\rGeometryProtoP\001ZIgoogle.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentai" + ), + serialized_pb=_b( + '\n4google/cloud/documentai_v1beta1/proto/geometry.proto\x12\x1fgoogle.cloud.documentai.v1beta1\x1a\x1cgoogle/api/annotations.proto"\x1e\n\x06Vertex\x12\t\n\x01x\x18\x01 \x01(\x05\x12\t\n\x01y\x18\x02 \x01(\x05"(\n\x10NormalizedVertex\x12\t\n\x01x\x18\x01 \x01(\x02\x12\t\n\x01y\x18\x02 \x01(\x02"\x99\x01\n\x0c\x42oundingPoly\x12\x39\n\x08vertices\x18\x01 \x03(\x0b\x32\'.google.cloud.documentai.v1beta1.Vertex\x12N\n\x13normalized_vertices\x18\x02 \x03(\x0b\x32\x31.google.cloud.documentai.v1beta1.NormalizedVertexB\x81\x01\n#com.google.cloud.documentai.v1beta1B\rGeometryProtoP\x01ZIgoogle.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentaib\x06proto3' + ), + dependencies=[google_dot_api_dot_annotations__pb2.DESCRIPTOR], +) + + +_VERTEX = _descriptor.Descriptor( + name="Vertex", + full_name="google.cloud.documentai.v1beta1.Vertex", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="x", + full_name="google.cloud.documentai.v1beta1.Vertex.x", + index=0, + number=1, + type=5, + cpp_type=1, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="y", + full_name="google.cloud.documentai.v1beta1.Vertex.y", + index=1, + number=2, + type=5, + cpp_type=1, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=119, + serialized_end=149, +) + + +_NORMALIZEDVERTEX = _descriptor.Descriptor( + name="NormalizedVertex", + full_name="google.cloud.documentai.v1beta1.NormalizedVertex", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="x", + full_name="google.cloud.documentai.v1beta1.NormalizedVertex.x", + index=0, + number=1, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="y", + full_name="google.cloud.documentai.v1beta1.NormalizedVertex.y", + index=1, + number=2, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=151, + serialized_end=191, +) + + +_BOUNDINGPOLY = _descriptor.Descriptor( + name="BoundingPoly", + full_name="google.cloud.documentai.v1beta1.BoundingPoly", + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name="vertices", + full_name="google.cloud.documentai.v1beta1.BoundingPoly.vertices", + index=0, + number=1, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + _descriptor.FieldDescriptor( + name="normalized_vertices", + full_name="google.cloud.documentai.v1beta1.BoundingPoly.normalized_vertices", + index=1, + number=2, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + ), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax="proto3", + extension_ranges=[], + oneofs=[], + serialized_start=194, + serialized_end=347, +) + +_BOUNDINGPOLY.fields_by_name["vertices"].message_type = _VERTEX +_BOUNDINGPOLY.fields_by_name["normalized_vertices"].message_type = _NORMALIZEDVERTEX +DESCRIPTOR.message_types_by_name["Vertex"] = _VERTEX +DESCRIPTOR.message_types_by_name["NormalizedVertex"] = _NORMALIZEDVERTEX +DESCRIPTOR.message_types_by_name["BoundingPoly"] = _BOUNDINGPOLY +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +Vertex = _reflection.GeneratedProtocolMessageType( + "Vertex", + (_message.Message,), + dict( + DESCRIPTOR=_VERTEX, + __module__="google.cloud.documentai_v1beta1.proto.geometry_pb2", + __doc__="""X coordinate. + + + Attributes: + y: + Y coordinate. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.Vertex) + ), +) +_sym_db.RegisterMessage(Vertex) + +NormalizedVertex = _reflection.GeneratedProtocolMessageType( + "NormalizedVertex", + (_message.Message,), + dict( + DESCRIPTOR=_NORMALIZEDVERTEX, + __module__="google.cloud.documentai_v1beta1.proto.geometry_pb2", + __doc__="""X coordinate. + + + Attributes: + y: + Y coordinate. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.NormalizedVertex) + ), +) +_sym_db.RegisterMessage(NormalizedVertex) + +BoundingPoly = _reflection.GeneratedProtocolMessageType( + "BoundingPoly", + (_message.Message,), + dict( + DESCRIPTOR=_BOUNDINGPOLY, + __module__="google.cloud.documentai_v1beta1.proto.geometry_pb2", + __doc__="""A bounding polygon for the detected image annotation. + + + Attributes: + vertices: + The bounding polygon vertices. + normalized_vertices: + The bounding polygon normalized vertices. + """, + # @@protoc_insertion_point(class_scope:google.cloud.documentai.v1beta1.BoundingPoly) + ), +) +_sym_db.RegisterMessage(BoundingPoly) + + +DESCRIPTOR._options = None +# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/documentai_v1beta1/proto/geometry_pb2_grpc.py b/google/cloud/documentai_v1beta1/proto/geometry_pb2_grpc.py new file mode 100644 index 00000000..07cb78fe --- /dev/null +++ b/google/cloud/documentai_v1beta1/proto/geometry_pb2_grpc.py @@ -0,0 +1,2 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +import grpc diff --git a/google/cloud/documentai_v1beta1/types.py b/google/cloud/documentai_v1beta1/types.py new file mode 100644 index 00000000..d3a4fe12 --- /dev/null +++ b/google/cloud/documentai_v1beta1/types.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from __future__ import absolute_import +import sys + +from google.api_core.protobuf_helpers import get_messages + +from google.cloud.documentai_v1beta1.proto import document_pb2 +from google.cloud.documentai_v1beta1.proto import document_understanding_pb2 +from google.cloud.documentai_v1beta1.proto import geometry_pb2 +from google.longrunning import operations_pb2 +from google.protobuf import any_pb2 +from google.protobuf import timestamp_pb2 +from google.protobuf import wrappers_pb2 +from google.rpc import status_pb2 +from google.type import color_pb2 + + +_shared_modules = [ + operations_pb2, + any_pb2, + timestamp_pb2, + wrappers_pb2, + status_pb2, + color_pb2, +] + +_local_modules = [document_pb2, document_understanding_pb2, geometry_pb2] + +names = [] + +for module in _shared_modules: # pragma: NO COVER + for name, message in get_messages(module).items(): + setattr(sys.modules[__name__], name, message) + names.append(name) +for module in _local_modules: + for name, message in get_messages(module).items(): + message.__module__ = "google.cloud.documentai_v1beta1.types" + setattr(sys.modules[__name__], name, message) + names.append(name) + + +__all__ = tuple(sorted(names)) diff --git a/noxfile.py b/noxfile.py new file mode 100644 index 00000000..a2eefbb6 --- /dev/null +++ b/noxfile.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Generated by synthtool. DO NOT EDIT! + +from __future__ import absolute_import +import os +import shutil + +import nox + + +LOCAL_DEPS = (os.path.join("..", "api_core"), os.path.join("..", "core")) +BLACK_VERSION = "black==19.3b0" +BLACK_PATHS = ["docs", "google", "tests", "noxfile.py", "setup.py"] + +if os.path.exists("samples"): + BLACK_PATHS.append("samples") + + +@nox.session(python="3.7") +def lint(session): + """Run linters. + + Returns a failure if the linters find linting errors or sufficiently + serious code quality issues. + """ + session.install("flake8", BLACK_VERSION, *LOCAL_DEPS) + session.run("black", "--check", *BLACK_PATHS) + session.run("flake8", "google", "tests") + + +@nox.session(python="3.6") +def blacken(session): + """Run black. + + Format code to uniform standard. + + This currently uses Python 3.6 due to the automated Kokoro run of synthtool. + That run uses an image that doesn't have 3.6 installed. Before updating this + check the state of the `gcp_ubuntu_config` we use for that Kokoro run. + """ + session.install(BLACK_VERSION) + session.run("black", *BLACK_PATHS) + + +@nox.session(python="3.7") +def lint_setup_py(session): + """Verify that setup.py is valid (including RST check).""" + session.install("docutils", "pygments") + session.run("python", "setup.py", "check", "--restructuredtext", "--strict") + + +def default(session): + # Install all test dependencies, then install this package in-place. + session.install("mock", "pytest", "pytest-cov") + for local_dep in LOCAL_DEPS: + session.install("-e", local_dep) + session.install("-e", ".") + + # Run py.test against the unit tests. + session.run( + "py.test", + "--quiet", + "--cov=google.cloud", + "--cov=tests.unit", + "--cov-append", + "--cov-config=.coveragerc", + "--cov-report=", + "--cov-fail-under=0", + os.path.join("tests", "unit"), + *session.posargs, + ) + + +@nox.session(python=["2.7", "3.5", "3.6", "3.7"]) +def unit(session): + """Run the unit test suite.""" + default(session) + + +@nox.session(python=["2.7", "3.7"]) +def system(session): + """Run the system test suite.""" + system_test_path = os.path.join("tests", "system.py") + system_test_folder_path = os.path.join("tests", "system") + # Sanity check: Only run tests if the environment variable is set. + if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): + session.skip("Credentials must be set via environment variable") + + system_test_exists = os.path.exists(system_test_path) + system_test_folder_exists = os.path.exists(system_test_folder_path) + # Sanity check: only run tests if found. + if not system_test_exists and not system_test_folder_exists: + session.skip("System tests were not found") + + # Use pre-release gRPC for system tests. + session.install("--pre", "grpcio") + + # Install all test dependencies, then install this package into the + # virtualenv's dist-packages. + session.install("mock", "pytest") + for local_dep in LOCAL_DEPS: + session.install("-e", local_dep) + session.install("-e", "../test_utils/") + session.install("-e", ".") + + # Run py.test against the system tests. + if system_test_exists: + session.run("py.test", "--quiet", system_test_path, *session.posargs) + if system_test_folder_exists: + session.run("py.test", "--quiet", system_test_folder_path, *session.posargs) + + +@nox.session(python="3.7") +def cover(session): + """Run the final coverage report. + + This outputs the coverage report aggregating coverage from the unit + test runs (not system test runs), and then erases coverage data. + """ + session.install("coverage", "pytest-cov") + session.run("coverage", "report", "--show-missing", "--fail-under=100") + + session.run("coverage", "erase") + + +@nox.session(python="3.7") +def docs(session): + """Build the docs for this library.""" + + session.install("-e", ".") + session.install("sphinx", "alabaster", "recommonmark") + + shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) + session.run( + "sphinx-build", + "-W", # warnings as errors + "-T", # show full traceback on exception + "-N", # no colors + "-b", + "html", + "-d", + os.path.join("docs", "_build", "doctrees", ""), + os.path.join("docs", ""), + os.path.join("docs", "_build", "html", ""), + ) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..3bd55550 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,3 @@ +# Generated by synthtool. DO NOT EDIT! +[bdist_wheel] +universal = 1 diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..25a7befc --- /dev/null +++ b/setup.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import io +import os + +import setuptools + +name = "google-cloud-documentai" +description = "Cloud Document AI API API client library" +version = "0.1.0" +release_status = "Development Status :: 3 - Alpha" +dependencies = [ + "google-api-core[grpc] >= 1.14.0, < 2.0.0dev", + 'enum34; python_version < "3.4"', +] + +package_root = os.path.abspath(os.path.dirname(__file__)) + +readme_filename = os.path.join(package_root, "README.rst") +with io.open(readme_filename, encoding="utf-8") as readme_file: + readme = readme_file.read() + +packages = [ + package for package in setuptools.find_packages() if package.startswith("google") +] + +namespaces = ["google"] +if "google.cloud" in packages: + namespaces.append("google.cloud") + +setuptools.setup( + name=name, + version=version, + description=description, + long_description=readme, + author="Google LLC", + author_email="googleapis-packages@google.com", + license="Apache 2.0", + url="https://github.com/GoogleCloudPlatform/google-cloud-python", + classifiers=[ + release_status, + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Operating System :: OS Independent", + "Topic :: Internet", + ], + platforms="Posix; MacOS X; Windows", + packages=packages, + namespace_packages=namespaces, + install_requires=dependencies, + python_requires=">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*", + include_package_data=True, + zip_safe=False, +) diff --git a/synth.metadata b/synth.metadata new file mode 100644 index 00000000..43598f0c --- /dev/null +++ b/synth.metadata @@ -0,0 +1,39 @@ +{ + "updateTime": "2019-11-06T22:44:32.981143Z", + "sources": [ + { + "generator": { + "name": "artman", + "version": "0.41.0", + "dockerImage": "googleapis/artman@sha256:75b38a3b073a7b243545f2332463096624c802bb1e56b8cb6f22ba1ecd325fa9" + } + }, + { + "git": { + "name": "googleapis", + "remote": "https://github.com/googleapis/googleapis.git", + "sha": "2275670a746ab2bc03ebba0d914b45320ea15af4", + "internalRef": "278922329" + } + }, + { + "template": { + "name": "python_library", + "origin": "synthtool.gcp", + "version": "2019.10.17" + } + } + ], + "destinations": [ + { + "client": { + "source": "googleapis", + "apiName": "documentai", + "apiVersion": "v1beta1", + "language": "python", + "generator": "gapic", + "config": "google/cloud/documentai/artman_documentai_v1beta1.yaml" + } + } + ] +} \ No newline at end of file diff --git a/synth.py b/synth.py new file mode 100644 index 00000000..9284c3cb --- /dev/null +++ b/synth.py @@ -0,0 +1,53 @@ +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This script is used to synthesize generated parts of this library.""" + +import synthtool as s +import synthtool.gcp as gcp +import logging + +logging.basicConfig(level=logging.DEBUG) + +gapic = gcp.GAPICGenerator() +common = gcp.CommonTemplates() + +# ---------------------------------------------------------------------------- +# Generate document AI GAPIC layer +# ---------------------------------------------------------------------------- +library = gapic.py_library("documentai", "v1beta1", include_protos=True) + +excludes = ["README.rst", "nox.py", "setup.py", "docs/index.rst"] +s.move(library, excludes=excludes) + +# Fix bad docstring with stray pipe characters +s.replace( + "google/cloud/**/document_understanding_pb2.py", + """\| Specifies a known document type for deeper structure + detection\. Valid values are currently "general" and + "invoice"\. If not provided, "general" \| is used as default. + If any other value is given, the request is rejected\.""", + """Specifies a known document type for deeper structure + detection. Valid values are currently "general" and + "invoice". If not provided, "general" is used as default. + If any other value is given, the request is rejected.""", +) + +# ---------------------------------------------------------------------------- +# Add templated files +# ---------------------------------------------------------------------------- +templated_files = common.py_library(unit_cov_level=97, cov_level=100) +s.move(templated_files) + +s.shell.run(["nox", "-s", "blacken"], hide_output=False) diff --git a/tests/unit/gapic/v1beta1/test_document_understanding_service_client_v1beta1.py b/tests/unit/gapic/v1beta1/test_document_understanding_service_client_v1beta1.py new file mode 100644 index 00000000..eb075a52 --- /dev/null +++ b/tests/unit/gapic/v1beta1/test_document_understanding_service_client_v1beta1.py @@ -0,0 +1,118 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests.""" + +import mock +import pytest + +from google.rpc import status_pb2 + +from google.cloud import documentai_v1beta1 +from google.cloud.documentai_v1beta1.proto import document_understanding_pb2 +from google.longrunning import operations_pb2 + + +class MultiCallableStub(object): + """Stub for the grpc.UnaryUnaryMultiCallable interface.""" + + def __init__(self, method, channel_stub): + self.method = method + self.channel_stub = channel_stub + + def __call__(self, request, timeout=None, metadata=None, credentials=None): + self.channel_stub.requests.append((self.method, request)) + + response = None + if self.channel_stub.responses: + response = self.channel_stub.responses.pop() + + if isinstance(response, Exception): + raise response + + if response: + return response + + +class ChannelStub(object): + """Stub for the grpc.Channel interface.""" + + def __init__(self, responses=[]): + self.responses = responses + self.requests = [] + + def unary_unary(self, method, request_serializer=None, response_deserializer=None): + return MultiCallableStub(method, self) + + +class CustomException(Exception): + pass + + +class TestDocumentUnderstandingServiceClient(object): + def test_batch_process_documents(self): + # Setup Expected Response + expected_response = {} + expected_response = document_understanding_pb2.BatchProcessDocumentsResponse( + **expected_response + ) + operation = operations_pb2.Operation( + name="operations/test_batch_process_documents", done=True + ) + operation.response.Pack(expected_response) + + # Mock the API response + channel = ChannelStub(responses=[operation]) + patch = mock.patch("google.api_core.grpc_helpers.create_channel") + with patch as create_channel: + create_channel.return_value = channel + client = documentai_v1beta1.DocumentUnderstandingServiceClient() + + # Setup Request + requests = [] + + response = client.batch_process_documents(requests) + result = response.result() + assert expected_response == result + + assert len(channel.requests) == 1 + expected_request = document_understanding_pb2.BatchProcessDocumentsRequest( + requests=requests + ) + actual_request = channel.requests[0][1] + assert expected_request == actual_request + + def test_batch_process_documents_exception(self): + # Setup Response + error = status_pb2.Status() + operation = operations_pb2.Operation( + name="operations/test_batch_process_documents_exception", done=True + ) + operation.error.CopyFrom(error) + + # Mock the API response + channel = ChannelStub(responses=[operation]) + patch = mock.patch("google.api_core.grpc_helpers.create_channel") + with patch as create_channel: + create_channel.return_value = channel + client = documentai_v1beta1.DocumentUnderstandingServiceClient() + + # Setup Request + requests = [] + + response = client.batch_process_documents(requests) + exception = response.exception() + assert exception.errors[0] == error