Skip to content
This repository has been archived by the owner on Sep 20, 2023. It is now read-only.

fix: added if statement to filter out dir blob files #63

Merged
merged 5 commits into from Dec 2, 2020
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
35 changes: 18 additions & 17 deletions samples/snippets/batch_process_documents_sample_v1beta3.py
Expand Up @@ -78,23 +78,24 @@ def batch_process_documents(

for i, blob in enumerate(blob_list):
# Download the contents of this blob as a bytes object.
blob_as_bytes = blob.download_as_bytes()
document = documentai.types.Document.from_json(blob_as_bytes)

print(f"Fetched file {i + 1}")

# For a full list of Document object attributes, please reference this page: https://googleapis.dev/python/documentai/latest/_modules/google/cloud/documentai_v1beta3/types/document.html#Document

# Read the text recognition output from the processor
for page in document.pages:
for form_field in page.form_fields:
field_name = get_text(form_field.field_name, document)
field_value = get_text(form_field.field_value, document)
print("Extracted key value pair:")
print(f"\t{field_name}, {field_value}")
for paragraph in document.pages:
paragraph_text = get_text(paragraph.layout, document)
print(f"Paragraph text:\n{paragraph_text}")
if ".json" in blob.name:
munkhuushmgl marked this conversation as resolved.
Show resolved Hide resolved
blob_as_bytes = blob.download_as_bytes()

document = documentai.types.Document.from_json(blob_as_bytes)
print(f"Fetched file {i + 1}")

# For a full list of Document object attributes, please reference this page: https://googleapis.dev/python/documentai/latest/_modules/google/cloud/documentai_v1beta3/types/document.html#Document

# Read the text recognition output from the processor
for page in document.pages:
for form_field in page.form_fields:
field_name = get_text(form_field.field_name, document)
field_value = get_text(form_field.field_value, document)
print("Extracted key value pair:")
print(f"\t{field_name}, {field_value}")
for paragraph in document.pages:
paragraph_text = get_text(paragraph.layout, document)
print(f"Paragraph text:\n{paragraph_text}")


# Extract shards from the text field
Expand Down