From 7f7f541bcf4d2f42b2f619c2ceb45f53c5d0e9eb Mon Sep 17 00:00:00 2001 From: Mike <45373284+munkhuushmgl@users.noreply.github.com> Date: Wed, 2 Dec 2020 14:26:12 -0800 Subject: [PATCH] fix: added if statement to filter out dir blob files (#63) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #62 🦕 Current version of sample doesnt check if blob is directory or .json file. Then, it downloads as bytes and tries to parse json from the dir blob file which will cause error. --- samples/snippets/batch_process_documents_sample_v1beta3.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/samples/snippets/batch_process_documents_sample_v1beta3.py b/samples/snippets/batch_process_documents_sample_v1beta3.py index 6e22e0ea..ea6c01e3 100644 --- a/samples/snippets/batch_process_documents_sample_v1beta3.py +++ b/samples/snippets/batch_process_documents_sample_v1beta3.py @@ -78,9 +78,12 @@ def batch_process_documents( for i, blob in enumerate(blob_list): # Download the contents of this blob as a bytes object. + if ".json" not in blob.name: + return + # Only parses JSON files blob_as_bytes = blob.download_as_bytes() - document = documentai.types.Document.from_json(blob_as_bytes) + document = documentai.types.Document.from_json(blob_as_bytes) print(f"Fetched file {i + 1}") # For a full list of Document object attributes, please reference this page: https://googleapis.dev/python/documentai/latest/_modules/google/cloud/documentai_v1beta3/types/document.html#Document