From b804373277c1c1baa3370ebfb4783503b7ff360f Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 22 Jul 2021 14:36:30 -0400 Subject: [PATCH] fix: use a larger chunk size when loading data (#799) * The chunk size used for data uploads was too small (1MB). Now it's 100MB. * fix: The chunk size used for data uploads was too small --- google/cloud/bigquery/client.py | 2 +- tests/unit/test_client.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 273cf5f77..742ecac2e 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -98,7 +98,7 @@ from google.cloud.bigquery.table import RowIterator -_DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB +_DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 _BASE_UPLOAD_TEMPLATE = "{host}/upload/bigquery/v2/projects/{project}/jobs?uploadType=" diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index c1aba9b67..535685511 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -8076,3 +8076,23 @@ def test_schema_to_json_with_file_object(self): client.schema_to_json(schema_list, fake_file) assert file_content == json.loads(fake_file.getvalue()) + + +def test_upload_chunksize(client): + with mock.patch("google.cloud.bigquery.client.ResumableUpload") as RU: + upload = RU.return_value + + upload.finished = False + + def transmit_next_chunk(transport): + upload.finished = True + result = mock.MagicMock() + result.json.return_value = {} + return result + + upload.transmit_next_chunk = transmit_next_chunk + f = io.BytesIO() + client.load_table_from_file(f, "foo.bar") + + chunk_size = RU.call_args_list[0][0][1] + assert chunk_size == 100 * (1 << 20)