Skip to content

Commit

Permalink
fix: use a larger chunk size when loading data (#799)
Browse files Browse the repository at this point in the history
* The chunk size used for data uploads was too small (1MB). Now it's 100MB.

* fix: The chunk size used for data uploads was too small
  • Loading branch information
jimfulton committed Jul 22, 2021
1 parent f0990f2 commit b804373
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 1 deletion.
2 changes: 1 addition & 1 deletion google/cloud/bigquery/client.py
Expand Up @@ -98,7 +98,7 @@
from google.cloud.bigquery.table import RowIterator


_DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB
_DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB
_MAX_MULTIPART_SIZE = 5 * 1024 * 1024
_DEFAULT_NUM_RETRIES = 6
_BASE_UPLOAD_TEMPLATE = "{host}/upload/bigquery/v2/projects/{project}/jobs?uploadType="
Expand Down
20 changes: 20 additions & 0 deletions tests/unit/test_client.py
Expand Up @@ -8076,3 +8076,23 @@ def test_schema_to_json_with_file_object(self):

client.schema_to_json(schema_list, fake_file)
assert file_content == json.loads(fake_file.getvalue())


def test_upload_chunksize(client):
with mock.patch("google.cloud.bigquery.client.ResumableUpload") as RU:
upload = RU.return_value

upload.finished = False

def transmit_next_chunk(transport):
upload.finished = True
result = mock.MagicMock()
result.json.return_value = {}
return result

upload.transmit_next_chunk = transmit_next_chunk
f = io.BytesIO()
client.load_table_from_file(f, "foo.bar")

chunk_size = RU.call_args_list[0][0][1]
assert chunk_size == 100 * (1 << 20)

0 comments on commit b804373

Please sign in to comment.