Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: add size parameter for load table from dataframe and json methods #280

Merged
merged 8 commits into from Oct 14, 2020
7 changes: 5 additions & 2 deletions google/cloud/bigquery/client.py
Expand Up @@ -2237,11 +2237,13 @@ def load_table_from_dataframe(
dataframe.to_parquet(tmppath, compression=parquet_compression)

with open(tmppath, "rb") as parquet_file:
file_size = os.path.getsize(tmppath)
return self.load_table_from_file(
parquet_file,
destination,
num_retries=num_retries,
rewind=True,
size=file_size,
job_id=job_id,
job_id_prefix=job_id_prefix,
location=location,
Expand Down Expand Up @@ -2343,11 +2345,12 @@ def load_table_from_json(
destination = _table_arg_to_table_ref(destination, default_project=self.project)

data_str = u"\n".join(json.dumps(item) for item in json_rows)
data_file = io.BytesIO(data_str.encode())

encoded_str = data_str.encode()
data_file = io.BytesIO(encoded_str)
return self.load_table_from_file(
data_file,
destination,
size=len(encoded_str),
num_retries=num_retries,
job_id=job_id,
job_id_prefix=job_id_prefix,
Expand Down
15 changes: 15 additions & 0 deletions tests/unit/test_client.py
Expand Up @@ -7482,6 +7482,7 @@ def test_load_table_from_dataframe(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=None,
Expand Down Expand Up @@ -7525,6 +7526,7 @@ def test_load_table_from_dataframe_w_client_location(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -7577,6 +7579,7 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -7631,6 +7634,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -7723,6 +7727,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -7782,6 +7787,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -7827,6 +7833,7 @@ def test_load_table_from_dataframe_unknown_table(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=None,
Expand Down Expand Up @@ -7867,6 +7874,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -7913,6 +7921,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -7973,6 +7982,7 @@ def test_load_table_from_dataframe_struct_fields(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -8046,6 +8056,7 @@ def test_load_table_from_dataframe_w_partial_schema(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -8139,6 +8150,7 @@ def test_load_table_from_dataframe_w_partial_schema_missing_types(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -8251,6 +8263,7 @@ def test_load_table_from_dataframe_w_nulls(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -8302,6 +8315,7 @@ def test_load_table_from_json_basic_use(self):
client,
mock.ANY,
self.TABLE_REF,
size=mock.ANY,
num_retries=_DEFAULT_NUM_RETRIES,
job_id=mock.ANY,
job_id_prefix=None,
Expand Down Expand Up @@ -8353,6 +8367,7 @@ def test_load_table_from_json_non_default_args(self):
client,
mock.ANY,
self.TABLE_REF,
size=mock.ANY,
num_retries=_DEFAULT_NUM_RETRIES,
job_id=mock.ANY,
job_id_prefix=None,
Expand Down