From 3be78b737add7111e24e912cd02fc6df75a07de6 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Wed, 14 Oct 2020 01:42:40 -0400 Subject: [PATCH] perf: add size parameter for load table from dataframe and json methods (#280) * feat: add size parameter for load from dataframe and json * pref: calculate length of encoded string --- google/cloud/bigquery/client.py | 7 +++++-- tests/unit/test_client.py | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 2afffab80..b7e082daa 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -2237,11 +2237,13 @@ def load_table_from_dataframe( dataframe.to_parquet(tmppath, compression=parquet_compression) with open(tmppath, "rb") as parquet_file: + file_size = os.path.getsize(tmppath) return self.load_table_from_file( parquet_file, destination, num_retries=num_retries, rewind=True, + size=file_size, job_id=job_id, job_id_prefix=job_id_prefix, location=location, @@ -2343,11 +2345,12 @@ def load_table_from_json( destination = _table_arg_to_table_ref(destination, default_project=self.project) data_str = u"\n".join(json.dumps(item) for item in json_rows) - data_file = io.BytesIO(data_str.encode()) - + encoded_str = data_str.encode() + data_file = io.BytesIO(encoded_str) return self.load_table_from_file( data_file, destination, + size=len(encoded_str), num_retries=num_retries, job_id=job_id, job_id_prefix=job_id_prefix, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 737c1aef7..52e00d7c7 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -7482,6 +7482,7 @@ def test_load_table_from_dataframe(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=None, @@ -7525,6 +7526,7 @@ def test_load_table_from_dataframe_w_client_location(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7577,6 +7579,7 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7631,6 +7634,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7723,6 +7727,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7782,6 +7787,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7827,6 +7833,7 @@ def test_load_table_from_dataframe_unknown_table(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=None, @@ -7867,6 +7874,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7913,6 +7921,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7973,6 +7982,7 @@ def test_load_table_from_dataframe_struct_fields(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -8046,6 +8056,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -8139,6 +8150,7 @@ def test_load_table_from_dataframe_w_partial_schema_missing_types(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -8251,6 +8263,7 @@ def test_load_table_from_dataframe_w_nulls(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -8302,6 +8315,7 @@ def test_load_table_from_json_basic_use(self): client, mock.ANY, self.TABLE_REF, + size=mock.ANY, num_retries=_DEFAULT_NUM_RETRIES, job_id=mock.ANY, job_id_prefix=None, @@ -8353,6 +8367,7 @@ def test_load_table_from_json_non_default_args(self): client, mock.ANY, self.TABLE_REF, + size=mock.ANY, num_retries=_DEFAULT_NUM_RETRIES, job_id=mock.ANY, job_id_prefix=None,