Skip to content

Commit

Permalink
perf: add size parameter for load table from dataframe and json metho…
Browse files Browse the repository at this point in the history
…ds (#280)

* feat: add size parameter for load from dataframe and json

* pref: calculate length of encoded string
  • Loading branch information
HemangChothani committed Oct 14, 2020
1 parent c69cd50 commit 3be78b7
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 2 deletions.
7 changes: 5 additions & 2 deletions google/cloud/bigquery/client.py
Expand Up @@ -2237,11 +2237,13 @@ def load_table_from_dataframe(
dataframe.to_parquet(tmppath, compression=parquet_compression)

with open(tmppath, "rb") as parquet_file:
file_size = os.path.getsize(tmppath)
return self.load_table_from_file(
parquet_file,
destination,
num_retries=num_retries,
rewind=True,
size=file_size,
job_id=job_id,
job_id_prefix=job_id_prefix,
location=location,
Expand Down Expand Up @@ -2343,11 +2345,12 @@ def load_table_from_json(
destination = _table_arg_to_table_ref(destination, default_project=self.project)

data_str = u"\n".join(json.dumps(item) for item in json_rows)
data_file = io.BytesIO(data_str.encode())

encoded_str = data_str.encode()
data_file = io.BytesIO(encoded_str)
return self.load_table_from_file(
data_file,
destination,
size=len(encoded_str),
num_retries=num_retries,
job_id=job_id,
job_id_prefix=job_id_prefix,
Expand Down
15 changes: 15 additions & 0 deletions tests/unit/test_client.py
Expand Up @@ -7482,6 +7482,7 @@ def test_load_table_from_dataframe(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=None,
Expand Down Expand Up @@ -7525,6 +7526,7 @@ def test_load_table_from_dataframe_w_client_location(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -7577,6 +7579,7 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -7631,6 +7634,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -7723,6 +7727,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -7782,6 +7787,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -7827,6 +7833,7 @@ def test_load_table_from_dataframe_unknown_table(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=None,
Expand Down Expand Up @@ -7867,6 +7874,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -7913,6 +7921,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -7973,6 +7982,7 @@ def test_load_table_from_dataframe_struct_fields(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -8046,6 +8056,7 @@ def test_load_table_from_dataframe_w_partial_schema(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -8139,6 +8150,7 @@ def test_load_table_from_dataframe_w_partial_schema_missing_types(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -8251,6 +8263,7 @@ def test_load_table_from_dataframe_w_nulls(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -8302,6 +8315,7 @@ def test_load_table_from_json_basic_use(self):
client,
mock.ANY,
self.TABLE_REF,
size=mock.ANY,
num_retries=_DEFAULT_NUM_RETRIES,
job_id=mock.ANY,
job_id_prefix=None,
Expand Down Expand Up @@ -8353,6 +8367,7 @@ def test_load_table_from_json_non_default_args(self):
client,
mock.ANY,
self.TABLE_REF,
size=mock.ANY,
num_retries=_DEFAULT_NUM_RETRIES,
job_id=mock.ANY,
job_id_prefix=None,
Expand Down

0 comments on commit 3be78b7

Please sign in to comment.