Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: load table from json #1248

Closed
Closed
Show file tree
Hide file tree
Changes from 37 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
46c54eb
bug: load table from json
abecerrilsalas May 11, 2022
c237d2b
Merge branch 'main' into load_table_from_json_bug
parthea May 12, 2022
1975087
bug: test added
abecerrilsalas May 13, 2022
1d06dcd
bug: trying
abecerrilsalas May 13, 2022
74efb3a
Merge branch 'load_table_from_json_bug' of https://github.com/abecerr…
abecerrilsalas May 13, 2022
92915d7
bug: fixing test
abecerrilsalas May 16, 2022
04654c0
Merge branch 'main' into load_table_from_json_bug
abecerrilsalas May 16, 2022
aa1eba7
fix: testing check
abecerrilsalas May 17, 2022
7a6b5c4
Merge branch 'load_table_from_json_bug' of https://github.com/abecerr…
abecerrilsalas May 17, 2022
f21b6cf
fix: test
abecerrilsalas May 17, 2022
d6cf378
fix: check
abecerrilsalas May 17, 2022
8f4d034
fix: nox check
abecerrilsalas May 18, 2022
8d735e0
feat: fixing imports
abecerrilsalas May 18, 2022
6afe300
fix: two tests added
abecerrilsalas May 19, 2022
66d4581
fix: test failure
abecerrilsalas May 20, 2022
9a5d71f
fix: fixed tests
abecerrilsalas May 20, 2022
dc9838d
Merge branch 'main' into load_table_from_json_bug
abecerrilsalas May 20, 2022
1d64518
fix: tests with nox
abecerrilsalas May 20, 2022
d883af3
Merge branch 'load_table_from_json_bug' of https://github.com/abecerr…
abecerrilsalas May 20, 2022
0535084
Merge branch 'main' into load_table_from_json_bug
abecerrilsalas May 20, 2022
68511b2
Merge branch 'main' into load_table_from_json_bug
abecerrilsalas May 24, 2022
b3faa95
fix: trying
abecerrilsalas May 25, 2022
3ca4dc8
Merge branch 'load_table_from_json_bug' of https://github.com/abecerr…
abecerrilsalas May 25, 2022
e1f9187
fix: getting rid of unused imports
abecerrilsalas May 25, 2022
1c3d6a8
fix: fixed tests
abecerrilsalas May 27, 2022
287a282
Merge branch 'main' into load_table_from_json_bug
abecerrilsalas May 27, 2022
0fb771e
Merge branch 'main' into load_table_from_json_bug
abecerrilsalas May 31, 2022
efdd87b
fix: code update
abecerrilsalas Jun 1, 2022
51e79b8
Merge branch 'load_table_from_json_bug' of https://github.com/abecerr…
abecerrilsalas Jun 1, 2022
04c9fc8
fix: test fix
abecerrilsalas Jun 1, 2022
6f8bcc1
fix: blacken
abecerrilsalas Jun 1, 2022
ebe3348
Merge branch 'main' into load_table_from_json_bug
abecerrilsalas Jun 1, 2022
57b371a
fix: updated test name
abecerrilsalas Jun 2, 2022
28f728e
fix: fixed one test
abecerrilsalas Jun 3, 2022
e83d3bf
fix: table_schema
abecerrilsalas Jun 3, 2022
0d075b3
fix: test and client fix
abecerrilsalas Jun 6, 2022
81d5345
Merge branch 'main' into load_table_from_json_bug
abecerrilsalas Jun 6, 2022
2dc5795
Merge branch 'main' into load_table_from_json_bug
chalmerlowe May 22, 2023
5c3e2b2
Merge branch 'main' into load_table_from_json_bug
chalmerlowe Jul 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
14 changes: 11 additions & 3 deletions google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,14 @@
import uuid
import warnings


from google import resumable_media # type: ignore
from google.resumable_media.requests import MultipartUpload # type: ignore
from google.resumable_media.requests import ResumableUpload

import google.api_core.client_options
import google.api_core.exceptions as core_exceptions
from google.api_core.exceptions import NotFound
from google.api_core.iam import Policy
from google.api_core import page_iterator
from google.api_core import retry as retries
Expand Down Expand Up @@ -108,6 +110,7 @@
from google.cloud.bigquery.format_options import ParquetOptions
from google.cloud.bigquery import _helpers


TimeoutType = Union[float, None]
ResumableTimeoutType = Union[
None, float, Tuple[float, float]
Expand Down Expand Up @@ -2757,7 +2760,14 @@ def load_table_from_json(

job_config.source_format = job.SourceFormat.NEWLINE_DELIMITED_JSON

if job_config.schema is None:
# make table id
# table_id = "your-project.your_dataset.your_table"
destination = _table_arg_to_table_ref(destination, default_project=self.project)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we calling _table_arg_to_table_ref() here?
The very first line of get_table() calls that function.

def get_table(
    self,
    table: Union[Table, TableReference, TableListItem, str],
    retry: retries.Retry = DEFAULT_RETRY,
    timeout: TimeoutType = DEFAULT_TIMEOUT,
) -> Table:
    """ REMOVED DOCSTRING FOR SPACE """
    
    table_ref = _table_arg_to_table_ref(table, default_project=self.project)

# check if table exists
try:
self.get_table(destination) # Make an API request.
job_config.autodetect = False
except NotFound:
job_config.autodetect = True

if project is None:
Expand All @@ -2766,8 +2776,6 @@ def load_table_from_json(
if location is None:
location = self.location

destination = _table_arg_to_table_ref(destination, default_project=self.project)

data_str = "\n".join(json.dumps(item, ensure_ascii=False) for item in json_rows)
encoded_str = data_str.encode()
data_file = io.BytesIO(encoded_str)
Expand Down
72 changes: 70 additions & 2 deletions tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -832,8 +832,18 @@ def test_load_table_from_json_basic_use(self):
)

json_rows = [
{"name": "John", "age": 18, "birthday": "2001-10-15", "is_awesome": False},
{"name": "Chuck", "age": 79, "birthday": "1940-03-10", "is_awesome": True},
{
"name": "John",
"age": "18",
"birthday": "2001-10-15",
"is_awesome": False,
},
{
"name": "Chuck",
"age": "79",
"birthday": "1940-03-10",
"is_awesome": True,
},
]

dataset_id = _make_dataset_id("bq_system_test")
Expand All @@ -859,6 +869,64 @@ def test_load_table_from_json_basic_use(self):
self.assertEqual(tuple(table.schema), table_schema)
self.assertEqual(table.num_rows, 2)

def test_load_table_from_json_table_exists(self):
table_schema = (bigquery.SchemaField("age", "STRING", mode="REQUIRED"),)
json_rows = [
{
"age": "18",
},
{
"age": "79",
},
]

dataset_id = _make_dataset_id("bq_system_test")
self.temp_dataset(dataset_id)
table_id = "{}.{}.load_table_from_json_bug_check_with_schema".format(
Config.CLIENT.project, dataset_id
)

# create an empty table
table = helpers.retry_403(Config.CLIENT.create_table)(Table(table_id))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test name and comment say this table should be created with a schema, but I don't see the schema here.

self.to_delete.insert(0, table)

job_config = bigquery.LoadJobConfig(schema=table_schema)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to provide schema here?

load_job = Config.CLIENT.load_table_from_json(
json_rows, table_id, job_config=job_config
)
load_job.result()

table = Config.CLIENT.get_table(table)
fetched = self._fetch_single_page(table)
row_tuples = [r.values() for r in fetched]
assert type(table.schema[0].field_type) is str
abecerrilsalas marked this conversation as resolved.
Show resolved Hide resolved
assert type(row_tuples[0][0]) is str

def test_load_table_from_json_bug_table_not_exists(self):
table_schema = (bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"),)
json_rows = [
{
"age": "18",
},
{
"age": "79",
},
]
dataset_id = _make_dataset_id("bq_system_test")
self.temp_dataset(dataset_id)
table_id = "{}.{}.load_table_from_json_bug_check".format(
Config.CLIENT.project, dataset_id
)

job_config = bigquery.LoadJobConfig()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add table as destination so it will be created during the load job

job_config.autodetect = True
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not following this portion. What is the job_config here doing?

load_job = Config.CLIENT.load_table_from_json(json_rows, table_id)
load_job.result()

table = Config.CLIENT.get_table(table_id)
self.assertTrue(table.schema)
self.assertEqual(tuple(table.schema), table_schema)

def test_load_table_from_json_schema_autodetect(self):
json_rows = [
{"name": "John", "age": 18, "birthday": "2001-10-15", "is_awesome": False},
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8100,8 +8100,8 @@ def test_load_table_from_json_basic_use(self):

sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON
assert sent_config.schema is None
assert sent_config.autodetect
# assert sent_config.schema is None
# assert sent_config.autodetect

def test_load_table_from_json_non_default_args(self):
from google.cloud.bigquery import job
Expand Down