Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docs: add loading data from Firestore backup sample #737

Merged
merged 8 commits into from Jul 16, 2021
39 changes: 15 additions & 24 deletions samples/snippets/conftest.py
Expand Up @@ -12,38 +12,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import datetime
import random

from google.cloud import bigquery
import pytest
import test_utils.prefixer


RESOURCE_PREFIX = "python_bigquery_samples_snippets"
RESOURCE_DATE_FORMAT = "%Y%m%d_%H%M%S"
RESOURCE_DATE_LENGTH = 4 + 2 + 2 + 1 + 2 + 2 + 2


def resource_prefix() -> str:
timestamp = datetime.datetime.utcnow().strftime(RESOURCE_DATE_FORMAT)
random_string = hex(random.randrange(1000000))[2:]
return f"{RESOURCE_PREFIX}_{timestamp}_{random_string}"


def resource_name_to_date(resource_name: str):
start_date = len(RESOURCE_PREFIX) + 1
date_string = resource_name[start_date : start_date + RESOURCE_DATE_LENGTH]
return datetime.datetime.strptime(date_string, RESOURCE_DATE_FORMAT)
prefixer = test_utils.prefixer.Prefixer("python-bigquery", "samples/snippets")


@pytest.fixture(scope="session", autouse=True)
def cleanup_datasets(bigquery_client: bigquery.Client):
yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1)
for dataset in bigquery_client.list_datasets():
if (
dataset.dataset_id.startswith(RESOURCE_PREFIX)
and resource_name_to_date(dataset.dataset_id) < yesterday
):
if prefixer.should_cleanup(dataset.dataset_id):
bigquery_client.delete_dataset(
dataset, delete_contents=True, not_found_ok=True
)
Expand All @@ -62,14 +42,25 @@ def project_id(bigquery_client):

@pytest.fixture(scope="session")
def dataset_id(bigquery_client: bigquery.Client, project_id: str):
dataset_id = resource_prefix()
dataset_id = prefixer.create_prefix()
full_dataset_id = f"{project_id}.{dataset_id}"
dataset = bigquery.Dataset(full_dataset_id)
bigquery_client.create_dataset(dataset)
yield dataset_id
bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True)


@pytest.fixture
def random_table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str):
"""Create a new table ID each time, so random_table_id can be used as
target for load jobs.
"""
random_table_id = prefixer.create_prefix() + "_random_table"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unclear what value the _random_table suffix is giving you other than extra characters.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, not actually necessary. Removed.

full_table_id = f"{project_id}.{dataset_id}.{random_table_id}"
yield full_table_id
bigquery_client.delete_table(full_table_id, not_found_ok=True)


@pytest.fixture
def bigquery_client_patch(monkeypatch, bigquery_client):
monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client)
55 changes: 55 additions & 0 deletions samples/snippets/load_table_uri_firestore.py
@@ -0,0 +1,55 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def load_table_uri_firestore(table_id):
orig_table_id = table_id
# [START bigquery_load_table_gcs_firestore]
# TODO(developer): Set table_id to the ID of the table to create.
table_id = "your-project.your_dataset.your_table_name"

# TODO(developer): Set uri to the path of the kind export metadata
uri = (
"gs://cloud-samples-data/bigquery/us-states"
"/2021-07-02T16:04:48_70344/all_namespaces/kind_us-states"
"/all_namespaces_kind_us-states.export_metadata"
)

# TODO(developer): Set projection_fields to a list of document properties
# to import. Leave unset or set to `None` for all fields.
projection_fields = ["name", "post_abbr"]

# [END bigquery_load_table_gcs_firestore]
table_id = orig_table_id

# [START bigquery_load_table_gcs_firestore]
from google.cloud import bigquery

# Construct a BigQuery client object.
client = bigquery.Client()

job_config = bigquery.LoadJobConfig(
source_format=bigquery.SourceFormat.DATASTORE_BACKUP,
projection_fields=projection_fields,
)

load_job = client.load_table_from_uri(
uri, table_id, job_config=job_config
) # Make an API request.

load_job.result() # Waits for the job to complete.

destination_table = client.get_table(table_id)
print("Loaded {} rows.".format(destination_table.num_rows))
# [END bigquery_load_table_gcs_firestore]
21 changes: 21 additions & 0 deletions samples/snippets/load_table_uri_firestore_test.py
@@ -0,0 +1,21 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import load_table_uri_firestore


def test_load_table_uri_firestore(capsys, random_table_id):
load_table_uri_firestore.load_table_uri_firestore(random_table_id)
out, _ = capsys.readouterr()
assert "Loaded 50 rows." in out
1 change: 1 addition & 0 deletions samples/snippets/requirements-test.txt
@@ -1,2 +1,3 @@
google-cloud-testutils==0.3.0
pytest==6.2.4
mock==4.0.3
4 changes: 2 additions & 2 deletions samples/snippets/test_update_with_dml.py
Expand Up @@ -15,13 +15,13 @@
from google.cloud import bigquery
import pytest

from conftest import resource_prefix
from conftest import prefixer
import update_with_dml


@pytest.fixture
def table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str):
table_id = f"{resource_prefix()}_update_with_dml"
table_id = f"{prefixer.create_prefix()}_update_with_dml"
yield table_id
full_table_id = f"{project_id}.{dataset_id}.{table_id}"
bigquery_client.delete_table(full_table_id, not_found_ok=True)
Expand Down