Skip to content

Commit

Permalink
feat: add Client.delete_job_metadata method to remove job metadata (#…
Browse files Browse the repository at this point in the history
…610)

Note: this only removes job metadata. Use `Client.cancel_job` to stop
a running job. Also, this feature is in preview and has not rolled out
to all regions yet

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Towards internal issue 176186229 🦕
  • Loading branch information
tswast committed Apr 26, 2021
1 parent f8d4aaa commit 0abb566
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 2 deletions.
71 changes: 71 additions & 0 deletions google/cloud/bigquery/client.py
Expand Up @@ -1545,6 +1545,77 @@ def delete_model(
if not not_found_ok:
raise

def delete_job_metadata(
self,
job_id,
project=None,
location=None,
retry=DEFAULT_RETRY,
timeout=None,
not_found_ok=False,
):
"""[Beta] Delete job metadata from job history.
Note: This does not stop a running job. Use
:func:`~google.cloud.bigquery.client.Client.cancel_job` instead.
Args:
job_id (Union[ \
str, \
google.cloud.bigquery.job.LoadJob, \
google.cloud.bigquery.job.CopyJob, \
google.cloud.bigquery.job.ExtractJob, \
google.cloud.bigquery.job.QueryJob \
]): Job identifier.
Keyword Arguments:
project (Optional[str]):
ID of the project which owns the job (defaults to the client's project).
location (Optional[str]):
Location where the job was run. Ignored if ``job_id`` is a job
object.
retry (Optional[google.api_core.retry.Retry]):
How to retry the RPC.
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
not_found_ok (Optional[bool]):
Defaults to ``False``. If ``True``, ignore "not found" errors
when deleting the job.
"""
extra_params = {}

project, location, job_id = _extract_job_reference(
job_id, project=project, location=location
)

if project is None:
project = self.project

if location is None:
location = self.location

# Location is always required for jobs.delete()
extra_params["location"] = location

path = f"/projects/{project}/jobs/{job_id}/delete"

span_attributes = {"path": path, "job_id": job_id, "location": location}

try:
self._call_api(
retry,
span_name="BigQuery.deleteJob",
span_attributes=span_attributes,
method="DELETE",
path=path,
query_params=extra_params,
timeout=timeout,
)
except google.api_core.exceptions.NotFound:
if not not_found_ok:
raise

def delete_routine(
self,
routine: Union[Routine, RoutineReference, str],
Expand Down
22 changes: 20 additions & 2 deletions tests/system/test_client.py
Expand Up @@ -25,6 +25,7 @@
import time
import unittest
import uuid
from typing import Optional

import psutil
import pytest
Expand Down Expand Up @@ -62,6 +63,7 @@
from google.cloud import bigquery_v2
from google.cloud.bigquery.dataset import Dataset
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import Table
from google.cloud._helpers import UTC
from google.cloud.bigquery import dbapi, enums
Expand Down Expand Up @@ -123,7 +125,7 @@ def _has_rows(result):


def _make_dataset_id(prefix):
return "%s%s" % (prefix, unique_resource_id())
return f"python_bigquery_tests_system_{prefix}{unique_resource_id()}"


def _load_json_schema(filename="schema.json"):
Expand All @@ -142,7 +144,7 @@ class Config(object):
global state.
"""

CLIENT = None
CLIENT: Optional[bigquery.Client] = None
CURSOR = None
DATASET = None

Expand Down Expand Up @@ -430,6 +432,22 @@ def test_delete_dataset_delete_contents_false(self):
with self.assertRaises(exceptions.BadRequest):
Config.CLIENT.delete_dataset(dataset)

def test_delete_job_metadata(self):
dataset_id = _make_dataset_id("us_east1")
self.temp_dataset(dataset_id, location="us-east1")
full_table_id = f"{Config.CLIENT.project}.{dataset_id}.test_delete_job_metadata"
table = Table(full_table_id, schema=[SchemaField("col", "STRING")])
Config.CLIENT.create_table(table)
query_job: bigquery.QueryJob = Config.CLIENT.query(
f"SELECT COUNT(*) FROM `{full_table_id}`", location="us-east1",
)
query_job.result()
self.assertIsNotNone(Config.CLIENT.get_job(query_job))

Config.CLIENT.delete_job_metadata(query_job)
with self.assertRaises(NotFound):
Config.CLIENT.get_job(query_job)

def test_get_table_w_public_dataset(self):
public = "bigquery-public-data"
dataset_id = "samples"
Expand Down
60 changes: 60 additions & 0 deletions tests/unit/test_client.py
Expand Up @@ -2498,6 +2498,66 @@ def test_update_table_delete_property(self):
self.assertEqual(req[1]["data"], sent)
self.assertIsNone(table3.description)

def test_delete_job_metadata_not_found(self):
creds = _make_credentials()
client = self._make_one("client-proj", creds, location="client-loc")
conn = client._connection = make_connection(
google.api_core.exceptions.NotFound("job not found"),
google.api_core.exceptions.NotFound("job not found"),
)

with self.assertRaises(google.api_core.exceptions.NotFound):
client.delete_job_metadata("my-job")

conn.api_request.reset_mock()
client.delete_job_metadata("my-job", not_found_ok=True)

conn.api_request.assert_called_once_with(
method="DELETE",
path="/projects/client-proj/jobs/my-job/delete",
query_params={"location": "client-loc"},
timeout=None,
)

def test_delete_job_metadata_with_id(self):
creds = _make_credentials()
client = self._make_one(self.PROJECT, creds)
conn = client._connection = make_connection({})

client.delete_job_metadata("my-job", project="param-proj", location="param-loc")

conn.api_request.assert_called_once_with(
method="DELETE",
path="/projects/param-proj/jobs/my-job/delete",
query_params={"location": "param-loc"},
timeout=None,
)

def test_delete_job_metadata_with_resource(self):
from google.cloud.bigquery.job import QueryJob

query_resource = {
"jobReference": {
"projectId": "job-based-proj",
"jobId": "query_job",
"location": "us-east1",
},
"configuration": {"query": {}},
}
creds = _make_credentials()
client = self._make_one(self.PROJECT, creds)
conn = client._connection = make_connection(query_resource)
job_from_resource = QueryJob.from_api_repr(query_resource, client)

client.delete_job_metadata(job_from_resource)

conn.api_request.assert_called_once_with(
method="DELETE",
path="/projects/job-based-proj/jobs/query_job/delete",
query_params={"location": "us-east1"},
timeout=None,
)

def test_delete_model(self):
from google.cloud.bigquery.model import Model

Expand Down

0 comments on commit 0abb566

Please sign in to comment.