Skip to content

Commit

Permalink
feat: add Client.delete_job method to remove job metadata
Browse files Browse the repository at this point in the history
Note: this only removes job metadata. Use `Client.cancel_job` to stop
a running job. Also, this feature is in preview and has not rolled out
to all regions yet

Location is required, so always pass in location. To keep the method
signature consistent with the other methods, location is not a
positional argument. The location from the job object is preferred.
  • Loading branch information
tswast committed Apr 20, 2021
1 parent 6502a60 commit 6f1ce48
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 2 deletions.
71 changes: 71 additions & 0 deletions google/cloud/bigquery/client.py
Expand Up @@ -1453,6 +1453,77 @@ def delete_model(
if not not_found_ok:
raise

def delete_job(
self,
job_id,
project=None,
location=None,
retry=DEFAULT_RETRY,
timeout=None,
not_found_ok=False,
):
"""[Beta] Delete job metadata from job history.
Note: This does not stop a running job. Use
:func:`~google.cloud.bigquery.client.Client.cancel_job` instead.
Args:
job_id (Union[ \
str, \
google.cloud.bigquery.job.LoadJob, \
google.cloud.bigquery.job.CopyJob, \
google.cloud.bigquery.job.ExtractJob, \
google.cloud.bigquery.job.QueryJob \
]): Job identifier.
Keyword Arguments:
project (Optional[str]):
ID of the project which owns the job (defaults to the client's project).
location (Optional[str]):
Location where the job was run. Ignored if ``job_id`` is a job
object.
retry (Optional[google.api_core.retry.Retry]):
How to retry the RPC.
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
not_found_ok (Optional[bool]):
Defaults to ``False``. If ``True``, ignore "not found" errors
when deleting the job.
"""
extra_params = {}

project, location, job_id = _extract_job_reference(
job_id, project=project, location=location
)

if project is None:
project = self.project

if location is None:
location = self.location

# Location is always required for jobs.delete()
extra_params["location"] = location

path = "/projects/{}/jobs/{}/delete".format(project, job_id)

span_attributes = {"path": path, "job_id": job_id, "location": location}

try:
self._call_api(
retry,
span_name="BigQuery.deleteJob",
span_attributes=span_attributes,
method="DELETE",
path=path,
query_params=extra_params,
timeout=timeout,
)
except google.api_core.exceptions.NotFound:
if not not_found_ok:
raise

def delete_routine(
self, routine, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False
):
Expand Down
24 changes: 22 additions & 2 deletions tests/system/test_client.py
Expand Up @@ -25,6 +25,7 @@
import time
import unittest
import uuid
from typing import Optional

import psutil
import pytest
Expand Down Expand Up @@ -62,6 +63,7 @@
from google.cloud import bigquery_v2
from google.cloud.bigquery.dataset import Dataset
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import Table
from google.cloud._helpers import UTC
from google.cloud.bigquery import dbapi, enums
Expand Down Expand Up @@ -123,7 +125,7 @@ def _has_rows(result):


def _make_dataset_id(prefix):
return "%s%s" % (prefix, unique_resource_id())
return "python_bigquery_tests_system_%s%s" % (prefix, unique_resource_id())


def _load_json_schema(filename="schema.json"):
Expand All @@ -142,7 +144,7 @@ class Config(object):
global state.
"""

CLIENT = None
CLIENT: Optional[bigquery.Client] = None
CURSOR = None
DATASET = None

Expand Down Expand Up @@ -430,6 +432,24 @@ def test_delete_dataset_delete_contents_false(self):
with self.assertRaises(exceptions.BadRequest):
Config.CLIENT.delete_dataset(dataset)

def test_delete_job(self):
dataset_id = _make_dataset_id("us_east1")
self.temp_dataset(dataset_id, location="us-east1")
full_table_id = (
f"{Config.CLIENT.project}.{dataset_id}.test_delete_job_explicit_location"
)
table = Table(full_table_id, schema=[SchemaField("col", "STRING")])
Config.CLIENT.create_table(table)
query_job: bigquery.QueryJob = Config.CLIENT.query(
f"SELECT COUNT(*) FROM `{full_table_id}`", location="us-east1",
)
query_job.result()
self.assertIsNotNone(Config.CLIENT.get_job(query_job))

Config.CLIENT.delete_job(query_job)
with self.assertRaises(NotFound):
Config.CLIENT.get_job(query_job)

def test_get_table_w_public_dataset(self):
public = "bigquery-public-data"
dataset_id = "samples"
Expand Down
60 changes: 60 additions & 0 deletions tests/unit/test_client.py
Expand Up @@ -2493,6 +2493,66 @@ def test_update_table_delete_property(self):
self.assertEqual(req[1]["data"], sent)
self.assertIsNone(table3.description)

def test_delete_job_not_found(self):
creds = _make_credentials()
client = self._make_one("client-proj", creds, location="client-loc")
conn = client._connection = make_connection(
google.api_core.exceptions.NotFound("job not found"),
google.api_core.exceptions.NotFound("job not found"),
)

with self.assertRaises(google.api_core.exceptions.NotFound):
client.delete_job("my-job")

conn.api_request.reset_mock()
client.delete_job("my-job", not_found_ok=True)

conn.api_request.assert_called_once_with(
method="DELETE",
path="/projects/client-proj/jobs/my-job/delete",
query_params={"location": "client-loc"},
timeout=None,
)

def test_delete_job_with_id(self):
creds = _make_credentials()
client = self._make_one(self.PROJECT, creds)
conn = client._connection = make_connection({})

client.delete_job("my-job", project="param-proj", location="param-loc")

conn.api_request.assert_called_once_with(
method="DELETE",
path="/projects/param-proj/jobs/my-job/delete",
query_params={"location": "param-loc"},
timeout=None,
)

def test_delete_job_with_resource(self):
from google.cloud.bigquery.job import QueryJob

query_resource = {
"jobReference": {
"projectId": "job-based-proj",
"jobId": "query_job",
"location": "us-east1",
},
"configuration": {"query": {}},
}
creds = _make_credentials()
client = self._make_one(self.PROJECT, creds)
conn = client._connection = make_connection(query_resource)
job_from_resource = QueryJob.from_api_repr(query_resource, client)

client.delete_job(job_from_resource)

conn.api_request.assert_called_once_with(
method="DELETE",
path="/projects/job-based-proj/jobs/query_job/delete",
query_params={"location": "us-east1"},
timeout=None,
)

def test_delete_model(self):
from google.cloud.bigquery.model import Model

Expand Down

0 comments on commit 6f1ce48

Please sign in to comment.