From f3348f98bf91a88a28bf61b12b95e391cc3be1ff Mon Sep 17 00:00:00 2001 From: Dmitry Frenkel Date: Wed, 15 Jul 2020 13:05:58 -0700 Subject: [PATCH] feat: discovery supports retries (#967) Adding ability for discovery to retry on eligible http errors and connection problems with randomized exponential backoff. Also: * DRYing discovery tests to avoid warnings when reading test data. * Updating .gitignore Fixes: #848 --- .gitignore | 4 + googleapiclient/discovery.py | 22 +++-- googleapiclient/http.py | 16 ++-- tests/data/500.json | 13 +++ tests/data/503.json | 13 +++ tests/test_discovery.py | 164 ++++++++++++++++++++++++++--------- tests/test_http.py | 5 ++ 7 files changed, 186 insertions(+), 51 deletions(-) create mode 100644 tests/data/500.json create mode 100644 tests/data/503.json diff --git a/.gitignore b/.gitignore index 1637b1d3b1e..59b756c5193 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,7 @@ dist/ .coverage coverage.xml nosetests.xml + +# IDE files +.idea +.vscode \ No newline at end of file diff --git a/googleapiclient/discovery.py b/googleapiclient/discovery.py index 3fc6e6a66f5..c7981157f22 100644 --- a/googleapiclient/discovery.py +++ b/googleapiclient/discovery.py @@ -187,6 +187,7 @@ def build( client_options=None, adc_cert_path=None, adc_key_path=None, + num_retries=1, ): """Construct a Resource for interacting with an API. @@ -223,6 +224,8 @@ def build( adc_key_path: str, client encrypted private key file path to save the application default client encrypted private key for mTLS. This field is required if you want to use the default client certificate. + num_retries: Integer, number of times to retry discovery with + randomized exponential backoff in case of intermittent/connection issues. Returns: A Resource object with methods for interacting with the service. @@ -243,7 +246,8 @@ def build( try: content = _retrieve_discovery_doc( - requested_url, discovery_http, cache_discovery, cache, developerKey + requested_url, discovery_http, cache_discovery, cache, + developerKey, num_retries=num_retries ) return build_from_document( content, @@ -266,7 +270,8 @@ def build( raise UnknownApiNameOrVersion("name: %s version: %s" % (serviceName, version)) -def _retrieve_discovery_doc(url, http, cache_discovery, cache=None, developerKey=None): +def _retrieve_discovery_doc(url, http, cache_discovery, + cache=None, developerKey=None, num_retries=1): """Retrieves the discovery_doc from cache or the internet. Args: @@ -276,13 +281,16 @@ def _retrieve_discovery_doc(url, http, cache_discovery, cache=None, developerKey cache_discovery: Boolean, whether or not to cache the discovery doc. cache: googleapiclient.discovery_cache.base.Cache, an optional cache object for the discovery documents. + developerKey: string, Key for controlling API usage, generated + from the API Console. + num_retries: Integer, number of times to retry discovery with + randomized exponential backoff in case of intermittent/connection issues. Returns: A unicode string representation of the discovery document. """ if cache_discovery: from . import discovery_cache - from .discovery_cache import base if cache is None: cache = discovery_cache.autodetect() @@ -302,10 +310,10 @@ def _retrieve_discovery_doc(url, http, cache_discovery, cache=None, developerKey actual_url = _add_query_parameter(url, "key", developerKey) logger.debug("URL being requested: GET %s", actual_url) - resp, content = http.request(actual_url) - - if resp.status >= 400: - raise HttpError(resp, content, uri=actual_url) + # Execute this request with retries build into HttpRequest + # Note that it will already raise an error if we don't get a 2xx response + req = HttpRequest(http, HttpRequest.null_postproc, actual_url) + resp, content = req.execute(num_retries=num_retries) try: content = content.decode("utf-8") diff --git a/googleapiclient/http.py b/googleapiclient/http.py index 2b35fde4c66..d9c3d2ae19a 100644 --- a/googleapiclient/http.py +++ b/googleapiclient/http.py @@ -1118,6 +1118,10 @@ def from_json(s, http, postproc): resumable=d["resumable"], ) + @staticmethod + def null_postproc(resp, contents): + return resp, contents + class BatchHttpRequest(object): """Batches multiple HttpRequest objects into a single HTTP request. @@ -1168,7 +1172,7 @@ def __init__(self, callback=None, batch_uri=None): batch_uri = _LEGACY_BATCH_URI if batch_uri == _LEGACY_BATCH_URI: - LOGGER.warn( + LOGGER.warning( "You have constructed a BatchHttpRequest using the legacy batch " "endpoint %s. This endpoint will be turned down on August 12, 2020. " "Please provide the API-specific endpoint or use " @@ -1416,7 +1420,7 @@ def _execute(self, http, order, requests): http: httplib2.Http, an http object to be used to make the request with. order: list, list of request ids in the order they were added to the batch. - request: list, list of request objects to send. + requests: list, list of request objects to send. Raises: httplib2.HttpLib2Error if a transport error has occurred. @@ -1690,9 +1694,8 @@ def __init__(self, filename=None, headers=None): if headers is None: headers = {"status": "200"} if filename: - f = open(filename, "rb") - self.data = f.read() - f.close() + with open(filename, "rb") as f: + self.data = f.read() else: self.data = None self.response_headers = headers @@ -1749,6 +1752,7 @@ def __init__(self, iterable): """ self._iterable = iterable self.follow_redirects = True + self.request_sequence = list() def request( self, @@ -1759,6 +1763,8 @@ def request( redirections=1, connection_type=None, ): + # Remember the request so after the fact this mock can be examined + self.request_sequence.append((uri, method, body, headers)) resp, content = self._iterable.pop(0) content = six.ensure_binary(content) diff --git a/tests/data/500.json b/tests/data/500.json new file mode 100644 index 00000000000..0e5cfab99a4 --- /dev/null +++ b/tests/data/500.json @@ -0,0 +1,13 @@ +{ + "error": { + "errors": [ + { + "domain": "global", + "reason": "internalError", + "message": "We encountered an internal error. Please try again using truncated exponential backoff." + } + ], + "code": 500, + "message": "Internal Server Error" + } +} diff --git a/tests/data/503.json b/tests/data/503.json new file mode 100644 index 00000000000..5deb433f4fa --- /dev/null +++ b/tests/data/503.json @@ -0,0 +1,13 @@ +{ + "error": { + "errors": [ + { + "domain": "global", + "reason": "backendError", + "message": "We encountered an internal error. Please try again using truncated exponential backoff." + } + ], + "code": 503, + "message": "Service Unavailable" + } +} diff --git a/tests/test_discovery.py b/tests/test_discovery.py index 31033e8a74f..f59ea151df5 100644 --- a/tests/test_discovery.py +++ b/tests/test_discovery.py @@ -60,6 +60,8 @@ from googleapiclient.discovery import ResourceMethodParameters from googleapiclient.discovery import STACK_QUERY_PARAMETERS from googleapiclient.discovery import STACK_QUERY_PARAMETER_DEFAULT_VALUE +from googleapiclient.discovery import V1_DISCOVERY_URI +from googleapiclient.discovery import V2_DISCOVERY_URI from googleapiclient.discovery_cache import DISCOVERY_DOC_MAX_AGE from googleapiclient.discovery_cache.base import Cache from googleapiclient.errors import HttpError @@ -108,10 +110,35 @@ def assertUrisEqual(testcase, expected, actual): testcase.assertEqual(expected_query[name], actual_query[name]) +def assert_discovery_uri(testcase, actual, service_name, version, discovery): + """Assert that discovery URI used was the one that was expected + for a given service and version.""" + params = {"api": service_name, "apiVersion": version} + expanded_requested_uri = uritemplate.expand(discovery, params) + assertUrisEqual(testcase, expanded_requested_uri, actual) + + +def validate_discovery_requests(testcase, http_mock, service_name, + version, discovery): + """Validates that there have > 0 calls to Http Discovery + and that LAST discovery URI used was the one that was expected + for a given service and version.""" + testcase.assertTrue(len(http_mock.request_sequence) > 0) + if len(http_mock.request_sequence) > 0: + actual_uri = http_mock.request_sequence[-1][0] + assert_discovery_uri(testcase, + actual_uri, service_name, version, discovery) + + def datafile(filename): return os.path.join(DATA_DIR, filename) +def read_datafile(filename, mode='r'): + with open(datafile(filename), mode=mode) as f: + return f.read() + + class SetupHttplib2(unittest.TestCase): def test_retries(self): # Merely loading googleapiclient.discovery should set the RETRIES to 1. @@ -120,8 +147,7 @@ def test_retries(self): class Utilities(unittest.TestCase): def setUp(self): - with open(datafile("zoo.json"), "r") as fh: - self.zoo_root_desc = json.loads(fh.read()) + self.zoo_root_desc = json.loads(read_datafile("zoo.json", "r")) self.zoo_get_method_desc = self.zoo_root_desc["methods"]["query"] self.zoo_animals_resource = self.zoo_root_desc["resources"]["animals"] self.zoo_insert_method_desc = self.zoo_animals_resource["methods"]["insert"] @@ -430,8 +456,8 @@ def test_failed_to_parse_discovery_json(self): def test_unknown_api_name_or_version(self): http = HttpMockSequence( [ - ({"status": "404"}, open(datafile("zoo.json"), "rb").read()), - ({"status": "404"}, open(datafile("zoo.json"), "rb").read()), + ({"status": "404"}, read_datafile("zoo.json", "rb")), + ({"status": "404"}, read_datafile("zoo.json", "rb")), ] ) with self.assertRaises(UnknownApiNameOrVersion): @@ -447,7 +473,7 @@ class DiscoveryFromDocument(unittest.TestCase): MOCK_CREDENTIALS = mock.Mock(spec=google.auth.credentials.Credentials) def test_can_build_from_local_document(self): - discovery = open(datafile("plus.json")).read() + discovery = read_datafile("plus.json") plus = build_from_document( discovery, base="https://www.googleapis.com/", @@ -457,7 +483,7 @@ def test_can_build_from_local_document(self): self.assertTrue(hasattr(plus, "activities")) def test_can_build_from_local_deserialized_document(self): - discovery = open(datafile("plus.json")).read() + discovery = read_datafile("plus.json") discovery = json.loads(discovery) plus = build_from_document( discovery, @@ -468,7 +494,7 @@ def test_can_build_from_local_deserialized_document(self): self.assertTrue(hasattr(plus, "activities")) def test_building_with_base_remembers_base(self): - discovery = open(datafile("plus.json")).read() + discovery = read_datafile("plus.json") base = "https://www.example.com/" plus = build_from_document( @@ -477,7 +503,7 @@ def test_building_with_base_remembers_base(self): self.assertEqual("https://www.googleapis.com/plus/v1/", plus._baseUrl) def test_building_with_optional_http_with_authorization(self): - discovery = open(datafile("plus.json")).read() + discovery = read_datafile("plus.json") plus = build_from_document( discovery, base="https://www.googleapis.com/", @@ -491,7 +517,7 @@ def test_building_with_optional_http_with_authorization(self): self.assertGreater(plus._http.http.timeout, 0) def test_building_with_optional_http_with_no_authorization(self): - discovery = open(datafile("plus.json")).read() + discovery = read_datafile("plus.json") # Cleanup auth field, so we would use plain http client discovery = json.loads(discovery) discovery["auth"] = {} @@ -507,14 +533,14 @@ def test_building_with_optional_http_with_no_authorization(self): def test_building_with_explicit_http(self): http = HttpMock() - discovery = open(datafile("plus.json")).read() + discovery = read_datafile("plus.json") plus = build_from_document( discovery, base="https://www.googleapis.com/", http=http ) self.assertEqual(plus._http, http) def test_building_with_developer_key_skips_adc(self): - discovery = open(datafile("plus.json")).read() + discovery = read_datafile("plus.json") plus = build_from_document( discovery, base="https://www.googleapis.com/", developerKey="123" ) @@ -524,7 +550,7 @@ def test_building_with_developer_key_skips_adc(self): self.assertNotIsInstance(plus._http, google_auth_httplib2.AuthorizedHttp) def test_api_endpoint_override_from_client_options(self): - discovery = open(datafile("plus.json")).read() + discovery = read_datafile("plus.json") api_endpoint = "https://foo.googleapis.com/" options = google.api_core.client_options.ClientOptions( api_endpoint=api_endpoint @@ -537,7 +563,7 @@ def test_api_endpoint_override_from_client_options(self): def test_api_endpoint_override_from_client_options_mapping_object(self): - discovery = open(datafile("plus.json")).read() + discovery = read_datafile("plus.json") api_endpoint = "https://foo.googleapis.com/" mapping_object = defaultdict(str) mapping_object['api_endpoint'] = api_endpoint @@ -548,7 +574,7 @@ def test_api_endpoint_override_from_client_options_mapping_object(self): self.assertEqual(plus._baseUrl, api_endpoint) def test_api_endpoint_override_from_client_options_dict(self): - discovery = open(datafile("plus.json")).read() + discovery = read_datafile("plus.json") api_endpoint = "https://foo.googleapis.com/" plus = build_from_document( discovery, @@ -586,14 +612,14 @@ def client_encrypted_cert_source(self): return self.ADC_CERT_PATH, self.ADC_KEY_PATH, self.ADC_PASSPHRASE def test_mtls_not_trigger_if_http_provided(self): - discovery = open(datafile("plus.json")).read() + discovery = read_datafile("plus.json") plus = build_from_document(discovery, http=httplib2.Http()) self.assertIsNotNone(plus) self.assertEqual(plus._baseUrl, REGULAR_ENDPOINT) self.check_http_client_cert(plus, has_client_cert=False) def test_exception_with_client_cert_source(self): - discovery = open(datafile("plus.json")).read() + discovery = read_datafile("plus.json") with self.assertRaises(MutualTLSChannelError): build_from_document( discovery, @@ -609,7 +635,7 @@ def test_exception_with_client_cert_source(self): ] ) def test_mtls_with_provided_client_cert(self, use_mtls_env, base_url): - discovery = open(datafile("plus.json")).read() + discovery = read_datafile("plus.json") with mock.patch.dict("os.environ", {"GOOGLE_API_USE_MTLS": use_mtls_env}): plus = build_from_document( @@ -626,7 +652,7 @@ def test_mtls_with_provided_client_cert(self, use_mtls_env, base_url): @parameterized.expand(["never", "auto", "always"]) def test_endpoint_not_switch(self, use_mtls_env): # Test endpoint is not switched if user provided api endpoint - discovery = open(datafile("plus.json")).read() + discovery = read_datafile("plus.json") with mock.patch.dict("os.environ", {"GOOGLE_API_USE_MTLS": use_mtls_env}): plus = build_from_document( @@ -665,7 +691,7 @@ def test_mtls_with_default_client_cert( default_client_encrypted_cert_source.return_value = ( self.client_encrypted_cert_source ) - discovery = open(datafile("plus.json")).read() + discovery = read_datafile("plus.json") with mock.patch.dict("os.environ", {"GOOGLE_API_USE_MTLS": use_mtls_env}): plus = build_from_document( @@ -692,7 +718,7 @@ def test_mtls_with_no_client_cert( self, use_mtls_env, base_url, has_default_client_cert_source ): has_default_client_cert_source.return_value = False - discovery = open(datafile("plus.json")).read() + discovery = read_datafile("plus.json") with mock.patch.dict("os.environ", {"GOOGLE_API_USE_MTLS": use_mtls_env}): plus = build_from_document( @@ -719,7 +745,7 @@ def test_userip_is_added_to_discovery_uri(self): os.environ["REMOTE_ADDR"] = "10.0.0.1" try: http = HttpMockSequence( - [({"status": "400"}, open(datafile("zoo.json"), "rb").read())] + [({"status": "400"}, read_datafile("zoo.json", "rb"))] ) zoo = build( "zoo", @@ -737,7 +763,7 @@ def test_userip_missing_is_not_added_to_discovery_uri(self): # out of the raised exception. try: http = HttpMockSequence( - [({"status": "400"}, open(datafile("zoo.json"), "rb").read())] + [({"status": "400"}, read_datafile("zoo.json", "rb"))] ) zoo = build( "zoo", @@ -755,7 +781,7 @@ def test_key_is_added_to_discovery_uri(self): # out of the raised exception. try: http = HttpMockSequence( - [({"status": "400"}, open(datafile("zoo.json"), "rb").read())] + [({"status": "400"}, read_datafile("zoo.json", "rb"))] ) zoo = build( "zoo", @@ -772,7 +798,7 @@ def test_discovery_loading_from_v2_discovery_uri(self): http = HttpMockSequence( [ ({"status": "404"}, "Not found"), - ({"status": "200"}, open(datafile("zoo.json"), "rb").read()), + ({"status": "200"}, read_datafile("zoo.json", "rb")), ] ) zoo = build("zoo", "v1", http=http, cache_discovery=False) @@ -782,7 +808,7 @@ def test_api_endpoint_override_from_client_options(self): http = HttpMockSequence( [ ({"status": "404"}, "Not found"), - ({"status": "200"}, open(datafile("zoo.json"), "rb").read()), + ({"status": "200"}, read_datafile("zoo.json", "rb")), ] ) api_endpoint = "https://foo.googleapis.com/" @@ -798,7 +824,7 @@ def test_api_endpoint_override_from_client_options_dict(self): http = HttpMockSequence( [ ({"status": "404"}, "Not found"), - ({"status": "200"}, open(datafile("zoo.json"), "rb").read()), + ({"status": "200"}, read_datafile("zoo.json", "rb")), ] ) api_endpoint = "https://foo.googleapis.com/" @@ -812,6 +838,70 @@ def test_api_endpoint_override_from_client_options_dict(self): self.assertEqual(zoo._baseUrl, api_endpoint) +class DiscoveryRetryFromHttp(unittest.TestCase): + def test_repeated_500_retries_and_fails(self): + http = HttpMockSequence( + [ + ({"status": "500"}, read_datafile("500.json", "rb")), + ({"status": "503"}, read_datafile("503.json", "rb")), + ] + ) + with self.assertRaises(HttpError): + with mock.patch("time.sleep") as mocked_sleep: + build("zoo", "v1", http=http, cache_discovery=False) + + mocked_sleep.assert_called_once() + # We also want to verify that we stayed with v1 discovery + validate_discovery_requests(self, http, "zoo", "v1", V1_DISCOVERY_URI) + + def test_v2_repeated_500_retries_and_fails(self): + http = HttpMockSequence( + [ + ({"status": "404"}, "Not found"), # last v1 discovery call + ({"status": "500"}, read_datafile("500.json", "rb")), + ({"status": "503"}, read_datafile("503.json", "rb")), + ] + ) + with self.assertRaises(HttpError): + with mock.patch("time.sleep") as mocked_sleep: + build("zoo", "v1", http=http, cache_discovery=False) + + mocked_sleep.assert_called_once() + # We also want to verify that we switched to v2 discovery + validate_discovery_requests(self, http, "zoo", "v1", V2_DISCOVERY_URI) + + def test_single_500_retries_and_succeeds(self): + http = HttpMockSequence( + [ + ({"status": "500"}, read_datafile("500.json", "rb")), + ({"status": "200"}, read_datafile("zoo.json", "rb")), + ] + ) + with mock.patch("time.sleep") as mocked_sleep: + zoo = build("zoo", "v1", http=http, cache_discovery=False) + + self.assertTrue(hasattr(zoo, "animals")) + mocked_sleep.assert_called_once() + # We also want to verify that we stayed with v1 discovery + validate_discovery_requests(self, http, "zoo", "v1", V1_DISCOVERY_URI) + + def test_single_500_then_404_retries_and_succeeds(self): + http = HttpMockSequence( + [ + ({"status": "500"}, read_datafile("500.json", "rb")), + ({"status": "404"}, "Not found"), # last v1 discovery call + ({"status": "200"}, read_datafile("zoo.json", "rb")), + ] + ) + with mock.patch("time.sleep") as mocked_sleep: + zoo = build("zoo", "v1", http=http, cache_discovery=False) + + self.assertTrue(hasattr(zoo, "animals")) + mocked_sleep.assert_called_once() + # We also want to verify that we switched to v2 discovery + validate_discovery_requests(self, http, "zoo", "v1", V2_DISCOVERY_URI) + + class DiscoveryFromAppEngineCache(unittest.TestCase): def setUp(self): self.old_environ = os.environ.copy() @@ -849,8 +939,7 @@ def import_mock(name, *args, **kwargs): ) # memcache.set is called once - with open(datafile("plus.json")) as f: - content = f.read() + content = read_datafile("plus.json") self.mocked_api.memcache.set.assert_called_once_with( url, content, time=DISCOVERY_DOC_MAX_AGE, namespace=namespace ) @@ -907,8 +996,7 @@ def test_file_based_cache(self): cache.get.assert_called_once_with(url) # cache.set is called once - with open(datafile("plus.json")) as f: - content = f.read() + content = read_datafile("plus.json") cache.set.assert_called_once_with(url, content) # Make sure there is a cache entry for the plus v1 discovery doc. @@ -1069,7 +1157,7 @@ def test_batch_request_from_default(self): def test_tunnel_patch(self): http = HttpMockSequence( [ - ({"status": "200"}, open(datafile("zoo.json"), "rb").read()), + ({"status": "200"}, read_datafile("zoo.json", "rb")), ({"status": "200"}, "echo_request_headers_as_json"), ] ) @@ -1089,13 +1177,13 @@ def test_oauth2client_credentials(self): credentials = mock.Mock(spec=GoogleCredentials) credentials.create_scoped_required.return_value = False - discovery = open(datafile("plus.json")).read() + discovery = read_datafile("plus.json") service = build_from_document(discovery, credentials=credentials) self.assertEqual(service._http, credentials.authorize.return_value) def test_google_auth_credentials(self): credentials = mock.Mock(spec=google.auth.credentials.Credentials) - discovery = open(datafile("plus.json")).read() + discovery = read_datafile("plus.json") service = build_from_document(discovery, credentials=credentials) self.assertIsInstance(service._http, google_auth_httplib2.AuthorizedHttp) @@ -1103,7 +1191,7 @@ def test_google_auth_credentials(self): def test_no_scopes_no_credentials(self): # Zoo doesn't have scopes - discovery = open(datafile("zoo.json")).read() + discovery = read_datafile("zoo.json") service = build_from_document(discovery) # Should be an ordinary httplib2.Http instance and not AuthorizedHttp. self.assertIsInstance(service._http, httplib2.Http) @@ -1233,8 +1321,7 @@ def test_multipart_media_good_upload(self): request = zoo.animals().insert(media_body=datafile("small.png"), body={}) self.assertTrue(request.headers["content-type"].startswith("multipart/related")) - with open(datafile("small.png"), "rb") as f: - contents = f.read() + contents = read_datafile("small.png", "rb") boundary = re.match(b"--=+([^=]+)", request.body).group(1) self.assertEqual( request.body.rstrip(b"\n"), # Python 2.6 does not add a trailing \n @@ -1280,7 +1367,7 @@ def test_resumable_multipart_media_good_upload(self): self.assertEqual("image/png", request.resumable.mimetype()) - self.assertNotEquals(request.body, None) + self.assertNotEqual(request.body, None) self.assertEqual(request.resumable_uri, None) http = HttpMockSequence( @@ -1744,8 +1831,7 @@ def test_pickle(self): # instances upon un-pickling def _dummy_zoo_request(self): - with open(os.path.join(DATA_DIR, "zoo.json"), "rU") as fh: - zoo_contents = fh.read() + zoo_contents = read_datafile("zoo.json") zoo_uri = uritemplate.expand(DISCOVERY_URI, {"api": "zoo", "apiVersion": "v1"}) if "REMOTE_ADDR" in os.environ: diff --git a/tests/test_http.py b/tests/test_http.py index 2c0756e6c53..88b9d5925f1 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -1097,6 +1097,11 @@ def test_no_retry_403_list_fails(self): request.execute() request._sleep.assert_not_called() + def test_null_postproc(self): + resp, content = HttpRequest.null_postproc("foo", "bar") + self.assertEqual(resp, "foo") + self.assertEqual(content, "bar") + class TestBatch(unittest.TestCase): def setUp(self):