From fcd26eb0ff4f97c097ca33b2d212d8f83e56686e Mon Sep 17 00:00:00 2001 From: Daniel Sanche Date: Mon, 3 May 2021 15:25:45 -0700 Subject: [PATCH] feat: support span inference (#267) --- google/cloud/logging_v2/handlers/_helpers.py | 72 ++++++---- .../cloud/logging_v2/handlers/app_engine.py | 4 +- google/cloud/logging_v2/handlers/handlers.py | 3 +- .../logging_v2/handlers/structured_log.py | 1 + tests/environment | 2 +- tests/unit/handlers/test__helpers.py | 124 ++++++++++++++---- tests/unit/handlers/test_app_engine.py | 6 +- tests/unit/handlers/test_structured_log.py | 15 ++- 8 files changed, 166 insertions(+), 61 deletions(-) diff --git a/google/cloud/logging_v2/handlers/_helpers.py b/google/cloud/logging_v2/handlers/_helpers.py index 88eba07a..ba853edf 100644 --- a/google/cloud/logging_v2/handlers/_helpers.py +++ b/google/cloud/logging_v2/handlers/_helpers.py @@ -16,6 +16,7 @@ import math import json +import re try: import flask @@ -55,12 +56,13 @@ def get_request_data_from_flask(): """Get http_request and trace data from flask request headers. Returns: - Tuple[Optional[dict], Optional[str]]: - Data related to the current http request and the trace_id for the - request. Both fields will be None if a flask request isn't found. + Tuple[Optional[dict], Optional[str], Optional[str]]: + Data related to the current http request, trace_id, and span_id for + the request. All fields will be None if a django request isn't + found. """ if flask is None or not flask.request: - return None, None + return None, None, None # build http_request http_request = { @@ -73,27 +75,26 @@ def get_request_data_from_flask(): "protocol": flask.request.environ.get(_PROTOCOL_HEADER), } - # find trace id - trace_id = None + # find trace id and span id header = flask.request.headers.get(_FLASK_TRACE_HEADER) - if header: - trace_id = header.split("/", 1)[0] + trace_id, span_id = _parse_trace_span(header) - return http_request, trace_id + return http_request, trace_id, span_id def get_request_data_from_django(): """Get http_request and trace data from django request headers. Returns: - Tuple[Optional[dict], Optional[str]]: - Data related to the current http request and the trace_id for the - request. Both fields will be None if a django request isn't found. + Tuple[Optional[dict], Optional[str], Optional[str]]: + Data related to the current http request, trace_id, and span_id for + the request. All fields will be None if a django request isn't + found. """ request = _get_django_request() if request is None: - return None, None + return None, None, None # convert content_length to int if it exists content_length = None @@ -112,13 +113,35 @@ def get_request_data_from_django(): "protocol": request.META.get(_PROTOCOL_HEADER), } - # find trace id - trace_id = None + # find trace id and span id header = request.META.get(_DJANGO_TRACE_HEADER) - if header: - trace_id = header.split("/", 1)[0] + trace_id, span_id = _parse_trace_span(header) - return http_request, trace_id + return http_request, trace_id, span_id + + +def _parse_trace_span(header): + """Given an X_CLOUD_TRACE header, extract the trace and span ids. + + Args: + header (str): the string extracted from the X_CLOUD_TRACE header + Returns: + Tuple[Optional[dict], Optional[str]]: + The trace_id and span_id extracted from the header + Each field will be None if not found. + """ + trace_id = None + span_id = None + if header: + try: + split_header = header.split("/", 1) + trace_id = split_header[0] + header_suffix = split_header[1] + # the span is the set of alphanumeric characters after the / + span_id = re.findall(r"^\w+", header_suffix)[0] + except IndexError: + pass + return trace_id, span_id def get_request_data(): @@ -126,9 +149,10 @@ def get_request_data(): frameworks (currently supported: Flask and Django). Returns: - Tuple[Optional[dict], Optional[str]]: - Data related to the current http request and the trace_id for the - request. Both fields will be None if a supported web request isn't found. + Tuple[Optional[dict], Optional[str], Optional[str]]: + Data related to the current http request, trace_id, and span_id for + the request. All fields will be None if a django request isn't + found. """ checkers = ( get_request_data_from_django, @@ -136,8 +160,8 @@ def get_request_data(): ) for checker in checkers: - http_request, trace_id = checker() + http_request, trace_id, span_id = checker() if http_request is not None: - return http_request, trace_id + return http_request, trace_id, span_id - return None, None + return None, None, None diff --git a/google/cloud/logging_v2/handlers/app_engine.py b/google/cloud/logging_v2/handlers/app_engine.py index bc7daa9d..874a9d60 100644 --- a/google/cloud/logging_v2/handlers/app_engine.py +++ b/google/cloud/logging_v2/handlers/app_engine.py @@ -90,7 +90,7 @@ def get_gae_labels(self): """ gae_labels = {} - _, trace_id = get_request_data() + _, trace_id, _ = get_request_data() if trace_id is not None: gae_labels[_TRACE_ID_LABEL] = trace_id @@ -107,7 +107,7 @@ def emit(self, record): record (logging.LogRecord): The record to be logged. """ message = super(AppEngineHandler, self).format(record) - inferred_http, inferred_trace = get_request_data() + inferred_http, inferred_trace, _ = get_request_data() if inferred_trace is not None: inferred_trace = f"projects/{self.project_id}/traces/{inferred_trace}" # allow user overrides diff --git a/google/cloud/logging_v2/handlers/handlers.py b/google/cloud/logging_v2/handlers/handlers.py index b9cc53a9..3a5599aa 100644 --- a/google/cloud/logging_v2/handlers/handlers.py +++ b/google/cloud/logging_v2/handlers/handlers.py @@ -59,7 +59,7 @@ def filter(self, record): } record.msg = "" if record.msg is None else record.msg # find http request data - inferred_http, inferred_trace = get_request_data() + inferred_http, inferred_trace, inferred_span = get_request_data() if inferred_trace is not None and self.project is not None: inferred_trace = f"projects/{self.project}/traces/{inferred_trace}" # set labels @@ -70,6 +70,7 @@ def filter(self, record): ) record.trace = getattr(record, "trace", inferred_trace) or "" + record.span_id = getattr(record, "span_id", inferred_span) or "" record.http_request = getattr(record, "http_request", inferred_http) or {} record.request_method = record.http_request.get("requestMethod", "") record.request_url = record.http_request.get("requestUrl", "") diff --git a/google/cloud/logging_v2/handlers/structured_log.py b/google/cloud/logging_v2/handlers/structured_log.py index e9d03642..76a56053 100644 --- a/google/cloud/logging_v2/handlers/structured_log.py +++ b/google/cloud/logging_v2/handlers/structured_log.py @@ -24,6 +24,7 @@ '"severity": "%(levelname)s", ' '"logging.googleapis.com/labels": { %(total_labels_str)s }, ' '"logging.googleapis.com/trace": "%(trace)s", ' + '"logging.googleapis.com/spanId": "%(span_id)s", ' '"logging.googleapis.com/sourceLocation": { "file": "%(file)s", "line": "%(line)d", "function": "%(function)s"}, ' '"httpRequest": {"requestMethod": "%(request_method)s", "requestUrl": "%(request_url)s", "userAgent": "%(user_agent)s", "protocol": "%(protocol)s"} }' ) diff --git a/tests/environment b/tests/environment index 94ff6858..df1b7c13 160000 --- a/tests/environment +++ b/tests/environment @@ -1 +1 @@ -Subproject commit 94ff685805510ad8d78c170603798cbe44050bce +Subproject commit df1b7c131575f8eb59120cef75709496602b7665 diff --git a/tests/unit/handlers/test__helpers.py b/tests/unit/handlers/test__helpers.py index fd17f6ff..b2c822e7 100644 --- a/tests/unit/handlers/test__helpers.py +++ b/tests/unit/handlers/test__helpers.py @@ -17,8 +17,10 @@ import mock _FLASK_TRACE_ID = "flask-id" +_FLASK_SPAN_ID = "span0flask" _FLASK_HTTP_REQUEST = {"requestUrl": "https://flask.palletsprojects.com/en/1.1.x/"} _DJANGO_TRACE_ID = "django-id" +_DJANGO_SPAN_ID = "span0django" _DJANGO_HTTP_REQUEST = {"requestUrl": "https://www.djangoproject.com/"} @@ -44,15 +46,17 @@ def index(): def test_no_context_header(self): app = self.create_app() with app.test_request_context(path="/", headers={}): - http_request, trace_id = self._call_fut() + http_request, trace_id, span_id = self._call_fut() self.assertIsNone(trace_id) + self.assertIsNone(span_id) self.assertEqual(http_request["requestMethod"], "GET") def test_valid_context_header(self): flask_trace_header = "X_CLOUD_TRACE_CONTEXT" expected_trace_id = _FLASK_TRACE_ID - flask_trace_id = expected_trace_id + "/testspanid" + expected_span_id = _FLASK_SPAN_ID + flask_trace_id = f"{expected_trace_id}/{expected_span_id}" app = self.create_app() context = app.test_request_context( @@ -60,9 +64,10 @@ def test_valid_context_header(self): ) with context: - http_request, trace_id = self._call_fut() + http_request, trace_id, span_id = self._call_fut() self.assertEqual(trace_id, expected_trace_id) + self.assertEqual(span_id, expected_span_id) self.assertEqual(http_request["requestMethod"], "GET") def test_http_request_populated(self): @@ -84,7 +89,7 @@ def test_http_request_populated(self): environ_base={"REMOTE_ADDR": expected_ip}, headers=headers, ) - http_request, trace_id = self._call_fut() + http_request, *_ = self._call_fut() self.assertEqual(http_request["requestMethod"], "PUT") self.assertEqual(http_request["requestUrl"], expected_path) @@ -99,7 +104,7 @@ def test_http_request_sparse(self): app = self.create_app() with app.test_client() as c: c.put(path=expected_path) - http_request, trace_id = self._call_fut() + http_request, *_ = self._call_fut() self.assertEqual(http_request["requestMethod"], "PUT") self.assertEqual(http_request["requestUrl"], expected_path) self.assertEqual(http_request["protocol"], "HTTP/1.1") @@ -135,17 +140,20 @@ def test_no_context_header(self): middleware = request.RequestMiddleware(None) middleware.process_request(django_request) - http_request, trace_id = self._call_fut() + http_request, trace_id, span_id = self._call_fut() + self.assertEqual(http_request["requestMethod"], "GET") self.assertIsNone(trace_id) + self.assertIsNone(span_id) def test_valid_context_header(self): from django.test import RequestFactory from google.cloud.logging_v2.handlers.middleware import request django_trace_header = "HTTP_X_CLOUD_TRACE_CONTEXT" - expected_trace_id = "testtraceiddjango" - django_trace_id = expected_trace_id + "/testspanid" + expected_span_id = _DJANGO_SPAN_ID + expected_trace_id = _DJANGO_TRACE_ID + django_trace_id = f"{expected_trace_id}/{expected_span_id}" django_request = RequestFactory().get( "/", **{django_trace_header: django_trace_id} @@ -153,9 +161,10 @@ def test_valid_context_header(self): middleware = request.RequestMiddleware(None) middleware.process_request(django_request) - http_request, trace_id = self._call_fut() + http_request, trace_id, span_id = self._call_fut() self.assertEqual(trace_id, expected_trace_id) + self.assertEqual(span_id, expected_span_id) self.assertEqual(http_request["requestMethod"], "GET") def test_http_request_populated(self): @@ -178,7 +187,7 @@ def test_http_request_populated(self): middleware = request.RequestMiddleware(None) middleware.process_request(django_request) - http_request, trace_id = self._call_fut() + http_request, *_ = self._call_fut() self.assertEqual(http_request["requestMethod"], "PUT") self.assertEqual(http_request["requestUrl"], expected_path) self.assertEqual(http_request["userAgent"], expected_agent) @@ -195,7 +204,7 @@ def test_http_request_sparse(self): django_request = RequestFactory().put(expected_path) middleware = request.RequestMiddleware(None) middleware.process_request(django_request) - http_request, trace_id = self._call_fut() + http_request, *_ = self._call_fut() self.assertEqual(http_request["requestMethod"], "PUT") self.assertEqual(http_request["requestUrl"], expected_path) self.assertEqual(http_request["remoteIp"], "127.0.0.1") @@ -226,8 +235,8 @@ def _helper(self, django_return, flask_return): return django_mock, flask_mock, result def test_from_django(self): - django_expected = (_DJANGO_HTTP_REQUEST, _DJANGO_TRACE_ID) - flask_expected = (None, None) + django_expected = (_DJANGO_HTTP_REQUEST, _DJANGO_TRACE_ID, _DJANGO_SPAN_ID) + flask_expected = (None, None, None) django_mock, flask_mock, output = self._helper(django_expected, flask_expected) self.assertEqual(output, django_expected) @@ -235,8 +244,8 @@ def test_from_django(self): flask_mock.assert_not_called() def test_from_flask(self): - django_expected = (None, None) - flask_expected = (_FLASK_HTTP_REQUEST, _FLASK_TRACE_ID) + django_expected = (None, None, None) + flask_expected = (_FLASK_HTTP_REQUEST, _FLASK_TRACE_ID, _FLASK_SPAN_ID) django_mock, flask_mock, output = self._helper(django_expected, flask_expected) self.assertEqual(output, flask_expected) @@ -245,8 +254,8 @@ def test_from_flask(self): flask_mock.assert_called_once_with() def test_from_django_and_flask(self): - django_expected = (_DJANGO_HTTP_REQUEST, _DJANGO_TRACE_ID) - flask_expected = (_FLASK_HTTP_REQUEST, _FLASK_TRACE_ID) + django_expected = (_DJANGO_HTTP_REQUEST, _DJANGO_TRACE_ID, _DJANGO_SPAN_ID) + flask_expected = (_FLASK_HTTP_REQUEST, _FLASK_TRACE_ID, _FLASK_SPAN_ID) django_mock, flask_mock, output = self._helper(django_expected, flask_expected) @@ -257,19 +266,19 @@ def test_from_django_and_flask(self): flask_mock.assert_not_called() def test_missing_http_request(self): - flask_expected = (None, _FLASK_TRACE_ID) - django_expected = (None, _DJANGO_TRACE_ID) + flask_expected = (None, _FLASK_TRACE_ID, _FLASK_SPAN_ID) + django_expected = (None, _DJANGO_TRACE_ID, _DJANGO_TRACE_ID) django_mock, flask_mock, output = self._helper(django_expected, flask_expected) # function only returns trace if http_request data is present - self.assertEqual(output, (None, None)) + self.assertEqual(output, (None, None, None)) django_mock.assert_called_once_with() flask_mock.assert_called_once_with() def test_missing_trace_id(self): - flask_expected = (_FLASK_HTTP_REQUEST, None) - django_expected = (None, _DJANGO_TRACE_ID) + flask_expected = (_FLASK_HTTP_REQUEST, None, None) + django_expected = (None, _DJANGO_TRACE_ID, _DJANGO_SPAN_ID) django_mock, flask_mock, output = self._helper(django_expected, flask_expected) # trace_id is optional @@ -279,14 +288,77 @@ def test_missing_trace_id(self): flask_mock.assert_called_once_with() def test_missing_both(self): - flask_expected = (None, None) - django_expected = (None, None) + flask_expected = (None, None, None) + django_expected = (None, None, None) django_mock, flask_mock, output = self._helper(django_expected, flask_expected) - self.assertEqual(output, (None, None)) + self.assertEqual(output, (None, None, None)) django_mock.assert_called_once_with() flask_mock.assert_called_once_with() def test_wo_libraries(self): output = self._call_fut() - self.assertEqual(output, (None, None)) + self.assertEqual(output, (None, None, None)) + + +class Test__parse_trace_span(unittest.TestCase): + @staticmethod + def _call_fut(header): + from google.cloud.logging_v2.handlers import _helpers + + return _helpers._parse_trace_span(header) + + def test_empty_header(self): + header = "" + trace_id, span_id = self._call_fut(header) + self.assertEqual(trace_id, None) + self.assertEqual(span_id, None) + + def test_no_span(self): + header = "12345" + trace_id, span_id = self._call_fut(header) + self.assertEqual(trace_id, header) + self.assertEqual(span_id, None) + + def test_no_trace(self): + header = "/12345" + trace_id, span_id = self._call_fut(header) + self.assertEqual(trace_id, "") + self.assertEqual(span_id, "12345") + + def test_with_span(self): + expected_trace = "12345" + expected_span = "67890" + header = f"{expected_trace}/{expected_span}" + trace_id, span_id = self._call_fut(header) + self.assertEqual(trace_id, expected_trace) + self.assertEqual(span_id, expected_span) + + def test_with_extra_characters(self): + expected_trace = "12345" + expected_span = "67890" + header = f"{expected_trace}/{expected_span};o=0" + trace_id, span_id = self._call_fut(header) + self.assertEqual(trace_id, expected_trace) + self.assertEqual(span_id, expected_span) + + def test_with_unicode_span(self): + """ + Spans are expected to be alphanumeric + """ + expected_trace = "12345" + header = f"{expected_trace}/😀123" + trace_id, span_id = self._call_fut(header) + self.assertEqual(trace_id, expected_trace) + self.assertEqual(span_id, None) + + def test_with_unicode_trace(self): + """ + Spans are expected to be alphanumeric + """ + expected_trace = "12😀345" + expected_span = "67890" + header = f"{expected_trace}/{expected_span}" + trace_id, span_id = self._call_fut(header) + self.assertEqual(trace_id, expected_trace) + self.assertEqual(span_id, expected_span) diff --git a/tests/unit/handlers/test_app_engine.py b/tests/unit/handlers/test_app_engine.py index 65e80457..c726c849 100644 --- a/tests/unit/handlers/test_app_engine.py +++ b/tests/unit/handlers/test_app_engine.py @@ -97,7 +97,7 @@ def test_emit(self): expected_trace_id = f"projects/{self.PROJECT}/traces/{trace_id}" get_request_patch = mock.patch( "google.cloud.logging_v2.handlers.app_engine.get_request_data", - return_value=(expected_http_request, trace_id), + return_value=(expected_http_request, trace_id, None), ) with get_request_patch: # library integrations mocked to return test data @@ -135,7 +135,7 @@ def test_emit_manual_field_override(self): inferred_trace_id = "trace-test" get_request_patch = mock.patch( "google.cloud.logging_v2.handlers.app_engine.get_request_data", - return_value=(inferred_http_request, inferred_trace_id), + return_value=(inferred_http_request, inferred_trace_id, None), ) with get_request_patch: # library integrations mocked to return test data @@ -180,7 +180,7 @@ def test_emit_manual_field_override(self): def _get_gae_labels_helper(self, trace_id): get_request_patch = mock.patch( "google.cloud.logging_v2.handlers.app_engine.get_request_data", - return_value=(None, trace_id), + return_value=(None, trace_id, None), ) client = mock.Mock(project=self.PROJECT, spec=["project"]) diff --git a/tests/unit/handlers/test_structured_log.py b/tests/unit/handlers/test_structured_log.py index 13719bf5..66822d74 100644 --- a/tests/unit/handlers/test_structured_log.py +++ b/tests/unit/handlers/test_structured_log.py @@ -64,6 +64,7 @@ def test_format(self): "message": message, "severity": record.levelname, "logging.googleapis.com/trace": "", + "logging.googleapis.com/spanId": "", "logging.googleapis.com/sourceLocation": { "file": pathname, "line": str(lineno), @@ -128,8 +129,11 @@ def test_format_with_request(self): expected_path = "http://testserver/123" expected_agent = "Mozilla/5.0" expected_trace = "123" + expected_span = "456" + trace_header = f"{expected_trace}/{expected_span};o=0" expected_payload = { "logging.googleapis.com/trace": expected_trace, + "logging.googleapis.com/spanId": expected_span, "httpRequest": { "requestMethod": "PUT", "requestUrl": expected_path, @@ -145,7 +149,7 @@ def test_format_with_request(self): data="body", headers={ "User-Agent": expected_agent, - "X_CLOUD_TRACE_CONTEXT": expected_trace, + "X_CLOUD_TRACE_CONTEXT": trace_header, }, ) handler.filter(record) @@ -173,16 +177,19 @@ def test_format_overrides(self): record = logging.LogRecord(logname, logging.INFO, "", 0, message, None, None) overwrite_path = "http://overwrite" inferred_path = "http://testserver/123" - overwrite_trace = "456" - inferred_trace = "123" + overwrite_trace = "abc" + overwrite_span = "def" + inferred_trace_span = "123/456;" overwrite_file = "test-file" record.http_request = {"requestUrl": overwrite_path} record.source_location = {"file": overwrite_file} record.trace = overwrite_trace + record.span_id = overwrite_span added_labels = {"added_key": "added_value", "overwritten_key": "new_value"} record.labels = added_labels expected_payload = { "logging.googleapis.com/trace": overwrite_trace, + "logging.googleapis.com/spanId": overwrite_span, "logging.googleapis.com/sourceLocation": { "file": overwrite_file, "function": "", @@ -206,7 +213,7 @@ def test_format_overrides(self): c.put( path=inferred_path, data="body", - headers={"X_CLOUD_TRACE_CONTEXT": inferred_trace}, + headers={"X_CLOUD_TRACE_CONTEXT": inferred_trace_span}, ) handler.filter(record) result = json.loads(handler.format(record))