This repository has been archived by the owner on Dec 31, 2023. It is now read-only.
/
_dataframe.py
153 lines (122 loc) · 5.44 KB
/
_dataframe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# Copyright 2016 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Time series as :mod:`pandas` dataframes."""
import itertools
try:
import pandas
except ImportError: # pragma: NO COVER
pandas = None
from google.cloud import monitoring_v3
TOP_RESOURCE_LABELS = ("project_id", "aws_account", "location", "region", "zone")
def _extract_header(time_series):
"""Return a copy of time_series with the points removed."""
return monitoring_v3.TimeSeries(
metric=time_series.metric,
resource=time_series.resource,
metric_kind=time_series.metric_kind,
value_type=time_series.value_type,
)
def _extract_labels(time_series):
"""Build the combined resource and metric labels, with resource_type."""
labels = {"resource_type": time_series.resource.type}
labels.update(time_series.resource.labels)
labels.update(time_series.metric.labels)
return labels
def _extract_value(typed_value):
"""Extract the value from a TypedValue."""
# There is no equivalent of WhichOneOf in proto-plus
# This may break if the field names have been altered in the
# proto-plus representation
# https://github.com/googleapis/proto-plus-python/issues/137
value_type = monitoring_v3.TypedValue.pb(typed_value).WhichOneof("value")
return getattr(typed_value, value_type)
def _build_dataframe(time_series_iterable, label=None, labels=None): # pragma: NO COVER
"""Build a :mod:`pandas` dataframe out of time series.
:type time_series_iterable:
iterable over :class:`~google.cloud.monitoring_v3.TimeSeries`
:param time_series_iterable:
An iterable (e.g., a query object) yielding time series.
:type label: str
:param label:
(Optional) The label name to use for the dataframe header. This can be
the name of a resource label or metric label (e.g.,
``"instance_name"``), or the string ``"resource_type"``.
:type labels: list of strings, or None
:param labels:
A list or tuple of label names to use for the dataframe header.
If more than one label name is provided, the resulting dataframe
will have a multi-level column header.
Specifying neither ``label`` or ``labels`` results in a dataframe
with a multi-level column header including the resource type and
all available resource and metric labels.
Specifying both ``label`` and ``labels`` is an error.
:rtype: :class:`pandas.DataFrame`
:returns: A dataframe where each column represents one time series.
:raises: :exc:`RuntimeError` if `pandas` is not installed.
"""
if pandas is None:
raise RuntimeError("This method requires `pandas` to be installed.")
if label is not None:
if labels:
raise ValueError("Cannot specify both `label` and `labels`.")
labels = (label,)
columns = []
headers = []
for time_series in time_series_iterable:
pandas_series = pandas.Series(
data=[_extract_value(point.value) for point in time_series.points],
index=[
point.interval.end_time.timestamp_pb().ToNanoseconds()
for point in time_series.points
],
)
columns.append(pandas_series)
headers.append(_extract_header(time_series))
# Implement a smart default of using all available labels.
if labels is None:
resource_labels = set(
itertools.chain.from_iterable(header.resource.labels for header in headers)
)
metric_labels = set(
itertools.chain.from_iterable(header.metric.labels for header in headers)
)
labels = (
["resource_type"]
+ _sorted_resource_labels(resource_labels)
+ sorted(metric_labels)
)
# Assemble the columns into a DataFrame.
dataframe = pandas.DataFrame.from_records(columns).T
# Convert the timestamp strings into a DatetimeIndex.
dataframe.index = pandas.to_datetime(dataframe.index)
# Build a multi-level stack of column headers. Some labels may
# be undefined for some time series.
levels = []
for key in labels:
level = [_extract_labels(header).get(key, "") for header in headers]
levels.append(level)
# Build a column Index or MultiIndex. Do not include level names
# in the column header if the user requested a single-level header
# by specifying "label".
dataframe.columns = pandas.MultiIndex.from_arrays(
levels, names=labels if not label else None
)
# Sort the rows just in case (since the API doesn't guarantee the
# ordering), and sort the columns lexicographically.
return dataframe.sort_index(axis=0).sort_index(axis=1)
def _sorted_resource_labels(labels):
"""Sort label names, putting well-known resource labels first."""
head = [label for label in TOP_RESOURCE_LABELS if label in labels]
tail = sorted(label for label in labels if label not in TOP_RESOURCE_LABELS)
return head + tail