Skip to content

Commit

Permalink
Add method to compute (weighted) quantiles (#686)
Browse files Browse the repository at this point in the history
  • Loading branch information
gidden committed Dec 15, 2022
1 parent e0534d9 commit 4cd4c7d
Show file tree
Hide file tree
Showing 6 changed files with 717 additions and 2 deletions.
1 change: 1 addition & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Bump minimum version of **pandas** to v1.2.0 to support automatic engine selecti
- [#713](https://github.com/IAMconsortium/pyam/pull/713) Informative error when using lists for filter by level, `level` now a forbidden column.
- [#709](https://github.com/IAMconsortium/pyam/pull/709) Hotfix ops to support `fillna=0`
- [#708](https://github.com/IAMconsortium/pyam/pull/708) Remove 'xls' as by-default-supported file format
- [#686](https://github.com/IAMconsortium/pyam/pull/686) Add support for (weighted) quantile timeseries as `df.compute.quantiles()` with a [tutorial](https://pyam-iamc.readthedocs.io/en/stable/tutorials/quantiles.html)

# Release v1.6.0

Expand Down
5 changes: 3 additions & 2 deletions doc/source/tutorials.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,13 @@ The source code is available in the folder

tutorials/pyam_first_steps.ipynb
tutorials/data_table_formats.ipynb
tutorials/unit_conversion.ipynb
tutorials/algebraic_operations.ipynb
tutorials/quantiles.ipynb
tutorials/iiasa_dbs.ipynb
tutorials/unfccc.ipynb
tutorials/GAMS_to_pyam.ipynb
tutorials/unit_conversion.ipynb
tutorials/aggregating_downscaling_consistency.ipynb
tutorials/algebraic_operations.ipynb
tutorials/subannual_time_resolution.ipynb
tutorials/ipcc_colors.ipynb
tutorials/legends.ipynb
Expand Down
590 changes: 590 additions & 0 deletions doc/source/tutorials/quantiles.ipynb

Large diffs are not rendered by default.

87 changes: 87 additions & 0 deletions pyam/compute.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import itertools
import math
import wquantiles
import pandas as pd
from pyam.index import replace_index_values
from pyam.timeseries import growth_rate
Expand All @@ -22,6 +24,91 @@ class IamComputeAccessor:
def __init__(self, df):
self._df = df

def quantiles(
self, quantiles, weights=None, level=["model", "scenario"], append=False
):
"""Compute the optionally weighted quantiles of data grouped by `level`.
For example, the following will provide the interquartile range and median value
of CO2 emissions across all models and scenarios in a given dataset:
.. code-block:: python
df.filter(variable='Emissions|CO2').compute.quantiles([0.25, 0.5, 0.75])
Parameters
----------
quantiles : collection
Group of quantile values to compute
weights : pd.Series, optional
Series indexed by `level`
level : collection, optional
The index columns to compute quantiles over
append : bool, optional
Whether to append computed timeseries data to this instance.
Returns
-------
:class:`IamDataFrame` or **None**
Computed data or None if `append=True`.
Raises
------
ValueError
If more than one variable provided or if argument `weights` is malformed.
"""
from pyam.core import (
IamDataFrame,
concat,
) # here because of circular import issue

self_df = self._df
if len(self_df.variable) > 1:
raise ValueError(
"quantiles() currently supports only 1 variable, and this"
f"dataframe has {len(self_df.variable)}"
)
if weights is not None and weights.name != "weight":
raise ValueError("weights pd.Series must have name 'weight'")

df = self_df.timeseries()
model = (
"Quantiles" if weights is None else "Weighted Quantiles"
) # can make this a kwarg

# get weights aligned with model/scenario in data
if weights is None:
df["weight"] = 1.0
else:
df = df.join(weights, how="inner")
w = df["weight"]
df.drop("weight", axis="columns", inplace=True)

# prep data for processing
df = df.reset_index(level=level).drop(columns=level)

dfs = []
# indexed over region, variable, and unit
idxs = df.index.drop_duplicates()
for idx, q in itertools.product(idxs, quantiles):
data = pd.Series(
wquantiles.quantile(df.loc[idx].values.T, w.values, q),
index=pd.Series(df.columns, name="year"),
name="value",
)
kwargs = {idxs.names[i]: idx[i] for i in range(len(idx))}
dfs.append(
IamDataFrame(
data,
model=model,
scenario=str(q), # can make this a kwarg
**kwargs,
)
)

# append to `self` or return as `IamDataFrame`
return self_df._finalize(concat(dfs), append=append)

def growth_rate(self, mapping, append=False):
"""Compute the annualized growth rate of a timeseries along the time dimension
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ install_requires =
six
setuptools >= 41
setuptools_scm
wquantiles
# required explicitly for Python 3.7
importlib_metadata
xlsxwriter
Expand Down
35 changes: 35 additions & 0 deletions tests/test_feature_quantiles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from pyam import IamDataFrame
import pytest
from pyam.testing import assert_iamframe_equal
import pandas as pd


def test_quantile_one_variable(test_pd_df):
"""Tests interquartile range of standard test df
Because it is only two datapoints, the only 'new' computation
is the median
"""
df = IamDataFrame(test_pd_df)
quantiles = (0.25, 0.5, 0.75)
obs = df.filter(variable="Primary Energy").compute.quantiles(quantiles)
exp = IamDataFrame(
pd.DataFrame(
{
"scenario": [str(q) for q in quantiles],
"2005": [1, (1.0 + 2) / 2, 2],
"2010": [6, (6 + 7) / 2, 7],
}
),
model="Quantiles",
region="World",
variable="Primary Energy",
unit="EJ/yr",
)
assert_iamframe_equal(exp, obs)


def test_quantile_missing_variable(test_pd_df):
df = IamDataFrame(test_pd_df)
with pytest.raises(ValueError):
df.compute.quantiles((0.25, 0.5))

0 comments on commit 4cd4c7d

Please sign in to comment.