Skip to content

Commit

Permalink
Release candidate 2.0 (#779)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielhuppmann committed Sep 12, 2023
1 parent 4759802 commit fd6d855
Show file tree
Hide file tree
Showing 14 changed files with 48 additions and 167 deletions.
1 change: 1 addition & 0 deletions .github/workflows/pytest.yml
Expand Up @@ -19,6 +19,7 @@ jobs:
- windows-latest
python-version:
- '3.10'
- '3.11'

fail-fast: false

Expand Down
13 changes: 8 additions & 5 deletions RELEASE_NOTES.md
@@ -1,7 +1,10 @@
# Next Release
# Release v2.0.0

The next release must bump the major version number.
Reactivate tests for Python 3.11 once ixmp4 0.3 is released.
## Highlights

- Use **ixmp4** as dependency for better integration with the IIASA Scenario Explorer database infrastructure
- Refactor the "exclude"-column for validation as own attribute (instead of a column in `meta`)
- Implement a cleaner package structure and reduce exposure of internal methods/modules

## Dependency changes

Expand All @@ -20,9 +23,9 @@ The column *exclude* of the `meta` indicators was moved to a new attribute `excl
All validation methods are refactored such that the argument `exclude_on_fail` changes
this new attribute (see PR [#759](https://github.com/IAMconsortium/pyam/pull/759)).

The term "exclude" is now an illegal column name for (timeseries) data and meta tables.
The term "exclude" is now an illegal column for (timeseries) data and meta indicators.
When importing an xlsx file created with pyam < 2.0, which has an "exclude" column in
"meta", that column is moved to the new exclude attribute with a log message.
the "meta" sheet, the column is moved to the new `exclude` attribute with a log message.

PR [#764](https://github.com/IAMconsortium/pyam/pull/764) implemented a more restrictive
approach to exposing pyam-internals at the package level, requiring a more explicit
Expand Down
2 changes: 1 addition & 1 deletion docs/R_tutorials/pyam_R_tutorial.ipynb
Expand Up @@ -518,7 +518,7 @@
"id": "a0eb9b43",
"metadata": {},
"source": [
"See the [pyam-IIASA-database tutorial](https://pyam-iamc.readthedocs.io/en/stable/tutorials/iiasa_dbs.html)\n",
"See the [pyam-IIASA-database tutorial](https://pyam-iamc.readthedocs.io/en/stable/tutorials/iiasa.html)\n",
"or the [API documentation](https://pyam-iamc.readthedocs.io/en/stable/api/iiasa.html)\n",
"for more information and a complete list of features!"
]
Expand Down
2 changes: 1 addition & 1 deletion docs/api/database.rst
Expand Up @@ -13,7 +13,7 @@ See https://software.ece.iiasa.ac.at/ixmp-server for more information.
The |pyam| package uses this interface to read timeseries data as well as
categorization and quantitative indicators.
The data is returned as an :class:`IamDataFrame`.
See `this tutorial <../tutorials/iiasa_dbs.html>`_ for more information.
See `this tutorial <../tutorials/iiasa.html>`_ for more information.

.. autofunction:: read_iiasa

Expand Down
2 changes: 1 addition & 1 deletion docs/api/iiasa.rst
Expand Up @@ -46,7 +46,7 @@ Coming soon...
The *Scenario Explorer* infrastructure developed by the Scenario Services and Scientific
Software team was developed and used for projects from 2018 until 2023.

See `this tutorial <../tutorials/iiasa_dbs.html>`_ for more information.
See `this tutorial <../tutorials/iiasa.html>`_ for more information.

.. autoclass:: Connection
:members:
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorials.rst
Expand Up @@ -24,7 +24,7 @@ The source code is available in the folder
tutorials/unit_conversion.ipynb
tutorials/algebraic_operations.ipynb
tutorials/quantiles.ipynb
tutorials/iiasa_dbs.ipynb
tutorials/iiasa.ipynb
tutorials/unfccc.ipynb
tutorials/GAMS_to_pyam.ipynb
tutorials/aggregating_downscaling_consistency.ipynb
Expand Down
File renamed without changes.
89 changes: 12 additions & 77 deletions pyam/core.py
Expand Up @@ -181,8 +181,7 @@ def _init(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs):
self._data, index, self.time_col, self.extra_cols = _data

# define `meta` dataframe for categorization & quantitative indicators
_index = make_index(self._data, cols=index)
self.meta = pd.DataFrame(index=_index)
self.meta = pd.DataFrame(index=make_index(self._data, cols=index))
self.exclude = False

# if given explicitly, merge meta dataframe after downselecting
Expand Down Expand Up @@ -1039,52 +1038,10 @@ def require_data(
_exclude_on_fail(self, missing_required.droplevel(list(required)))
return missing_required.to_frame(index=False)

def require_variable(self, variable, unit=None, year=None, exclude_on_fail=False):
"""Check whether all scenarios have a required variable
Parameters
----------
variable : str
Required variable.
unit : str, optional
Name of unit (optional).
year : int or list, optional
Check whether the variable exists for ANY of the years (if a list).
exclude_on_fail : bool, optional
If True, set :attr:`exclude` = True for all scenarios that do not satisfy
the criteria.
"""
# TODO: deprecated, remove for release >= 2.0
deprecation_warning("Use `df.require_data()` instead.")

criteria = {"variable": variable}
if unit:
criteria.update({"unit": unit})
if year:
criteria.update({"year": year})

keep = self._apply_filters(**criteria)
idx = self.meta.index.difference(_meta_idx(self.data[keep]))

n = len(idx)
if n == 0:
logger.info(
"All scenarios have the required variable `{}`".format(variable)
)
return

msg = (
"{} scenario does not include required variable `{}`"
if n == 1
else "{} scenarios do not include required variable `{}`"
)

if exclude_on_fail:
_exclude_on_fail(self, idx)

logger.info(msg.format(n, variable))
return pd.DataFrame(index=idx).reset_index()
def require_variable(self, *args, **kwargs):
"""This method is deprecated, use `df.require_data()` instead."""
# TODO: deprecated, remove for release >= 2.1
raise DeprecationWarning("Use `df.require_data()` instead.")

def validate(self, criteria={}, exclude_on_fail=False):
"""Validate scenarios using criteria on timeseries values
Expand Down Expand Up @@ -1901,6 +1858,8 @@ def filter(self, keep=True, inplace=False, **kwargs):
msg = "Only yearly data after filtering, time-domain changed to 'year'."
logger.info(msg)

ret._data.sort_index(inplace=True)

# downselect `meta` dataframe
idx = make_index(ret._data, cols=self.index.names)
if len(idx) == 0:
Expand Down Expand Up @@ -2562,16 +2521,7 @@ def load_meta(self, path, sheet_name="meta", ignore_conflict=False, **kwargs):
# merge imported meta indicators
self.meta = merge_meta(meta, self.meta, ignore_conflict=ignore_conflict)

def map_regions(
self,
map_col,
agg=None,
copy_col=None,
fname=None,
region_col=None,
remove_duplicates=False,
inplace=False,
):
def map_regions(self, map_col, **kwargs):
# TODO: deprecated, remove for release >= 2.1
raise DeprecationWarning(
"This method was removed. Please use `aggregate_region()` instead."
Expand Down Expand Up @@ -2671,25 +2621,10 @@ def validate(df, criteria={}, exclude_on_fail=False, **kwargs):
return vdf


def require_variable(
df, variable, unit=None, year=None, exclude_on_fail=False, **kwargs
):
"""Check whether all scenarios have a required variable
Parameters
----------
df : IamDataFrame
args : passed to :meth:`IamDataFrame.require_variable`
kwargs : used for downselecting IamDataFrame
passed to :meth:`IamDataFrame.filter`
"""
fdf = df.filter(**kwargs)
if len(fdf.data) > 0:
vdf = fdf.require_variable(
variable=variable, unit=unit, year=year, exclude_on_fail=exclude_on_fail
)
df._exclude |= fdf._exclude # update if any excluded
return vdf
def require_variable(*args, **kwargs):
"""This method is deprecated, use `df.require_data()` instead."""
# TODO: deprecated, remove for release >= 2.1
raise DeprecationWarning("Use `df.require_data()` instead.")


def categorize(
Expand Down
17 changes: 6 additions & 11 deletions pyam/iiasa.py
@@ -1,3 +1,4 @@
from io import StringIO
from pathlib import Path
import json
import logging
Expand Down Expand Up @@ -242,7 +243,7 @@ def meta_columns(self):
url = "/".join([self._base_url, "metadata/types"])
r = requests.get(url, headers=self.auth())
_check_response(r)
return pd.read_json(r.text, orient="records")["name"]
return pd.read_json(StringIO(r.text), orient="records")["name"]

def _query_index(self, default_only=True, meta=False, cols=[], **kwargs):
# TODO: at present this reads in all data for all scenarios,
Expand All @@ -255,7 +256,7 @@ def _query_index(self, default_only=True, meta=False, cols=[], **kwargs):
_check_response(r)

# cast response to dataframe, apply filter by kwargs, and return
runs = pd.read_json(r.text, orient="records")
runs = pd.read_json(StringIO(r.text), orient="records")
if runs.empty:
logger.warning("No permission to view model(s) or no scenarios exist.")
return pd.DataFrame([], columns=META_IDX + ["version", "run_id"] + cols)
Expand Down Expand Up @@ -360,7 +361,7 @@ def variables(self):
url = "/".join([self._base_url, "ts"])
r = requests.get(url, headers=self.auth())
_check_response(r)
df = pd.read_json(r.text, orient="records")
df = pd.read_json(StringIO(r.text), orient="records")
return pd.Series(df["variable"].unique(), name="variable")

@lru_cache()
Expand All @@ -382,7 +383,7 @@ def regions(self, include_synonyms=False):

@staticmethod
def convert_regions_payload(response, include_synonyms):
df = pd.read_json(response, orient="records")
df = pd.read_json(StringIO(response), orient="records")
if df.empty:
return df
if "synonyms" not in df.columns:
Expand Down Expand Up @@ -449,10 +450,6 @@ def _match(data, patterns):
# pass empty list to API if all regions selected
if len(regions) == len(self.regions()):
regions = []
logger.debug(
f"Prepared filter for {len(regions)} region(s), "
f"{len(variables)} variables and {len(runs)} runs"
)
data = {
"filters": {
"regions": list(regions),
Expand Down Expand Up @@ -523,7 +520,6 @@ def query(self, default_only=True, meta=True, **kwargs):
# retrieve data
_args = json.dumps(self._query_post(_meta, default_only=default_only, **kwargs))
url = "/".join([self._base_url, "runs/bulk/ts"])
logger.debug(f"Query timeseries data from {url} with data {_args}")
r = requests.post(url, headers=headers, data=_args)
_check_response(r)
# refactor returned json object to be castable to an IamDataFrame
Expand All @@ -537,8 +533,7 @@ def query(self, default_only=True, meta=True, **kwargs):
value=float,
version=int,
)
data = pd.read_json(r.text, orient="records", dtype=dtype)
logger.debug(f"Response: {len(r.text)} bytes, {len(data)} records")
data = pd.read_json(StringIO(r.text), orient="records", dtype=dtype)
cols = IAMC_IDX + ["year", "value", "subannual", "version"]
# keep only known columns or init empty df
data = pd.DataFrame(data=data, columns=cols)
Expand Down
4 changes: 2 additions & 2 deletions pyam/str.py
Expand Up @@ -106,8 +106,8 @@ def reduce_hierarchy(x, depth):
----------
x : str
Uses ``|`` to separate the components of the variable.
level : int or list of int
Position of the component.s
depth : int or list of int
Position of the components.
"""
_x = x.split("|")
Expand Down
10 changes: 5 additions & 5 deletions setup.cfg
Expand Up @@ -27,18 +27,18 @@ python_requires = >=3.10, <3.12
# Please also add a section "Dependency changes" to the release notes
install_requires =
iam-units >= 2020.4.21
ixmp4 >= 0.2.0
ixmp4 >= 0.4.0
numpy >= 1.23.0, < 1.24
requests
# requests included via ixmp4
pyjwt
httpx[http2]
# httpx[http2] included via ixmp4
openpyxl
pandas >= 2.0.0
scipy
pint >= 0.13
PyYAML
matplotlib >= 3.6.0, < 3.7.1
seaborn
scipy >= 1.10.0
seaborn >= 0.11
six
setuptools >= 41
setuptools_scm
Expand Down
13 changes: 6 additions & 7 deletions tests/conftest.py
Expand Up @@ -3,14 +3,13 @@

matplotlib.use("agg")

from pathlib import Path
import os
from requests.exceptions import ConnectionError
import pytest
from datetime import datetime
from httpx import ConnectError
import numpy as np
import pandas as pd
import pytest
from pathlib import Path

from datetime import datetime
from pyam import IamDataFrame, iiasa
from pyam.utils import META_IDX, IAMC_IDX

Expand All @@ -19,7 +18,7 @@
try:
iiasa.Connection()
IIASA_UNAVAILABLE = False
except ConnectionError: # pragma: no cover
except ConnectError: # pragma: no cover
IIASA_UNAVAILABLE = True

TEST_API = "integration-test"
Expand Down Expand Up @@ -237,7 +236,7 @@ def reg_df():

@pytest.fixture(scope="session")
def plot_df():
df = IamDataFrame(data=os.path.join(TEST_DATA_DIR, "plot_data.csv"))
df = IamDataFrame(data=TEST_DATA_DIR / "plot_data.csv")
yield df


Expand Down

0 comments on commit fd6d855

Please sign in to comment.