Release candidate 2.0 (#779)

IAMconsortium · Sep 12, 2023 · fd6d855 · fd6d855
1 parent 4759802
commit fd6d855
Show file tree

Hide file tree

Showing 14 changed files with 48 additions and 167 deletions.
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -19,6 +19,7 @@ jobs:
         - windows-latest
         python-version:
         - '3.10'
+        - '3.11'
 
       fail-fast: false
 

diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -1,7 +1,10 @@
-# Next Release
+# Release v2.0.0
 
-The next release must bump the major version number.
-Reactivate tests for Python 3.11 once ixmp4 0.3 is released.
+## Highlights
+
+- Use **ixmp4** as dependency for better integration with the IIASA Scenario Explorer database infrastructure 
+- Refactor the "exclude"-column for validation as own attribute (instead of a column in `meta`)
+- Implement a cleaner package structure and reduce exposure of internal methods/modules
 
 ## Dependency changes
 
@@ -20,9 +23,9 @@ The column *exclude* of the `meta` indicators was moved to a new attribute `excl
 All validation methods are refactored such that the argument `exclude_on_fail` changes
 this new attribute (see PR [#759](https://github.com/IAMconsortium/pyam/pull/759)).
 
-The term "exclude" is now an illegal column name for (timeseries) data and meta tables.
+The term "exclude" is now an illegal column for (timeseries) data and meta indicators.
 When importing an xlsx file created with pyam < 2.0, which has an "exclude" column in
-"meta", that column is moved to the new exclude attribute with a log message.
+the "meta" sheet, the column is moved to the new `exclude` attribute with a log message.
 
 PR [#764](https://github.com/IAMconsortium/pyam/pull/764) implemented a more restrictive
 approach to exposing pyam-internals at the package level, requiring a more explicit

diff --git a/docs/R_tutorials/pyam_R_tutorial.ipynb b/docs/R_tutorials/pyam_R_tutorial.ipynb
@@ -518,7 +518,7 @@
    "id": "a0eb9b43",
    "metadata": {},
    "source": [
-    "See the [pyam-IIASA-database tutorial](https://pyam-iamc.readthedocs.io/en/stable/tutorials/iiasa_dbs.html)\n",
+    "See the [pyam-IIASA-database tutorial](https://pyam-iamc.readthedocs.io/en/stable/tutorials/iiasa.html)\n",
     "or the [API documentation](https://pyam-iamc.readthedocs.io/en/stable/api/iiasa.html)\n",
     "for more information and a complete list of features!"
    ]

diff --git a/docs/api/database.rst b/docs/api/database.rst
@@ -13,7 +13,7 @@ See https://software.ece.iiasa.ac.at/ixmp-server for more information.
 The |pyam| package uses this interface to read timeseries data as well as
 categorization and quantitative indicators.
 The data is returned as an :class:`IamDataFrame`.
-See `this tutorial <../tutorials/iiasa_dbs.html>`_ for more information.
+See `this tutorial <../tutorials/iiasa.html>`_ for more information.
 
 .. autofunction:: read_iiasa
 

diff --git a/docs/api/iiasa.rst b/docs/api/iiasa.rst
@@ -46,7 +46,7 @@ Coming soon...
 The *Scenario Explorer* infrastructure developed by the Scenario Services and Scientific
 Software team was developed and used for projects from 2018 until 2023.
 
-See `this tutorial <../tutorials/iiasa_dbs.html>`_ for more information.
+See `this tutorial <../tutorials/iiasa.html>`_ for more information.
 
 .. autoclass:: Connection
    :members:

diff --git a/docs/tutorials.rst b/docs/tutorials.rst
@@ -24,7 +24,7 @@ The source code is available in the folder
    tutorials/unit_conversion.ipynb
    tutorials/algebraic_operations.ipynb
    tutorials/quantiles.ipynb
-   tutorials/iiasa_dbs.ipynb
+   tutorials/iiasa.ipynb
    tutorials/unfccc.ipynb
    tutorials/GAMS_to_pyam.ipynb
    tutorials/aggregating_downscaling_consistency.ipynb

diff --git a/docs/tutorials/iiasa_dbs.ipynb → docs/tutorials/iiasa.ipynb b/docs/tutorials/iiasa_dbs.ipynb → docs/tutorials/iiasa.ipynb
diff --git a/pyam/core.py b/pyam/core.py
@@ -181,8 +181,7 @@ def _init(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs):
         self._data, index, self.time_col, self.extra_cols = _data
 
         # define `meta` dataframe for categorization & quantitative indicators
-        _index = make_index(self._data, cols=index)
-        self.meta = pd.DataFrame(index=_index)
+        self.meta = pd.DataFrame(index=make_index(self._data, cols=index))
         self.exclude = False
 
         # if given explicitly, merge meta dataframe after downselecting
@@ -1039,52 +1038,10 @@ def require_data(
                 _exclude_on_fail(self, missing_required.droplevel(list(required)))
             return missing_required.to_frame(index=False)
 
-    def require_variable(self, variable, unit=None, year=None, exclude_on_fail=False):
-        """Check whether all scenarios have a required variable
-
-        Parameters
-        ----------
-        variable : str
-            Required variable.
-        unit : str, optional
-            Name of unit (optional).
-        year : int or list, optional
-            Check whether the variable exists for ANY of the years (if a list).
-        exclude_on_fail : bool, optional
-            If True, set :attr:`exclude` = True for all scenarios that do not satisfy
-            the criteria.
-
-        """
-        # TODO: deprecated, remove for release >= 2.0
-        deprecation_warning("Use `df.require_data()` instead.")
-
-        criteria = {"variable": variable}
-        if unit:
-            criteria.update({"unit": unit})
-        if year:
-            criteria.update({"year": year})
-
-        keep = self._apply_filters(**criteria)
-        idx = self.meta.index.difference(_meta_idx(self.data[keep]))
-
-        n = len(idx)
-        if n == 0:
-            logger.info(
-                "All scenarios have the required variable `{}`".format(variable)
-            )
-            return
-
-        msg = (
-            "{} scenario does not include required variable `{}`"
-            if n == 1
-            else "{} scenarios do not include required variable `{}`"
-        )
-
-        if exclude_on_fail:
-            _exclude_on_fail(self, idx)
-
-        logger.info(msg.format(n, variable))
-        return pd.DataFrame(index=idx).reset_index()
+    def require_variable(self, *args, **kwargs):
+        """This method is deprecated, use `df.require_data()` instead."""
+        # TODO: deprecated, remove for release >= 2.1
+        raise DeprecationWarning("Use `df.require_data()` instead.")
 
     def validate(self, criteria={}, exclude_on_fail=False):
         """Validate scenarios using criteria on timeseries values
@@ -1901,6 +1858,8 @@ def filter(self, keep=True, inplace=False, **kwargs):
                 msg = "Only yearly data after filtering, time-domain changed to 'year'."
                 logger.info(msg)
 
+        ret._data.sort_index(inplace=True)
+
         # downselect `meta` dataframe
         idx = make_index(ret._data, cols=self.index.names)
         if len(idx) == 0:
@@ -2562,16 +2521,7 @@ def load_meta(self, path, sheet_name="meta", ignore_conflict=False, **kwargs):
         # merge imported meta indicators
         self.meta = merge_meta(meta, self.meta, ignore_conflict=ignore_conflict)
 
-    def map_regions(
-        self,
-        map_col,
-        agg=None,
-        copy_col=None,
-        fname=None,
-        region_col=None,
-        remove_duplicates=False,
-        inplace=False,
-    ):
+    def map_regions(self, map_col, **kwargs):
         # TODO: deprecated, remove for release >= 2.1
         raise DeprecationWarning(
             "This method was removed. Please use `aggregate_region()` instead."
@@ -2671,25 +2621,10 @@ def validate(df, criteria={}, exclude_on_fail=False, **kwargs):
         return vdf
 
 
-def require_variable(
-    df, variable, unit=None, year=None, exclude_on_fail=False, **kwargs
-):
-    """Check whether all scenarios have a required variable
-
-    Parameters
-    ----------
-    df : IamDataFrame
-    args : passed to :meth:`IamDataFrame.require_variable`
-    kwargs : used for downselecting IamDataFrame
-        passed to :meth:`IamDataFrame.filter`
-    """
-    fdf = df.filter(**kwargs)
-    if len(fdf.data) > 0:
-        vdf = fdf.require_variable(
-            variable=variable, unit=unit, year=year, exclude_on_fail=exclude_on_fail
-        )
-        df._exclude |= fdf._exclude  # update if any excluded
-        return vdf
+def require_variable(*args, **kwargs):
+    """This method is deprecated, use `df.require_data()` instead."""
+    # TODO: deprecated, remove for release >= 2.1
+    raise DeprecationWarning("Use `df.require_data()` instead.")
 
 
 def categorize(

diff --git a/pyam/iiasa.py b/pyam/iiasa.py
@@ -1,3 +1,4 @@
+from io import StringIO
 from pathlib import Path
 import json
 import logging
@@ -242,7 +243,7 @@ def meta_columns(self):
         url = "/".join([self._base_url, "metadata/types"])
         r = requests.get(url, headers=self.auth())
         _check_response(r)
-        return pd.read_json(r.text, orient="records")["name"]
+        return pd.read_json(StringIO(r.text), orient="records")["name"]
 
     def _query_index(self, default_only=True, meta=False, cols=[], **kwargs):
         # TODO: at present this reads in all data for all scenarios,
@@ -255,7 +256,7 @@ def _query_index(self, default_only=True, meta=False, cols=[], **kwargs):
         _check_response(r)
 
         # cast response to dataframe, apply filter by kwargs, and return
-        runs = pd.read_json(r.text, orient="records")
+        runs = pd.read_json(StringIO(r.text), orient="records")
         if runs.empty:
             logger.warning("No permission to view model(s) or no scenarios exist.")
             return pd.DataFrame([], columns=META_IDX + ["version", "run_id"] + cols)
@@ -360,7 +361,7 @@ def variables(self):
         url = "/".join([self._base_url, "ts"])
         r = requests.get(url, headers=self.auth())
         _check_response(r)
-        df = pd.read_json(r.text, orient="records")
+        df = pd.read_json(StringIO(r.text), orient="records")
         return pd.Series(df["variable"].unique(), name="variable")
 
     @lru_cache()
@@ -382,7 +383,7 @@ def regions(self, include_synonyms=False):
 
     @staticmethod
     def convert_regions_payload(response, include_synonyms):
-        df = pd.read_json(response, orient="records")
+        df = pd.read_json(StringIO(response), orient="records")
         if df.empty:
             return df
         if "synonyms" not in df.columns:
@@ -449,10 +450,6 @@ def _match(data, patterns):
         # pass empty list to API if all regions selected
         if len(regions) == len(self.regions()):
             regions = []
-        logger.debug(
-            f"Prepared filter for {len(regions)} region(s), "
-            f"{len(variables)} variables and {len(runs)} runs"
-        )
         data = {
             "filters": {
                 "regions": list(regions),
@@ -523,7 +520,6 @@ def query(self, default_only=True, meta=True, **kwargs):
         # retrieve data
         _args = json.dumps(self._query_post(_meta, default_only=default_only, **kwargs))
         url = "/".join([self._base_url, "runs/bulk/ts"])
-        logger.debug(f"Query timeseries data from {url} with data {_args}")
         r = requests.post(url, headers=headers, data=_args)
         _check_response(r)
         # refactor returned json object to be castable to an IamDataFrame
@@ -537,8 +533,7 @@ def query(self, default_only=True, meta=True, **kwargs):
             value=float,
             version=int,
         )
-        data = pd.read_json(r.text, orient="records", dtype=dtype)
-        logger.debug(f"Response: {len(r.text)} bytes, {len(data)} records")
+        data = pd.read_json(StringIO(r.text), orient="records", dtype=dtype)
         cols = IAMC_IDX + ["year", "value", "subannual", "version"]
         # keep only known columns or init empty df
         data = pd.DataFrame(data=data, columns=cols)

diff --git a/pyam/str.py b/pyam/str.py
@@ -106,8 +106,8 @@ def reduce_hierarchy(x, depth):
     ----------
     x : str
         Uses ``|`` to separate the components of the variable.
-    level : int or list of int
-        Position of the component.s
+    depth : int or list of int
+        Position of the components.
 
     """
     _x = x.split("|")

diff --git a/setup.cfg b/setup.cfg
@@ -27,18 +27,18 @@ python_requires = >=3.10, <3.12
 # Please also add a section "Dependency changes" to the release notes
 install_requires =
     iam-units >= 2020.4.21
-    ixmp4 >= 0.2.0
+    ixmp4 >= 0.4.0
     numpy >= 1.23.0, < 1.24
-    requests
+#    requests  included via ixmp4
     pyjwt
-    httpx[http2]
+#    httpx[http2] included via ixmp4
     openpyxl
     pandas >= 2.0.0
-    scipy
     pint >= 0.13
     PyYAML
     matplotlib >= 3.6.0, < 3.7.1
-    seaborn
+    scipy >= 1.10.0
+    seaborn >= 0.11
     six
     setuptools >= 41
     setuptools_scm

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -3,14 +3,13 @@
 
 matplotlib.use("agg")
 
-from pathlib import Path
-import os
-from requests.exceptions import ConnectionError
-import pytest
+from datetime import datetime
+from httpx import ConnectError
 import numpy as np
 import pandas as pd
+import pytest
+from pathlib import Path
 
-from datetime import datetime
 from pyam import IamDataFrame, iiasa
 from pyam.utils import META_IDX, IAMC_IDX
 
@@ -19,7 +18,7 @@
 try:
     iiasa.Connection()
     IIASA_UNAVAILABLE = False
-except ConnectionError:  # pragma: no cover
+except ConnectError:  # pragma: no cover
     IIASA_UNAVAILABLE = True
 
 TEST_API = "integration-test"
@@ -237,7 +236,7 @@ def reg_df():
 
 @pytest.fixture(scope="session")
 def plot_df():
-    df = IamDataFrame(data=os.path.join(TEST_DATA_DIR, "plot_data.csv"))
+    df = IamDataFrame(data=TEST_DATA_DIR / "plot_data.csv")
     yield df