diff --git a/.github/workflows/tests-coverage.yml b/.github/workflows/tests-coverage.yml new file mode 100644 index 00000000..c6efca13 --- /dev/null +++ b/.github/workflows/tests-coverage.yml @@ -0,0 +1,68 @@ +# Tests with pytest the package and monitors the coverage and sends it to coveralls.io +# Coverage is only send to coveralls.io when no pytest tests fail +name: "Tests & Coverage" + +on: [push] + +# Cancel jobs on new push +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + build: + name: "${{ matrix.name-suffix }} at py${{ matrix.python-version }} on ${{ matrix.os }}" + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: false + matrix: + include: + - name-suffix: "coverage" + os: ubuntu-latest + python-version: "3.11" + - name-suffix: "basic" + os: ubuntu-latest + python-version: "3.10" + - name-suffix: "basic" + os: ubuntu-latest + python-version: "3.12" + - name-suffix: "basic" + os: windows-latest + python-version: "3.11" + + steps: + - name: Checkout repo + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Set up Conda + if: runner.os == 'Windows' + uses: conda-incubator/setup-miniconda@v2 + with: + miniconda-version: "latest" + python-version: ${{ matrix.python-version }} + activate-environment: testenv + + - name: Install dependencies + run: | + python -m pip install --upgrade pip wheel setuptools + python -m pip install .[dev] + + - name: Run tests + if: ${{ !(runner.os == 'Linux' && matrix.python-version == 3.9 && matrix.name-suffix == 'coverage') }} + run: | + python -m pytest --disable-warnings --color=yes -v + + - name: Run tests, coverage and send to coveralls + if: runner.os == 'Linux' && matrix.python-version == 3.9 && matrix.name-suffix == 'coverage' + run: | + coverage run --source=windpowerlib -m pytest --disable-warnings --color=yes -v + coveralls + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + COVERALLS_SERVICE_NAME: github diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 00000000..edd0a241 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,28 @@ +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: doc/conf.py + +# Build documentation with MkDocs +#mkdocs: +# configuration: mkdocs.yml + +# Optionally build your docs in additional formats such as PDF and ePub +formats: all + +# Optionally set the version of Python and requirements required to build your docs +python: + install: + - requirements: doc/requirements.txt + +# Set the version of Python +build: + os: ubuntu-22.04 + tools: + python: "3.11" diff --git a/.travis.yml b/.travis.yml index 0f515fb0..9b4e348c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,9 +2,9 @@ language: python matrix: include: - - python: 3.6 - - python: 3.7 - - python: 3.8 + - python: 3.10 + - python: 3.11 + - python: 3.12 # command to install dependencies #before_install: diff --git a/README.rst b/README.rst index fad1a1bb..a0cad423 100644 --- a/README.rst +++ b/README.rst @@ -33,13 +33,13 @@ Go to the `download page = 3.6) environment, use pypi to install the latest windpowerlib version: +If you have a working Python 3 environment, use pypi to install the latest windpowerlib version: :: pip install windpowerlib -The windpowerlib is designed for Python 3 and tested on Python >= 3.5. We highly recommend to use virtual environments. +The windpowerlib is designed for Python 3 and tested on Python >= 3.10. We highly recommend to use virtual environments. Please see the `installation page `_ of the oemof documentation for complete instructions on how to install python and a virtual environment on your operating system. Optional Packages diff --git a/doc/getting_started.rst b/doc/getting_started.rst index d2a0d83b..169f3360 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -43,7 +43,7 @@ If you have a working Python 3 environment, use pypi to install the latest windp pip install windpowerlib -The windpowerlib is designed for Python 3 and tested on Python >= 3.5. We highly recommend to use virtual environments. +The windpowerlib is designed for Python 3 and tested on Python >= 3.10. We highly recommend to use virtual environments. Please see the `installation page `_ of the oemof documentation for complete instructions on how to install python and a virtual environment on your operating system. Optional Packages diff --git a/doc/requirements.txt b/doc/requirements.txt index a8f73dd3..05a03dfa 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,4 +1,5 @@ sphinx>=1.4 +sphinx_rtd_theme ipykernel nbsphinx pandas diff --git a/doc/whatsnew/v0-2-2.rst b/doc/whatsnew/v0-2-2.rst index 89551de5..be840c13 100644 --- a/doc/whatsnew/v0-2-2.rst +++ b/doc/whatsnew/v0-2-2.rst @@ -1,6 +1,11 @@ v0.2.2 () ++++++++++++++++++++++++++++++ +* Updated the code basis to work for newer versions of python (support for python 3.6 to + python 3.9 is discontinued, supported python versions are now >= python 3.9) and added + github actions to run tests automatically when changes are pushed to github + (`PR 136 `_). + Contributors ############ - * Author \ No newline at end of file + * Birgit Schachler \ No newline at end of file diff --git a/example/modelchain_example.ipynb b/example/modelchain_example.ipynb index eed91268..6aadba7f 100644 --- a/example/modelchain_example.ipynb +++ b/example/modelchain_example.ipynb @@ -144,7 +144,8 @@ " file,\n", " index_col=0,\n", " header=[0, 1],\n", - " date_parser=lambda idx: pd.to_datetime(idx, utc=True))\n", + " )\n", + " weather_df.index = pd.to_datetime(weather_df.index, utc=True)\n", " \n", " # change time zone\n", " weather_df.index = weather_df.index.tz_convert(\n", diff --git a/example/modelchain_example.py b/example/modelchain_example.py index ec88c487..bcca0f3e 100644 --- a/example/modelchain_example.py +++ b/example/modelchain_example.py @@ -85,8 +85,8 @@ def get_weather_data(filename="weather.csv", **kwargs): file, index_col=0, header=[0, 1], - date_parser=lambda idx: pd.to_datetime(idx, utc=True), ) + weather_df.index = pd.to_datetime(weather_df.index, utc=True) # change time zone weather_df.index = weather_df.index.tz_convert("Europe/Berlin") diff --git a/example/simple_example.py b/example/simple_example.py index d42dad06..0023fe1f 100644 --- a/example/simple_example.py +++ b/example/simple_example.py @@ -79,8 +79,8 @@ def get_weather_data(filename="weather.csv", **kwargs): file, index_col=0, header=[0, 1], - date_parser=lambda idx: pd.to_datetime(idx, utc=True), ) + weather_df.index = pd.to_datetime(weather_df.index, utc=True) # change time zone weather_df.index = weather_df.index.tz_convert("Europe/Berlin") diff --git a/example/test_examples.py b/example/test_examples.py index 8be5e9d0..ea492f9c 100644 --- a/example/test_examples.py +++ b/example/test_examples.py @@ -4,14 +4,10 @@ """ import os -import subprocess -import tempfile -import nbformat -import sys from example import modelchain_example as mc_e from example import turbine_cluster_modelchain_example as tc_mc_e from numpy.testing import assert_allclose -import pytest +import pytest_notebook class TestExamples: @@ -42,7 +38,7 @@ def test_turbine_cluster_modelchain_example_flh(self): ) tc_mc_e.calculate_power_output(weather, example_farm, example_cluster) assert_allclose( - 1956.164053, + 2004.84125, (example_farm.power_output.sum() / example_farm.nominal_power), 0.01, ) @@ -57,50 +53,27 @@ def test_turbine_cluster_modelchain_example_flh(self): def _notebook_run(self, path): """ - Execute a notebook via nbconvert and collect output. - Returns (parsed nb object, execution errors) + Execute a notebook and collect output. + Returns execution errors. """ - dirname, __ = os.path.split(path) - os.chdir(dirname) - with tempfile.NamedTemporaryFile(suffix=".ipynb") as fout: - args = [ - "jupyter", - "nbconvert", - "--to", - "notebook", - "--execute", - "--ExecutePreprocessor.timeout=60", - "--output", - fout.name, - path, - ] - subprocess.check_call(args) - - fout.seek(0) - nb = nbformat.read(fout, nbformat.current_nbformat) - - errors = [ - output - for cell in nb.cells - if "outputs" in cell - for output in cell["outputs"] - if output.output_type == "error" - ] - - return nb, errors + notebook = pytest_notebook.notebook.load_notebook(path=path) + result = pytest_notebook.execution.execute_notebook( + notebook, + with_coverage=False, + timeout=600, + ) + return result.exec_error - @pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6") def test_modelchain_example_ipynb(self): dir_path = os.path.dirname(os.path.realpath(__file__)) - nb, errors = self._notebook_run( + errors = self._notebook_run( os.path.join(dir_path, "modelchain_example.ipynb") ) - assert errors == [] + assert errors is None - @pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6") def test_turbine_cluster_modelchain_example_ipynb(self): dir_path = os.path.dirname(os.path.realpath(__file__)) - nb, errors = self._notebook_run( + errors = self._notebook_run( os.path.join(dir_path, "turbine_cluster_modelchain_example.ipynb") ) - assert errors == [] + assert errors is None diff --git a/example/turbine_cluster_modelchain_example.ipynb b/example/turbine_cluster_modelchain_example.ipynb index 634744ab..a512d580 100644 --- a/example/turbine_cluster_modelchain_example.ipynb +++ b/example/turbine_cluster_modelchain_example.ipynb @@ -28,7 +28,9 @@ "source": [ "### Imports and initialization of wind turbines\n", "\n", - "The import of weather data and the initialization of wind turbines is done as in the ``modelchain_example``. Be aware that currently for wind farm and wind cluster calculations wind turbines need to have a power curve as some calculations do not work with the power coefficient curve." + "The import of weather data and the initialization of wind turbines are taken from the ``modelchain_example``. See there for more information.\n", + "\n", + "Also, be aware that currently for wind farm and wind cluster calculations wind turbines need to have a power curve as some calculations do not work with the power coefficient curve." ] }, { @@ -37,10 +39,11 @@ "metadata": {}, "outputs": [], "source": [ + "import os\n", "import pandas as pd\n", + "import requests\n", "\n", - "import modelchain_example as mc_e\n", - "from windpowerlib import TurbineClusterModelChain, WindTurbineCluster, WindFarm\n", + "from windpowerlib import create_power_curve, TurbineClusterModelChain, WindFarm, WindTurbine, WindTurbineCluster\n", "\n", "import logging\n", "logging.getLogger().setLevel(logging.DEBUG)" @@ -59,21 +62,106 @@ "height 10 80 2 10 0\n", "2010-01-01 00:00:00+01:00 5.32697 7.80697 267.60 267.57 98405.7\n", "2010-01-01 01:00:00+01:00 5.46199 7.86199 267.60 267.55 98382.7\n", - "2010-01-01 02:00:00+01:00 5.67899 8.59899 267.61 267.54 98362.9\n", - "\n", - "nominal power of my_turbine: 3000000.0\n" + "2010-01-01 02:00:00+01:00 5.67899 8.59899 267.61 267.54 98362.9\n" + ] + } + ], + "source": [ + "def get_weather_data(filename='weather.csv', **kwargs):\n", + " r\"\"\"\n", + " Imports weather data from a file.\n", + "\n", + " \"\"\"\n", + "\n", + " if 'datapath' not in kwargs:\n", + " kwargs['datapath'] = os.path.dirname(__file__)\n", + " \n", + " file = os.path.join(kwargs['datapath'], filename)\n", + " \n", + " # download example weather data file in case it does not yet exist\n", + " if not os.path.isfile(file):\n", + " logging.debug(\"Download weather data for example.\")\n", + " req = requests.get(\"https://osf.io/59bqn/download\")\n", + " with open(file, \"wb\") as fout:\n", + " fout.write(req.content)\n", + " \n", + " # read csv file \n", + " weather_df = pd.read_csv(\n", + " file,\n", + " index_col=0,\n", + " header=[0, 1],\n", + " )\n", + " weather_df.index = pd.to_datetime(weather_df.index, utc=True)\n", + " \n", + " # change time zone\n", + " weather_df.index = weather_df.index.tz_convert(\n", + " 'Europe/Berlin')\n", + " \n", + " return weather_df\n", + "\n", + "# Read weather data from csv\n", + "weather = get_weather_data(filename='weather.csv', datapath='')\n", + "print(weather[['wind_speed', 'temperature', 'pressure']][0:3])" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nominal power of my_turbine: 3000000.0\n" ] } ], "source": [ - "# Get weather data\n", - "weather = mc_e.get_weather_data('weather.csv')\n", - "print(weather[['wind_speed', 'temperature', 'pressure']][0:3])\n", + "def initialize_wind_turbines():\n", + " r\"\"\"\n", + " Initializes three WindTurbine objects.\n", + "\n", + " \"\"\"\n", + " enercon_e126 = {\n", + " \"turbine_type\": \"E-126/4200\", # turbine type as in register\n", + " \"hub_height\": 135, # in m\n", + " }\n", + " e126 = WindTurbine(**enercon_e126)\n", + "\n", + " my_turbine = {\n", + " \"nominal_power\": 3e6, # in W\n", + " \"hub_height\": 105, # in m\n", + " \"power_curve\": pd.DataFrame(\n", + " data={\n", + " \"value\": [\n", + " p * 1000\n", + " for p in [0.0, 26.0, 180.0, 1500.0, 3000.0, 3000.0]\n", + " ], # in W\n", + " \"wind_speed\": [0.0, 3.0, 5.0, 10.0, 15.0, 25.0],\n", + " }\n", + " ), # in m/s\n", + " }\n", + " my_turbine = WindTurbine(**my_turbine)\n", + "\n", + " my_power = pd.Series(\n", + " [0.0, 39000.0, 270000.0, 2250000.0, 4500000.0, 4500000.0]\n", + " )\n", + " my_wind_speed = (0.0, 3.0, 5.0, 10.0, 15.0, 25.0)\n", + "\n", + " my_turbine2 = {\n", + " \"nominal_power\": 6e6, # in W\n", + " \"hub_height\": 115, # in m\n", + " \"power_curve\": create_power_curve(\n", + " wind_speed=my_wind_speed, power=my_power\n", + " ),\n", + " }\n", + " my_turbine2 = WindTurbine(**my_turbine2)\n", + "\n", + " return my_turbine, e126, my_turbine2\n", "\n", - "# Initialize wind turbines\n", - "my_turbine, e126, my_turbine2 = mc_e.initialize_wind_turbines()\n", - "print()\n", - "print('nominal power of my_turbine: {}'.format(my_turbine.nominal_power))" + "my_turbine, e126, my_turbine2 = initialize_wind_turbines()\n", + "print('Nominal power of my_turbine: {}'.format(my_turbine.nominal_power))" ] }, { diff --git a/pyproject.toml b/pyproject.toml index 3dbb3802..a11e2514 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.black] line-length = 79 -target-version = ['py36', 'py37', 'py38'] +target-version = ['py310', 'py311', 'py312'] include = '\.pyi?$' exclude = ''' /( diff --git a/setup.py b/setup.py index e588c933..83f8cca3 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ def read(fname): packages=["windpowerlib"], package_data={ "windpowerlib": [ - os.path.join("data", "**.csv"), + os.path.join("data", "*.csv"), os.path.join("data", "default_turbine_data", "*.csv"), os.path.join("oedb", "*.csv"), ] @@ -25,15 +25,15 @@ def read(fname): long_description=read("README.rst"), long_description_content_type="text/x-rst", zip_safe=False, - install_requires=["pandas >= 0.20.0", "requests"], + install_requires=["pandas", "requests"], extras_require={ "dev": [ "pytest", "jupyter", "sphinx_rtd_theme", - "nbformat", "numpy", "matplotlib", + "pytest-notebook", ] }, ) diff --git a/tests/test_data_handling.py b/tests/test_data_handling.py index bf31bf37..b53d2bca 100644 --- a/tests/test_data_handling.py +++ b/tests/test_data_handling.py @@ -4,6 +4,7 @@ """ import filecmp +import logging import os from shutil import copyfile @@ -16,6 +17,7 @@ get_turbine_types, restore_default_turbine_data, store_turbine_data_from_oedb, + _process_and_save_oedb_data, ) @@ -77,7 +79,7 @@ def test_broken_pwr_curve(self): def test_get_turbine_types(self, capsys): """Test the `get_turbine_types` function.""" - get_turbine_types() + get_turbine_types(turbine_library="oedb") captured = capsys.readouterr() assert "Enercon" in captured.out get_turbine_types("oedb", print_out=False, filter_=False) @@ -85,14 +87,63 @@ def test_get_turbine_types(self, capsys): with pytest.raises(ValueError, match=msg): get_turbine_types("wrong") - def test_store_turbine_data_from_oedb(self): + def test_store_turbine_data_from_oedb(self, caplog): """Test `store_turbine_data_from_oedb` function.""" t = {} for fn in os.listdir(self.orig_path): t[fn] = os.path.getmtime(os.path.join(self.orig_path, fn)) - store_turbine_data_from_oedb() + with caplog.at_level(logging.WARNING): + store_turbine_data_from_oedb() for fn in os.listdir(self.orig_path): assert t[fn] < os.path.getmtime(os.path.join(self.orig_path, fn)) + assert "The turbine library data contains too many faulty" not in caplog.text + assert "No cp-curve but has_cp_curve=True" not in caplog.text + assert "No power curve but has_power_curve=True" not in caplog.text + + def test__prepare_and_save_oedb_turbine_curve_data(self, caplog): + """Test `_prepare_and_save_oedb_turbine_curve_data` function.""" + # prepare dummy turbine data + # turbine 0 everything okay, turbine 1 duplicated wind speeds, turbine 2 + # power curve values broken + turbine_data = pd.DataFrame( + data={ + "id": [0, 1, 2], + "turbine_type": ["turbine 0", "turbine 1", "turbine 2"], + "has_power_curve": [True, True, True], + "has_cp_curve": [True, True, True], + "power_curve_wind_speeds": ["[15, 20, 25]", "[15, 15, 25]", "[15, 20, 25]"], + "power_curve_values": ["[15, 20, 25]", "[15, 20, 25]", "[15, 20, [25]"], + "power_coefficient_curve_wind_speeds": ["[15, 20, 25]", "[15, 20, 25]", "[15, 20, 25]"], + "power_coefficient_curve_values": ["[15, 20, 25]", "[15, 20, 25]", "[15, 20, 25]"], + "thrust_coefficient_curve_wind_speeds": [0, 1, 2], + "thrust_coefficient_curve_values": [0, 1, 2], + "nominal_power": [0, 1, 2], + }, + index=[0, 1, 2] + ) + + # run test with low / default threshold - data is not overwritten + t = {} + for fn in os.listdir(self.orig_path): + t[fn] = os.path.getmtime(os.path.join(self.orig_path, fn)) + with caplog.at_level(logging.WARNING): + _process_and_save_oedb_data(turbine_data) + for fn in os.listdir(self.orig_path): + assert t[fn] == os.path.getmtime(os.path.join(self.orig_path, fn)) + assert "The turbine library data contains too many faulty " in caplog.text + + # run test with high threshold + for fn in os.listdir(self.orig_path): + t[fn] = os.path.getmtime(os.path.join(self.orig_path, fn)) + with caplog.at_level(logging.WARNING): + _process_and_save_oedb_data(turbine_data, threshold=0.95) + for fn in os.listdir(self.orig_path): + assert t[fn] < os.path.getmtime(os.path.join(self.orig_path, fn)) + assert "The turbine library data contains faulty power_curves" in caplog.text + assert not turbine_data.at[2, "has_power_curve"] + assert not turbine_data.at[1, "has_power_curve"] + assert turbine_data.at[1, "has_cp_curve"] + assert turbine_data.at[0, "has_power_curve"] def test_wrong_url_load_turbine_data(self): """Load turbine data from oedb with a wrong schema.""" diff --git a/tests/test_density.py b/tests/test_density.py index 68556f4d..1f6964d4 100644 --- a/tests/test_density.py +++ b/tests/test_density.py @@ -5,7 +5,7 @@ import pandas as pd import numpy as np -from pandas.util.testing import assert_series_equal +from pandas.testing import assert_series_equal from numpy.testing import assert_allclose from windpowerlib.density import barometric, ideal_gas diff --git a/tests/test_modelchain.py b/tests/test_modelchain.py index 5b2c8b2f..483f0b41 100644 --- a/tests/test_modelchain.py +++ b/tests/test_modelchain.py @@ -7,7 +7,7 @@ import pandas as pd import numpy as np import pytest -from pandas.util.testing import assert_series_equal +from pandas.testing import assert_series_equal import windpowerlib.wind_turbine as wt import windpowerlib.modelchain as mc @@ -102,7 +102,11 @@ def test_temperature_hub(self): np.array([100, 10]), ] temp_exp = pd.Series(data=[267, 268], name=100) - assert_series_equal(test_mc.temperature_hub(weather_df), temp_exp) + assert_series_equal( + test_mc.temperature_hub(weather_df), + temp_exp, + check_dtype=False, + ) def test_density_hub(self): # Test modelchain with density_model='barometric' @@ -452,7 +456,7 @@ def test_heigths_as_string(self): # Heights in the original DataFrame are of type np.int64 assert isinstance( - self.weather_df.columns.get_level_values(1)[0], np.int64 + self.weather_df.columns.get_level_values(1)[0], np.int_ ) assert isinstance(string_weather.columns.get_level_values(1)[0], str) diff --git a/tests/test_power_curves.py b/tests/test_power_curves.py index ccf296ab..8ed59f7f 100644 --- a/tests/test_power_curves.py +++ b/tests/test_power_curves.py @@ -6,7 +6,7 @@ import pandas as pd import numpy as np import pytest -from pandas.util.testing import assert_frame_equal +from pandas.testing import assert_frame_equal from windpowerlib.power_curves import ( smooth_power_curve, @@ -53,7 +53,10 @@ def test_smooth_power_curve(self): ) smoothed_curve_exp.index = np.arange(5, 10, 1) assert_frame_equal( - smooth_power_curve(**parameters)[5:10], smoothed_curve_exp + smooth_power_curve(**parameters)[5:10], + smoothed_curve_exp, + check_dtype=False, + check_index_type=False, ) # Test Staffel_Pfenninger method @@ -73,7 +76,10 @@ def test_smooth_power_curve(self): ) smoothed_curve_exp.index = np.arange(5, 10, 1) assert_frame_equal( - smooth_power_curve(**parameters)[5:10], smoothed_curve_exp + smooth_power_curve(**parameters)[5:10], + smoothed_curve_exp, + check_dtype=False, + check_index_type=False, ) # Raise ValueError - misspelling diff --git a/tests/test_power_output.py b/tests/test_power_output.py index d8fef326..b9dd72f7 100644 --- a/tests/test_power_output.py +++ b/tests/test_power_output.py @@ -8,7 +8,7 @@ import pandas as pd import pytest from numpy.testing import assert_allclose -from pandas.util.testing import assert_series_equal +from pandas.testing import assert_series_equal from windpowerlib.power_output import ( power_coefficient_curve, power_curve, diff --git a/tests/test_temperature.py b/tests/test_temperature.py index d0e2ba4b..3b86bc58 100644 --- a/tests/test_temperature.py +++ b/tests/test_temperature.py @@ -5,7 +5,7 @@ import pandas as pd import numpy as np -from pandas.util.testing import assert_series_equal +from pandas.testing import assert_series_equal from numpy.testing import assert_array_equal from windpowerlib.temperature import linear_gradient diff --git a/tests/test_tools.py b/tests/test_tools.py index 2392d89d..308e8580 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -4,7 +4,7 @@ """ import pandas as pd -from pandas.util.testing import assert_series_equal +from pandas.testing import assert_series_equal from windpowerlib.tools import ( linear_interpolation_extrapolation, diff --git a/tests/test_turbine_cluster_modelchain.py b/tests/test_turbine_cluster_modelchain.py index 463328b5..2e3342c2 100644 --- a/tests/test_turbine_cluster_modelchain.py +++ b/tests/test_turbine_cluster_modelchain.py @@ -6,7 +6,7 @@ import pytest import pandas as pd import numpy as np -from pandas.util.testing import assert_series_equal +from pandas.testing import assert_series_equal import windpowerlib.wind_farm as wf import windpowerlib.wind_turbine as wt @@ -373,7 +373,7 @@ def test_heigths_as_string(self): # Heights in the original DataFrame are of type np.int64 assert isinstance( - self.weather_df.columns.get_level_values(1)[0], np.int64 + self.weather_df.columns.get_level_values(1)[0], np.int_ ) assert isinstance(string_weather.columns.get_level_values(1)[0], str) diff --git a/tests/test_wake_losses.py b/tests/test_wake_losses.py index d27fd4a1..1d02d128 100644 --- a/tests/test_wake_losses.py +++ b/tests/test_wake_losses.py @@ -6,7 +6,7 @@ import pandas as pd import numpy as np import pytest -from pandas.util.testing import assert_series_equal +from pandas.testing import assert_series_equal from windpowerlib.wake_losses import ( reduce_wind_speed, diff --git a/tests/test_wind_speed.py b/tests/test_wind_speed.py index f4af4299..f92dff99 100644 --- a/tests/test_wind_speed.py +++ b/tests/test_wind_speed.py @@ -6,7 +6,7 @@ import pandas as pd import numpy as np import pytest -from pandas.util.testing import assert_series_equal +from pandas.testing import assert_series_equal from numpy.testing import assert_allclose from windpowerlib.wind_speed import logarithmic_profile, hellman diff --git a/windpowerlib/data.py b/windpowerlib/data.py index 309bcd7d..3562817e 100644 --- a/windpowerlib/data.py +++ b/windpowerlib/data.py @@ -92,10 +92,10 @@ def get_turbine_types(turbine_library="local", print_out=True, filter_=True): + "but must be 'local' or 'oedb'." ) if filter_: - cp_curves_df = df.loc[df["has_cp_curve"]][ + cp_curves_df = df.loc[df["has_cp_curve"].fillna(False)][ ["manufacturer", "turbine_type", "has_cp_curve"] ] - p_curves_df = df.loc[df["has_power_curve"]][ + p_curves_df = df.loc[df["has_power_curve"].fillna(False)][ ["manufacturer", "turbine_type", "has_power_curve"] ] curves_df = pd.merge( @@ -158,7 +158,7 @@ def load_turbine_data_from_oedb(schema="supply", table="wind_turbine_library"): def store_turbine_data_from_oedb( - schema="supply", table="wind_turbine_library" + schema="supply", table="wind_turbine_library", threshold=0.2 ): r""" Loads turbine library from the OpenEnergy database (oedb). @@ -166,6 +166,9 @@ def store_turbine_data_from_oedb( Turbine data is saved to csv files ('oedb_power_curves.csv', 'oedb_power_coefficient_curves.csv' and 'oedb_nominal_power') for offline usage of the windpowerlib. If the files already exist they are overwritten. + In case the turbine library on the oedb contains too many faulty turbines, + the already existing files are not overwritten. The accepted percentage of faulty + turbines can be set through the parameter `threshold`. Parameters ---------- @@ -173,6 +176,16 @@ def store_turbine_data_from_oedb( Database schema of the turbine library. table : str Table name of the turbine library. + threshold : float + In case there are turbines in the turbine library with faulty data (e.g. + duplicate wind speed entries in the power (coefficient) curve data), the + threshold defines the share of accepted faulty turbine ata up to which the + existing turbine data is overwritten by the newly downloaded data. + For example, a threshold of 0.1 means that more than 10% of the + turbines would need to have invalid data in order to discard the downloaded + data. This is to make sure that in the rare case the oedb data is too buggy, + the turbine data that is by default provided with the windpowerlib is not + overwritten by poor data. Returns ------- @@ -182,68 +195,151 @@ def store_turbine_data_from_oedb( """ turbine_data = fetch_turbine_data_from_oedb(schema=schema, table=table) - # standard file name for saving data - filename = os.path.join(os.path.dirname(__file__), "oedb", "{0}.csv") + turbine_data = _process_and_save_oedb_data( + turbine_data, threshold=threshold + ) + check_turbine_data( + filename = os.path.join(os.path.dirname(__file__), "oedb", "{0}.csv") + ) + return turbine_data + + +def _process_and_save_oedb_data(turbine_data, threshold=0.2): + """ + Helper function to extract power (coefficient) curve data from the turbine library. - # get all power (coefficient) curves and save them to file - for curve_type in ["power_curve", "power_coefficient_curve"]: + Parameters + ----------- + turbine_data : :pandas:`pandas.DataFrame` + Raw turbine data downloaded from the oedb with + :func:`fetch_turbine_data_from_oedb`. + threshold : float + See parameter `threshold` in func:`store_turbine_data_from_oedb` + for more information. + + Returns + -------- + :pandas:`pandas.DataFrame` + Turbine data of different turbines such as 'manufacturer', + 'turbine_type', 'nominal_power'. + + """ + curve_types = ["power_curve", "power_coefficient_curve"] + # get all power (coefficient) curves + curve_dict = {} + broken_turbines_dict = {} + for curve_type in curve_types: + broken_turbine_data = [] curves_df = pd.DataFrame(columns=["wind_speed"]) for index in turbine_data.index: if ( turbine_data["{}_wind_speeds".format(curve_type)][index] and turbine_data["{}_values".format(curve_type)][index] ): - df = ( - pd.DataFrame( - data=[ - eval( - turbine_data[ - "{}_wind_speeds".format(curve_type) - ][index] - ), - eval( - turbine_data["{}_values".format(curve_type)][ - index - ] - ), - ] + try: + df = ( + pd.DataFrame( + data=[ + eval( + turbine_data[ + "{}_wind_speeds".format(curve_type) + ][index] + ), + eval( + turbine_data["{}_values".format(curve_type)][ + index + ] + ), + ] + ) + .transpose() + .rename( + columns={ + 0: "wind_speed", + 1: turbine_data["turbine_type"][index], + } + ) ) - .transpose() - .rename( - columns={ - 0: "wind_speed", - 1: turbine_data["turbine_type"][index], - } + if not df.wind_speed.duplicated().any(): + curves_df = pd.merge( + left=curves_df, right=df, how="outer", on="wind_speed" + ) + else: + broken_turbine_data.append( + turbine_data.loc[index, "turbine_type"]) + except: + broken_turbine_data.append(turbine_data.loc[index, "turbine_type"]) + curve_dict[curve_type] = curves_df + broken_turbines_dict[curve_type] = broken_turbine_data + + # check if there are faulty turbines and if so, raise warning + # if there are too many, don't save downloaded data to disk but keep existing data + if any(len(_) > 0 for _ in broken_turbines_dict.values()): + issue_link = ("https://github.com/OpenEnergyPlatform/data-preprocessing" + "/issues/28") + # in case only some data is faulty, only give out warning + if all(len(_) < threshold * len(turbine_data) + for _ in broken_turbines_dict.values()): + save_turbine_data = True + for curve_type in curve_types: + if len(broken_turbines_dict[curve_type]) > 0: + logging.warning( + f"The turbine library data contains faulty {curve_type}s. The " + f"{curve_type} data can therefore not be loaded for the " + f"following turbines: {broken_turbine_data}. " + f"Please report this in the following issue, in case it hasn't " + f"already been reported: {issue_link}" ) - ) - curves_df = pd.merge( - left=curves_df, right=df, how="outer", on="wind_speed" - ) - curves_df = curves_df.set_index("wind_speed").sort_index().transpose() - # power curve values in W - if curve_type == "power_curve": - curves_df *= 1000 - curves_df.index.name = "turbine_type" - curves_df.sort_index(inplace=True) - curves_df.to_csv(filename.format("{}s".format(curve_type))) - - # get turbine data and save to file (excl. curves) - turbine_data_df = turbine_data.drop( - [ - "power_curve_wind_speeds", - "power_curve_values", - "power_coefficient_curve_wind_speeds", - "power_coefficient_curve_values", - "thrust_coefficient_curve_wind_speeds", - "thrust_coefficient_curve_values", - ], - axis=1, - ).set_index("turbine_type") - # nominal power in W - turbine_data_df["nominal_power"] *= 1000 - turbine_data_df.sort_index(inplace=True) - turbine_data_df.to_csv(filename.format("turbine_data")) - check_turbine_data(filename) + # set has_power_(coefficient)_curve to False for faulty turbines + for turb in broken_turbines_dict[curve_type]: + ind = turbine_data[turbine_data.turbine_type == turb].index[0] + col = ("has_power_curve" if curve_type == "power_curve" + else "has_cp_curve") + turbine_data.at[ind, col] = False + # in case most data is faulty, do not store downloaded data + else: + logging.warning( + f"The turbine library data contains too many faulty turbine datasets " + f"wherefore it is not loaded from the oedb. " + f"In case you want to circumvent this behaviour, you can specify a " + f"higher tolerance through the parameter 'threshold'." + f"Please report this in the following issue, in case it hasn't " + f"already been reported: {issue_link}" + ) + save_turbine_data = False + else: + save_turbine_data = True + + if save_turbine_data: + # standard file name for saving data + filename = os.path.join(os.path.dirname(__file__), "oedb", "{0}.csv") + # save curve data to csv + for curve_type in curve_types: + curves_df = curve_dict[curve_type].set_index( + "wind_speed").sort_index().transpose() + # power curve values in W + if curve_type == "power_curve": + curves_df *= 1000 + curves_df.index.name = "turbine_type" + curves_df.sort_index(inplace=True) + curves_df.to_csv(filename.format("{}s".format(curve_type))) + + # save turbine data to file (excl. curves) + turbine_data_df = turbine_data.drop( + [ + "power_curve_wind_speeds", + "power_curve_values", + "power_coefficient_curve_wind_speeds", + "power_coefficient_curve_values", + "thrust_coefficient_curve_wind_speeds", + "thrust_coefficient_curve_values", + ], + axis=1, + ).set_index("turbine_type") + # nominal power in W + turbine_data_df["nominal_power"] *= 1000 + turbine_data_df.sort_index(inplace=True) + turbine_data_df.to_csv(filename.format("turbine_data")) return turbine_data diff --git a/windpowerlib/power_curves.py b/windpowerlib/power_curves.py index 43df0071..2a693e0f 100644 --- a/windpowerlib/power_curves.py +++ b/windpowerlib/power_curves.py @@ -140,18 +140,26 @@ def smooth_power_curve( # Append wind speeds to `power_curve_wind_speeds` maximum_value = power_curve_wind_speeds.iloc[-1] + wind_speed_range while power_curve_wind_speeds.values[-1] < maximum_value: - power_curve_wind_speeds = power_curve_wind_speeds.append( - pd.Series( - power_curve_wind_speeds.iloc[-1] - + ( - power_curve_wind_speeds.iloc[5] - - power_curve_wind_speeds.iloc[4] - ), - index=[power_curve_wind_speeds.index[-1] + 1], - ) + power_curve_wind_speeds = pd.concat( + [ + power_curve_wind_speeds, + pd.Series( + power_curve_wind_speeds.iloc[-1] + + ( + power_curve_wind_speeds.iloc[5] + - power_curve_wind_speeds.iloc[4] + ), + index=[power_curve_wind_speeds.index[-1] + 1], + ) + ], + sort=True, ) - power_curve_values = power_curve_values.append( - pd.Series(0.0, index=[power_curve_values.index[-1] + 1]) + power_curve_values = pd.concat( + [ + power_curve_values, + pd.Series(0.0, index=[power_curve_values.index[-1] + 1]) + ], + sort=True, ) for power_curve_wind_speed in power_curve_wind_speeds: # Create array of wind speeds for the sum @@ -249,6 +257,7 @@ def wake_losses_to_power_curve( wind_farm_efficiency.set_index("wind_speed"), ], axis=1, + sort=True, ) # Add column with reduced power (nan values of efficiency are # interpolated) diff --git a/windpowerlib/wind_farm.py b/windpowerlib/wind_farm.py index c20c704a..7c9b8f97 100644 --- a/windpowerlib/wind_farm.py +++ b/windpowerlib/wind_farm.py @@ -446,6 +446,7 @@ def assign_power_curve( ), ], axis=1, + sort=True, ) # Aggregate all power curves wind_farm_power_curve = pd.DataFrame( diff --git a/windpowerlib/wind_turbine_cluster.py b/windpowerlib/wind_turbine_cluster.py index 166ea64f..da540ac3 100644 --- a/windpowerlib/wind_turbine_cluster.py +++ b/windpowerlib/wind_turbine_cluster.py @@ -211,6 +211,7 @@ def assign_power_curve( ) ], axis=1, + sort=True ) # Sum up power curves cluster_power_curve = pd.DataFrame(