Skip to content

Commit

Permalink
Handle faulty turbine library data
Browse files Browse the repository at this point in the history
  • Loading branch information
birgits committed Jan 30, 2024
1 parent d274ce6 commit 296e58e
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 34 deletions.
9 changes: 7 additions & 2 deletions tests/test_data_handling.py
Expand Up @@ -4,6 +4,7 @@
"""

import filecmp
import logging
import os
from shutil import copyfile

Expand Down Expand Up @@ -85,14 +86,18 @@ def test_get_turbine_types(self, capsys):
with pytest.raises(ValueError, match=msg):
get_turbine_types("wrong")

def test_store_turbine_data_from_oedb(self):
def test_store_turbine_data_from_oedb(self, caplog):
"""Test `store_turbine_data_from_oedb` function."""
t = {}
for fn in os.listdir(self.orig_path):
t[fn] = os.path.getmtime(os.path.join(self.orig_path, fn))
store_turbine_data_from_oedb()
with caplog.at_level(logging.WARNING):
store_turbine_data_from_oedb()
for fn in os.listdir(self.orig_path):
assert t[fn] < os.path.getmtime(os.path.join(self.orig_path, fn))
assert "The turbine library data contains too many faulty" not in caplog.text
assert "No cp-curve but has_cp_curve=True" not in caplog.text
assert "No power curve but has_power_curve=True" not in caplog.text

def test_wrong_url_load_turbine_data(self):
"""Load turbine data from oedb with a wrong schema."""
Expand Down
102 changes: 70 additions & 32 deletions windpowerlib/data.py
Expand Up @@ -187,46 +187,84 @@ def store_turbine_data_from_oedb(

# get all power (coefficient) curves and save them to file
for curve_type in ["power_curve", "power_coefficient_curve"]:
broken_turbine_data = []
curves_df = pd.DataFrame(columns=["wind_speed"])
for index in turbine_data.index:
if (
turbine_data["{}_wind_speeds".format(curve_type)][index]
and turbine_data["{}_values".format(curve_type)][index]
):
df = (
pd.DataFrame(
data=[
eval(
turbine_data[
"{}_wind_speeds".format(curve_type)
][index]
),
eval(
turbine_data["{}_values".format(curve_type)][
index
]
),
]
)
.transpose()
.rename(
columns={
0: "wind_speed",
1: turbine_data["turbine_type"][index],
}
try:
df = (
pd.DataFrame(
data=[
eval(
turbine_data[
"{}_wind_speeds".format(curve_type)
][index]
),
eval(
turbine_data["{}_values".format(curve_type)][
index
]
),
]
)
.transpose()
.rename(
columns={
0: "wind_speed",
1: turbine_data["turbine_type"][index],
}
)
)
if not df.wind_speed.duplicated().any():
curves_df = pd.merge(
left=curves_df, right=df, how="outer", on="wind_speed"
)
except:
broken_turbine_data.append(turbine_data.loc[index, "turbine_type"])

# warning in case of broken turbine data
if len(broken_turbine_data) > 0:
issue_link = ("https://github.com/OpenEnergyPlatform/data-preprocessing"
"/issues/28")
# in case only some data is faulty, only give out warning
if len(broken_turbine_data) < 0.2 * len(turbine_data):
logging.warning(
f"The turbine library data contains faulty {curve_type}s. The "
f"{curve_type} data can therefore not be loaded for the following "
f"turbines: {broken_turbine_data}. "
f"Please report this in the following issue, in case it hasn't "
f"already been reported: {issue_link}"
)
if not df.wind_speed.duplicated().any():
curves_df = pd.merge(
left=curves_df, right=df, how="outer", on="wind_speed"
)
curves_df = curves_df.set_index("wind_speed").sort_index().transpose()
# power curve values in W
if curve_type == "power_curve":
curves_df *= 1000
curves_df.index.name = "turbine_type"
curves_df.sort_index(inplace=True)
curves_df.to_csv(filename.format("{}s".format(curve_type)))
save_turbine_data = True
# set has_power_(coefficient)_curve to False for faulty turbines
for turb in broken_turbine_data:
ind = turbine_data[turbine_data.turbine_type == turb].index[0]
col = ("has_power_curve" if curve_type == "power_curve"
else "has_cp_curve")
turbine_data.at[ind, col] = False
# in case most data is faulty, do not store downloaded data
else:
logging.warning(
f"The turbine library data contains too many faulty {curve_type}s,"
f"wherefore {curve_type} data is not loaded from the oedb. "
f"Please report this in the following issue, in case it hasn't "
f"already been reported: {issue_link}"
)
save_turbine_data = False
else:
save_turbine_data = True

if save_turbine_data:
curves_df = curves_df.set_index("wind_speed").sort_index().transpose()
# power curve values in W
if curve_type == "power_curve":
curves_df *= 1000
curves_df.index.name = "turbine_type"
curves_df.sort_index(inplace=True)
curves_df.to_csv(filename.format("{}s".format(curve_type)))

# get turbine data and save to file (excl. curves)
turbine_data_df = turbine_data.drop(
Expand Down

0 comments on commit 296e58e

Please sign in to comment.