FlexMeasures · Flix6x · Nov 10, 2022 · Oct 9, 2022 · Oct 9, 2022 · Oct 9, 2022
diff --git a/documentation/changelog.rst b/documentation/changelog.rst
@@ -12,7 +12,7 @@ New features
 * Ability to provide your own custom scheduling function [see `PR #505 <http://www.github.com/FlexMeasures/flexmeasures/pull/505>`_]
 * Visually distinguish forecasts/schedules (dashed lines) from measurements (solid lines), and expand the tooltip with timing info regarding the forecast/schedule horizon or measurement lag [see `PR #503 <http://www.github.com/FlexMeasures/flexmeasures/pull/503>`_]
 * The asset page also allows to show sensor data from other assets that belong to the same account [see `PR #500 <http://www.github.com/FlexMeasures/flexmeasures/pull/500>`_]
-* Improved import of time series data from CSV file: 1) drop duplicate records with warning, and 2) allow configuring which column contains explicit recording times for each data point (use case: import forecasts) [see `PR #501 <http://www.github.com/FlexMeasures/flexmeasures/pull/501>`_]
+* Improved import of time series data from CSV file: 1) drop duplicate records with warning, 2) allow configuring which column contains explicit recording times for each data point (use case: import forecasts) [see `PR #501 <http://www.github.com/FlexMeasures/flexmeasures/pull/501>`_], 3) localize timezone naive data, 4) support reading in datetime and timedelta values, 5) remove rows with NaN values, and 6) filter by values in specific columns [see `PR #521 <http://www.github.com/FlexMeasures/flexmeasures/pull/521>`_]
 
 Bugfixes
 -----------

diff --git a/documentation/cli/change_log.rst b/documentation/cli/change_log.rst
@@ -4,6 +4,8 @@
 FlexMeasures CLI Changelog
 **********************
 
+* Add options to ``flexmeasures add beliefs`` to 1) read CSV data with timezone naive datetimes (use ``--timezone``to localize the data), 2) read CSV data with datetime/timedelta units (use ``--unit datetime`` or ``--unit timedelta`, 3) remove rows with NaN values, and 4) add filter read-in data by matching values in specific columns (use ``--filter-column`` and ``--filter-value`` together).
+
 since v0.11.0 | August 28, 2022
 ==============================
 

diff --git a/flexmeasures/cli/data_add.py b/flexmeasures/cli/data_add.py
@@ -279,8 +279,9 @@ def add_initial_structure():
 @click.argument("file", type=click.Path(exists=True))
 @click.option(
     "--sensor-id",
+    "sensor",
     required=True,
-    type=click.IntRange(min=1),
+    type=SensorIdField(),
     help="Sensor to which the beliefs pertain.",
 )
 @click.option(
@@ -334,6 +335,12 @@ def add_initial_structure():
     multiple=True,
     help="Additional strings to recognize as NaN values. This argument can be given multiple times.",
 )
+@click.option(
+    "--keep-default-na",
+    default=False,
+    type=bool,
+    help="Whether or not to keep NaN values in the data.",
+)
 @click.option(
     "--nrows",
     required=False,
@@ -360,6 +367,24 @@ def add_initial_structure():
     type=int,
     help="Column number with datetimes",
 )
+@click.option(
+    "--timezone",
+    required=False,
+    default=None,
+    help="timezone as string, e.g. 'UTC' or 'Europe/Amsterdam'",
+)
+@click.option(
+    "--filter-column",
+    "filter_columns",
+    multiple=True,
+    help="Set a column number to filter data. Use together with --filter-value.",
+)
+@click.option(
+    "--filter-value",
+    "filter_values",
+    multiple=True,
+    help="Set a column value to filter data. Use together with --filter-column.",
+)
 @click.option(
     "--delimiter",
     required=True,
@@ -389,19 +414,23 @@ def add_initial_structure():
 )
 def add_beliefs(
     file: str,
-    sensor_id: int,
+    sensor: Sensor,
     source: str,
+    filter_columns: List[int],
+    filter_values: List[int],
     unit: Optional[str] = None,
     horizon: Optional[int] = None,
     cp: Optional[float] = None,
     resample: bool = True,
     allow_overwrite: bool = False,
     skiprows: int = 1,
     na_values: List[str] = None,
+    keep_default_na: bool = False,
     nrows: Optional[int] = None,
     datecol: int = 0,
     valuecol: int = 1,
     beliefcol: Optional[int] = None,
+    timezone: Optional[str] = None,
     delimiter: str = ",",
     decimal: str = ".",
     thousands: Optional[str] = None,
@@ -426,17 +455,7 @@ def add_beliefs(
     In case no --horizon is specified and no beliefcol is specified,
     the moment of executing this CLI command is taken as the time at which the beliefs were recorded.
     """
-    sensor = Sensor.query.filter(Sensor.id == sensor_id).one_or_none()
-    if sensor is None:
-        print(f"Failed to create beliefs: no sensor found with ID {sensor_id}.")
-        return
-    if source.isdigit():
-        _source = get_source_or_none(int(source), source_type="CLI script")
-        if not _source:
-            print(f"Failed to find source {source}.")
-            return
-    else:
-        _source = get_or_create_source(source, source_type="CLI script")
+    _source = parse_source(source)
 
     # Set up optional parameters for read_csv
     if file.split(".")[-1].lower() == "csv":
@@ -451,6 +470,14 @@ def add_beliefs(
     elif beliefcol is None:
         kwargs["belief_time"] = server_now().astimezone(pytz.timezone(sensor.timezone))
 
+    # Set up optional filters:
+    if len(filter_columns) != len(filter_values):
+        raise ValueError(
+            "The number of filter columns and filter values should be the same."
+        )
+    filter_by_column = (
+        dict(zip(filter_columns, filter_values)) if filter_columns else None
+    )
     bdf = tb.read_csv(
         file,
         sensor,
@@ -465,6 +492,9 @@ def add_beliefs(
         else [datecol, beliefcol, valuecol],
         parse_dates=True,
         na_values=na_values,
+        keep_default_na=keep_default_na,
+        timezone=timezone,
+        filter_by_column=filter_by_column,
         **kwargs,
     )
     duplicate_rows = bdf.index.duplicated(keep="first")
@@ -1092,3 +1122,14 @@ def check_errors(errors: Dict[str, List[str]]):
             f"Please correct the following errors:\n{errors}.\n Use the --help flag to learn more."
         )
         raise click.Abort
+
+
+def parse_source(source):
+    if source.isdigit():
+        _source = get_source_or_none(int(source))
+        if not _source:
+            print(f"Failed to find source {source}.")
+            return
+    else:
+        _source = get_or_create_source(source, source_type="CLI script")
+    return _source
diff --git a/flexmeasures/data/queries/data_sources.py b/flexmeasures/data/queries/data_sources.py
@@ -42,7 +42,15 @@ def get_or_create_source(
     return _source
 
 
-def get_source_or_none(source: int, source_type: str) -> Optional[DataSource]:
-    query = DataSource.query.filter(DataSource.type == source_type)
+def get_source_or_none(
+    source: int | str, source_type: str | None = None
+) -> DataSource | None:
+    """
+    :param source:      source id
+    :param source_type: optionally, filter by source type
+    """
+    query = DataSource.query
+    if source_type is not None:
+        query = query.filter(DataSource.type == source_type)
     query = query.filter(DataSource.id == int(source))
     return query.one_or_none()
diff --git a/flexmeasures/utils/unit_utils.py b/flexmeasures/utils/unit_utils.py
@@ -8,6 +8,7 @@
 Time series with fixed resolution can be converted from units of flow to units of stock (such as 'kW' to 'kWh'), and vice versa.
 Percentages can be converted to units of some physical capacity if a capacity is known (such as '%' to 'kWh').
 """
+from __future__ import annotations
 
 from datetime import timedelta
 from typing import List, Optional, Union
@@ -207,6 +208,25 @@ def is_energy_price_unit(unit: str) -> bool:
     return False
 
 
+def convert_time_units(
+    data: Union[tb.BeliefsSeries, pd.Series, List[Union[int, float]], int, float],
+    from_unit: str,
+    to_unit: str,
+):
+    """Convert data with datetime or timedelta dtypes to float values.
+
+    Use Unix epoch or the requested time unit, respectively.
+    """
+    if from_unit == "datetime":
+        return (
+            pd.to_datetime(data, utc=True) - pd.Timestamp("1970-01-01", tz="utc")
+        ) // pd.Timedelta("1s")
+    if from_unit == "timedelta":
+        if to_unit[0].isdigit():
+            return data / pd.Timedelta(to_unit)
+        return data / pd.Timedelta(1, to_unit)
+
+
 def convert_units(
     data: Union[tb.BeliefsSeries, pd.Series, List[Union[int, float]], int, float],
     from_unit: str,
@@ -215,6 +235,8 @@ def convert_units(
     capacity: Optional[str] = None,
 ) -> Union[pd.Series, List[Union[int, float]], int, float]:
     """Updates data values to reflect the given unit conversion."""
+    if from_unit in ("datetime", "timedelta"):
+        return convert_time_units(data, from_unit, to_unit)
 
     if from_unit != to_unit:
         from_magnitudes = (

diff --git a/requirements/app.in b/requirements/app.in
@@ -28,7 +28,7 @@ tldextract
 pyomo>=5.6
 tabulate
 timetomodel>=0.7.1
-timely-beliefs>=1.12
+timely-beliefs[forecast]>=1.13
 python-dotenv
 # a backport, not needed in Python3.8
 importlib_metadata