FlexMeasures · nhoening · Jun 19, 2021 · Jun 11, 2021 · Jun 15, 2021 · Jun 15, 2021
diff --git a/documentation/changelog.rst b/documentation/changelog.rst
@@ -14,6 +14,8 @@ Bugfixes
 Infrastructure / Support
 ----------------------
 
+* Add possibility to send errors to Sentry [see `PR #143 <http://www.github.com/SeitaBV/flexmeasures/pull/143>`_]
+
 
 v0.5.0 | June 7, 2021
 ===========================

diff --git a/documentation/configuration.rst b/documentation/configuration.rst
@@ -160,6 +160,15 @@ Token which external services can use to check on the status of recurring tasks
 Default: ``None``
 
 
+SENTRY_SDN
+^^^^^^^^^^^^
+
+Set tokenized URL, so errors will be sent to Sentry when ``app.env`` is not in `debug` or `testing` mode.
+E.g.: ``https://<examplePublicKey>@o<something>.ingest.sentry.io/<project-Id>``
+
+Default: ``None``
+
+
 SQLAlchemy
 ----------
 
@@ -329,6 +338,14 @@ Password of mail system user.
 Default: ``None``
 
 
+MAIL_MONITORING_RECIPIENTS
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Mail addresses (comma-separated) to send monitoring alerts to. For example "fred@one.com,wilma@two.com"
+
+Default: ``None``
+
+
 .. _redis-config:
 
 Redis

diff --git a/flexmeasures/app.py b/flexmeasures/app.py
@@ -26,7 +26,7 @@ def create(env: Optional[str] = None, path_to_config: Optional[str] = None) -> F
 
     from flexmeasures.utils import config_defaults
     from flexmeasures.utils.config_utils import read_config, configure_logging
-    from flexmeasures.utils.app_utils import set_secret_key
+    from flexmeasures.utils.app_utils import set_secret_key, init_sentry
     from flexmeasures.utils.error_utils import add_basic_error_handlers
 
     # Create app
@@ -36,17 +36,20 @@ def create(env: Optional[str] = None, path_to_config: Optional[str] = None) -> F
     # as we need to know the ENV now (for it to be recognised by Flask()).
     load_dotenv()
     app = Flask("flexmeasures")
+
     if env is not None:  # overwrite
         app.env = env
-        if env == "testing":
-            app.testing = True
-        if env == "development":
-            app.debug = config_defaults.DevelopmentConfig.DEBUG
+    if app.env == "testing":
+        app.testing = True
+    if app.env == "development":
+        app.debug = config_defaults.DevelopmentConfig.DEBUG
 
     # App configuration
 
-    read_config(app, path_to_config=path_to_config)
+    read_config(app, custom_path_to_config=path_to_config)
     add_basic_error_handlers(app)
+    if not app.env == "development" and not app.testing:
+        init_sentry(app)
 
     app.mail = Mail(app)
     FlaskJSON(app)

diff --git a/flexmeasures/data/__init__.py b/flexmeasures/data/__init__.py
@@ -26,6 +26,7 @@ def register_at(app: Flask):
         # Register some useful custom scripts with the flask cli
         with app.app_context():
             import flexmeasures.data.scripts.cli_tasks.jobs
+            import flexmeasures.data.scripts.cli_tasks.monitor
             import flexmeasures.data.scripts.cli_tasks.data_add
             import flexmeasures.data.scripts.cli_tasks.data_delete
             import flexmeasures.data.scripts.cli_tasks.db_ops

diff --git a/flexmeasures/data/scripts/cli_tasks/data_add.py b/flexmeasures/data/scripts/cli_tasks/data_add.py
@@ -481,8 +481,8 @@ def create_forecasts(
                 asset_id=asset_id,
                 timed_value_type=value_type,
                 horizons=[horizon],
-                start_of_roll=from_date - timedelta(hours=horizon),
-                end_of_roll=to_date - timedelta(hours=horizon),
+                start_of_roll=from_date - horizon,
+                end_of_roll=to_date - horizon,
             )
     else:
         from flexmeasures.data.scripts.data_gen import populate_time_series_forecasts

diff --git a/flexmeasures/data/scripts/cli_tasks/monitor.py b/flexmeasures/data/scripts/cli_tasks/monitor.py
@@ -0,0 +1,91 @@
+from datetime import timedelta
+from typing import Optional
+
+import click
+from flask import current_app as app
+from flask.cli import with_appcontext
+from flask_mail import Message
+from sentry_sdk import (
+    capture_message as capture_message_for_sentry,
+    set_context as set_sentry_context,
+)
+
+from flexmeasures.data.models.task_runs import LatestTaskRun
+from flexmeasures.utils.time_utils import server_now
+
+
+@click.group("monitor")
+def fm_monitor():
+    """FlexMeasures: Monitor tasks."""
+
+
+def send_monitoring_alert(
+    task_name: str, msg: str, latest_run: Optional[LatestTaskRun] = None
+):
+    """
+    Send any monitoring message per Sentry and per email. Also log an error.
+    """
+    latest_run_txt = ""
+    if latest_run:
+        set_sentry_context(
+            "latest_run", {"time": latest_run.datetime, "status": latest_run.status}
+        )
+        latest_run_txt = (
+            f"Last run was at {latest_run.datetime}, status was: {latest_run.status}"
+        )
+
+    capture_message_for_sentry(msg)
+
+    email_recipients = app.config.get("MAIL_MONITORING_RECIPIENTS", "").split(",")
+    if len(email_recipients) > 0:
+        email = Message(subject=f"Problem with task {task_name}", bcc=email_recipients)
+        email.body = f"{msg}\n\n{latest_run_txt}\nWe suggest to check the logs."
+        app.mail.send(email)
+
+    app.logger.error(f"msg  {latest_run_txt}")
+
+
+@fm_monitor.command("tasks")
+@with_appcontext
+@click.option(
+    "--task",
+    type=(str, int),
+    multiple=True,
+    required=True,
+    help="The name of the task and the maximal allowed minutes between successful runs. Use multiple times if needed.",
+)
+def monitor_tasks(task):
+    """
+    Check if the given task's last successful execution happened less than the allowed time ago.
+    If not, alert someone, via email or sentry.
+    """
+    for t in task:
+        task_name = t[0]
+        app.logger.info(f"Checking latest run of task {task_name} ...")
+        latest_run: LatestTaskRun = LatestTaskRun.query.get(task_name)
+        if latest_run is None:
+            msg = f"Task {task_name} has no last run and thus cannot be monitored. Is it configured properly?"
+            send_monitoring_alert(task_name, msg)
+            return
+        now = server_now()
+        acceptable_interval = timedelta(minutes=t[1])
+        if (
+            now - acceptable_interval
+            <= latest_run.datetime
+            <= now + acceptable_interval
+        ):
+            # last time is okay, let's check the status
+            if latest_run.status is False:
+                msg = f"A failure has been reported on task {task_name}."
+                send_monitoring_alert(task_name, msg, latest_run)
+        else:
+            msg = (
+                f"Task {task_name}'s latest run time is outside of the acceptable range "
+                f"({acceptable_interval})."
+            )
+            app.logger.error(msg)
+            send_monitoring_alert(task_name, msg, latest_run)
+    app.logger.info("Done checking task runs ...")
+
+
+app.cli.add_command(fm_monitor)
diff --git a/flexmeasures/utils/app_utils.py b/flexmeasures/utils/app_utils.py
@@ -4,17 +4,56 @@
 
 import click
 from flask import Flask
-from flask.cli import FlaskGroup
+from flask.cli import FlaskGroup, with_appcontext
+import sentry_sdk
+from sentry_sdk.integrations.flask import FlaskIntegration
+from sentry_sdk.integrations.rq import RqIntegration
+from pkg_resources import get_distribution
 
 from flexmeasures.app import create as create_app
 
 
 @click.group(cls=FlaskGroup, create_app=create_app)
+@with_appcontext
 def flexmeasures_cli():
-    """Management scripts for the FlexMeasures platform."""
+    """
+    Management scripts for the FlexMeasures platform.
+    We use @app_context here so things from the app setup are initialised
+    only once. This is crucial for Sentry, for example.
+    """
     pass
 
 
+def init_sentry(app: Flask):
+    """
+    Configure Sentry.
+    We need the app to read the Sentry DSN from configuration, and also
+    to send some additional meta information.
+    """
+    sentry_dsn = app.config.get("SENTRY_DSN")
+    if not sentry_dsn:
+        app.logger.info(
+            "[FLEXMEASURES] No SENTRY_DSN setting found, so initialising Sentry cannot happen ..."
+        )
+        return
+    app.logger.info("[FLEXMEASURES] Initialising Sentry ...")
+    sentry_sdk.init(
+        dsn=sentry_dsn,
+        integrations=[FlaskIntegration(), RqIntegration()],
+        debug=app.debug,
+        release=f"flexmeasures@{get_distribution('flexmeasures').version}",
+        send_default_pii=True,  # user data (current user id, email address, username) is attached to the event.
+        environment=app.env,
+        # Set traces_sample_rate to 1.0 to capture 100%
+        # of transactions for performance monitoring.
+        # We recommend adjusting this value in production.
+        # TODO: Decide if we need this and if to configure it.
+        traces_sample_rate=0.33,
+    )
+    sentry_sdk.set_tag("mode", app.config.get("FLEXMEASURES_MODE"))
+    sentry_sdk.set_tag("platform-name", app.config.get("FLEXMEASURES_PLATFORM_NAME"))
+
+
 def set_secret_key(app, filename="secret_key"):
     """Set the SECRET_KEY or exit.
 
@@ -104,3 +143,4 @@ def register_plugins(app: Flask):
         plugin_version = getattr(plugin_blueprint, "__version__", "0.1")
         app.config["LOADED_PLUGINS"][plugin_name] = plugin_version
     app.logger.info(f"Loaded plugins: {app.config['LOADED_PLUGINS']}")
+    sentry_sdk.set_context("plugins", app.config.get("LOADED_PLUGINS", {}))
diff --git a/flexmeasures/utils/config_defaults.py b/flexmeasures/utils/config_defaults.py
@@ -41,6 +41,7 @@ class Config(object):
         "no-reply@example.com",
     )  # tuple of name and email address
     MAIL_PASSWORD: Optional[str] = None
+    MAIL_MONITORING_RECIPIENTS = None
 
     SECURITY_REGISTERABLE = False
     SECURITY_LOGIN_USER_TEMPLATE = "admin/login_user.html"
@@ -75,6 +76,8 @@ class Config(object):
         3000  # Web interface poll period for updates in ms
     )
 
+    SENTRY_DSN: Optional[str] = None
+
     FLEXMEASURES_PLATFORM_NAME: str = "FlexMeasures"
     FLEXMEASURES_MODE: str = ""
     FLEXMEASURES_TIMEZONE: str = "Asia/Seoul"

diff --git a/flexmeasures/utils/config_utils.py b/flexmeasures/utils/config_utils.py
@@ -50,7 +50,7 @@ def configure_logging():
     loggingDictConfig(flexmeasures_logging_config)
 
 
-def read_config(app: Flask, path_to_config: Optional[str]):
+def read_config(app: Flask, custom_path_to_config: Optional[str]):
     """Read configuration from various expected sources, complain if not setup correctly. """
 
     if app.env not in (
@@ -65,21 +65,22 @@ def read_config(app: Flask, path_to_config: Optional[str]):
         )
         sys.exit(2)
 
-    # Load default config settings
+    # First, load default config settings
     app.config.from_object(
         "flexmeasures.utils.config_defaults.%sConfig" % camelize(app.env)
     )
 
-    # Now read user config, if possible. If no explicit path is given, try home dir first, then instance dir
+    # Now, potentially overwrite those from config file
+    # These two locations are possible (besides the custom path)
     path_to_config_home = str(Path.home().joinpath(".flexmeasures.cfg"))
     path_to_config_instance = os.path.join(app.instance_path, "flexmeasures.cfg")
-    if not app.testing:
+    if not app.testing:  # testing runs completely on defaults
+        # If no custom path is given, this will try home dir first, then instance dir
         used_path_to_config = read_custom_config(
-            app, path_to_config, path_to_config_home, path_to_config_instance
+            app, custom_path_to_config, path_to_config_home, path_to_config_instance
         )
 
     # Check for missing values.
-    # Testing might affect only specific functionality (-> dev's responsibility)
     # Documentation runs fine without them.
     if not app.testing and app.env != "documentation":
         if not are_required_settings_complete(app):
@@ -126,7 +127,7 @@ def read_custom_config(
         app.config.from_pyfile(path_to_config)
     except FileNotFoundError:
         pass
-    # Finally, all required varaiables can be set as env var:
+    # Finally, all required variables can be set as env var:
     for req_var in required:
         app.config[req_var] = os.getenv(req_var, app.config.get(req_var, None))
     return path_to_config

diff --git a/flexmeasures/utils/error_utils.py b/flexmeasures/utils/error_utils.py
@@ -23,7 +23,7 @@ def log_error(exc: Exception, error_msg: str):
     extra = dict(url=request.path, **get_err_source_info(last_traceback))
 
     msg = (
-        '{error_name}:"{message}" [occured at {src_module}({src_func}):{src_linenr},'
+        '{error_name}:"{message}" [occurred at {src_module}({src_func}):{src_linenr},'
         "URL was: {url}]".format(
             error_name=exc.__class__.__name__, message=error_msg, **extra
         )

diff --git a/requirements/app.in b/requirements/app.in
@@ -48,6 +48,7 @@ Flask-Security-Too>=4.0
 Flask-Classful
 Flask-Marshmallow
 Flask-Cors
+sentry-sdk[flask]
 marshmallow-sqlalchemy>=0.23.1
 webargs
 # flask should be after all the flask plugins, because setup might find they ARE flask

diff --git a/requirements/app.txt b/requirements/app.txt
@@ -22,12 +22,15 @@ blinker==1.4
     # via
     #   flask-mail
     #   flask-principal
+    #   sentry-sdk
 bokeh==1.0.4
     # via
     #   -r requirements/app.in
     #   pandas-bokeh
 certifi==2020.12.5
-    # via requests
+    # via
+    #   requests
+    #   sentry-sdk
 cffi==1.14.5
     # via bcrypt
 cftime==1.4.1
@@ -99,6 +102,7 @@ flask==1.1.2
     #   flask-sslify
     #   flask-wtf
     #   rq-dashboard
+    #   sentry-sdk
 greenlet==1.0.0
     # via sqlalchemy
 humanize==3.3.0
@@ -280,6 +284,8 @@ scipy==1.6.2
     #   timetomodel
 selenium==3.141.0
     # via timely-beliefs
+sentry-sdk[flask]==1.1.0
+    # via -r requirements/app.in
 siphon==0.9
     # via -r requirements/app.in
 six==1.15.0
@@ -331,6 +337,7 @@ urllib3==1.26.4
     # via
     #   requests
     #   selenium
+    #   sentry-sdk
 webargs==7.0.1
     # via -r requirements/app.in
 werkzeug==1.0.1