openclimatefix · peterdudfield · May 19, 2023 · Jun 7, 2023 · Jun 9, 2023 · Jun 9, 2023
diff --git a/gradboost_pv/models/training.py b/gradboost_pv/models/training.py
@@ -117,26 +117,35 @@ def run_experiment(
                 [
                     (y_test.values - y_pred_test.reshape(-1, 1)) ** 2,
                     np.abs(y_test.values - y_pred_test.reshape(-1, 1)),
+                    y_test.values,
+                    y_pred_test.reshape(-1, 1),
                 ],
                 axis=1,
             ),
-            columns=["test_mse", "test_mae"],
+            columns=["test_mse", "test_mae", "prediction", "target"],
             index=y_test.index,
         )
         errors_train = pd.DataFrame(
             data=np.concatenate(
                 [
                     (y_train.values - y_pred_train.reshape(-1, 1)) ** 2,
                     np.abs(y_train.values - y_pred_train.reshape(-1, 1)),
+                    y_train.values,
+                    y_pred_train.reshape(-1, 1),
                 ],
                 axis=1,
             ),
-            columns=["train_mse", "train_mae"],
+            columns=["train_mse", "train_mae", "prediction", "target"],
             index=y_train.index,
         )
 
-        errors = pd.concat([errors_train, errors_test], axis=1)
-        errors.to_pickle(errors_local_save_file)
+        if ".pkl" in errors_local_save_file:
+            errors = pd.concat([errors_train, errors_test], axis=1)
+            errors.to_pickle(errors_local_save_file)
+
+        if ".csv" in errors_local_save_file:
+            errors_train.to_csv(errors_local_save_file.replace(".csv", "_train.csv"))
+            errors_test.to_csv(errors_local_save_file.replace(".csv", "_test.csv"))
 
     return ExperimentSummary(
         train_mse,

diff --git a/results/readme.md b/results/readme.md
@@ -0,0 +1,3 @@
+# Results
+
+Folder to store results of the backtest
diff --git a/scripts/models/train/merge_results.py b/scripts/models/train/merge_results.py
@@ -0,0 +1,37 @@
+import pandas as pd
+
+forecast_horizon_hours = range(0, 37)
+
+test_or_train = "test"
+
+
+all_df = None
+for forecast_horizon_hour in forecast_horizon_hours:
+    print(forecast_horizon_hour)
+
+    df = pd.read_csv(f"./results/errors_{forecast_horizon_hour}_{test_or_train}.csv", index_col=0)
+
+    if forecast_horizon_hour > 0:
+        df = df[["prediction"]]
+    else:
+        df = df[["target", "prediction"]]
+
+    df = df.rename(
+        {
+            "target": f"target_{forecast_horizon_hour}",
+            "prediction": f"+{forecast_horizon_hour}hours",
+        },
+        axis=1,
+    )
+
+    # if we want to results to be shift so that the index becomes the target time
+    # otherwise the index is the init time
+    # df = df.shift(forecast_horizon_hour*2, axis=0)
+
+    if all_df is None:
+        all_df = df
+    else:
+        all_df = all_df.join(df)
+
+
+all_df.to_csv(f"./results/errors_all_{test_or_train}.csv")
diff --git a/scripts/models/train/region_filtered_model.py b/scripts/models/train/region_filtered_model.py
@@ -87,7 +87,15 @@ def main(path_to_processed_nwp: Path, nwp_variables: list[str]) -> Dict[Hour, Ex
         X, y = build_datasets_from_local(
             processed_nwp, gsp_data, np.timedelta64(forecast_horizon_hour, "h")
         )
-        training_results = run_experiment(X, y, DEFFAULT_HYPARAM_CONFIG)
+
+        errors_local_save_file = f"./results/errors_{forecast_horizon_hour}.csv"
+        training_results = run_experiment(
+            X,
+            y,
+            DEFFAULT_HYPARAM_CONFIG,
+            save_errors_locally=True,
+            errors_local_save_file=errors_local_save_file,
+        )
 
         results[forecast_horizon_hour] = training_results