performance

#!/usr/bin/env python2
#
# Copyright (c) 2018 Jonathan Weyn <jweyn@uw.edu>
#
# See the file LICENSE for your rights.
#

"""
Evaluate performance metrics for a MOS-X model. These functions should only be used after the data files in 'build' and
'validate' have been created.
"""

import mosx
import numpy as np
import pandas as pd
import os
import sys
import pickle
import string
from optparse import OptionParser
from datetime import datetime
import warnings
from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import LabelBinarizer
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gs
from matplotlib import rc
try:
    from properscoring import crps_ensemble
    crps = True
except ImportError:
    crps = False


# Set matplotlib rc parameters
rc('font', **{'family': 'Times New Roman'})
matplotlib.rcParams['mathtext.fontset'] = 'custom'
matplotlib.rcParams['mathtext.rm'] = 'Times New Roman'
matplotlib.rcParams['mathtext.it'] = 'Times New Roman:italic'
matplotlib.rcParams['mathtext.bf'] = 'Times New Roman:bold'
matplotlib.rcParams.update({'font.size': 10})

# Suppress warnings
# warnings.filterwarnings("ignore")


def get_command_options():
    parser = OptionParser()
    parser.add_option('-t', '--naive-rain-correction', dest='tune_rain', action='store_true', default=False,
                      help='Use the raw precipitation from GFS/NAM to override or average with MOS-X')
    parser.add_option('-e', '--ensemble', dest='ensemble', action='store_true', default=False,
                      help='Calculate ensemble statistics (for a valid ensemble estimator)')
    parser.add_option('-r', '--rain-post-average', dest='avg_rain', action='store_true', default=False,
                      help='If using a RainTuningEstimator, this will average the raw estimation from an ensemble'
                           'with that of the rain tuning post-processor')
    parser.add_option('-L', '--learning-curve', dest='learning', action='store_true', default=False,
                      help='Generate a learning curve by re-training the model (time consuming)')
    parser.add_option('-S', '--spread-skill', dest='spread_skill', action='store_true', default=False,
                      help='Plot spread-skill relationship for an ensemble (if -e is enabled)')
    parser.add_option('-H', '--rank-histogram', dest='histogram', action='store_true', default=False,
                      help='Plot rank histograms for an ensemble (if -e is enabled)')
    parser.add_option('-E', '--error-distribution', dest='error_plot', action='store_true', default=False,
                      help='Plot error distributions')
    (opts, args) = parser.parse_args()
    return opts, args


def brier_score(y, x):
    num_categories = max(x.shape[1], y.shape[1])
    while x.shape[1] < num_categories:
        x = np.c_[x, np.zeros(x.shape[0])]
    while y.shape[1] < num_categories:
        y = np.c_[y, np.zeros(y.shape[0])]
    score = np.sum((x - y) ** 2, axis=1)
    return np.mean(score / num_categories)


def mean_variance(X):
    return np.mean(np.var(X, axis=-1), axis=0)


def plot_spread_skill(X, y, grid=False, one_to_one=True, width=6, height=6):

    fig = plt.figure()
    fig.set_size_inches(width, height)
    rows = 2
    columns = 2
    gs1 = gs.GridSpec(rows, columns)
    gs1.update(wspace=0.18, hspace=0.18)
    colors = [c['color'] for c in list(matplotlib.rcParams['axes.prop_cycle'])]
    params = ['High', 'Low', 'Wind', 'Rain']
    limits = [20., 20., 10., 2.]

    var = np.var(X, axis=-1)
    ens_mean = np.mean(X, axis=-1)
    err = (ens_mean - y) ** 2

    for plot_num in range(len(params)):
        ax = plt.subplot(gs1[plot_num])
        plt.scatter(err[:, plot_num], var[:, plot_num], c=colors[plot_num], s=max(width, height)/2.)
        plt.xlim([0., limits[plot_num]])
        plt.ylim([0., limits[plot_num]])
        if grid:
            plt.grid(grid, linestyle='--', color=[0.8, 0.8, 0.8])
        if one_to_one:
            curr_xlim = ax.get_xlim()
            curr_ylim = ax.get_ylim()
            max_limit = max(np.max(curr_xlim), np.max(curr_ylim))
            min_limit = min(np.min(curr_xlim), np.min(curr_ylim))
            plt.plot([min_limit, max_limit], [min_limit, max_limit], 'k')
        if (plot_num + 1) % columns == 1:
            plt.ylabel('spread')
        if (plot_num + 1) > columns * (rows - 1):
            plt.xlabel('error')
        letters = list(string.ascii_lowercase)
        panel_label = '%s) %s' % (letters[plot_num], params[plot_num])
        ax.annotate(panel_label, xycoords='axes fraction', xy=(0.05, 0.95), horizontalalignment='left',
                    verticalalignment='top')

    return fig


def plot_rank_histogram(X, y, width=6, height=6):
    fig = plt.figure()
    fig.set_size_inches(width, height)
    gs1 = gs.GridSpec(2, 2)
    gs1.update(wspace=0.18, hspace=0.18)
    colors = [c['color'] for c in list(matplotlib.rcParams['axes.prop_cycle'])]
    params = ['High', 'Low', 'Wind', 'Rain']

    def plot_histogram(subplot_num, x, unit=None, facecolor='b', bins=None, align='left'):
        global fig
        ax = plt.subplot(subplot_num)
        if bins is None and unit is not None:
            bins = max(int(np.nanmax(x) / unit - np.nanmin(x) / unit), 1)
        n, bins, patches = plt.hist(x, bins=bins, facecolor=facecolor, normed=True, align=align, )
        ylim = ax.get_ylim()
        if ylim[1] - np.nanmax(n) < 0.005:
            ax.set_ylim([ylim[0], ylim[1] + 0.005])
        if unit is None:
            unit = 1.
        ax.set_yticklabels(['{:.1f}'.format(100. * l * unit) for l in plt.yticks()[0]])
        if plot_num % 2 == 0:
            ax.set_ylabel('frequency (%)')
        letters = list(string.ascii_lowercase)
        panel_label = '%s) %s' % (letters[plot_num], params[plot_num])
        ax.annotate(panel_label, xycoords='axes fraction', xy=(0.50, 0.95), horizontalalignment='center',
                    verticalalignment='top')
        return

    for plot_num in range(4):
        rank = np.sum(X[:, plot_num, :].T < y[:, plot_num], axis=0).T
        plot_histogram(gs1[plot_num], rank, facecolor=colors[plot_num])

    return fig


def plot_error(X, y, width=6, height=6):
    fig = plt.figure()
    fig.set_size_inches(width, height)
    gs1 = gs.GridSpec(2, 2)
    gs1.update(wspace=0.18, hspace=0.18)
    colors = [c['color'] for c in list(matplotlib.rcParams['axes.prop_cycle'])]
    params = ['High', 'Low', 'Wind', 'Rain']

    def plot_histogram(subplot_num, x, unit=None, facecolor='b', bins=None, align='left'):
        global fig
        ax = plt.subplot(subplot_num)
        if bins is None and unit is not None:
            bins = max(int(np.nanmax(x) / unit - np.nanmin(x) / unit), 1)
        n, bins, patches = plt.hist(x, bins=bins, facecolor=facecolor, normed=True, align=align, )
        ylim = ax.get_ylim()
        if ylim[1] - np.nanmax(n) < 0.005:
            ax.set_ylim([ylim[0], ylim[1] + 0.005])
        if unit is None:
            unit = 1.
        ax.set_yticklabels(['{:.1f}'.format(100. * l * unit) for l in plt.yticks()[0]])
        if plot_num % 2 == 0:
            ax.set_ylabel('frequency (%)')
        letters = list(string.ascii_lowercase)
        panel_label = '%s) %s' % (letters[plot_num], params[plot_num])
        ax.annotate(panel_label, xycoords='axes fraction', xy=(0.05, 0.95), horizontalalignment='left',
                    verticalalignment='top')
        return

    for plot_num in range(4):
        error = X[:, plot_num] - y[:, plot_num]
        plot_histogram(gs1[plot_num], error, facecolor=colors[plot_num], unit=1)

    return fig


# Get options and config

options, arguments = get_command_options()

try:
    config_file = arguments[0]
except IndexError:
    print('Required argument (config file) not provided.')
    sys.exit(1)
config = mosx.util.get_config(config_file)

predict_timeseries = config['Model']['predict_timeseries']
if predict_timeseries:
    config['Model']['predict_timeseries'] = False

predictor_file = '%s/%s_CV_%s_predictors.pkl' % (config['SITE_ROOT'], config['station_id'],
                                                 config['Validate']['end_date'])
if not(os.path.isfile(predictor_file)):
    print("Cannot find validation predictors file '%s'. Please run 'validate' first." % predictor_file)
    sys.exit(1)


# Open predictors file from validate

with open(predictor_file, 'rb') as handle:
    predictors = pickle.load(handle)
predictor_array = np.concatenate((predictors['BUFKIT'], predictors['OBS']), axis=1)
true_array = predictors['VERIF']


# Make the prediction
print('\n--- MOS-X performance: making the forecast...\n')
predicted, all_predicted, ps = mosx.model.predict_all(config, predictor_file, ensemble=options.ensemble,
                                                      naive_rain_correction=options.tune_rain)
if options.avg_rain:
    print('Using average of raw and rain-tuned precipitation forecasts')
    no_tuned_predictions = mosx.model.predict_all(config, predictor_file, naive_rain_correction=options.tune_rain,
                                                  rain_tuning=False)
    predicted = np.mean([predicted, no_tuned_predictions[0]], axis=0)


# Calculate general performance scores

print('\n--- MOS-X performance: generating global performance scores...\n')
multi = 'raw_values'
scores = np.nan * np.zeros((7, 4))
scores[0] = explained_variance_score(true_array[:, :4], predicted[:, :4], multioutput=multi)
scores[1] = mean_absolute_error(true_array[:, :4], predicted[:, :4], multioutput=multi)
scores[2] = mean_squared_error(true_array[:, :4], predicted[:, :4], multioutput=multi)
scores[3] = r2_score(true_array[:, :4], predicted[:, :4], multioutput=multi)

if config['Model']['rain_forecast_type'] in ['pop', 'categorical']:
    # Try to get probabilities for each category of rain
    print('MOS-X performance: making probabilistic rain predictions...')
    rain_prob = mosx.model.predict_rain_proba(config, predictor_file)
    lb = LabelBinarizer()
    rain_categories = lb.fit_transform(true_array[:, 3])
    scores[4, 3] = brier_score(rain_categories, rain_prob)

if options.ensemble:
    scores[5] = mean_variance(all_predicted[:, :4, :])

if options.ensemble and crps:
    scores[6] = np.mean(crps_ensemble(true_array[:, :4], all_predicted[:, :4, :], axis=-1), axis=0)

scores_df = pd.DataFrame(scores)
score_names = ['Explained variance score', 'Mean absolute bias', 'Mean squared bias',
               'R^2 coefficient of determination', 'Brier score', 'Mean ensemble variance', 'CRPS']
score_columns = ['High', 'Low', 'Wind', 'Rain']
scores_df.index = score_names
scores_df.columns = score_columns

print('\n')
print(scores_df)


# Optional learning curve plot

if options.learning:
    print("\n--- MOS-X performance: exporting learning curve figure to 'learning_curve.pdf'...\n")
    train_file = '%s/%s_predictors_train.pkl' % (config['SITE_ROOT'], config['station_id'])
    predictor_file = '%s/%s_CV_%s_predictors.pkl' % (config['SITE_ROOT'], config['station_id'],
                                                     config['Validate']['end_date'])
    predictors, targets, n_samples_test = mosx.model.combine_train_test(config, train_file, predictor_file,
                                                                        return_count_test=True)
    cv = mosx.model.SplitConsecutive(first=False, n_samples=n_samples_test)
    scorer = mosx.model.wxchallenge_scorer(no_rain=True)
    figure = mosx.model._plot_learning_curve(mosx.model.build_estimator(config), predictors, targets, cv=cv,
                                          scoring=scorer)
    plt.savefig('learning_curve.pdf', bbox_inches='tight')


# Optional spread-skill plot

if options.spread_skill and options.ensemble:
    print("\n--- MOS-X performance: exporting spread-skill figure to 'spread_skill.pdf'...\n")
    figure = plot_spread_skill(all_predicted, true_array, )
    plt.savefig('spread_skill.pdf', bbox_inches='tight')
elif options.spread_skill:
    print("warning: '--spread-skill' option enabled but no ensemble predictions (--ensemble) were enabled!")


# Optional rank histograms plot

if options.histogram and options.ensemble:
    print("\n--- MOS-X performance: exporting rank histogram figure to 'rank_histogram.pdf'...\n")
    figure = plot_rank_histogram(all_predicted, true_array, )
    plt.savefig('rank_histogram.pdf', bbox_inches='tight')
elif options.histogram:
    print("warning: '--rank-histogram' option enabled but no ensemble predictions (--ensemble) were enabled!")


# Optional error distribution plot

if options.error_plot:
    print("\n--- MOS-X performance: exporting error distribution figure to 'error_distribution.pdf'...\n")
    figure = plot_error(predicted, true_array, )
    plt.savefig('error_distribution.pdf', bbox_inches='tight')