Source code for transparentai.models.regression.regression_plots

import matplotlib.pyplot as plt
import numpy as np

from ..regression import metrics as regression
from transparentai import plots


[docs]def plot_error_distribution(errors): """Plots the error distribution with standard deviation, mean and median. The error is calculated by the following formula : .. math:: error = y - \hat{y} Parameters ---------- errors: array like Errors of a regressor """ mean = np.mean(errors) median = np.median(errors) std = np.std(errors) y, x, _ = plt.hist(errors, bins=50, color='#3498db', label='std = %.4f' % std) plt.vlines(mean, ymin=0, ymax=y.max(), color='#e74c3c', linewidths=3, label='mean = %.4f' % mean) plt.vlines(median, ymin=0, ymax=y.max(), color='#e67e22', linewidths=3, label='median = %.4f' % median) plt.legend() plt.title('Error distribution (bins=50)')
[docs]def plot_performance(y_true, y_pred, y_true_valid=None, y_pred_valid=None, metrics=None, **kwargs): """Plots the performance of a regressor. You can use the metrics of your choice with the metrics argument Can compare train and validation set. Parameters ---------- y_true: array like True target values y_pred: array like Predicted values y_true_valid: array like (default None) True target values for validation set y_pred_valid: array like (1D or 2D) (default None) Predicted values for validation set metrics: list List of metrics to plots Raises ------ TypeError: if metrics is set it must be a list """ validation = (y_true_valid is not None) & (y_pred_valid is not None) if metrics is None: metrics = ['MAE', 'mean_squared_error', 'root_mean_squared_error', 'r2'] elif type(metrics) != list: raise TypeError('metrics must be a list') # 1. compute metrics perf = regression.compute_metrics(y_true, y_pred, metrics) errors = y_true - y_pred if validation: perf_valid = regression.compute_metrics( y_true_valid, y_pred_valid, metrics) errors_valid = y_true_valid - y_pred_valid # 2. Plot figure with score values and error difference # Init figure fig = plt.figure(figsize=(15, 8)) n_cols = int(validation)+1 widths = [1] * n_cols heights = [1, 2] gs = fig.add_gridspec(ncols=n_cols, nrows=2, wspace=0.2, width_ratios=widths, height_ratios=heights) ax = fig.add_subplot(gs[0, 0]) plots.plot_table_score(perf) ax = fig.add_subplot(gs[1, 0]) plot_error_distribution(errors) if validation: ax = fig.add_subplot(gs[0, 1]) plots.plot_table_score(perf_valid) ax = fig.add_subplot(gs[1, 1]) plot_error_distribution(errors_valid) title = 'Model performance plot' if validation: title += ' train set (left) and test set (right)' fig.suptitle(title, fontsize=18) # plt.show() return plots.plot_or_figure(fig, **kwargs)