Source code for transparentai.models.regression.regression_plots
import matplotlib.pyplot as plt
import numpy as np
from ..regression import metrics as regression
from transparentai import plots
[docs]def plot_error_distribution(errors):
"""Plots the error distribution with standard deviation,
mean and median.
The error is calculated by the following formula :
.. math::
error = y - \hat{y}
Parameters
----------
errors: array like
Errors of a regressor
"""
mean = np.mean(errors)
median = np.median(errors)
std = np.std(errors)
y, x, _ = plt.hist(errors, bins=50, color='#3498db',
label='std = %.4f' % std)
plt.vlines(mean, ymin=0, ymax=y.max(), color='#e74c3c', linewidths=3,
label='mean = %.4f' % mean)
plt.vlines(median, ymin=0, ymax=y.max(), color='#e67e22', linewidths=3,
label='median = %.4f' % median)
plt.legend()
plt.title('Error distribution (bins=50)')
[docs]def plot_performance(y_true, y_pred, y_true_valid=None, y_pred_valid=None, metrics=None, **kwargs):
"""Plots the performance of a regressor.
You can use the metrics of your choice with the metrics argument
Can compare train and validation set.
Parameters
----------
y_true: array like
True target values
y_pred: array like
Predicted values
y_true_valid: array like (default None)
True target values for validation set
y_pred_valid: array like (1D or 2D) (default None)
Predicted values for validation set
metrics: list
List of metrics to plots
Raises
------
TypeError:
if metrics is set it must be a list
"""
validation = (y_true_valid is not None) & (y_pred_valid is not None)
if metrics is None:
metrics = ['MAE', 'mean_squared_error',
'root_mean_squared_error', 'r2']
elif type(metrics) != list:
raise TypeError('metrics must be a list')
# 1. compute metrics
perf = regression.compute_metrics(y_true, y_pred, metrics)
errors = y_true - y_pred
if validation:
perf_valid = regression.compute_metrics(
y_true_valid, y_pred_valid, metrics)
errors_valid = y_true_valid - y_pred_valid
# 2. Plot figure with score values and error difference
# Init figure
fig = plt.figure(figsize=(15, 8))
n_cols = int(validation)+1
widths = [1] * n_cols
heights = [1, 2]
gs = fig.add_gridspec(ncols=n_cols, nrows=2, wspace=0.2,
width_ratios=widths,
height_ratios=heights)
ax = fig.add_subplot(gs[0, 0])
plots.plot_table_score(perf)
ax = fig.add_subplot(gs[1, 0])
plot_error_distribution(errors)
if validation:
ax = fig.add_subplot(gs[0, 1])
plots.plot_table_score(perf_valid)
ax = fig.add_subplot(gs[1, 1])
plot_error_distribution(errors_valid)
title = 'Model performance plot'
if validation:
title += ' train set (left) and test set (right)'
fig.suptitle(title, fontsize=18)
# plt.show()
return plots.plot_or_figure(fig, **kwargs)