Source code for transparentai.utils.reports

from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt

from transparentai.datasets import variable
from transparentai.models import classification, explainers, regression
from transparentai import utils
from datetime import datetime


[docs]def generate_head_page(document_title): """Generate a figure with a given title. Parameters ---------- document_title: str Name of the document Returns ------- matplotlib.figure.Figure: Document head figure """ fig = plt.figure(figsize=(8.27, 11.69)) plt.text(0.5, 0.75, document_title, fontsize=23, ha='center', va='center', bbox=dict(facecolor='none', edgecolor='black', boxstyle='round,pad=1')) date = datetime.today().strftime('%Y-%m-%d') plt.text(0.5, 0.65, date, fontsize=15, ha='center', va='center') plt.axis('off') return fig
[docs]def generate_validation_report(model, X, y_true, X_valid=None, y_true_valid=None, metrics=None, model_type='classification', out='validation_report.pdf'): """Generate a pdf report on the model performance with the following graphics: - First page with the report title - An histogram of the y_true distribution - Model performance plot - Model feature importance plot This function is usefull to keep a proof of the validation. Parameters ---------- model: Model to analyse X: array like Features y_true: array like True labels X_valid: array like Features for validation set y_true_valid: array like (default None) True labels for validation set metrics: list (default None) List of metrics to plots model_type: str (default 'classification') 'classification' or 'regression' out: str (default 'validation_report.pdf') path where to save the report Raises ------ ValueError: 'model_type must be 'classification' or 'regression' """ if model_type not in ['classification', 'regression']: raise ValueError( 'model_type must be \'classification\' or \'regression\'') if utils.object_has_function(model, 'predict_proba'): fn = model.predict_proba else: fn = model.predict y_pred = fn(X) if X_valid is not None: y_pred_valid = fn(X_valid) else: y_pred_valid = None figs = list() document_title = 'Validation report' figs.append(generate_head_page(document_title)) # Plot y_true variable print('Generating y_true distribution') f = variable.plot_variable(y_true, plot=False) figs.append(f) if model_type == 'classification': module = classification else: module = regression print('Generating model performance') f = module.plot_performance( y_true, y_pred, y_true_valid, y_pred_valid, plot=False) figs.append(f) nsamples = 1000 print('Generating model feature influence (over %i samples)' % nsamples) explainer = explainers.ModelExplainer(model, X) f = explainer.plot_global_explain(X, nsamples=nsamples, plot=False) figs.append(f) pp = PdfPages(out) for f in figs: pp.savefig(f) pp.close() print('report generated at %s' % out)