Source code for transparentai.fairness.fairness_plots

import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import textwrap

from ..fairness import fairness
from transparentai import plots


[docs]def get_protected_attr_values(attr, df, privileged_group, privileged=True):
    """Retrieves all values given the privileged_group argument.

    If privileged is True and privileged_group[attr] is a list then it returns
    the list, if it's a function then values of df[attr] for which the function
    returns True. 

    If privileged is False and privileged_group[attr] is a list then it returns
    values of df[attr] not in the list else if it's a function returns values 
    of df[attr] for which the function returns False. 

    Parameters
    ----------
    attr: str
        Protected attribute which is a key of the privileged_group
        dictionnary
    df: pd.DataFrame
        Dataframe to extract privilieged group from.
    privileged_group: dict
        Dictionnary with protected attribute as key (e.g. age or gender)
        and a list of favorable value (like ['Male']) or a function
        returning a boolean corresponding to a privileged group
    privileged: bool (default True)
        Boolean prescribing whether to
        condition this metric on the `privileged_groups`, if `True`, or
        the `unprivileged_groups`, if `False`.

    Returns
    -------
    list:
        List of privileged values of the protected attribyte attr 
        if privileged is True else unprivileged values

    Raises
    ------
    ValueError:
        attr must be in privileged_group
    """
    if attr not in privileged_group:
        raise ValueError('attr must be in privileged_group')

    val = privileged_group[attr]
    if type(val) == list:
        if privileged:
            return val

        def fn(x): return x in val
    else:
        fn = val

    cond = df[attr].apply(fn) == privileged
    return df[cond][attr].unique().astype(str).tolist()


[docs]def format_priv_text(values, max_char):
    """Formats privileged (or unprivileged) values text
    so that it can be shown.

    Parameters
    ----------
    values: list
        List of privileged or unprivileged values
    max_char: int
        Maximum characters allow in the returned string

    Returns
    -------
    str:
        Formated string for given values

    Raises
    ------
    TypeError
        values must be a list
    """
    if type(values) != list:
        raise TypeError('values must be a list')

    priv_text = ''

    for val in values:
        if (len(val) + len(priv_text) > max_char) & (priv_text != ''):
            priv_text = priv_text[:-2] + ' and others  '
            break
        priv_text += val+', '

    return priv_text[:-2]


[docs]def plot_attr_title(ax, attr, df, privileged_group):
    """Plots the protected attribute titles with :

    - The attribute name (e.g. Gender)
    - Priviliged and unprivileged values
    - Number of privileged and unprivileged values

    Parameters
    ----------
    ax: plt.axes.Axes
        axe where to add the plot
    attr: str
        Protected attribute which is a key of the privileged_group
        dictionnary
    df: pd.DataFrame
        Dataframe to extract privilieged group from.
    privileged_group: dict
        Dictionnary with protected attribute as key (e.g. age or gender)
        and a list of favorable value (like ['Male']) or a function
        returning a boolean corresponding to a privileged group

    Raises
    ------
    ValueError:
        attr must be in df columns
    ValueError:
        attr must be in privileged_group keys
    """
    if attr not in df.columns:
        raise ValueError('attr must be in df columns')
    if attr not in privileged_group:
        raise ValueError('attr must be in privileged_group keys')

    plt.text(0, 1.4, 'Protected Attribute: %s' %
             attr, fontsize=22, weight="bold")
    priv_df = fairness.create_privilieged_df(df, privileged_group)[attr]

    priv_values = get_protected_attr_values(attr, df, privileged_group)
    unpriv_values = get_protected_attr_values(
        attr, df, privileged_group, privileged=False)

    priv_text = format_priv_text(priv_values, max_char=30)
    unpriv_text = format_priv_text(unpriv_values, max_char=30)

    n_priv = (priv_df == 1).sum()
    n_unpriv = (priv_df == 0).sum()
    n = len(priv_df)

    plt.text(0, 0.8, 'Privileged group values  : %s' %
             (priv_text), fontsize=13)
    plt.text(0, 0.2, 'Unrivileged group values: %s' %
             (unpriv_text), fontsize=13)

    plt.text(1, 0.8, r'# of privileged values : $\bf{%i}$ ($\bf{%.2f}$%%)' % (n_priv, n_priv*100/n),
             fontsize=13, horizontalalignment='right')
    plt.text(1, 0.2, r'# of unprivileged values : $\bf{%i}$ ($\bf{%.2f}$%%)' % (n_unpriv, n_unpriv*100/n),
             fontsize=13, horizontalalignment='right')

    plt.axis('off')
    ax.axhline(0, color='#000', linewidth=5)


[docs]def plot_bias_one_attr(ax, metric, score):
    """Plots bias metric score bar with the indication
    if it's considered not fair or fair.

    Parameters
    ----------
    ax: plt.axes.Axes
        axe where to add the plot
    metric: str
        The name of the metric
    score: float:
        Score value of the metric        
    """
    goal, threshold = fairness.fairness_metrics_goal_threshold(metric)
    is_fair = fairness.is_metric_fair(score, metric)

    ax.set_ylim((goal-1, goal+1))
    ax.set_xlim((0, 1))
    ax.axhline(goal, color='#000', linewidth=2)
    ax.axhline(goal-threshold, color='#000', linewidth=1, linestyle='--')
    ax.axhline(goal+threshold, color='#000', linewidth=1, linestyle='--')

    ax.bar(0.5, score, color='#2c3e50', width=0.25, zorder=2)

    y = goal-threshold if is_fair else goal-1 if score < goal else goal+threshold
    h = 2*threshold if is_fair else 1-threshold
    bg_color = '#2ecc71' if is_fair else '#e74c3c'
    text = 'fair' if is_fair else 'not fair'
    color = '#27ae60' if is_fair else '#c0392b'

    ax.text(0.01, goal+threshold+0.04, text, fontsize=13,
            color=color, weight="bold")

    rect = Rectangle((0, y), 1, h, facecolor=bg_color,
                     alpha=0.5, zorder=1)
    ax.add_patch(rect)

    ax.get_xaxis().set_ticks([])
    ax.set_title(metric)


[docs]def plot_fairness_text(ax, score, metric):
    """Plots bias metric explanation text.

    The text is retrieved by the fairness_metrics_text()
    function.

    Parameters
    ----------
    ax: plt.axes.Axes
        axe where to add the plot
    metric: str
        The name of the metric
    score: float:
        Score value of the metric  
    """
    text = fairness.fairness_metrics_text(score, metric)

    text = "\n".join(textwrap.wrap(text, width=17))

    ax.text(-0.5, 1, text, ha='left', fontsize=12,
            va='top', wrap=True)

    for sp in ['top', 'right', 'left', 'bottom']:
        ax.spines[sp].set_visible(False)
    ax.set_yticks([])
    ax.set_xticks([])


[docs]def plot_bias(y_true, y_pred, df, privileged_group, pos_label=1,
              regr_split=None, with_text=True, **kwargs):
    """Plots the fairness metrics for protected attributes
    refered in the privileged_group argument.

    It uses the 4 fairness function :

    - statistical_parity_difference
    - disparate_impact
    - equal_opportunity_difference
    - average_odds_difference

    You can also use it for a regression problem. You can set a value
    in the regr_split argument so it converts it to a binary classification problem.
    To use the mean use 'mean'.
    If the favorable label is more than the split value 
    set pos_label argument to 1 else to 0.

    Example
    =======
    
    Using this function for a binary classifier:

    >>> from transparentai.datasets import load_adult
    >>> from sklearn.ensemble import RandomForestClassifier

    >>> data = load_adult()
    >>> X, Y = data.drop(columns='income'), data['income'].replace({'>50K':1, '<=50K':0})
    >>> X = X.select_dtypes('number')
    >>> clf = RandomForestClassifier().fit(X,Y)
    >>> y_pred = clf.predict(X)

    >>> privileged_group = { 'gender':['Male'] }

    >>> y_pred = clf.predict(X)plot_bias(Y, y_pred, data, privileged_group, with_text=True)


    Parameters
    ----------
    y_true: array like
        True labels
    y_pred: array like
        Predicted labels
    df: pd.DataFrame
        Dataframe to extract privilieged group from.
    privileged_group: dict
        Dictionnary with protected attribute as key (e.g. age or gender)
        and a list of favorable value (like ['Male']) or a function
        returning a boolean corresponding to a privileged group
    pos_label: number
        The label of the positive class.
    regr_split: 'mean' or number (default None)
        If it's a regression problem then you can convert result to a
        binary classification using 'mean' or a choosen number.
        both y_true and y_pred become 0 and 1 : 0 if it's equal or less
        than the split value (the average if 'mean') and 1 if more.
        If the favorable label is more than the split value set pos_label=1
        else pos_label=0
    with_text: bool (default True)
        Whether it displays the explanation text for 
        fairness metrics.
    """
    metrics = ['statistical_parity_difference',
               'disparate_impact',
               'equal_opportunity_difference',
               'average_odds_difference']

    scores = fairness.compute_fairness_metrics(y_true, y_pred, df, privileged_group,
                                               metrics, pos_label, regr_split='mean')

    n_attr = len(scores)

    if not with_text:
        widths = [1]*4
        heights = [1, 5, 2]*n_attr
    else:
        widths = [3, 1]*2
        heights = [1, 5, 5, 2]*n_attr

    fig = plt.figure(figsize=(15, 7*n_attr + (int(with_text)*8)))

    gs = fig.add_gridspec(len(heights), 4, wspace=0.3,
                          width_ratios=widths,
                          height_ratios=heights)

    row = 0
    for attr, bias_scores in scores.items():
        ax = fig.add_subplot(gs[row, :])
        plot_attr_title(ax, attr, df, privileged_group)

        axes = [fig.add_subplot(gs[row+1+j, i])
                for j in range(int(with_text)+1)
                for i in range(4)]

        for i, (metric, score) in enumerate(bias_scores.items()):
            ax = axes[i] if not with_text else axes[i*2]
            plot_bias_one_attr(ax, metric, score)

            if not with_text:
                continue

            ax = axes[i*2+1]
            plot_fairness_text(ax, score, metric)

        # Separator line
        ax = fig.add_subplot(gs[row+2, :])
        plt.axis('off')

        row += 3+int(with_text)

    # plt.show()
    return plots.plot_or_figure(fig, **kwargs)