Saturday, February 2, 2013

#matplotlib: Comparative histogram recipe

When comparing the distributions of two related data sets, it often makes sense to present the data on a comparative histogram, i.e. two histograms, one for each data set, contrasted against each other. 

Comparative histograms could be used to demonstrate a change in performance when many enough samples are available (e.g. latency of calls to a server), or compare two statistical populations, e.g. age pyramid. 
Here is the code:

import numpy as np
import matplotlib.pyplot as plt

def comphist(x1, x2, orientation='vertical', **kwargs):
    """Draw a comparative histogram."""
    # Split keyword args:
    kwargs1 = {}
    kwargs2 = {}
    kwcommon = {}
    for arg in kwargs:
        tgt_arg = arg[:-1]
        if arg.endswith('1'):
            arg_dict = kwargs1
        elif arg.endswith('2'):
            arg_dict = kwargs2
            arg_dict = kwcommon
            tgt_arg = arg
        arg_dict[tgt_arg] = kwargs[arg]

    fig = plt.figure()

    # Have both histograms share one axis.
    if orientation == 'vertical':
        ax1 = plt.subplot(211)
        ax2 = plt.subplot(212, sharex=ax1)
        # Flip the ax2 histogarm horizontally.
        plt.setp(ax1.get_xticklabels(), visible=False)
        legend_loc = (1, 4)
        ax1 = plt.subplot(122)
        ax2 = plt.subplot(121, sharey=ax1)
        # Flip the ax2 histogarm vertically.
        plt.setp(ax1.get_yticklabels(), visible=False)
        legend_loc = (1, 2)

    ax1.hist(x1, orientation=orientation, **kwargs1)
    ax2.hist(x2, orientation=orientation, **kwargs2)
    # Tighten up the layout.    
    plt.subplots_adjust(wspace=0.0, hspace=0.0)
    return fig

if __name__ == "__main__":
    comphist(np.random.randn(1000), np.random.randn(1000), 
             label1='before', label2='after', color2='green', bins=30, rwidth=1)

No comments:

Post a Comment