Bachelors Degrees By Gender#

Download this notebook from GitHub (right-click to download).


URL: http://matplotlib.org/examples/showcase/bachelors_degrees_by_gender.html

Most examples work across multiple plotting backends equivalent, this example is also available for:

import holoviews as hv
from holoviews import opts
hv.extension('bokeh', 'matplotlib')

Define data#

import pandas as pd

gender_degree_data = pd.read_csv("https://datasets.holoviz.org/bachelor_women/v1/percent_bachelors_degrees_women_usa.csv")

title = ('Percentage of Bachelor\'s degrees conferred to women in '
        'the U.S.A. by major (1970-2011)\n')

# These are the colors that will be used in the plot
color_sequence = ['#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c',
                  '#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5',
                  '#8c564b', '#c49c94', '#e377c2', '#f7b6d2', '#7f7f7f',
                  '#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5']

# Offsets for degree labels
y_offsets = {'Foreign Languages': 0.5, 'English': -0.5,
             'Communications and Journalism': 0.75,
             'Art and Performance': -0.25, 'Agriculture': 1.25,
             'Social Sciences and History': 0.25, 'Business': -0.75,
             'Math and Statistics': 0.75, 'Architecture': -0.75,
             'Computer Science': 0.75, 'Engineering': -0.25}

# Load the data into a dataframe and us pd.melt to unpivot the degree column
df = pd.DataFrame(gender_degree_data)
df = pd.melt(df, id_vars='Year', var_name='Degree', value_name='conferred')
df['Degree'] = [d.replace('_', ' ').title() for d in df.Degree]

# Define a formatter that works for both bokeh and matplotlib
def percent_format(x):
    try:
        return '{:0.0f}%'.format(x)
    except:
        return '%d%' % x

# Define the value dimensions
vdim = hv.Dimension('conferred', range=(0, 90))

# Define the dataset
ds = hv.Dataset(df, vdims=vdim)
curves = ds.to(hv.Curve, 'Year', groupby='Degree').overlay().redim(Year=dict(range=(1970, 2030)))

# Define a function to get the text annotations
max_year = ds['Year'].max()
def offset(row):
    row['conferred'] += y_offsets.get(row.Degree, 0)
    return row
label_df = df[df.Year==max_year].apply(offset, axis=1)
labels = hv.Labels(label_df, ['Year', 'conferred'], 'Degree')

Display#

overlay = (curves * labels).relabel(title)

gridstyle = {'grid_line_dash': [6, 4], 'grid_line_width': 3, 'grid_bounds': (0, 100)}

overlay.opts(
    opts.Curve(show_frame=False, labelled=[], tools=['hover'],
               height=900, width=900, show_legend=False, xticks=[1970, 1980, 1990, 2000, 2010],
               color=hv.Cycle(values=color_sequence), line_width=2, show_grid=True, yformatter='%d%%'),
    opts.Labels(text_color='Degree', cmap=color_sequence, text_align='left'),
    opts.NdOverlay(batched=False, gridstyle=gridstyle))
This web page was generated from a Jupyter notebook and not all interactivity will work on this website. Right click to download and run locally for full Python-backed interactivity.

Download this notebook from GitHub (right-click to download).