Bokeh Basics I

4 minute read

Bokeh Basics - I

Bokeh is a python library that enables users to create beautiful, dynamic and interactive visualizations.

To learn more about Bokeh visit its website

Want to create visualizations? Let’s get started!

# import required libraries

from bokeh.io import output_notebook, show, reset_output, output_file

import bokeh
from bokeh.plotting import figure

import numpy as np
import pandas as pd

# import library for toy datasets
from vega_datasets import data as vds

Load Data

We need data to plot! Bokeh provides example datsets we can use.

from bokeh.sampledata import iris
# load iris dataset
df_iris = iris.flowers
# display first five rows in the df
df_iris.head()
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
# To display the graphs/output we need to run `output_notebook()` once if using JupyterLab
# and in every cell that would return a graph if using Colab
output_notebook()

Creating Plots

To create plots we must follow this workflow:

  • Create a figure –
  • Create a glyph/plot. We have several options: line, bar, scatter.
  • show plot

Bokeh’s Data Structure

Bokeh uses the ColumnDataSource as its main data structure. The ColumnDataSource is a table-like data structure that maps string column names to sequences of data (columns). The ColumnDataSource is created automatically most of the time but it can also be created explicitly by passing a pandas dataframe to the class initializer:

data = ColumnDataSource(df)

# to create the ColumnDataSource

 from bokeh.models import ColumnDataSource

 df = ColumnDataSource({'A' : [1, 2, 3, 4, 5],
                        'B' : [5, 4, 3, 2, 1],
                        'C' : [1, 3, 5, 1, 2 ]})
 df.data

{'A': [1, 2, 3, 4, 5], 'B': [5, 4, 3, 2, 1], 'C': [1, 3, 5, 1, 2]}

Create a Line Plot

We can create some random data to pass as our x and y values.

# plot a linear graph

from bokeh.models import HoverTool

# create toy data

x_ax = np.arange(10)
y_ax = np.random.rand(10)

# Create plot

line_plot = figure(plot_width=600, plot_height=425, title='Line Plot', x_axis_label='X', y_axis_label='Y')
line_plot.line(x_ax, y_ax, legend_label='line', line_width=2)

# add hover tool
line_plot.add_tools(HoverTool())
output_file('/line_chart.html')

show(line_plot)
Bokeh Plot

Creating a multi-variable line plot

output_notebook()

# create some random data
x_multi = np.arange(10)
y1_multi = np.random.rand(10)
y2_multi = np.random.rand(10)
y3_multi = np.random.rand(10)

# crete instance of the plot

multi_var_plot = figure(plot_width=600, plot_height=400, toolbar_location='below')
multi_var_plot.line(x_multi, y1_multi, color='yellow', line_width=4, legend_label='y1')
multi_var_plot.line(x_multi, y2_multi, color='blue', line_width=4, legend_label='y2')
multi_var_plot.line(x_multi, y3_multi, color='red', line_width=4, legend_label='y3')
multi_var_plot.add_tools(HoverTool())

output_file('/multiline_chart.html')

show(multi_var_plot)
Bokeh Plot

Creating Bar Charts

# create random data

x_ax = ['cat1', 'cat2', 'cat3', 'cat4', 'cat5']
y_ax = np.random.rand(5) * 10

# sort data

sorted_cat = sorted(x_ax, key=lambda x: y_ax[x_ax.index(x)], reverse=True)

# Create instance of the bar chart

bar_chart = figure(x_range=sorted_cat, title='Bar Chart', x_axis_label='X', y_axis_label='Y', plot_height=300)
# use vbar for vertical and hvar for horizontal 
bar_chart.vbar(x_ax, top=y_ax, color='blue', width=0.4)
bar_chart.y_range.start = 0
bar_chart.add_tools(HoverTool())
output_file('/bar_chart.html')
show(bar_chart)
Bokeh Plot

Stacked Bar Chart

# Stacked Bar Chart
df_stacked = pd.DataFrame({'y': [1, 2, 3, 4, 5],
                           'x1': [1,2, 4, 3, 4],
                           'x2' : [1, 4, 2, 2, 3]})

df_CDS_tacked = ColumnDataSource(df_stacked)

stacked_bar_chart = figure(plot_width=600, plot_height=300, title='Stacked Bar Chart')
stacked_bar_chart.hbar_stack(['x1', 'x2'],
                             y = 'y',
                             height = 0.8,
                             color = ('green', 'lightgreen'),
                             source=df_stacked
                             )
stacked_bar_chart.add_tools(HoverTool())
output_file('/stacked_bar_chart.html')
show(stacked_bar_chart)
Bokeh Plot

Creating a Bar Chart Grouping Data


from bokeh.transform import dodge

# create some random data

categories = ['category1', 'category2', 'category3']

df_grouped = pd.DataFrame({'categories' : categories,
                           '2018' : [2, 1, 4],
                           '2019' : [5, 3, 3],
                           '2020' : [3, 2, 4]})

# create instance of a figure
bar_grouped = figure(x_range=categories, y_range = (0, 10), plot_height = 350)

# define position of bars on chart
dodge1 = dodge('categories', -0.25, range=bar_grouped.x_range)
dodge2 = dodge('categories', 0.0, range=bar_grouped.x_range)
dodge3 = dodge('categories', 0.25, range=bar_grouped.x_range)

bar_grouped.vbar(x=dodge1, top='2018', width=0.2, source=df_grouped, color='blue', legend_label='2018')
bar_grouped.vbar(x=dodge2, top='2019', width=0.2, source=df_grouped, color='green', legend_label='2019')
bar_grouped.vbar(x=dodge3, top='2020', width=0.2, source=df_grouped, color='red', legend_label='2020')

# configure legend

bar_grouped.legend.location = 'top_left'
bar_grouped.legend.orientation = 'horizontal'


bar_grouped.add_tools(HoverTool())
output_file('/grouped_bar_chart.html')
show(bar_grouped)
Bokeh Plot

Stacked Area Chart

# create dummy data for the chart

df_area_stacked = pd.DataFrame({'x' : [1, 2, 3, 4, 5],
                                'y1' : [1, 3, 1, 4, 5],
                                'y2' : [1, 2, 3, 4, 2]})

stacked_area_chart = figure(plot_width=600, plot_height=300)

stacked_area_chart.varea_stack(['y1', 'y2'],
                               x = 'x',
                               color = ('coral', 'cadetblue'),
                               source = df_area_stacked)
show(stacked_area_chart)
Bokeh Plot

Scatter Plots

Load the car dataset from vega

df_cars = vds.cars()
df_cars.head()
Name Miles_per_Gallon Cylinders Displacement Horsepower Weight_in_lbs Acceleration Year Origin
0 chevrolet chevelle malibu 18.0 8 307.0 130.0 3504 12.0 1970-01-01 USA
1 buick skylark 320 15.0 8 350.0 165.0 3693 11.5 1970-01-01 USA
2 plymouth satellite 18.0 8 318.0 150.0 3436 11.0 1970-01-01 USA
3 amc rebel sst 16.0 8 304.0 150.0 3433 12.0 1970-01-01 USA
4 ford torino 17.0 8 302.0 140.0 3449 10.5 1970-01-01 USA
Bokeh Plot

Scatter Plot for Categorical Data

We can use the famous iris dataset to plot categorical data. This dataset contains attributes about three different flower species: setosa, versicolor, virginica.

# load iris df from vega
df_iris = vds.iris()
df_iris.head()
sepalLength sepalWidth petalLength petalWidth species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
from bokeh.transform import factor_cmap, factor_mark

# load data
species = ['setosa', 'versicolor', 'virginica']
markers = ['hex', 'cross', 'triangle']

sub_scatter = figure(plot_width=600,
                     plot_height=400, 
                     title='Iris Scatter Plot',
                     x_axis_label='Petal Length',
                     y_axis_label='Petal Width')

sub_scatter.scatter(x='petalLength',
                    y='petalWidth',
                    source=df_iris,
                    legend_label='species',
                    fill_alpha=0.5,
                    size=15,
                    color=factor_cmap(field_name='species', palette='Dark2_4', factors=species),
                    marker=factor_mark('species', markers, species))

sub_scatter.legend.location="top_left"
output_file('/sub_scatter.html')
show(sub_scatter)
Bokeh Plot