Bokeh Basics I

4 minute read

Bokeh Basics - I

Bokeh is a python library that enables users to create beautiful, dynamic and interactive visualizations.

To learn more about Bokeh visit its website

Want to create visualizations? Let’s get started!

# import required libraries

from bokeh.io import output_notebook, show, reset_output, output_file

import bokeh
from bokeh.plotting import figure

import numpy as np
import pandas as pd

# import library for toy datasets
from vega_datasets import data as vds

Load Data

We need data to plot! Bokeh provides example datsets we can use.

from bokeh.sampledata import iris
# load iris dataset
df_iris = iris.flowers
# display first five rows in the df
df_iris.head()

	sepal_length	sepal_width	petal_length	petal_width	species
0	5.1	3.5	1.4	0.2	setosa
1	4.9	3.0	1.4	0.2	setosa
2	4.7	3.2	1.3	0.2	setosa
3	4.6	3.1	1.5	0.2	setosa
4	5.0	3.6	1.4	0.2	setosa

# To display the graphs/output we need to run `output_notebook()` once if using JupyterLab
# and in every cell that would return a graph if using Colab
output_notebook()

Creating Plots

To create plots we must follow this workflow:

Create a figure –
Create a glyph/plot. We have several options: line, bar, scatter.
show plot

Bokeh’s Data Structure

Bokeh uses the ColumnDataSource as its main data structure. The ColumnDataSource is a table-like data structure that maps string column names to sequences of data (columns). The ColumnDataSource is created automatically most of the time but it can also be created explicitly by passing a pandas dataframe to the class initializer:

data = ColumnDataSource(df)

# to create the ColumnDataSource

 from bokeh.models import ColumnDataSource

 df = ColumnDataSource({'A' : [1, 2, 3, 4, 5],
                        'B' : [5, 4, 3, 2, 1],
                        'C' : [1, 3, 5, 1, 2 ]})
 df.data

{'A': [1, 2, 3, 4, 5], 'B': [5, 4, 3, 2, 1], 'C': [1, 3, 5, 1, 2]}

Create a Line Plot

We can create some random data to pass as our x and y values.

# plot a linear graph

from bokeh.models import HoverTool

# create toy data

x_ax = np.arange(10)
y_ax = np.random.rand(10)

# Create plot

line_plot = figure(plot_width=600, plot_height=425, title='Line Plot', x_axis_label='X', y_axis_label='Y')
line_plot.line(x_ax, y_ax, legend_label='line', line_width=2)

# add hover tool
line_plot.add_tools(HoverTool())
output_file('/line_chart.html')

show(line_plot)

Bokeh Plot

Creating a multi-variable line plot

output_notebook()

# create some random data
x_multi = np.arange(10)
y1_multi = np.random.rand(10)
y2_multi = np.random.rand(10)
y3_multi = np.random.rand(10)

# crete instance of the plot

multi_var_plot = figure(plot_width=600, plot_height=400, toolbar_location='below')
multi_var_plot.line(x_multi, y1_multi, color='yellow', line_width=4, legend_label='y1')
multi_var_plot.line(x_multi, y2_multi, color='blue', line_width=4, legend_label='y2')
multi_var_plot.line(x_multi, y3_multi, color='red', line_width=4, legend_label='y3')
multi_var_plot.add_tools(HoverTool())

output_file('/multiline_chart.html')

show(multi_var_plot)

Bokeh Plot

Creating Bar Charts

# create random data

x_ax = ['cat1', 'cat2', 'cat3', 'cat4', 'cat5']
y_ax = np.random.rand(5) * 10

# sort data

sorted_cat = sorted(x_ax, key=lambda x: y_ax[x_ax.index(x)], reverse=True)

# Create instance of the bar chart

bar_chart = figure(x_range=sorted_cat, title='Bar Chart', x_axis_label='X', y_axis_label='Y', plot_height=300)
# use vbar for vertical and hvar for horizontal 
bar_chart.vbar(x_ax, top=y_ax, color='blue', width=0.4)
bar_chart.y_range.start = 0
bar_chart.add_tools(HoverTool())
output_file('/bar_chart.html')
show(bar_chart)

Bokeh Plot

Stacked Bar Chart

# Stacked Bar Chart
df_stacked = pd.DataFrame({'y': [1, 2, 3, 4, 5],
                           'x1': [1,2, 4, 3, 4],
                           'x2' : [1, 4, 2, 2, 3]})

df_CDS_tacked = ColumnDataSource(df_stacked)

stacked_bar_chart = figure(plot_width=600, plot_height=300, title='Stacked Bar Chart')
stacked_bar_chart.hbar_stack(['x1', 'x2'],
                             y = 'y',
                             height = 0.8,
                             color = ('green', 'lightgreen'),
                             source=df_stacked
                             )
stacked_bar_chart.add_tools(HoverTool())
output_file('/stacked_bar_chart.html')
show(stacked_bar_chart)

Bokeh Plot

Creating a Bar Chart Grouping Data

from bokeh.transform import dodge

# create some random data

categories = ['category1', 'category2', 'category3']

df_grouped = pd.DataFrame({'categories' : categories,
                           '2018' : [2, 1, 4],
                           '2019' : [5, 3, 3],
                           '2020' : [3, 2, 4]})

# create instance of a figure
bar_grouped = figure(x_range=categories, y_range = (0, 10), plot_height = 350)

# define position of bars on chart
dodge1 = dodge('categories', -0.25, range=bar_grouped.x_range)
dodge2 = dodge('categories', 0.0, range=bar_grouped.x_range)
dodge3 = dodge('categories', 0.25, range=bar_grouped.x_range)

bar_grouped.vbar(x=dodge1, top='2018', width=0.2, source=df_grouped, color='blue', legend_label='2018')
bar_grouped.vbar(x=dodge2, top='2019', width=0.2, source=df_grouped, color='green', legend_label='2019')
bar_grouped.vbar(x=dodge3, top='2020', width=0.2, source=df_grouped, color='red', legend_label='2020')

# configure legend

bar_grouped.legend.location = 'top_left'
bar_grouped.legend.orientation = 'horizontal'


bar_grouped.add_tools(HoverTool())
output_file('/grouped_bar_chart.html')
show(bar_grouped)

Bokeh Plot

Stacked Area Chart

# create dummy data for the chart

df_area_stacked = pd.DataFrame({'x' : [1, 2, 3, 4, 5],
                                'y1' : [1, 3, 1, 4, 5],
                                'y2' : [1, 2, 3, 4, 2]})

stacked_area_chart = figure(plot_width=600, plot_height=300)

stacked_area_chart.varea_stack(['y1', 'y2'],
                               x = 'x',
                               color = ('coral', 'cadetblue'),
                               source = df_area_stacked)
show(stacked_area_chart)

Bokeh Plot

Scatter Plots

Load the car dataset from vega

df_cars = vds.cars()
df_cars.head()

	Name	Miles_per_Gallon	Cylinders	Displacement	Horsepower	Weight_in_lbs	Acceleration	Year	Origin
0	chevrolet chevelle malibu	18.0	8	307.0	130.0	3504	12.0	1970-01-01	USA
1	buick skylark 320	15.0	8	350.0	165.0	3693	11.5	1970-01-01	USA
2	plymouth satellite	18.0	8	318.0	150.0	3436	11.0	1970-01-01	USA
3	amc rebel sst	16.0	8	304.0	150.0	3433	12.0	1970-01-01	USA
4	ford torino	17.0	8	302.0	140.0	3449	10.5	1970-01-01	USA

Bokeh Plot

Scatter Plot for Categorical Data

We can use the famous iris dataset to plot categorical data. This dataset contains attributes about three different flower species: setosa, versicolor, virginica.

# load iris df from vega
df_iris = vds.iris()
df_iris.head()

	sepalLength	sepalWidth	petalLength	petalWidth	species
0	5.1	3.5	1.4	0.2	setosa
1	4.9	3.0	1.4	0.2	setosa
2	4.7	3.2	1.3	0.2	setosa
3	4.6	3.1	1.5	0.2	setosa
4	5.0	3.6	1.4	0.2	setosa

from bokeh.transform import factor_cmap, factor_mark

# load data
species = ['setosa', 'versicolor', 'virginica']
markers = ['hex', 'cross', 'triangle']

sub_scatter = figure(plot_width=600,
                     plot_height=400, 
                     title='Iris Scatter Plot',
                     x_axis_label='Petal Length',
                     y_axis_label='Petal Width')

sub_scatter.scatter(x='petalLength',
                    y='petalWidth',
                    source=df_iris,
                    legend_label='species',
                    fill_alpha=0.5,
                    size=15,
                    color=factor_cmap(field_name='species', palette='Dark2_4', factors=species),
                    marker=factor_mark('species', markers, species))

sub_scatter.legend.location="top_left"
output_file('/sub_scatter.html')
show(sub_scatter)

Bokeh Plot

Share on

Twitter Facebook Google+ LinkedIn

Gabe Maldonado

Bokeh Basics I

Bokeh Basics - I

Load Data

Creating Plots

Bokeh’s Data Structure

Create a Line Plot

Creating a multi-variable line plot

Creating Bar Charts

Stacked Bar Chart

Creating a Bar Chart Grouping Data

Stacked Area Chart

Scatter Plots

Scatter Plot for Categorical Data

Share on

You may also enjoy

Deployed Web Apps

Mapping Geodata with Folium

Working with Timeseries Data II

Working with Timeseries Data