from collections import OrderedDict
from bokeh.models import ColumnDataSource, HoverTool, Range1d, FactorRange
import bokeh.plotting as bk
import numpy as np
from math import pi
import pandas as pd
import datetime as dt
bk.output_notebook()
Firstly, do some data processing on the Seattle file to prepare the content for the two charts.
data = pd.read_csv("data/seattle_incidents_summer_2014.csv",
low_memory=False) # compromise in the lack of time
jun_filter = data["Month"] == 6
jul_filter = data["Month"] == 7
aug_filter = data["Month"] == 8
# prepare for the 1st plot:
# a histogram showing the monthly totals
# also a histogram at a daily level
total_jun = len(data[jun_filter])
total_jul = len(data[jul_filter])
total_aug = len(data[aug_filter])
start_tstamp_strings = data["Occurred Date or Date Range Start"]
start_tstamps = [dt.datetime.strptime(ts_str, '%m/%d/%Y %I:%M:%S %p')
for ts_str in start_tstamp_strings]
min_date = dt.datetime(2014, 5, 31)
start_days = [(tstamp - min_date).days for tstamp in start_tstamps]
daily_counts = pd.Series(start_days).value_counts()
# incident sums for weeks: 1..12
weekly_counts = daily_counts[daily_counts.index].groupby((daily_counts.index + 5) / 7)
weekly_counts = weekly_counts.sum()
weekly_counts = weekly_counts[weekly_counts.index >= 1]
def date_from_daynr(daynrs):
return([dt.datetime.fromordinal(min_date.toordinal() + x)
for x in daynrs])
def date_from_weeknr(weeknrs):
return([dt.datetime.fromordinal(min_date.toordinal() + 2 + x * 7)
for x in weeknrs])
# prepare for the 2nd plot
grouped = data.groupby(data["Offense Type"])
mean_counts = data["Offense Type"].value_counts() / 3
# at this point the codes (index) are ordered by average monthly incidents
# join further details to this column without changing the order
counts_jun = data["Offense Type"][jun_filter].value_counts()
counts_jul = data["Offense Type"][jul_filter].value_counts()
counts_aug = data["Offense Type"][aug_filter ].value_counts()
joint = pd.concat([mean_counts, counts_jun, counts_jul, counts_aug],
axis=1, join_axes=[mean_counts.index])
joint.columns = ["c_mean", "c_jun", "c_jul", "c_aug"]
n = len(joint) + 1
ranks = range(1, n)
types = joint.index
# construct the 1st plot
# "Summer 2014 crime breakdown in Seattle over time"
month_source = ColumnDataSource(data=dict(when=["June", "July", "August"],
incidents=[total_jun,
total_jul, total_aug]))
weekly_date_strs = ["Week {}".format(x)
for x in range(1, (len(weekly_counts) + 1))]
weekly_source = ColumnDataSource(data=dict(when=weekly_date_strs,
incidents=weekly_counts.values))
daily_date_strs = [dd.strftime("%m/%d")
for dd in date_from_daynr(daily_counts.index)]
daily_source = ColumnDataSource(data=dict(when=daily_date_strs,
incidents=daily_counts.values))
TOOLS = "save,hover"
p1 = bk.figure(width=900, height=600, tools=TOOLS, x_axis_type = "datetime")
p1.xaxis.axis_label = "Date"
p1.yaxis.axis_label = "Incidents"
# monthly bars
p1.quad(top = [total_jun, total_jul, total_aug], bottom=0,
left=date_from_daynr([1, 31, 62]), right=date_from_daynr([31, 62, 93]),
fill_color=["#553311","#A0A022","#DDDD22"],# ["#995511", "#EEEE22", "#BB9922"],
color="#000000", fill_alpha = 0.5, source=month_source,
)
# weekly_counts
p1.quad(top=weekly_counts, bottom=0,
left=date_from_weeknr(range(0, 13)),
right=date_from_weeknr(range(1, 14)),
color="#000000", fill_alpha = 0.5, fill_color="#992299",
source=weekly_source
)
14
# daily bars
daily_weekdays = [pd.Timestamp(x).weekday() for x in date_from_daynr(daily_counts.index)]
Sunday_index = [x for x in range(0, len(daily_weekdays)) if daily_weekdays[x] == 6]
daily_fill_colors = len(daily_weekdays) * ["#3377CC"]
for idx in Sunday_index:
daily_fill_colors[idx] = "#77BBFF"
p1.quad(top=daily_counts.values, bottom=0,
left =date_from_daynr(daily_counts.index),
right=date_from_daynr(daily_counts.index + 1),
fill_color=daily_fill_colors, fill_alpha = 0.7, color="#111111",
source=daily_source)
hover = p1.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([
("Period", "@when"),
("Incidents", "@incidents")
])
# construct the 2nd plot
# ("Summer 2014 crime breakdown in Seattle by offense & month")
item_ranks = ["{} - #{}".format(*t) for t in zip(types, range(1, len(types) + 1))]
item_ranks = list(reversed(item_ranks))[100:]
rev_ranks = list(reversed(ranks))[100:]
TOOLS = "pan,wheel_zoom,reset,save,crosshair"
p2 = bk.figure(tools=TOOLS, plot_height=700, plot_width=900,
y_range=item_ranks, x_range=(0, 3000))
p2.grid.grid_line_color = None
def plot_incidents(index, means, counts, legend_text, color):
dy = 0.25 - float(index) / 4
tops = [y + dy for y in rev_ranks]
bottoms = [y - 0.25 for y in tops]
over = [max(x[0], x[1]) for x in zip(counts, means)]
p2.quad(left=means, right=over,
top=tops, bottom=bottoms,
color="#EE2222", alpha=0.7, legend="Above usual")
below = [min(x[0], x[1]) for x in zip(counts, means)]
p2.quad(left=below, right=means,
top=tops, bottom=bottoms,
color="#22EE22", alpha=0.7, legend="Below usual")
p2.quad(left=0, right=below,
top=tops, bottom=bottoms,
color=color, alpha=0.9,
legend=legend_text)
plot_incidents(0, joint.c_mean, joint.c_jun,
"June incidents", "#553311")
plot_incidents(1, joint.c_mean, joint.c_jul,
"July incidents", "#A0A022")
plot_incidents(2, joint.c_mean, joint.c_aug,
"August incidents", "#DDDD22")
Apparently, August was the most peaceful month. Sundays are highlighted in light blue. The incident counts seem to be below the average on these days. August was also the warmest month - maybe the heat made it less appealing to go 'working'...
(Reference: https://weatherspark.com/history/29735/2014/Seattle-Washington-United-States)
bk.show(p1)
A small number of crime types account for the majority of the overall incidents. The top performers are shown below.
Vehicle, car and other thefts seem to be the most typical source of crime.
Automobile thefts decrease, bicycle thefts increase.
The three months do not seem to really significantly differ in the crime statistics breakdown, except for these two.
The change in weather referred to in the previous point may explain the increase in the bicycle thefts - probably more people use choose to ride to work or go on holidays when the weather is nice - which is likely to mean less frequent use of cars. The more bicycles leave the garage - the more to steal. Should they replace cars as the favourite means of commute, the reduction in the auto thefts can also be explained, but unveiling the proper mechanisms requires more investigation. It is still a good idea to watch out for our bikes when the weather is tempting!
bk.show(p2)