https://data.sfgov.org/Public-Safety/Police-Department-Calls-for-Service/hz9m-tj6z/data
This tutorial uses a subset of this data
The Calls for Service were filtered as follows: CONTAINS homeless, 915, 919, 920: Downloaded 157,237 records 3/31/16 to 11/30/2019. This is 5.1% of all calls in the broader database. File renamed to: SF_311_Jan29.xlsx
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.4
## ✓ tidyr 1.0.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
Reload Data
SF <- rio::import("https://github.com/profrobwells/HomelessSP2020/blob/master/Data/SF_311_Jan29.xlsx?raw=true", which = "SF Police_Department_Calls_for_")
SF <- janitor::clean_names(SF)
#Process dates
SF$call_date2 <- ymd(SF$call_date)
SF$year <- year(SF$call_date2)
Days <- SF %>%
count(call_date2) %>%
group_by(call_date2) %>%
arrange(desc(n))
Types <- SF %>% count(original_crime_type_name) %>%
group_by(original_crime_type_name) %>%
arrange(desc(n))
Years <- SF %>%
count(year) %>%
group_by(year) %>%
arrange(desc(year))
Action <- SF %>%
count(disposition) %>%
arrange(desc(n))
nrow(SF) [1] 157237 > ncol(SF) [1] 14
Process dates, check file types
str(SF)
## 'data.frame': 157237 obs. of 16 variables:
## $ crime_id : num 1.9e+08 1.9e+08 1.9e+08 1.9e+08 1.9e+08 ...
## $ original_crime_type_name: chr "919" "Homeless Complaint" "Homeless Complaint" "915" ...
## $ report_date : POSIXct, format: "2019-01-04" "2019-01-20" ...
## $ call_date : POSIXct, format: "2019-01-04" "2019-01-20" ...
## $ offense_date : POSIXct, format: "2019-01-04" "2019-01-20" ...
## $ call_time : POSIXct, format: "1899-12-31 06:58:00" "1899-12-31 06:19:00" ...
## $ call_date_time : POSIXct, format: "2019-01-04 06:58:00" "2019-01-20 06:19:00" ...
## $ disposition : chr "HAN" "HAN" "ADV" "HAN" ...
## $ address : chr "400 Block Of Jones St" "8th And Market" "Mission St/24th St" "Alabama St/23rd St" ...
## $ city : chr "San Francisco" NA "San Francisco" "San Francisco" ...
## $ state : chr "CA" "CA" "CA" "CA" ...
## $ agency_id : num 1 1 1 1 1 1 1 1 1 1 ...
## $ address_type : chr "Premise Address" "Geo-Override" "Intersection" "Intersection" ...
## $ common_location : chr NA NA NA NA ...
## $ call_date2 : Date, format: "2019-01-04" "2019-01-20" ...
## $ year : num 2019 2019 2019 2019 2019 ...
Examine how we have created a new date and year column and how they are formatted differently than the rest We can now perform date and year calculations Create Days Table
summary(Days)
## call_date2 n
## Min. :2016-03-31 Min. : 10.0
## 1st Qu.:2017-02-28 1st Qu.: 86.0
## Median :2018-01-29 Median :119.0
## Mean :2018-01-29 Mean :117.3
## 3rd Qu.:2018-12-30 3rd Qu.:148.0
## Max. :2019-11-30 Max. :232.0
Between March 31, 2016 and Nov. 30, 2019, San Francisco residents placed an average 117 calls to police complaining about homeless people.
Days %>%
filter(n == 232)
## # A tibble: 1 x 2
## # Groups: call_date2 [1]
## call_date2 n
## <date> <int>
## 1 2019-08-15 232
Days %>%
filter(n == 10)
## # A tibble: 1 x 2
## # Groups: call_date2 [1]
## call_date2 n
## <date> <int>
## 1 2016-03-31 10
Examine the types of events
Types <- SF %>% count(original_crime_type_name) %>%
group_by(original_crime_type_name) %>%
arrange(desc(n))
Types <- SF %>% count(original_crime_type_name) %>%
group_by(original_crime_type_name) %>%
arrange(desc(n))
Create separate table with just the top five counties’ crime rate: dplyr has a “top_n” function that i find handy
Types <- SF %>%
count(original_crime_type_name) %>%
top_n(5, n) %>%
arrange(desc(n))
Export a table into a spreadsheet (csv is a comma separated file)
write.csv(Days,"Days.csv")
Build a table totalling the number of complaints by year
Years <- SF %>%
count(year) %>%
group_by(year) %>%
arrange(desc(year))
Look at the Radio Codes spreadsheet under dispositions
Total by disposition
Action <- SF %>%
count(disposition) %>%
arrange(desc(n))
Ceate a table with serious infractions described in disposition
Example: Here’s a table filtering the dispositions column to show “no disposition” or “gone on arrival”
Nothing <- SF %>%
filter(disposition == "ND" | disposition == "GOA")
Arrest, Cited, Criminal Activation, SF Fire Dept Medical Staff engaged
Busted <- SF %>%
filter(disposition == "ARR" | disposition == "CIT" | disposition == "CRM" | disposition == "SFD") %>%
count(disposition) %>%
arrange(desc(n))
ggplot(Years, aes(x = year, y = n)) +
geom_bar(stat = "identity") +
#coord_flip() + #this makes it a horizontal bar chart instead of vertical
labs(title = "Homeless Calls Per Year, San Francisco",
subtitle = "SF PD Service Call Data, 3/2016-11/2019",
caption = "Graphic by Wells",
y="Number of Calls",
x="Year")
A chart using a dplyr filtering language
Years %>%
filter(year >= 2017) %>%
ggplot(aes(x = year, y = n)) +
geom_bar(stat = "identity") +
#coord_flip() + #this makes it a horizontal bar chart instead of vertical
labs(title = "Homeless Calls After 2017, San Francisco",
subtitle = "SF PD Service Call Data, 2017-2019",
caption = "Graphic by Wells",
y="Number of Calls",
x="Year")
A more complex filter
SF %>%
filter(!is.na(common_location)) %>%
count(common_location) %>%
top_n(10, n) %>%
ggplot(aes(x = common_location, y = n)) +
geom_bar(stat = "identity") +
coord_flip() + #this makes it a horizontal bar chart instead of vertical
labs(title = "Popular Spots for Homeless, San Francisco",
subtitle = "SF PD Service Call Data, 2016-2019",
caption = "Graphic by Wells",
y="Number of Calls",
x="Places")
Filter for at least 100 actions. Add color, export image to Blackboard.
Action %>%
filter(n > 100) %>%
ggplot(aes(x = reorder(disposition, n), y = n, fill=n)) +
geom_bar(stat = "identity", show.legend = FALSE) +
coord_flip() + #this makes it a horizontal bar chart instead of vertical
labs(title = "Action on Homeless Calls, San Francisco",
subtitle = "SF PD Service Call Data, 3/2016-11/2019",
caption = "Graphic by Wells",
y="Number of Calls",
x="Action")
Making our charts less ugly
The disposition column is in cop-speak. We need to clean it up
Step #1: Duplicate the column you want to mess with
SF$disposition1 <- SF$disposition
Rename specific strings. Example:
str_replace_all(test.vector, pattern=fixed(‘-’), replacement=fixed(‘:’) )
Details on string manipulation:
https://dereksonderegger.github.io/570L/13-string-manipulation.html
We can do this to replace ABA with “Abated”
SF$disposition1 <- str_replace_all(SF$disposition1, pattern=fixed('ABA'), replacement=fixed('Abated') )
#Again with ADM
SF$disposition1 <- str_replace_all(SF$disposition1, pattern=fixed('ADM'), replacement=fixed('Admonished') )
We can do that 19 times. OR….
Look at this example using a lookup table to replace all the values
https://stackoverflow.com/questions/50615116/renaming-character-variables-in-a-column-in-data-frame-r
Build a table to translate the Cop Speak to English:
dispo_lkup <- c(ABA="Abated", ADM="Admonish", ADV="Advised", ARR="Arrest", CAN="Cancel", CSA="CPSA",
CIT="Cited", CRM="Criminal", GOA="Gone", HAN="Handled", NCR="No_Criminal", ND="No_Dispo",
NOM="No_Merit", PAS="PlaceSecure", REP="Report", SFD="Medical", UTL="Unfound", VAS="Vehicle_Secure", '22'="Cancel")
#22="Cancel" was handled differently because it is a numeric value: '22'="Cancel"
#This scans "disposition", finds ABA and replaces with Abated, finds ARR, replaces with Arrest, etc
SF$disposition1 <- as.character(dispo_lkup[SF$disposition])
Rerun Action with disposition1
Action <- SF %>%
count(disposition1) %>%
arrange(desc(n))
Compare our renamed variables to the original disposition
Action <- SF %>%
count(disposition1, disposition) %>%
arrange(desc(n))
We have codes not listed on the sheet
NA Not recorded 4339
Get rid of the space
SF$disposition <- gsub("Not recorded", "Not_Recorded", SF$disposition)
Add to the list
dispo_lkup <- c(ABA="Abated", ADM="Admonish", ADV="Advised", ARR="Arrest", CAN="Cancel", CSA="CPSA",
CIT="Cited", CRM="Criminal", GOA="Gone", HAN="Handled", NCR="No_Criminal", ND="No_Dispo",
NOM="No_Merit", PAS="PlaceSecure", REP="Report", SFD="Medical", UTL="Unfound",
VAS="Vehicle_Secure", '22'="Cancel", Not_Recorded="NotRecorded")
Rerun
Action <- SF %>%
count(disposition1) %>%
arrange(desc(n))
Chart Dispositions
Action %>%
filter(n > 100) %>%
ggplot(aes(x = reorder(disposition1, n), y = n, fill=n)) +
geom_bar(stat = "identity", show.legend = FALSE) +
coord_flip() + #this makes it a horizontal bar chart instead of vertical
labs(title = "Action on Homeless Calls, San Francisco",
subtitle = "SF PD Service Call Data, 3/2016-11/2019",
caption = "Graphic by Wells",
y="Number of Calls",
x="Action")
- Parse out police codes from narrative: original_crime_type_name Look at the Types table: some columns have one code, some have two. 919 2879 915 Sleeper 290
Some are separated by a slash 915/919 161
We need to unpack that - Cleaning Sequence
#convert all text to lowercase
SF$crime1 <- tolower(SF$original_crime_type_name)
#Replace / with a space
SF$crime1 <- gsub("/", " ", SF$crime1)
#Replace '
SF$crime1 <- gsub("'", "", SF$crime1)
#fix space in homeless complaint
SF$crime1 <- gsub("homeless complaint", "homeless_complaint", SF$crime1)
#split data into two columns
SF <- separate(data = SF, col = crime1, into = c("crime2", "crime3", "crime4"), sep = " ", extra = "merge", fill = "right")
Look at the categories now
Types2 <- SF %>% count(crime2) %>%
group_by(crime2) %>%
arrange(desc(n))
clean <- c(homeless_complaint="homeless_complaint", '915'="homeless_call", '919'="sit_lying", '920'="aggress_solicit", '915s'="homeless_call", '915x'="homeless_call", drugs="drugs", '601'="trespasser",
poss="poss", aggressive="aggressive", '811'="intoxicated")
SF$crime2 <- as.character(clean[SF$crime2])
Look at the categories now
Types2 <- SF %>% count(crime2) %>%
group_by(crime2) %>%
arrange(desc(n))
Basic chart but with a messed up x axis
Types2 %>%
ggplot(aes(x = crime2, y = n, fill=n)) +
geom_bar(stat = "identity") +
coord_flip() + #this makes it a horizontal bar chart instead of vertical
labs(title = "Top 10 Homeless Complaints, San Francisco",
subtitle = "SF PD Service Call Data, 3/2016-11/2019",
caption = "Graphic by Wells",
y="Number of Calls",
x="Complaint")
Chart with a fixed x axis scale; No values filtered out; Labels added to bars
Types2 %>%
filter(!is.na(crime2)) %>%
#filter(crime2!=" ") %>% - a crude alternative to previous line!
ggplot(aes(x = reorder(crime2, n), y = n, fill=n)) + #reorder sorts the bars
geom_bar(stat = "identity", show.legend = FALSE) +
geom_text(aes(label = n), hjust = -.1, size = 3) +
scale_y_continuous(limits=c(0, 175000)) + #fixes scientific notation
coord_flip() + #this makes it a horizontal bar chart instead of vertical
labs(title = "Top 10 Homeless Complaints, San Francisco",
subtitle = "SF PD Service Call Data, 3/2016-11/2019",
caption = "Graphic by Wells",
y="Number of Calls",
x="Complaint")
mutate - Create new column(s) in the data, or change existing column(s).
mutate() adds new variables and preserves existing
Example: mtcars <- as.data.frame(mtcars) View(mtcars)
mtcars2 <- mtcars %>% as_tibble() %>% mutate( cyl2 = cyl * 2, cyl4 = cyl2 * 2 )
Process dates using lubidate
SF <- SF %>%
mutate(yearmo = format(call_date, "%Y-%m"))
Chart the number of calls by year and month
SF %>%
count(yearmo) %>%
group_by(yearmo) %>%
ggplot(aes(x = yearmo, y = n, fill=n)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle=90)) +
#Changes angle of x axis labels
#coord_flip() + #this makes it a horizontal bar chart instead of vertical
labs(title = "Homeless Calls After 2017, San Francisco",
subtitle = "SF PD Service Call Data by Month 2017-2019",
caption = "Graphic by Wells",
y="Number of Calls",
x="Year")
Percentage change per month
PCT_CHG_CALLS <- SF %>%
select(original_crime_type_name, disposition, address, call_date2, yearmo) %>%
count(yearmo) %>%
mutate(difference = (n-lag(n))) %>%
mutate(pct_change = (difference/abs(lag(n)))*100)
grep and grepl: see ??grep
http://www.endmemo.com/program/R/grepl.php
Cleaning Sequence
#convert all text to lowercase
SF$crime1 <- tolower(SF$original_crime_type_name)
x915 <- SF %>%
filter(grepl ("915", original_crime_type_name)) %>%
mutate(cleaned = "homeless_complaint")
x919 <- SF %>%
filter(grepl ("919", original_crime_type_name)) %>%
mutate(cleaned = "sitting_lying")
xsleep <- SF %>%
filter(grepl ("sleep", original_crime_type_name)) %>%
mutate(cleaned = "sleep")
xaggr <- SF %>%
filter(grepl ("aggr", original_crime_type_name)) %>%
mutate(cleaned = "aggressive")
xdrug <- SF %>%
filter(grepl ("drug", original_crime_type_name)) %>%
mutate(cleaned = "drug")
xhomeless <- SF %>%
filter(grepl ("homeless_complaint", crime2)) %>%
mutate(cleaned = "homeless_complaint")
#Moe, Brooke's Work:
xnoise <- SF %>%
filter(grepl ("415", original_crime_type_name)) %>%
mutate(cleaned = "noise")
xposs <- SF %>%
filter(grepl ("poss", original_crime_type_name)) %>%
mutate(cleaned = "possession")
xtrespasser <- SF %>%
filter(grepl ("601", original_crime_type_name)) %>%
mutate(cleaned = "trespasser")
xsolicit <- SF %>%
filter(grepl ("920", original_crime_type_name)) %>%
mutate(cleaned = "solicit")
xinterview <- SF %>%
filter(grepl ("909", original_crime_type_name)) %>%
mutate(cleaned = "interview")
xtent <- SF %>%
filter(grepl ("tent", crime1)) %>%
mutate(cleaned="tent")
xdog <- SF %>%
filter(grepl ("dog", crime1)) %>%
mutate(cleaned="dog")
xchopshop <- SF %>%
filter(grepl ("chop shop", crime1)) %>%
mutate(cleaned="chopshop")
xpanhandling <- SF %>%
filter(grepl ("panhandling", crime1)) %>%
mutate(cleaned="panhandling")
xmusic <- SF %>%
filter(grepl ("music", crime1)) %>%
mutate(cleaned="music")
Create new dataframe using rbind
new_total <- rbind(xhomeless, x915, x919, xaggr, xdrug, xsleep, xnoise, xposs, xtrespasser, xsolicit, xinterview, xtent, xdog,
xchopshop, xpanhandling, xmusic)
Count it up!
Total_Calls_Master <- new_total %>%
count(cleaned) %>%
arrange(desc(n))
#rename columns
colnames(Total_Calls_Master)[1:2] <- c("Complaints", "Number")
#export
write_csv(Total_Calls_Master, "Total_Calls_Master.csv")
Make into html table
#install.packages("kableExtra")
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
#This makes html tables called "kables"
Total_Calls_Master %>%
kable() %>%
kable_styling("striped")
Complaints | Number |
---|---|
homeless_complaint | 153895 |
sitting_lying | 3282 |
solicit | 262 |
trespasser | 100 |
sleep | 77 |
interview | 75 |
drug | 52 |
noise | 27 |
aggressive | 18 |
dog | 12 |
tent | 9 |
music | 8 |
chopshop | 7 |
panhandling | 5 |
possession | 5 |
Export from Viewer as .png
SF <- SF %>%
mutate(weekday = wday(call_date, label=TRUE, abbr=FALSE))
Build a summary table with the days of the week with the greatest number of calls. Create a graphic. Then build a table to see if the complaints vary by day
Below from Matthew Moore, Katy Seiter, Wells edited
SF <- SF %>%
mutate(weekday = wday(call_date, label=TRUE, abbr=FALSE))
Weekday_Count <- SF %>%
select(weekday, crime_id) %>%
count(weekday) %>%
arrange(desc(n))
Graphic of calls by weekdays
Weekday_Count %>%
ggplot(aes(x = weekday, y = n, fill=n)) +
geom_bar(stat = "identity", show.legend = FALSE) +
theme(axis.text.x = element_text(angle=90)) +
#Changes angle of x axis labels
#coord_flip() + #this makes it a horizontal bar chart instead of vertical
labs(title = "Homeless Calls By Weekday in San Francisco",
subtitle = "SF PD Service Call Data 2017-2019",
caption = "Graphic by Moore and Seiter",
y="Number of Calls",
x="Weekday")
Create a Bubble graphic
ggplot(data = Weekday_Count) +
geom_point(mapping = aes(x = weekday, y = n, size = n, color = n), show.legend = FALSE) +
theme(axis.text.x = element_text(angle=90)) +
labs(title = "Homeless By Weekday in San Francisco",
subtitle = "SF PD Service Call Data 2017-2019: Source: SFPD",
caption = "Graphic by Moore and Seiter",
y="Number of Calls",
x="Weekday")
Improved bubble chart
ggplot(Weekday_Count, aes(x = weekday, y = n)) +
xlab("Weekday") +
ylab("Number of Calls") +
theme_minimal(base_size = 12, base_family = "Georgia") +
geom_point(aes(size = n, color = n), alpha = 0.7, show.legend = FALSE) +
scale_size_area(guide = FALSE, max_size = 15) +
labs(title = "Homeless By Weekday in San Francisco",
subtitle = "SF PD Service Call Data 2017-2019: Source: SFPD",
caption = "Graphic by Moore and Seiter")
- Task #3: Calls vs Dispositions
What calls resulted in arrests? What calls resulted in citations?
Action2 <- SF %>%
select(crime_id, original_crime_type_name, disposition)
We need to pair the crime type and disposition and then count them
From Michael Adkison:
callsarrest <- Action2 %>%
filter(grepl("ARR", disposition)) %>%
mutate(cleaned = "Arrest")
To quickly format into percents, load formattable
#install.packages("formattable")
library(formattable)
callsarrest2 <- callsarrest %>%
arrange(original_crime_type_name, disposition) %>%
count(original_crime_type_name) %>%
#mutate(PctTotal = (n/441)) %>%
arrange(desc(n))
colnames(callsarrest2)[1:2] <- c("Complaints", "Arrests")
Build a table to translate the Cop Speak to English:
clean <- c('Homeless Complaint'="homeless_complaint", homeless_complaint="homeless_complaint", '915'="homeless_complaint",
'919'="Sit_lying", '920'="Aggress_solicit", '915s'="homeless_complaint", '915x'="homeless_complaint",
drugs="drugs", '601'="trespasser", poss="poss", aggressive="aggressive", '811'="intoxicated",
'Drugs / 915'="Drugs", 'Drugs/915'="Drugs")
This scans “disposition”, finds ABA and replaces with Abated, finds ARR, replaces with Arrest, etc callsarrest2\(Complaints <- as.character(clean[callsarrest2\)Complaints])
callsarrest3 <- callsarrest2 %>%
select(Complaints, Arrests) %>%
group_by(Complaints) %>%
summarise(total = sum(Arrests)) %>%
mutate(PctTotal = (total/441)) %>%
arrange(desc(total))
colnames(callsarrest3)[2] <- "Arrests"
callsarrest3$PctTotal <- percent(callsarrest3$PctTotal)
#This makes kables
callsarrest3 %>%
kable() %>%
kable_styling("striped")
Complaints | Arrests | PctTotal |
---|---|---|
Homeless Complaint | 412 | 93.42% |
915 | 19 | 4.31% |
919 | 4 | 0.91% |
920 | 2 | 0.45% |
601 / 915 | 1 | 0.23% |
915 / 909 | 1 | 0.23% |
Drugs / 915 | 1 | 0.23% |
Drugs/915 | 1 | 0.23% |
SF %>%
select(weekday, crime_id, disposition) %>%
filter(grepl("ARR", disposition)) %>%
count(weekday)
## # A tibble: 7 x 2
## weekday n
## <ord> <int>
## 1 Sunday 47
## 2 Monday 63
## 3 Tuesday 79
## 4 Wednesday 77
## 5 Thursday 74
## 6 Friday 56
## 7 Saturday 45
Make bubble chart
SF %>%
select(weekday, crime_id, disposition) %>%
filter(grepl("ARR", disposition)) %>%
count(weekday) %>%
ggplot(aes(x = weekday, y = n)) +
xlab("Weekday") +
ylab("Arrests") +
theme_minimal(base_size = 12, base_family = "Georgia") +
geom_point(aes(size = n, color = n), alpha = 0.7, show.legend = FALSE) +
scale_size_area(guide = FALSE, max_size = 15) +
labs(title = "Homeless Arrests By Weekday in San Francisco",
subtitle = "SF PD Service Call Data 2017-2019: Source: SFPD",
caption = "Graphic by Wells")
SF %>%
filter(grepl("ARR", disposition)) %>%
count(yearmo) %>%
group_by(yearmo) %>%
ggplot(aes(x = yearmo, y = n, fill=n)) +
geom_bar(stat = "identity", show.legend = FALSE) +
geom_smooth(method = lm, se=FALSE, color = "red") +
theme(axis.text.x = element_text(angle=90)) +
#Changes angle of x axis labels
#coord_flip() + #this makes it a horizontal bar chart instead of vertical
labs(title = "Arrest Trends on Homeless Calls in San Francisco",
subtitle = "Arrests Based on SF PD Service Call Data by Month 2017-2019",
caption = "Graphic by Wells",
y="Number of Calls",
x="Year")
#format to hours
SF$hour <- hour(SF$call_date_time)
SF %>%
count(hour) %>%
group_by(hour) %>%
ggplot(aes(x = hour, y = n, fill=n)) +
geom_bar(stat = "identity", show.legend = FALSE) +
theme(axis.text.x = element_text(angle=90)) +
#Changes angle of x axis labels
#coord_flip() + #this makes it a horizontal bar chart instead of vertical
labs(title = "Hours of Homeless Calls, San Francisco",
subtitle = "SF PD Service Call Data by Month 2017-2019",
caption = "Graphic by Wells",
y="Number of Calls",
x="Hour")
- Question: Examine some of the charting options on this tutorial and adapt them to this data using any chart you want # https://paldhous.github.io/wcsj/2017/