
# Portland Crime Data -----------------------------------------------------

## This code takes in the full LAPD Crime Incidence data set: "Portland_Open_Data_Sheet_data" and then manipulates it so that the text Location column is turned into numeric longitude and latitude columns. It also subsets and saves two data sets, one containing location and time information for incidences of homicides and the other containing the location and time of cases of vehicular theft, the two crimes we want to consider within this thesis.
# Note: here we load up the Portland data as originally accessed, and so the code to read in the necessary data will differ dependent on the users download and file name. Additionally, due to updates to the data since original access, there may be variations with respect to column names and types which will need to be taken into consideration in the code below.

# Author: Nadeen Khaleel


# Setwd and Load Libraries ------------------------------------------------

library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))

library(readr)
library(ggplot2)
library(ggmap)
library(dplyr)
library(stringr)
library(revgeo)
library(sp)
library(sf)
library(lwgeom)

# Load in Crime Data ------------------------------------------------------

PPD <- read_csv("Portland_Open_Data_Sheet_data.csv")
PPD$lat <- PPD$OpenDataLat
PPD$lon <- PPD$OpenDataLon

# Remove data without location information
PPD <- PPD[!is.na(PPD$lat),]
PPD <- PPD[!is.na(PPD$lon),]


# Data Manipulation -------------------------------------------------------

# Subset data into homicides and stolen vehicles by selecting the required crime code descriptions
hom_sub <- grep("Homicide Offenses",unique(PPD$`Offense Category`),value=TRUE)
gta_sub <- grep("Motor Vehicle Theft",unique(PPD$`Offense Category`),value=TRUE)

p_hom <- PPD[PPD$`Offense Category`%in%hom_sub,]
p_gta <- PPD[PPD$`Offense Category`%in%gta_sub,]

p_hom$DT_OCC <- as.Date(p_hom$`Occur Date`,format='%d/%m/%Y')
p_hom$DT_OCC_NUM <- as.numeric(p_hom$DT_OCC)
p_gta$DT_OCC <- as.Date(p_gta$`Occur Date`,format='%d/%m/%Y')
p_gta$DT_OCC_NUM <- as.numeric(p_gta$DT_OCC)

p_hom$MY <- format(as.Date(p_hom$DT_OCC),"%Y-%m")
p_gta$MY <- format(as.Date(p_gta$DT_OCC),"%Y-%m")

p_hom$Y <- format(as.Date(p_hom$DT_OCC),"%Y")
p_gta$Y <- format(as.Date(p_gta$DT_OCC),"%Y")

# Remove data without location information that weren't caught before
p_hom <- p_hom[p_hom$lat!=0,]
p_hom <- p_hom[p_hom$lon!=0,]
p_gta <- p_gta[p_gta$lat!=0,]
p_gta <- p_gta[p_gta$lon!=0,]


# Retain points only within the city
or_boundary <- st_read("../SHAPEFILES/BOUNDARIES/City_Boundaries_Portland/City_Boundaries.shp")
p_boundary <- or_boundary[or_boundary$CITYNAME=="Portland",]
# p_boundary.proj <- lwgeom::st_transform_proj(p_boundary,"+init=epsg:32610") # this was the original run version, however now the inclusion of +init is no longer in use
p_boundary.proj <- lwgeom::st_transform_proj(p_boundary,"epsg:32610")

p_hom_sf <- st_as_sf(p_hom, coords = c("lon", "lat"), crs = st_crs(p_boundary))
p_gta_sf <- st_as_sf(p_gta, coords = c("lon", "lat"), crs = st_crs(p_boundary))
# p_hom_sf.proj <- lwgeom::st_transform_proj(p_hom_sf,"+init=epsg:32610")
# p_gta_sf.proj <- lwgeom::st_transform_proj(p_gta_sf,"+init=epsg:32610")
p_hom_sf.proj <- lwgeom::st_transform_proj(p_hom_sf,"epsg:32610") # as discussed above
p_gta_sf.proj <- lwgeom::st_transform_proj(p_gta_sf,"epsg:32610") # as discussed above

int_points_hom <- st_covers(p_boundary.proj, p_hom_sf.proj)
int_points_gta <- st_covers(p_boundary.proj, p_gta_sf.proj)

keep.hom <- int_points_hom[[1]] # Only one polygon and so non-empty one
keep.gta <- int_points_gta[[1]] # Only one polygon and so non-empty one

p_hom <- p_hom[keep.hom,]
p_gta <- p_gta[keep.gta,]

p_hom_sf <- st_as_sf(p_hom, coords = c("lon", "lat"), crs = st_crs(p_boundary))
p_gta_sf <- st_as_sf(p_gta, coords = c("lon", "lat"), crs = st_crs(p_boundary))



# Save the Data Sets ------------------------------------------------------

saveRDS(p_hom,"p_hom.rds")
saveRDS(p_gta,"p_gta.rds")

saveRDS(p_hom_sf,"p_hom_sf.rds")
saveRDS(p_gta_sf,"p_gta_sf.rds")


# SessionInfo() -----------------------------------------------------------

sessionInfo()

