
# Los Angeles Crime Data --------------------------------------------------

# This code takes in the full LAPD Crime Incidence data set: "Crime_Data_from_2010_to_Present" and then manipulates it so that the text Location column is turned into numeric longitude and latitude columns. It also subsets and saves two data sets, one containing location and time information for incidences of homicides and the other containing the location and time of cases of vehicular theft, the two crimes we want to consider within this thesis.
# Note: here we load up the Los Angeles data as originally accessed, and so the code to read in the necessary data will differ dependent on the users download and file name. Additionally, due to updates to the data since original access, there may be variations with respect to column names and types which will need to be taken into consideration in the code below.

# Author: Nadeen Khaleel

# Setwd and Load Libraries ------------------------------------------------

library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))

library(readr)
library(ggplot2)
library(ggmap)
library(dplyr)
library(stringr)
library(revgeo)
library(sp)
library(sf)


# Load in Crime Data ------------------------------------------------------

LAPD <- read_csv("Crime_Data_from_2010_to_Present.csv")

# Remove data without location information
LAPD <- LAPD[!is.na(LAPD$Location),]


# Data Manipulation -------------------------------------------------------

LAPD$DT_OCC <- as.Date(LAPD$`Date Occurred`,format='%m/%d/%Y')
LAPD$DT_OCC_NUM <- as.numeric(LAPD$DT_OCC)
LAPD$MY <- format(as.Date(LAPD$DT_OCC),"%Y-%m")
LAPD$Y <- format(as.Date(LAPD$DT_OCC),"%Y")

# Location into Lat and Lon columns and remove those with (0,0) as we are told that this is when there is no location information.
Loc <- LAPD$Location
Loc1 <- str_replace(Loc,"\\(","")
Loc2 <- str_replace(Loc1,"\\)","")
Loc3 <- str_split(Loc2,",")
Loc <- matrix(as.numeric(unlist(Loc3)), ncol = 2, byrow = TRUE)

LAPD$lat <- Loc[,1]
LAPD$lon <- Loc[,2]

LAPD <- LAPD[LAPD$lat!=0,]

# Subset data into homicides and stolen vehicles by selecting the required crime code descriptions
hom_sub <- grep("HOMICIDE|MANSLAUGHTER|NEGLIGENT",unique(LAPD$`Crime Code Description`),value=TRUE)
gta_sub <- grep("VEHICLE - STOLEN",unique(LAPD$`Crime Code Description`),value=TRUE)

la_hom <- LAPD[LAPD$`Crime Code Description`%in%hom_sub,]
la_gta <- LAPD[LAPD$`Crime Code Description`%in%gta_sub,]

# Retain points only within the city
la_boundary <- st_read("../SHAPEFILES/BOUNDARIES/City Boundaries for Los Angeles County/geo_export_ded155ce-fcf8-43a1-b781-9267f1e0fcb6.shp")
lacity_boundary <- la_boundary[la_boundary$abbr=="LAX",]

# lacity_boundary.proj <- lwgeom::st_transform_proj(lacity_boundary,"+init=epsg:32611") # this was the original run version, however now the inclusion of +init is no longer in use
lacity_boundary.proj <- lwgeom::st_transform_proj(lacity_boundary,"epsg:32611")

la_hom_sf <- st_as_sf(la_hom, coords = c("lon", "lat"), crs = st_crs(lacity_boundary))
la_gta_sf <- st_as_sf(la_gta, coords = c("lon", "lat"), crs = st_crs(lacity_boundary))
# la_hom_sf.proj <- lwgeom::st_transform_proj(la_hom_sf,"+init=epsg:32611") # as discussed above
# la_gta_sf.proj <- lwgeom::st_transform_proj(la_gta_sf,"+init=epsg:32611") # as discussed above
la_hom_sf.proj <- lwgeom::st_transform_proj(la_hom_sf,"epsg:32611")
la_gta_sf.proj <- lwgeom::st_transform_proj(la_gta_sf,"epsg:32611")
int_points_hom.proj <- st_covers(lacity_boundary.proj, la_hom_sf.proj)
int_points_gta.proj <- st_covers(lacity_boundary.proj, la_gta_sf.proj)

keep.hom.proj <- int_points_hom.proj[[18]] # 18 the only non-empty one
keep.gta.proj <- c(int_points_gta.proj[[18]],int_points_gta.proj[[19]]) # 18 and 19 the only non-empty ones

la_hom <- la_hom[keep.hom.proj,]
la_gta <- la_gta[keep.gta.proj,]

la_hom_sf <- st_as_sf(la_hom, coords = c("lon", "lat"), crs = st_crs(lacity_boundary))
la_gta_sf <- st_as_sf(la_gta, coords = c("lon", "lat"), crs = st_crs(lacity_boundary))


# Save the Data Sets ------------------------------------------------------

saveRDS(la_hom,"la_hom.rds")
saveRDS(la_gta,"la_gta.rds")

saveRDS(la_hom_sf,"la_hom_sf.rds")
saveRDS(la_gta_sf,"la_gta_sf.rds")



# SessionInfo() -----------------------------------------------------------

sessionInfo()
