
# Fitting GLMs to Census Tract-Level Crime Data ---------------------------

# This R script to estimate the Ripley's K for the different point patterns to compare to the theoretical form of the function. We also fit Poisson and Negative Binomial models to the census tract-level count data for the different crimes and different cities. The remainder of the R script produces summaries or summary plots for the GLM fits.

# Author: Nadeen Khaleel

# Setwd and Load Libraries ------------------------------------------------

library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))

library(rstanarm) #bayesglm
library(sf)
library(bayesplot)
library(fields)
library(spatstat)
library(raster)
library(ggplot2)
library(maptools)
library(gridExtra)



# Los Angeles -------------------------------------------------------------
# Models for the Los Angeles crime data at the census tract level.

# Los Angeles: Set-Up -----------------------------------------------------
# Change the working directory as we want to save the data in separate folders for each city. We then load the census tract data produced in DATA/RAW_DATA/SHAPEFILES/CENSUS_TRACTS and project these to UTM coordinates.

# LA Data
setwd("./LA")

# Load count and point data
load("../../../PROCESSED_DATA/CRIME/COUNT_DATA_CENSUS_TRACTS/LA/LA2015CTCountData_projFinal.rda")
load("../../../PROCESSED_DATA/CRIME/COUNT_DATA_CENSUS_TRACTS/LA/LA2015CTSFCountData_projFinal.rda")
la_hom <- readRDS("../../../PROCESSED_DATA/CRIME/POINT_PATTERN/la_hom_sf.rds")
la_gta <- readRDS("../../../PROCESSED_DATA/CRIME/POINT_PATTERN/la_gta_sf.rds")

# Extract 2015 incidents
la_gta15 <- la_gta[which(la_gta$Y==2015),]
la_hom15 <- la_hom[which(la_hom$Y==2015),]

hom_countdf <- ct_homcount.df
gta_countdf <- ct_gtacount.df

# LA Census Tracts
load("../../../PROCESSED_DATA/SHAPEFILES/CENSUS_TRACTS/LACityCT.rda")

# Project to UTM: the union - which will form a boundary for the city for the point patterns for Ripley's K.
lacity_boundary <- st_union(ct_LA)
lacity_proj <- lwgeom::st_transform_proj(lacity_boundary,"epsg:32611")
lacity_sp <- as(lacity_proj,"Spatial")

geo2ctLA <- function(x){ct_LA$TRACTCE[which(ct_LA$GEOID==x)]}

# Transform the window
bbox <- lacity_sp@bbox
lacity_spshift <- elide(lacity_sp,shift=-c(bbox[1,1],bbox[2,1]))
bbox_shift <- lacity_spshift@bbox
lacity_spscale <- elide(lacity_spshift,scale=max(bbox_shift)/1e4)
W <- as.owin.SpatialPolygons((lacity_spscale))

# Census Tracts: project to UTM for Ripley's K
ct_LA.proj <- lwgeom::st_transform_proj(ct_LA,"epsg:32611")
ct_LA.sp <- as(ct_LA.proj, "Spatial")
ct_LA.spshift <- elide(ct_LA.sp,shift=-c(bbox[1,1],bbox[2,1]))
ct_LA.spscale <- elide(ct_LA.spshift,scale=max(bbox_shift)/1e4)
ct_LA.sfscale <- st_as_sf(ct_LA.spscale)


# Los Angeles: Point Patterns ---------------------------------------------
# Create the necessary point patterns and calculate standardised socio-economic variables.

# Transform the crime point data
bbox <- lacity_sp@bbox
bbox_shift <- lacity_spshift@bbox
la_hom.proj <- lwgeom::st_transform_proj(la_hom15,"epsg:32611")
la_hom.projsp <- as(la_hom.proj,"Spatial")
la_hom_spshift <- elide(la_hom.projsp,bb=lacity_sp@bbox,shift=-c(bbox[1,1],bbox[2,1]))
la_hom_spscale <- elide(la_hom_spshift,bb=lacity_spshift@bbox,scale=max(bbox_shift)/1e4)
bbox <- lacity_sp@bbox
bbox_shift <- lacity_spshift@bbox
la_gta.proj <- lwgeom::st_transform_proj(la_gta15,"epsg:32611")
la_gta.projsp <- as(la_gta.proj,"Spatial")
la_gta_spshift <- elide(la_gta.projsp,bb=lacity_sp@bbox,shift=-c(bbox[1,1],bbox[2,1]))
la_gta_spscale <- elide(la_gta_spshift,bb=lacity_spshift@bbox,scale=max(bbox_shift)/1e4)


# Generate point patterns over the projected window
la_hom.proj.loc <- la_hom_spscale@coords
lahom.ppp <- as.ppp(la_hom.proj.loc,W) # 1 point lying outside window
la_gta.proj.loc <- la_gta_spscale@coords
lagta.ppp <- as.ppp(la_gta.proj.loc,W) # 35 points lying outside window

# Plot rejected points
pdf("ExcludedPointsLA.pdf",h=8,w=10)
par(mfrow=c(1,2))
plot(lahom.ppp,pch=19,cex=0.1,main="Homicide PP - Excluded Points")
# Warning message:
#   In plot.ppp(lahom.ppp, pch = 19, cex = 0.1, main = "Homicide PP - Excluded Points") :
#   1 illegal points also plotted
a <- attributes(lahom.ppp)
points(a$rejects$x,a$rejects$y,col="magenta",pch=19,cex=0.5)
plot(lagta.ppp,pch=19,cex=0.1,main="GTA PP - Excluded Points")
# Warning message:
  # In plot.ppp(lagta.ppp, pch = 19, cex = 0.1, main = "GTA PP - Excluded Points") :
  # 35 illegal points also plotted
a <- attributes(lagta.ppp)
points(a$rejects$x,a$rejects$y,col="magenta",pch=19,cex=0.5)
dev.off()


# Standardise the population and income
hom_countdf$zpop <- (hom_countdf$pop - mean(hom_countdf$pop))/sd(hom_countdf$pop)
hom_countdf$zinc <- (hom_countdf$inc - mean(hom_countdf$inc))/sd(hom_countdf$inc)
gta_countdf$zpop <- (gta_countdf$pop - mean(gta_countdf$pop))/sd(gta_countdf$pop)
gta_countdf$zinc <- (gta_countdf$inc - mean(gta_countdf$inc))/sd(gta_countdf$inc)

# Store the scale and shift information for further use
bbox <- lacity_sp@bbox
bbox_shift <- lacity_spshift@bbox


# Los Angeles: Ripley's K -------------------------------------------------
# Estimate Ripley's K (homogeneous and inhomogeneous) for the Los Angeles crime data and compare to the theoretical form of the function.

# Resolution for the rasters (roughly 200m-by-200m)
nr <- 359
nc <- 236

# Load covariate data for creation of log-intensity function of inhomogensous Poisson process using ppm() estimates for coefficients
LA_ctpop_15 <- readRDS("../../../PROCESSED_DATA/COVARIATES/LA_CTPop_15_proj.rds")
LA_ctinc_15 <- readRDS("../../../PROCESSED_DATA/COVARIATES/LA_CTInc_15_0imp_proj.rds")

# Assign covariates to census tracts
count.cells <- ct_LA.proj
count.cells$pop <- LA_ctpop_15$pop[match(count.cells$TRACTCE,as.character(sapply(1:length(LA_ctpop_15$geoid2),function(i){geo2ctLA(LA_ctpop_15$geoid2[i])})))]
count.cells$inc <- LA_ctinc_15$inc[match(count.cells$TRACTCE,as.character(sapply(1:length(LA_ctinc_15$geoid2),function(i){geo2ctLA(LA_ctinc_15$geoid2[i])})))]
W.proj <- as.owin.SpatialPolygons((lacity_sp))

# We now want to generate the covariates over a fine grid, but not interpolated, just assigning the values to the census tracts the cells lie within.
g <- quadrats(W.proj,nx=nc,ny=nr)
la_cells <- as(g,"SpatialPolygons")
la_cells_centre <- t(sapply(la_cells@polygons, function(x){x@Polygons[[1]]@labpt}))
cells.centre.df <- data.frame(x=la_cells_centre[,1],y=la_cells_centre[,2])
coordinates(cells.centre.df) <- ~ x + y
cells.centre.df <- st_as_sf(cells.centre.df)
st_crs(cells.centre.df) <- st_crs(count.cells)

# Assign the necessary values
cttocell <- st_intersects(cells.centre.df,count.cells)
cells.centre.df$pop[which(lengths(cttocell)!=0)] <- count.cells$pop[unlist(cttocell)]
# NA to 0
cells.centre.df$pop[is.na(cells.centre.df$pop)] <- 0
cells.centre.df$inc[which(lengths(cttocell)!=0)] <- count.cells$inc[unlist(cttocell)]
# NA to 0
cells.centre.df$inc[is.na(cells.centre.df$inc)] <- 0

# Calculate standardised variables and then shift and scale as with the window, so that unit change in x or y direction is related to a real distance shift of 10km and the bottom=left corner of the bounding box of the LA polygon, lies on the origin, (0,0)
cells.centre.df <- as(cells.centre.df,"Spatial")
cells.centre.df$zpop <- (cells.centre.df$pop - mean(cells.centre.df$pop))/sd(cells.centre.df$pop)
cells.centre.df$zinc <- (cells.centre.df$inc - mean(cells.centre.df$inc))/sd(cells.centre.df$inc)
cells.centre.dfshift <- elide(cells.centre.df,bb=bbox,shift=-c(bbox[1,1],bbox[2,1]))
cells.centre.dfscale <- elide(cells.centre.dfshift,bb=bbox_shift,scale=max(bbox_shift)/1e4)

# Create raster which can then be used to produce pixel images
w.r <- raster(lacity_spscale,nrow=nr,ncol=nc)
pop.ras <- rasterize(cells.centre.dfscale,w.r,field=cells.centre.dfscale$pop)
inc.ras <- rasterize(cells.centre.dfscale,w.r,field=cells.centre.dfscale$inc)
zpop.ras <- rasterize(cells.centre.dfscale,w.r,field=cells.centre.dfscale$zpop)
zinc.ras <- rasterize(cells.centre.dfscale,w.r,field=cells.centre.dfscale$zinc)
int.ras <- rasterize(cells.centre.dfscale,w.r,field=rep(1,length(cells.centre.dfscale)))

# Need a base to cover the entire window, including the region outside the window, otherwise there are warnings about the covariates not covering the LA polygon when fitting the ppm() model
b.r <- raster(extent(pop.ras),nrow=pop.ras@nrows,ncol=pop.ras@ncols)
bbox.base <- owin(xrange=c(extent(pop.ras)[1],extent(pop.ras)[2]),yrange=c(extent(pop.ras)[3],extent(pop.ras)[4]))
gc.df <- gridcenters(bbox.base,1e3,1e3)
gc.df <- data.frame(x=gc.df$x,y=gc.df$y)
coordinates(gc.df) <- ~ x + y
b.ras <- rasterize(gc.df,b.r,rep(0,length(gc.df)))

# Generate the pixel images
popb.im <- as.im(merge(pop.ras,b.ras))
incb.im <- as.im(merge(inc.ras,b.ras))
zpopb.im <- as.im(merge(zpop.ras,b.ras))
zincb.im <- as.im(merge(zinc.ras,b.ras))
intb.im <- as.im(merge(int.ras,b.ras))


# Los Angeles: Homicide
pdf("RipleysK_lahom_proj_236359.pdf",h=8,w=15,pointsize = 14)
par(mfrow=c(1,2))
plot(Kest(lahom.ppp,correction="border"))
mod.hom <- ppm(lahom.ppp ~ c1 + c2, covariates=list(c1=popb.im,c2=incb.im))
lambda.im <- exp(intb.im*mod.hom$coef[[1]] + popb.im*mod.hom$coef[[2]] + incb.im*mod.hom$coef[[3]])
plot(Kinhom(lahom.ppp,lambda=lambda.im[W,drop=FALSE],correction="border"))
dev.off()
# with standardised covariates
pdf("RipleysK_lahomz_proj_236359.pdf",h=8,w=15,pointsize = 14)
par(mfrow=c(1,2))
plot(Kest(lahom.ppp,correction="border"))
mod.hom <- ppm(lahom.ppp ~ c1 + c2, covariates=list(c1=zpopb.im,c2=zincb.im))
lambda.im <- exp(intb.im*mod.hom$coef[[1]] + zpopb.im*mod.hom$coef[[2]] + zincb.im*mod.hom$coef[[3]])
plot(Kinhom(lahom.ppp,lambda=lambda.im[W,drop=FALSE],correction="border"))
dev.off()

# Los Angeles: Motor Vehicle Theft
pdf("RipleysK_lagta_proj_236359.pdf",h=8,w=15,pointsize = 14)
par(mfrow=c(1,2))
plot(Kest(lagta.ppp,correction="border"))
mod.gta <- ppm(lagta.ppp ~ c1 + c2, covariates=list(c1=popb.im,c2=incb.im))
# Warning message:
#   In countingweights(id, areas) :
#   some tiles with positive area do not contain any quadrature points: relative error = 1%
lambda.im <- exp(intb.im*mod.gta$coef[[1]] + popb.im*mod.gta$coef[[2]] + incb.im*mod.gta$coef[[3]])
plot(Kinhom(lagta.ppp,lambda=lambda.im[W,drop=FALSE],correction="border"))
# Warning message:
#   Values for 6 points lying slightly outside the pixel image domain were estimated by convolution
dev.off()
# with standardised covariates
pdf("RipleysK_lagtaz_proj_236359.pdf",h=8,w=15,pointsize = 14)
par(mfrow=c(1,2))
plot(Kest(lagta.ppp,correction="border"))
mod.gta <- ppm(lagta.ppp ~ c1 + c2, covariates=list(c1=zpopb.im,c2=zincb.im))
# Warning message:
#   In countingweights(id, areas) :
#   some tiles with positive area do not contain any quadrature points: relative error = 1%
lambda.im <- exp(intb.im*mod.gta$coef[[1]] + zpopb.im*mod.gta$coef[[2]] + zincb.im*mod.gta$coef[[3]])
plot(Kinhom(lagta.ppp,lambda=lambda.im[W,drop=FALSE],correction="border"))
# Warning message:
#   Values for 6 points lying slightly outside the pixel image domain were estimated by convolution
dev.off()


# Los Angeles: Homicide GLM -----------------------------------------------
# Fit Poisson and Negative Binomial models to the homicide crime data at census tract-level for Los Angeles

# Los Angeles: Homicide: Poisson GLM
fit.zcov <- stan_glm(hom ~ 1 + zpop + zinc, data = hom_countdf, family = poisson, offset=log(ctarea/1e8), prior = normal(0,sqrt(1000)), prior_intercept = normal(0,sqrt(1000)),chains=4,iter=25000)
summary(fit.zcov,digits=10)

# Los Angeles: Homicide: Negative Binomial GLM
fit.nbzcov <- stan_glm(hom ~ 1 + zpop + zinc, data = hom_countdf, family = neg_binomial_2, offset=log(ctarea/1e8), prior = normal(0,sqrt(1000)), prior_intercept = normal(0,sqrt(1000)),chains=4,iter=25000)
summary(fit.nbzcov,digits=10)

# Save homicide models
save(fit.zcov,fit.nbzcov,file="LAHommcGLMS_area.rda")

rm(fit.zcov,fit.nbzcov)


# Los Angeles: Motor Vehicle Theft GLM ------------------------------------
# Fit Poisson and Negative Binomial models to the motor vehicle theft crime data at census tract-level for Los Angeles

# Los Angeles: Motor Vehicle Theft: Poisson GLM 
fit.zcov <- stan_glm(gta ~ 1 + zpop + zinc, data = gta_countdf, family = poisson, offset=log(ctarea/1e8), prior = normal(0,sqrt(1000)), prior_intercept = normal(0,sqrt(1000)),chains=4,iter=25000)
summary(fit.zcov,digits=10)

# Los Angeles: Motor Vehicle Theft: Negative Binomial GLM
fit.nbzcov <- stan_glm(gta ~ 1 + zpop + zinc, data = gta_countdf, family = neg_binomial_2, offset=log(ctarea/1e8), prior = normal(0,sqrt(1000)), prior_intercept = normal(0,sqrt(1000)),chains=4,iter=25000)
summary(fit.nbzcov,digits=10)

# Save motor vehicle theft models
save(fit.zcov,fit.nbzcov,file="LAGTAmcGLMS_area.rda")

rm(fit.zcov,fit.nbzcov)


# Los Angeles: Re-set WD --------------------------------------------------

library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))



# New York ----------------------------------------------------------------
# Models for the New York crime data at the census tract level.

# New York: Set-Up --------------------------------------------------------
# Change the working directory as we want to save the data in separate folders for each city. We then load the census tract data produced in DATA/RAW_DATA/SHAPEFILES/CENSUS_TRACTS and project these to UTM coordinates.

# NYC Data
setwd("./NYC")

# Load count and point data
load("../../../PROCESSED_DATA/CRIME/COUNT_DATA_CENSUS_TRACTS/NYC/NY2015CTCountData_projFinal.rda")
load("../../../PROCESSED_DATA/CRIME/COUNT_DATA_CENSUS_TRACTS/NYC/NY2015CTSFCountData_projFinal.rda")
nyc_hom <- readRDS("../../../PROCESSED_DATA/CRIME/POINT_PATTERN/nyc_hom_sf.rds")
nyc_gta <- readRDS("../../../PROCESSED_DATA/CRIME/POINT_PATTERN/nyc_gta_sf.rds")

# Extract 2015 incidents
nyc_gta15 <- nyc_gta[which(nyc_gta$Y==2015),]
nyc_hom15 <- nyc_hom[which(nyc_hom$Y==2015),]

hom_countdf <- ct_homcount.df
gta_countdf <- ct_gtacount.df

# NYC Census Tracts
load("../../../PROCESSED_DATA/SHAPEFILES/CENSUS_TRACTS/NYCityCT.rda")
nycity_boundary <- st_union(ct_NY)
nycity_proj <- lwgeom::st_transform_proj(nycity_boundary,"epsg:32618")
nycity_sp <- as(nycity_proj,"Spatial") #st_transform for crs...

# Transform the window
bbox <- nycity_sp@bbox
nycity_spshift <- elide(nycity_sp,shift=-c(bbox[1,1],bbox[2,1]))
bbox_shift <- nycity_spshift@bbox
nycity_spscale <- elide(nycity_spshift,scale=max(bbox_shift)/1e4)
W <- as.owin.SpatialPolygons((nycity_spscale))

# Census Tracts: project to UTM for Ripley's K
ct_NY.proj <- lwgeom::st_transform_proj(ct_NY,"epsg:32618")
ct_NY.sp <- as(ct_NY.proj, "Spatial")
ct_NY.spshift <- elide(ct_NY.sp,shift=-c(bbox[1,1],bbox[2,1]))
ct_NY.spscale <- elide(ct_NY.spshift,scale=max(bbox_shift)/1e4)
ct_NY.sfscale <- st_as_sf(ct_NY.spscale)



# New York: Point Patterns ------------------------------------------------
# Create the necessary point patterns and calculate standardised socio-economic variables.

# Transform the crime point data
bbox <- nycity_sp@bbox
bbox_shift <- nycity_spshift@bbox
nyc_hom.proj <- lwgeom::st_transform_proj(nyc_hom15,"epsg:32618")
nyc_hom.projsp <- as(nyc_hom.proj,"Spatial")
nyc_hom_spshift <- elide(nyc_hom.projsp,bb=bbox,shift=-c(bbox[1,1],bbox[2,1]))
nyc_hom_spscale <- elide(nyc_hom_spshift,bb=bbox_shift,scale=max(bbox_shift)/1e4)
bbox <- nycity_sp@bbox
bbox_shift <- nycity_spshift@bbox
nyc_gta.proj <- lwgeom::st_transform_proj(nyc_gta15,"epsg:32618")
nyc_gta.projsp <- as(nyc_gta.proj,"Spatial")
nyc_gta_spshift <- elide(nyc_gta.projsp,bb=bbox,shift=-c(bbox[1,1],bbox[2,1]))
nyc_gta_spscale <- elide(nyc_gta_spshift,bb=bbox_shift,scale=max(bbox_shift)/1e4)

# Generate point patterns over the projected window
nyc_hom.proj.loc <- nyc_hom_spscale@coords
nychom.ppp <- as.ppp(nyc_hom.proj.loc,W)
nyc_gta.proj.loc <- nyc_gta_spscale@coords
nycgta.ppp <- as.ppp(nyc_gta.proj.loc,W)


# Standardise the population and income
hom_countdf$zpop <- (hom_countdf$pop - mean(hom_countdf$pop))/sd(hom_countdf$pop)
hom_countdf$zinc <- (hom_countdf$inc - mean(hom_countdf$inc))/sd(hom_countdf$inc)
gta_countdf$zpop <- (gta_countdf$pop - mean(gta_countdf$pop))/sd(gta_countdf$pop)
gta_countdf$zinc <- (gta_countdf$inc - mean(gta_countdf$inc))/sd(gta_countdf$inc)



# New York: Ripley's K ----------------------------------------------------
# Estimate Ripley's K (homogeneous and inhomogeneous) for the New York crime data and compare to the theoretical form of the function.

# Resolution for the rasters (roughly 200m-by-200m)
nr <- 239 # y axis
nc <- 235 # x axis

nyc_boundary <- st_union(ct_NY)
nyc_proj <- lwgeom::st_transform_proj(nyc_boundary,"epsg:32618")
nyc_sp <- as(nyc_proj,"Spatial")
bbox <- nyc_sp@bbox
nyc_spshift <- elide(nyc_sp,shift=-c(bbox[1,1],bbox[2,1]))
bbox_shift <- nyc_spshift@bbox
nyc_spscale <- elide(nyc_spshift,scale=max(bbox_shift)/1e4)
W <- as.owin.SpatialPolygons((nyc_spscale))


# Load covariate data for creation of log-intensity function of inhomogensous Poisson process using ppm() estimates for coefficients
NY_ctpop_15 <- readRDS("../../../PROCESSED_DATA/COVARIATES/NY_CTPop_15_proj.rds")
NY_ctinc_15 <- readRDS("../../../PROCESSED_DATA/COVARIATES/NY_CTInc_15_0imp_proj.rds")

# Project census tracts to UTM
ct_NY.proj <- lwgeom::st_transform_proj(ct_NY,"epsg:32618")

# Assign covariates to census tracts
count.cells <- ct_NY.proj
count.cells$pop <- NY_ctpop_15$pop[match(count.cells$GEOID,NY_ctpop_15$geoid2)]
count.cells$inc <- NY_ctinc_15$inc[match(count.cells$GEOID,NY_ctinc_15$geoid2)]
W.proj <- as.owin.SpatialPolygons((nyc_sp))

# We now want to generate the covariates over a fine grid, but not interpolated, just assigning the values to the census tracts the cells lie within.
g <- quadrats(W.proj,nx=nc,ny=nr)
ny_cells <- as(g,"SpatialPolygons")
ny_cells_centre <- t(sapply(ny_cells@polygons, function(x){x@Polygons[[1]]@labpt}))
cells.centre.df <- data.frame(x=ny_cells_centre[,1],y=ny_cells_centre[,2])
coordinates(cells.centre.df) <- ~ x + y
cells.centre.df <- st_as_sf(cells.centre.df)
st_crs(cells.centre.df) <- st_crs(count.cells)

# Assign necessary values
cttocell <- st_intersects(cells.centre.df,count.cells)
cells.centre.df$pop[which(lengths(cttocell)!=0)] <- count.cells$pop[unlist(cttocell)]
# NA to 0
cells.centre.df$pop[is.na(cells.centre.df$pop)] <- 0
cells.centre.df$inc[which(lengths(cttocell)!=0)] <- count.cells$inc[unlist(cttocell)]
# NA to 0
cells.centre.df$inc[is.na(cells.centre.df$inc)] <- 0

# Calculate standardised variables and then shift and scale as with the window, so that unit change in x or y direction is related to a real distance shift of 10km and the bottom=left corner of the bounding box of the NYC polygon, lies on the origin, (0,0)
cells.centre.df <- as(cells.centre.df,"Spatial")
cells.centre.df$zpop <- (cells.centre.df$pop - mean(cells.centre.df$pop))/sd(cells.centre.df$pop)
cells.centre.df$zinc <- (cells.centre.df$inc - mean(cells.centre.df$inc))/sd(cells.centre.df$inc)
cells.centre.dfshift <- elide(cells.centre.df,bb=bbox,shift=-c(bbox[1,1],bbox[2,1]))
cells.centre.dfscale <- elide(cells.centre.dfshift,bb=bbox_shift,scale=max(bbox_shift)/1e4)

# Create raster which can then be used to produce pixel images
w.r <- raster(nyc_spscale,nrow=nr,ncol=nc)
pop.ras <- rasterize(cells.centre.dfscale,w.r,field=cells.centre.dfscale$pop)
inc.ras <- rasterize(cells.centre.dfscale,w.r,field=cells.centre.dfscale$inc)
zpop.ras <- rasterize(cells.centre.dfscale,w.r,field=cells.centre.dfscale$zpop)
zinc.ras <- rasterize(cells.centre.dfscale,w.r,field=cells.centre.dfscale$zinc)
int.ras <- rasterize(cells.centre.dfscale,w.r,field=rep(1,length(cells.centre.dfscale)))

# Need a base to cover the entire window, including the region outside the window, otherwise there are warnings about the covariates not covering the LA polygon when fitting the ppm() model
b.r <- raster(extent(pop.ras),nrow=pop.ras@nrows,ncol=pop.ras@ncols)
bbox.base <- owin(xrange=c(extent(pop.ras)[1],extent(pop.ras)[2]),yrange=c(extent(pop.ras)[3],extent(pop.ras)[4]))
gc.df <- gridcenters(bbox.base,1e3,1e3)
gc.df <- data.frame(x=gc.df$x,y=gc.df$y)
coordinates(gc.df) <- ~ x + y
b.ras <- rasterize(gc.df,b.r,rep(0,length(gc.df)))

# Generate the pixel images
popb.im <- as.im(merge(pop.ras,b.ras))
incb.im <- as.im(merge(inc.ras,b.ras))
zpopb.im <- as.im(merge(zpop.ras,b.ras))
zincb.im <- as.im(merge(zinc.ras,b.ras))
intb.im <- as.im(merge(int.ras,b.ras))

# New York: Homicide
pdf("RipleysK_nyhom_proj_235239.pdf",h=8,w=15,pointsize = 14)
par(mfrow=c(1,2))
plot(Kest(nychom.ppp,correction="border"))
mod.hom <- ppm(nychom.ppp ~ c1 + c2, covariates=list(c1=popb.im,c2=incb.im))
lambda.im <- exp(intb.im*mod.hom$coef[[1]] + popb.im*mod.hom$coef[[2]] + incb.im*mod.hom$coef[[3]])
plot(Kinhom(nychom.ppp,lambda=lambda.im[W,drop=FALSE],correction="border"))
dev.off()

pdf("RipleysK_nyhomz_proj_235239.pdf",h=8,w=15,pointsize = 14)
par(mfrow=c(1,2))
plot(Kest(nychom.ppp,correction="border"))
mod.hom <- ppm(nychom.ppp ~ c1 + c2, covariates=list(c1=zpopb.im,c2=zincb.im))
lambda.im <- exp(intb.im*mod.hom$coef[[1]] + zpopb.im*mod.hom$coef[[2]] + zincb.im*mod.hom$coef[[3]])
plot(Kinhom(nychom.ppp,lambda=lambda.im[W,drop=FALSE],correction="border"))
dev.off()

# New York: Motor Vehicle Theft
pdf("RipleysK_nygta_proj_235239.pdf",h=8,w=15,pointsize = 14)
par(mfrow=c(1,2))
plot(Kest(nycgta.ppp,correction="border"))
mod.gta <- ppm(nycgta.ppp ~ c1 + c2, covariates=list(c1=popb.im,c2=incb.im))
lambda.im <- exp(intb.im*mod.gta$coef[[1]] + popb.im*mod.gta$coef[[2]] + incb.im*mod.gta$coef[[3]])
plot(Kinhom(nycgta.ppp,lambda=lambda.im[W,drop=FALSE],correction="border"))
dev.off()

pdf("RipleysK_nygtaz_proj_235239.pdf",h=8,w=15,pointsize = 14)
par(mfrow=c(1,2))
plot(Kest(nycgta.ppp,correction="border"))
mod.gta <- ppm(nycgta.ppp ~ c1 + c2, covariates=list(c1=zpopb.im,c2=zincb.im))
lambda.im <- exp(intb.im*mod.gta$coef[[1]] + zpopb.im*mod.gta$coef[[2]] + zincb.im*mod.gta$coef[[3]])
plot(Kinhom(nycgta.ppp,lambda=lambda.im[W,drop=FALSE],correction="border"))
dev.off()


# New York: Homicide GLM --------------------------------------------------
# Fit Poisson and Negative Binomial models to the homicide crime data at census tract-level for New York

# New York: Homicide: Poisson GLM
fit.zcov <- stan_glm(hom ~ 1 + zpop + zinc, data = hom_countdf, family = poisson, offset=log(ctarea/1e8), prior = normal(0,sqrt(1000)), prior_intercept = normal(0,sqrt(1000)),chains=4,iter=25000)
summary(fit.zcov,digits=10)

# New York: Homicide: Negative Binomial GLM
fit.nbzcov <- stan_glm(hom ~ 1 + zpop + zinc, data = hom_countdf, family = neg_binomial_2, offset=log(ctarea/1e8), prior = normal(0,sqrt(1000)), prior_intercept = normal(0,sqrt(1000)),chains=4,iter=25000)
summary(fit.nbzcov,digits=10)

# Save homicide models
save(fit.zcov,fit.nbzcov,file="NYCHommcGLMS_area.rda")

rm(fit.zcov,fit.nbzcov)


# New York: Motor Vehicle Theft GLM ---------------------------------------
# Fit Poisson and Negative Binomial models to the motor vehicle theft crime data at census tract-level for New York

# New York: Motor Vehicle Theft: Poisson GLM 
fit.zcov <- stan_glm(gta ~ 1 + zpop + zinc, data = gta_countdf, family = poisson, offset=log(ctarea/1e8), prior = normal(0,sqrt(1000)), prior_intercept = normal(0,sqrt(1000)),chains=4,iter=25000,control=list(adapt_delta=0.96))
summary(fit.zcov,digits=10)

# New York: Motor Vehicle Theft: Negative Binomial GLM
fit.nbzcov <- stan_glm(gta ~ 1 + zpop + zinc, data = gta_countdf, family = neg_binomial_2, offset=log(ctarea/1e8), prior = normal(0,sqrt(1000)), prior_intercept = normal(0,sqrt(1000)),chains=4,iter=25000)
summary(fit.nbzcov,digits=10)

# Save motor vehicle theft models
save(fit.zcov,fit.nbzcov,file="NYCGTAmcGLMS_area.rda")

rm(fit.zcov,fit.nbzcov)


# New York: Re-set WD -----------------------------------------------------

library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))



# Portland ----------------------------------------------------------------
# Models for the Portland crime data at the census tract level.

# Portland: Set-Up --------------------------------------------------------
# Change the working directory as we want to save the data in separate folders for each city. We then load the census tract data produced in DATA/RAW_DATA/SHAPEFILES/CENSUS_TRACTS and project these to UTM coordinates.

# Portland Data
setwd("./Portland")

# Load count and point data
load("../../../PROCESSED_DATA/CRIME/COUNT_DATA_CENSUS_TRACTS/Portland/P2015CTCountData_projFinal.rda")
load("../../../PROCESSED_DATA/CRIME/COUNT_DATA_CENSUS_TRACTS/Portland/P2015CTSFCountData_projFinal.rda")
p_hom <- readRDS("../../../PROCESSED_DATA/CRIME/POINT_PATTERN/p_hom_sf.rds")
p_gta <- readRDS("../../../PROCESSED_DATA/CRIME/POINT_PATTERN/p_gta_sf.rds")

# Extract 2015 incidents
p_gta15 <- p_gta[which(p_gta$Y==2015),]
p_hom15 <- p_hom[which(p_hom$Y==2015),]

hom_countdf <- ct_homcount.df
gta_countdf <- ct_gtacount.df

# Portland Census Tracts
load("../../../PROCESSED_DATA/SHAPEFILES/CENSUS_TRACTS/PCityCT.rda")
pcity_boundary <- st_union(ct_P)
pcity_proj <- lwgeom::st_transform_proj(pcity_boundary,"epsg:32610")
pcity_sp <- as(pcity_proj,"Spatial")

# Transform the window
bbox <- pcity_sp@bbox
pcity_spshift <- elide(pcity_sp,shift=-c(bbox[1,1],bbox[2,1]))
bbox_shift <- pcity_spshift@bbox
pcity_spscale <- elide(pcity_spshift,scale=max(bbox_shift)/1e4)
W <- as.owin.SpatialPolygons((pcity_spscale))

# Census Tracts: project to UTM for Ripley's K
ct_P.proj <- lwgeom::st_transform_proj(ct_P,"epsg:32610")
ct_P.sp <- as(ct_P.proj, "Spatial")
ct_P.spshift <- elide(ct_P.sp,shift=-c(bbox[1,1],bbox[2,1]))
ct_P.spscale <- elide(ct_P.spshift,scale=max(bbox_shift)/1e4)
ct_P.sfscale <- st_as_sf(ct_P.spscale)


# Portland: Point Patterns ------------------------------------------------
# Create the necessary point patterns and calculate standardised socio-economic variables.

# Transform the crime point data
bbox <- pcity_sp@bbox
bbox_shift <- pcity_spshift@bbox
p_hom.proj <- lwgeom::st_transform_proj(p_hom15,"epsg:32610")
p_hom.projsp <- as(p_hom.proj,"Spatial")
p_hom_spshift <- elide(p_hom.projsp,bb=bbox,shift=-c(bbox[1,1],bbox[2,1]))
p_hom_spscale <- elide(p_hom_spshift,bb=bbox_shift,scale=max(bbox_shift)/1e4)
bbox <- pcity_sp@bbox
bbox_shift <- pcity_spshift@bbox
p_gta.proj <- lwgeom::st_transform_proj(p_gta15,"epsg:32610")
p_gta.projsp <- as(p_gta.proj,"Spatial")
p_gta_spshift <- elide(p_gta.projsp,bb=bbox,shift=-c(bbox[1,1],bbox[2,1]))
p_gta_spscale <- elide(p_gta_spshift,bb=bbox_shift,scale=max(bbox_shift)/1e4)

# Generate point patterns over the projected window
p_hom.proj.loc <- p_hom_spscale@coords
phom.ppp <- as.ppp(p_hom.proj.loc,W)
p_gta.proj.loc <- p_gta_spscale@coords
pgta.ppp <- as.ppp(p_gta.proj.loc,W) # 1 point lying outside window

# Plot rejected points
pdf("ExcludedPointsP.pdf",h=8,w=10)
par(mfrow=c(1,2))
plot(phom.ppp,pch=19,cex=0.1,main="Homicide PP - Excluded Points")
a <- attributes(phom.ppp)
points(a$rejects$x,a$rejects$y,col="magenta",pch=19,cex=0.5)
plot(pgta.ppp,pch=19,cex=0.1,main="GTA PP - Excluded Points")
a <- attributes(pgta.ppp)
points(a$rejects$x,a$rejects$y,col="magenta",pch=19,cex=0.5)
dev.off()


# Standardise the population and income
hom_countdf$zpop <- (hom_countdf$pop - mean(hom_countdf$pop))/sd(hom_countdf$pop)
hom_countdf$zinc <- (hom_countdf$inc - mean(hom_countdf$inc))/sd(hom_countdf$inc)
gta_countdf$zpop <- (gta_countdf$pop - mean(gta_countdf$pop))/sd(gta_countdf$pop)
gta_countdf$zinc <- (gta_countdf$inc - mean(gta_countdf$inc))/sd(gta_countdf$inc)


# Portland: Ripley's K ----------------------------------------------------
# Estimate Ripley's K (homogeneous and inhomogeneous) for the Portland crime data and compare to the theoretical form of the function.

# Resolution for the rasters (roughly 200m-by-200m)
nr <- 129 # y axis cuts
nc <- 190 # x axis cuts

# Load covariate data for creation of log-intensity function of inhomogensous Poisson process using ppm() estimates for coefficients
P_ctpop_15 <- readRDS("../../../PROCESSED_DATA/COVARIATES/P_CTPop_15_proj.rds")
P_ctinc_15 <- readRDS("../../../PROCESSED_DATA/COVARIATES/P_CTInc_15_0imp_proj.rds")

geo2ctP <- function(x){ct_P$TRACTCE[which(ct_P$GEOID==x)]}

# Project census tracts to UTM
ct_P.proj <- lwgeom::st_transform_proj(ct_P,"epsg:32610")

# Assign covariates to census tracts
count.cells <- ct_P.proj
count.cells$pop <- P_ctpop_15$pop[match(count.cells$TRACTCE,as.character(sapply(1:length(P_ctpop_15$geoid2),function(i){geo2ctP(P_ctpop_15$geoid2[i])})))]
count.cells$inc <- P_ctinc_15$inc[match(count.cells$TRACTCE,as.character(sapply(1:length(P_ctinc_15$geoid2),function(i){geo2ctP(P_ctinc_15$geoid2[i])})))]
W.proj <- as.owin.SpatialPolygons((pcity_sp))

# We now want to generate the covariates over a fine grid, but not interpolated, just assigning the values to the census tracts the cells lie within.
g <- quadrats(W.proj,nx=nc,ny=nr)
p_cells <- as(g,"SpatialPolygons")
p_cells_centre <- t(sapply(p_cells@polygons, function(x){x@Polygons[[1]]@labpt}))
cells.centre.df <- data.frame(x=p_cells_centre[,1],y=p_cells_centre[,2])
coordinates(cells.centre.df) <- ~ x + y
cells.centre.df <- st_as_sf(cells.centre.df)
st_crs(cells.centre.df) <- st_crs(count.cells)

# Assign necessary values
cttocell <- st_intersects(cells.centre.df,count.cells)
cells.centre.df$pop[which(lengths(cttocell)!=0)] <- count.cells$pop[unlist(cttocell)]
# NA to 0
cells.centre.df$pop[is.na(cells.centre.df$pop)] <- 0
cells.centre.df$inc[which(lengths(cttocell)!=0)] <- count.cells$inc[unlist(cttocell)]
# NA to 0
cells.centre.df$inc[is.na(cells.centre.df$inc)] <- 0

# Calculate standardised variables and then shift and scale as with the window, so that unit change in x or y direction is related to a real distance shift of 10km and the bottom=left corner of the bounding box of the Portland polygon, lies on the origin, (0,0)
cells.centre.df <- as(cells.centre.df,"Spatial")
cells.centre.df$zpop <- (cells.centre.df$pop - mean(cells.centre.df$pop))/sd(cells.centre.df$pop)
cells.centre.df$zinc <- (cells.centre.df$inc - mean(cells.centre.df$inc))/sd(cells.centre.df$inc)
cells.centre.dfshift <- elide(cells.centre.df,bb=bbox,shift=-c(bbox[1,1],bbox[2,1]))
cells.centre.dfscale <- elide(cells.centre.dfshift,bb=bbox_shift,scale=max(bbox_shift)/1e4)

# Create raster which can then be used to produce pixel images
w.r <- raster(pcity_spscale,nrow=nr,ncol=nc)
pop.ras <- rasterize(cells.centre.dfscale,w.r,field=cells.centre.dfscale$pop)
inc.ras <- rasterize(cells.centre.dfscale,w.r,field=cells.centre.dfscale$inc)
zpop.ras <- rasterize(cells.centre.dfscale,w.r,field=cells.centre.dfscale$zpop)
zinc.ras <- rasterize(cells.centre.dfscale,w.r,field=cells.centre.dfscale$zinc)
int.ras <- rasterize(cells.centre.dfscale,w.r,field=rep(1,length(cells.centre.dfscale)))

# Need a base to cover the entire window, including the region outside the window, otherwise there are warnings about the covariates not covering the LA polygon when fitting the ppm() model
b.r <- raster(extent(pop.ras),nrow=pop.ras@nrows,ncol=pop.ras@ncols)
bbox.base <- owin(xrange=c(extent(pop.ras)[1],extent(pop.ras)[2]),yrange=c(extent(pop.ras)[3],extent(pop.ras)[4]))
gc.df <- gridcenters(bbox.base,1e3,1e3)
gc.df <- data.frame(x=gc.df$x,y=gc.df$y)
coordinates(gc.df) <- ~ x + y
b.ras <- rasterize(gc.df,b.r,rep(0,length(gc.df)))

# Generate the pixel images
popb.im <- as.im(merge(pop.ras,b.ras))
incb.im <- as.im(merge(inc.ras,b.ras))
zpopb.im <- as.im(merge(zpop.ras,b.ras))
zincb.im <- as.im(merge(zinc.ras,b.ras))
intb.im <- as.im(merge(int.ras,b.ras))

# Portland: Motor Vehicle Theft
pdf("RipleysK_pgta_proj_190129.pdf",h=8,w=15,pointsize = 14)
par(mfrow=c(1,2))
plot(Kest(pgta.ppp,correction="border"))
mod.gta <- ppm(pgta.ppp ~ c1 + c2, covariates=list(c1=popb.im,c2=incb.im))
lambda.im <- exp(intb.im*mod.gta$coef[[1]] + popb.im*mod.gta$coef[[2]] + incb.im*mod.gta$coef[[3]])
plot(Kinhom(pgta.ppp,lambda=lambda.im[W,drop=FALSE],correction="border"))
dev.off()
# standardissed covariates
pdf("RipleysK_pgtaz_proj_190129.pdf",h=8,w=15,pointsize = 14)
par(mfrow=c(1,2))
plot(Kest(pgta.ppp,correction="border"))
mod.gta <- ppm(pgta.ppp ~ c1 + c2, covariates=list(c1=zpopb.im,c2=zincb.im))
lambda.im <- exp(intb.im*mod.gta$coef[[1]] + zpopb.im*mod.gta$coef[[2]] + zincb.im*mod.gta$coef[[3]])
plot(Kinhom(pgta.ppp,lambda=lambda.im[W,drop=FALSE],correction="border"))
dev.off()


# Portland: Motor Vehicle Theft GLM ---------------------------------------
# Fit Poisson and Negative Binomial models to the motor vehicle theft crime data at census tract-level for Portland

# Portland: Motor Vehicle Theft: Poisson GLM
fit.zcov <- stan_glm(gta ~ 1 + zpop + zinc, data = gta_countdf, family = poisson, offset=log(ctarea/1e8), prior = normal(0,sqrt(1000)), prior_intercept = normal(0,sqrt(1000)),chains=4,iter=25000)
summary(fit.zcov,digits=10)

# Portland: Motor Vehicle Theft: Negative Binomial GLM
fit.nbzcov <- stan_glm(gta ~ 1 + zpop + zinc, data = gta_countdf, family = neg_binomial_2, offset=log(ctarea/1e8), prior = normal(0,sqrt(1000)), prior_intercept = normal(0,sqrt(1000)),chains=4,iter=25000)
summary(fit.nbzcov,digits=10)

# Save motor vehicle theft models
save(fit.zcov,fit.nbzcov,file="PGTAmcGLMS_area.rda")

rm(fit.zcov,fit.nbzcov)

# Portland: Re-set WD -----------------------------------------------------

library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))



# Plotting Results --------------------------------------------------------
# Want to produce plots to visually inspect the results of the models for the different crimes for each city as well as calculating the credible intervals.

library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))


# Error Plots on Census Tracts --------------------------------------------
# Plot model errors onto the census tracts to visually inspect their spatial behaviour.

# Los Angeles: Error Plots ------------------------------------------------
# Plots of the errors from the models in LA plotted onto the census tracts.

# LA Data
setwd("./LA")
 
# LA Census Tracts
load("../../../PROCESSED_DATA/SHAPEFILES/CENSUS_TRACTS/LACityCT.rda")

# Project to UTM: the union - which will form a boundary for the city
lacity_boundary <- st_union(ct_LA)
lacity_proj <- lwgeom::st_transform_proj(lacity_boundary,"epsg:32611")
lacity_sp <- as(lacity_proj,"Spatial")

# Transform the window
bbox <- lacity_sp@bbox
lacity_spshift <- elide(lacity_sp,shift=-c(bbox[1,1],bbox[2,1]))
bbox_shift <- lacity_spshift@bbox
lacity_spscale <- elide(lacity_spshift,scale=max(bbox_shift)/1e4)
W <- as.owin.SpatialPolygons((lacity_spscale))

# For plotting spatial results, get transformed census tracts - not the window, but want each census tract
ct_LA.proj <- lwgeom::st_transform_proj(ct_LA,"epsg:32611")
ct_LA.sp <- as(ct_LA.proj, "Spatial")
ct_LA.spshift <- elide(ct_LA.sp,shift=-c(bbox[1,1],bbox[2,1]))
ct_LA.spscale <- elide(ct_LA.spshift,scale=max(bbox_shift)/1e4)
ct_LA.sfscale <- st_as_sf(ct_LA.spscale)

# Los Angeles: Error Plots: Homicide
load("LAHommcGLMS_area.rda")

# Poisson
ct_LA.temp <- ct_LA.sfscale
ct_LA.temp$poerr <- fit.zcov$residuals
pdf("LAHomPoissonCTError2_area.pdf",h=8,w=10,pointsize = 14)
par(oma=c(0,0,3,0))
plot(ct_LA.temp["poerr"],main="",cex=20)
title("Residuals from Poisson Model for\n Homicides in LA",cex.main=1.5,outer=T) # par and outer added 04/10/2021
dev.off()

# Negative Binomial
ct_LA.temp$nberr <- fit.nbzcov$residuals
pdf("LAHomNBCTError2_area.pdf",h=8,w=10,pointsize = 14)
par(oma=c(0,0,3,0))
plot(ct_LA.temp["nberr"],main="",cex=20)
title("Residuals from Negative Binomial Model\n for Homicides in LA",cex.main=1.5,outer=T) # par and outer added 04/10/2021
dev.off()


# Los Angeles: Error Plots: Motor Vehicle Theft
load("LAGTAmcGLMS_area.rda")

# Poisson
ct_LA.temp <- ct_LA.sfscale
ct_LA.temp$poerr <- fit.zcov$residuals
pdf("LAGTAPoissonCTError2_area.pdf",h=8,w=10,pointsize = 14)
par(oma=c(0,0,3,0))
plot(ct_LA.temp["poerr"],main="")
title(main="Residuals from Poisson Model for\n Motor Vehicle Theft in LA",outer=T,cex.main=1.5)
dev.off()

# Negative Binomial
ct_LA.temp$nberr <- fit.nbzcov$residuals
pdf("LAGTANBCTError2_area.pdf",h=8,w=10,pointsize = 14)
par(oma=c(0,0,3,0))
plot(ct_LA.temp["nberr"],main="")
title(main="Residuals from Negative Binomial Model for\n Motor Vehicle Theft in LA",outer=T,cex.main=1.5)
dev.off()


library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))


# New York: Error Plots ---------------------------------------------------
# Plots of the errors from the models in NYC plotted onto the census tracts.

# NYC Data
setwd("./NYC")

# NYC Census Tracts
load("../../../PROCESSED_DATA/SHAPEFILES/CENSUS_TRACTS/NYCityCT.rda")

# Project to UTM: the union - which will form a boundary for the city
nycity_boundary <- st_union(ct_NY)
nycity_proj <- lwgeom::st_transform_proj(nycity_boundary,"epsg:32618")
nycity_sp <- as(nycity_proj,"Spatial") #st_transform for crs...

# Transform the window
bbox <- nycity_sp@bbox
nycity_spshift <- elide(nycity_sp,shift=-c(bbox[1,1],bbox[2,1]))
bbox_shift <- nycity_spshift@bbox
nycity_spscale <- elide(nycity_spshift,scale=max(bbox_shift)/1e4)
W <- as.owin.SpatialPolygons((nycity_spscale))

# For plotting spatial results, get transformed census tracts - not the window, but want each census tract
ct_NY.proj <- lwgeom::st_transform_proj(ct_NY,"epsg:32618")
ct_NY.sp <- as(ct_NY.proj, "Spatial")
ct_NY.spshift <- elide(ct_NY.sp,shift=-c(bbox[1,1],bbox[2,1]))
ct_NY.spscale <- elide(ct_NY.spshift,scale=max(bbox_shift)/1e4)
ct_NY.sfscale <- st_as_sf(ct_NY.spscale)


# New York: Error Plots: Homicide
load("NYCHommcGLMS_area.rda")

# Poisson
ct_NY.temp <- ct_NY.sfscale
ct_NY.temp$poerr <- fit.zcov$residuals
pdf("NYCHomPoissonCTError2_area.pdf",h=8,w=10,pointsize = 14)
par(oma=c(0,0,3,0))
plot(ct_NY.temp["poerr"],main="")
title(main="Residuals from Poisson Model for\n Homicides in New York",cex.main=1.5,outer=T) # outer=T added 04/10/2021
dev.off()

# Negative Binomial
ct_NY.temp$nberr <- fit.nbzcov$residuals
pdf("NYCHomNBCTError2_area.pdf",h=8,w=10,pointsize = 14)
par(oma=c(0,0,3,0))
plot(ct_NY.temp["nberr"],main="")
title(main="Residuals from Negative Binomial Model\n for Homicides in New York",cex.main=1.5,outer=T) # outer=T added 04/10/2021
dev.off()

# New York: Error Plots: Motor Vehicle Theft
load("NYCGTAmcGLMS_area.rda")

# Poisson
ct_NY.temp <- ct_NY.sfscale
ct_NY.temp$poerr <- fit.zcov$residuals
pdf("NYCGTAPoissonCTError2_area.pdf",h=8,w=10,pointsize = 14)
par(oma=c(0,0,3,0))
plot(ct_NY.temp["poerr"],main="")
title(main="Residuals from Poisson Model for\n Motor Vehicle Thefts in New York",outer=T,cex.main=1.5)
dev.off()

# Negative Binomial
ct_NY.temp$nberr <- fit.nbzcov$residuals
pdf("NYCGTANBCTError2_area.pdf",h=8,w=10,pointsize = 14)
par(oma=c(0,0,3,0))
plot(ct_NY.temp["nberr"],main="")
title(main="Residuals from Negative Binomial Model for\n Motor Vehicle Thefts in New York",outer=T,cex.main=1.5)
dev.off()

rm(list=ls())

library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))


# Portland: Error Plots ---------------------------------------------------
# Plots of the errors from the models in Portland plotted onto the census tracts.

# Portland Data
setwd("./Portland")

# Portland Census Tract
load("../../../PROCESSED_DATA/SHAPEFILES/CENSUS_TRACTS/PCityCT.rda")

# Project to UTM: the union - which will form a boundary for the city
pcity_boundary <- st_union(ct_P)
pcity_proj <- lwgeom::st_transform_proj(pcity_boundary,"epsg:32610")
pcity_sp <- as(pcity_proj,"Spatial")

# Transform the window
bbox <- pcity_sp@bbox
pcity_spshift <- elide(pcity_sp,shift=-c(bbox[1,1],bbox[2,1]))
bbox_shift <- pcity_spshift@bbox
pcity_spscale <- elide(pcity_spshift,scale=max(bbox_shift)/1e4)
W <- as.owin.SpatialPolygons((pcity_spscale))

# For plotting spatial results, get transformed census tracts - not the window, but want each census tract
ct_P.proj <- lwgeom::st_transform_proj(ct_P,"epsg:32610")
ct_P.sp <- as(ct_P.proj, "Spatial")
ct_P.spshift <- elide(ct_P.sp,shift=-c(bbox[1,1],bbox[2,1]))
ct_P.spscale <- elide(ct_P.spshift,scale=max(bbox_shift)/1e4)
ct_P.sfscale <- st_as_sf(ct_P.spscale)

# Portland: Error Plots: Motor Vehicle Theft
load("PGTAmcGLMS_area.rda")

# Poisson
ct_P.temp <- ct_P.sfscale
ct_P.temp$poerr <- fit.zcov$residuals
pdf("PGTAPoissonCTError2_area.pdf",h=8,w=10,pointsize = 14)
par(oma=c(0,0,3,0))
plot(ct_P.temp["poerr"],main="")
title(main="Residuals from Poisson Model for\n Motor Vehicle Theft in Portland",outer=T,cex.main=1.5)
dev.off()

# Negative Binomial
ct_P.temp$nberr <- fit.nbzcov$residuals
pdf("PGTANBCTError2_area.pdf",h=8,w=10,pointsize = 14)
par(oma=c(0,0,3,0))
plot(ct_P.temp["nberr"],main="")
title(main="Residuals from Negative Binomial Model for\n Motor Vehicle Theft in Portland",outer=T,cex.main=1.5)
dev.off()

library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))


# Credible Interval Plots for All Cities ----------------------------------


library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))

# First create and fill up a data frame with all of the necessary values for each Poisson and Negative Binomial model for each crime and each city, then ggplot can be used to plot the values as we want.

n.c <- 3; n.cr <- 2; n.m <- 2; n.p <- 3
df <- data.frame(city=rep(c("LA","NYC","P"),each=(n.cr*n.m*n.p+n.cr)),crime=rep(rep(c("Hom","GTA"),each=(n.m*n.p+1)),n.c),model=rep(c(rep("Poisson",n.p),rep("NB",n.p+1)),n.c*n.cr),parameter=rep(c(rep(c("b0","b1","b2"),2),"phi_inv"),n.c*n.cr),mean=rep(NA,n.c*n.cr*n.m*n.p+n.c*2),c.l=rep(NA,n.c*n.cr*n.m*n.p+n.c*2),c.u=rep(NA,n.c*n.cr*n.m*n.p+n.c*2))

# LA
ind.c <- 1 # City=LA
ind.cr <- 1 # Crime=Hom
load("LA/LAHommcGLMS_area.rda")
ind.m <- 1 # Model=Poisson
inds.start <- (ind.c-1)*(n.cr*n.m*n.p+n.cr) + (ind.cr-1)*(n.m*n.p+1) + (ind.m-1)*n.p + 1
inds.end <- inds.start + 2
df$mean[inds.start:inds.end] <- unname(fit.zcov$stan_summary[1:3,1])
c.b <- rstanarm::posterior_interval(fit.zcov,0.95) # 95
df$c.l[inds.start:inds.end] <- unname(c.b[,1])
df$c.u[inds.start:inds.end] <- unname(c.b[,2])
ind.m <- 2 # MOdel=NB
inds.start <- inds.end + 1
inds.end <- inds.start + 3
df$mean[inds.start:inds.end] <- unname(fit.nbzcov$stan_summary[1:4,1])
c.b <- rstanarm::posterior_interval(fit.nbzcov,0.95) # 95
df$c.l[inds.start:inds.end] <- unname(c.b[,1])
df$c.u[inds.start:inds.end] <- unname(c.b[,2])

ind.cr <- 2
load("LA/LAGTAmcGLMS_area.rda")
ind.m <- 1 # Model=Poisson
inds.start <- (ind.c-1)*(n.cr*n.m*n.p+n.cr) + (ind.cr-1)*(n.m*n.p+1) + (ind.m-1)*n.p + 1
inds.end <- inds.start + 2
df$mean[inds.start:inds.end] <- unname(fit.zcov$stan_summary[1:3,1])
c.b <- rstanarm::posterior_interval(fit.zcov,0.95) # 95
df$c.l[inds.start:inds.end] <- unname(c.b[,1])
df$c.u[inds.start:inds.end] <- unname(c.b[,2])
ind.m <- 2 # MOdel=NB
inds.start <- inds.end + 1
inds.end <- inds.start + 3
df$mean[inds.start:inds.end] <- unname(fit.nbzcov$stan_summary[1:4,1])
c.b <- rstanarm::posterior_interval(fit.nbzcov,0.95) # 95
df$c.l[inds.start:inds.end] <- unname(c.b[,1])
df$c.u[inds.start:inds.end] <- unname(c.b[,2])
rm(fit.zcov)
rm(fit.nbzcov)

# NYC
ind.c <- 2 # City=NYC
ind.cr <- 1 # Crime=Hom
load("NYC/NYCHommcGLMS_area.rda")
ind.m <- 1 # Model=Poisson
inds.start <- (ind.c-1)*(n.cr*n.m*n.p+n.cr) + (ind.cr-1)*(n.m*n.p+1) + (ind.m-1)*n.p + 1
inds.end <- inds.start + 2
df$mean[inds.start:inds.end] <- unname(fit.zcov$stan_summary[1:3,1])
c.b <- rstanarm::posterior_interval(fit.zcov,0.95) # 95
df$c.l[inds.start:inds.end] <- unname(c.b[,1])
df$c.u[inds.start:inds.end] <- unname(c.b[,2])
ind.m <- 2 # MOdel=NB
inds.start <- inds.end + 1
inds.end <- inds.start + 3
df$mean[inds.start:inds.end] <- unname(fit.nbzcov$stan_summary[1:4,1])
c.b <- rstanarm::posterior_interval(fit.nbzcov,0.95) # 95
df$c.l[inds.start:inds.end] <- unname(c.b[,1])
df$c.u[inds.start:inds.end] <- unname(c.b[,2])

ind.cr <- 2
load("NYC/NYCGTAmcGLMS_area.rda")
ind.m <- 1 # Model=Poisson
inds.start <- (ind.c-1)*(n.cr*n.m*n.p+n.cr) + (ind.cr-1)*(n.m*n.p+1) + (ind.m-1)*n.p + 1
inds.end <- inds.start + 2
df$mean[inds.start:inds.end] <- unname(fit.zcov$stan_summary[1:3,1])
c.b <- rstanarm::posterior_interval(fit.zcov,0.95) # 95
df$c.l[inds.start:inds.end] <- unname(c.b[,1])
df$c.u[inds.start:inds.end] <- unname(c.b[,2])
ind.m <- 2 # MOdel=NB
inds.start <- inds.end + 1
inds.end <- inds.start + 3
df$mean[inds.start:inds.end] <- unname(fit.nbzcov$stan_summary[1:4,1])
c.b <- rstanarm::posterior_interval(fit.nbzcov,0.95) # 95
df$c.l[inds.start:inds.end] <- unname(c.b[,1])
df$c.u[inds.start:inds.end] <- unname(c.b[,2])
rm(fit.zcov)
rm(fit.nbzcov)

# Portland
ind.c <- 3 # City=Portland
ind.cr <- 1 # Crime=Hom
ind.m <- 1 # Model=Poisson
inds.start <- (ind.c-1)*(n.cr*n.m*n.p+n.cr) + (ind.cr-1)*(n.m*n.p+1) + (ind.m-1)*n.p + 1
inds.end <- inds.start + 2
df$mean[inds.start:inds.end] <- NA
df$c.l[inds.start:inds.end] <- NA
df$c.u[inds.start:inds.end] <- NA
ind.m <- 2 # MOdel=NB - NO NEGATIVE BINOMIAL MODEL FIT FOR PORTLAND HOMICIDE
inds.start <- inds.end + 1
inds.end <- inds.start + 3
df$mean[inds.start:inds.end] <- NA
c.b <- rstanarm::posterior_interval(fit.nbzcov,0.95) # 95
df$c.l[inds.start:inds.end] <- NA
df$c.u[inds.start:inds.end] <- NA

ind.cr <- 2
load("Portland/PGTAmcGLMS_area.rda")
ind.m <- 1 # Model=Poisson
inds.start <- (ind.c-1)*(n.cr*n.m*n.p+n.cr) + (ind.cr-1)*(n.m*n.p+1) + (ind.m-1)*n.p + 1
inds.end <- inds.start + 2
df$mean[inds.start:inds.end] <- unname(fit.zcov$stan_summary[1:3,1])
c.b <- rstanarm::posterior_interval(fit.zcov,0.95) # 95
df$c.l[inds.start:inds.end] <- unname(c.b[,1])
df$c.u[inds.start:inds.end] <- unname(c.b[,2])
ind.m <- 2 # MOdel=NB
inds.start <- inds.end + 1
inds.end <- inds.start + 3
df$mean[inds.start:inds.end] <- unname(fit.nbzcov$stan_summary[1:4,1])
c.b <- rstanarm::posterior_interval(fit.nbzcov,0.95) # 95
df$c.l[inds.start:inds.end] <- unname(c.b[,1])
df$c.u[inds.start:inds.end] <- unname(c.b[,2])
rm(fit.zcov)
rm(fit.nbzcov)

# Save
saveRDS(df,file="InitialModelsMCDF_area3Cities.rds")


df <- readRDS("InitialModelsMCDF_area3Cities.rds")
param_lab <- c(b0="Intercept",b1="Population",b2="Average Income",phi_inv="Inverse Dispersion")
crime_lab <- c(Hom="Homicide",GTA="Motor Vehicle Theft")
p <- ggplot(df,aes(x=mean,y=city,colour=model)) + geom_point(data=df,size=2.5,aes(shape=model)) + geom_errorbarh(data=df,height=0.25,aes(xmin=c.l,xmax=c.u)) + facet_grid(crime~parameter,scales="free_x",labeller=labeller(parameter=param_lab,crime=crime_lab)) + theme_gray() + theme(strip.text = element_text(size = 20),axis.title = element_text(size=20),axis.text = element_text(size=18),legend.title=element_text(size=20),legend.text = element_text(size=18)) + labs(colour="Model",shape="Model") + xlab("Mean") + ylab("City")
ggsave(plot=p,filename="InitialModelsMCPlot_area3Cities.pdf",width=14,height=8)



# Posterior Credible Interval Calculations --------------------------------
# Want to plot all of the credible intervals for all models and cities onto one plot.

library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))

# Los Angeles: Credible Intervals -----------------------------------------
# Calculate the posterior credible intervals for LA models

# Los Angeles: Credible Intervals: Homicide
load("LA/LAHommcGLMS_area.rda")
posterior_interval(fit.zcov,prob=0.95)
posterior_interval(fit.nbzcov,prob=0.95)
rm(list=ls())

# Los Angeles: Credible Intervals: Motor Vehicle Theft
load("LA/LAGTAmcGLMS_area.rda")
posterior_interval(fit.zcov,prob=0.95)
posterior_interval(fit.nbzcov,prob=0.95)

rm(list=ls())


# New York: Credible Intervals --------------------------------------------
# Calculate the posterior credible intervals for NYC models

# New York: Credible Intervals: Homicide
load("NYC/NYCHommcGLMS_area.rda")
posterior_interval(fit.zcov,prob=0.95)
posterior_interval(fit.nbzcov,prob=0.95)
rm(list=ls())

# New York: Credible Intervals: Motor Vehicle Theft
load("NYC/NYCGTAmcGLMS_area.rda")
posterior_interval(fit.zcov,prob=0.95)
posterior_interval(fit.nbzcov,prob=0.95)
rm(list=ls())


# Portland: Credible Intervals --------------------------------------------
# Calculate the posterior credible intervals for Portland models

# Portland: Credible Intervals: Motor Vehicle Theft
load("Portland/PGTAmcGLMS_area.rda")
posterior_interval(fit.zcov,prob=0.95)
posterior_interval(fit.nbzcov,prob=0.95)
rm(list=ls())



# Model Posterior Plots ---------------------------------------------------
# In this section we produce some summary plots for the models for the crime data at the census tract level for LA, NYC and Portland.

library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))

# Set the theme for the plots, to ensure that the size of the text is legible
bayesplot_theme_set()
bayesplot_theme_update(strip.text = element_text(size = 40),legend.text = element_text(size=30),legend.title = element_text(size=40),axis.title = element_text(size=40),axis.text = element_text(size=30),axis.text.x = element_text(angle=45,vjust=0.5))


# Los Angeles: Model Plots ------------------------------------------------
# Load up necessary data in order to plot the results of the models for the LA crime data (Homicide and Motor Vehicle Theft)

# LA Data
setwd("./LA")

#  Load count and point data (latter is not needed here!)
load("../../../PROCESSED_DATA/CRIME/COUNT_DATA_CENSUS_TRACTS/LA/LA2015CTCountData_projFinal.rda")
load("../../../PROCESSED_DATA/CRIME/COUNT_DATA_CENSUS_TRACTS/LA/LA2015CTSFCountData_projFinal.rda")
la_hom <- readRDS("../../../PROCESSED_DATA/CRIME/POINT_PATTERN/la_hom_sf.rds") # not really needed
la_gta <- readRDS("../../../PROCESSED_DATA/CRIME/POINT_PATTERN/la_gta_sf.rds") # not really needed

# Extract 2015 data
la_gta15 <- la_gta[which(la_gta$Y==2015),] # not really needed
la_hom15 <- la_hom[which(la_hom$Y==2015),] # not really needed

hom_countdf <- ct_homcount.df
gta_countdf <- ct_gtacount.df


# Los Angeles: Model Plots: Homicide --------------------------------------
# Plots for the model results for the LA Homicide GLMs

load("LAHommcGLMS_area.rda")
set.seed(125)

# Los Angeles: Model Plots: Homicide: Poisson
posterior <- as.array(fit.zcov)
plot_ltrace <- mcmc_trace(posterior)
plot_ldens <- mcmc_dens(posterior)
prop_zero <- function(y) mean(y == 0)
plot_lzero <- pp_check(fit.zcov, plotfun = "stat", stat="prop_zero",binwidth=0.005)
plot_lmeansd <- pp_check(fit.zcov, plotfun = "stat_2d", stat = c("mean", "sd"))
plot_lhist <- pp_check(fit.zcov,plotfun = "hist",nreps = 5)
y.rep <- posterior_predict(fit.zcov)
y <- hom_countdf$hom
 
pdf("ctlahom_zcov_tracemc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_ltrace # trace plot
dev.off()

pdf("ctlahom_zcov_densmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_ldens # density of posterior samples
dev.off()

pdf("ctlahom_zcov_zeromc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_lzero # proportion of predicted zeros compared to true proportion of zeros
dev.off()

pdf("ctlahom_zcov_meansdmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_lmeansd # plot of predicted mean vs predicted sd
dev.off()

pdf("ctlahom_zcov_histmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_lhist # histogram plots of predicted y
dev.off()

pdf("ctlahom_zcov_allmc_proj3_area.pdf",h=15,w=20,pointsize=20)
grid.arrange(plot_ltrace,plot_ldens,plot_lzero) # trace, density and prop. zero
dev.off()

pdf("ctlahom_zcov_aveprederrmc_proj3_area.pdf",h=15,w=20,pointsize=20)
pp_check(fit.zcov, plotfun = "scatter_avg") # average predicted y v y
dev.off()

pdf("ctlahom_zcov_aveprederr2mc_proj3_area.pdf",h=15,w=20,pointsize=20)
ppc_error_scatter_avg(y,y.rep) # average predictive error v y
dev.off()

pdf("ctlahom_zcov_aveprederr2zerointmc_proj3_area.pdf",h=15,w=20,pointsize=20)
ppc_error_scatter_avg(y,y.rep) + geom_vline(aes(xintercept=0),col="red") # average predictive error v y + vertical line at x=0
dev.off()

pdf("ctlahom_zcov_aveyvymc_proj3_area.pdf",h=15,w=20,pointsize=20)
pp_check(fit.zcov, plotfun = "scatter_avg") # double up by accident
dev.off()


pdf("ctlahom_zcov_aveyvyaveprederrmc_proj3_area.pdf",h=15,w=20,pointsize=20)
grid.arrange(pp_check(fit.zcov, plotfun = "scatter_avg"),ppc_error_scatter_avg(y,y.rep)) # average predicted y v y + average predictive error v y
dev.off()

# Los Angeles: Model PLots: Homicide: Negative Binomial Model
posteriornb <- as.array(fit.nbzcov)
plot_tracenb <- mcmc_trace(posteriornb)
prop_zero <- function(y) mean(y == 0)
plot_zeronb <- pp_check(fit.nbzcov, plotfun = "stat", stat="prop_zero",binwidth=0.005)
plot_meansdnb <- pp_check(fit.nbzcov, plotfun = "stat_2d", stat = c("mean", "sd"))
plot_histnb <- pp_check(fit.nbzcov,plotfun = "hist",nreps = 5)
plot_densnb <- mcmc_dens(posteriornb)
y.repnb <- posterior_predict(fit.nbzcov)
y <- hom_countdf$hom

pdf("ctlahom_nbzcov_tracemc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_tracenb # trace plot
dev.off()

pdf("ctlahom_nbzcov_zeromc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_zeronb # proportion of predicted zeros v true proportion of zeros
dev.off()

pdf("ctlahom_nbzcov_meansdmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_meansdnb # predicted means v predicted sd
dev.off()

pdf("ctlahom_nbzcov_histmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_histnb # histogram of predicted y
dev.off()

pdf("ctlahom_nbzcov_allmc_proj3_area.pdf",h=15,w=20,pointsize=20)
grid.arrange(plot_tracenb,plot_densnb,plot_zeronb) # trace, density and prop. zeros
dev.off()

pdf("ctlahom_nbzcov_aveprederr2mc_proj3_area.pdf",h=15,w=20,pointsize=20)
ppc_error_scatter_avg(y,y.repnb) # average predictive error v y
dev.off()

pdf("ctlahom_nbzcov_aveprederr2zerointmc_proj3_area.pdf",h=15,w=20,pointsize=20)
ppc_error_scatter_avg(y,y.repnb) + geom_vline(aes(xintercept=0),col="red") # average predictive error v y + vertical line at x=0
dev.off()

pdf("ctlahom_nbzcov_aveyvymc_proj3_area.pdf",h=15,w=20,pointsize=20)
pp_check(fit.nbzcov, plotfun = "scatter_avg") # average predicted y v y
dev.off()

pdf("ctlahom_nbzcov_aveyvyaveprederrmc_proj3_area.pdf",h=15,w=20,pointsize=20)
grid.arrange(pp_check(fit.nbzcov, plotfun = "scatter_avg"),ppc_error_scatter_avg(y,y.repnb)) # average predicted y v y + average predictive error v y
dev.off()

rm(fit.zcov)
rm(fit.nbzcov)
rm(posterior,plot_ltrace,plot_ldens,prop_zero,plot_lzero,plot_lmeansd,plot_lhist,y.rep,y)
rm(posteriornb,plot_tracenb,plot_densnb,plot_zeronb,plot_meansdnb,plot_histnb,y.repnb)


# Los Angeles: Model Plots: Motor Vehicle Theft ---------------------------
# Plots for the model results for the LA Motor Vehicle Theft GLMs

load("LAGTAmcGLMS_area.rda")
set.seed(250)

# Los Angeles: Model Plots: Motor Vehicle Theft: Poisson 
posterior <- as.array(fit.zcov)
plot_ltrace <- mcmc_trace(posterior)
plot_ldens <- mcmc_dens(posterior)
prop_zero <- function(y) mean(y == 0)
plot_lzero <- pp_check(fit.zcov, plotfun = "stat", stat="prop_zero",binwidth=0.005)
plot_lmeansd <- pp_check(fit.zcov, plotfun = "stat_2d", stat = c("mean", "sd"))
plot_lhist <- pp_check(fit.zcov,plotfun = "hist",nreps = 5)
y.rep <- posterior_predict(fit.zcov)
y <- gta_countdf$gta


pdf("ctlagta_zcov_tracemc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_ltrace # trace
dev.off()

pdf("ctlagta_zcov_densmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_ldens # density of samples
dev.off()


pdf("ctlagta_zcov_zeromc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_lzero # proportion of predicted zeros vs true proportion of zeros
dev.off()

pdf("ctlagta_zcov_meansdmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_lmeansd # predicted means v predicted sd
dev.off()

pdf("ctlagta_zcov_histmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_lhist # histogram of predicted y
dev.off()

pdf("ctlagta_zcov_aveprederr2mc_proj3_area.pdf",h=15,w=20,pointsize=20)
ppc_error_scatter_avg(y,y.rep) # average predictive error vy
dev.off()

pdf("ctlagta_zcov_aveprederr2zerointmc_proj3_area.pdf",h=15,w=20,pointsize=20)
ppc_error_scatter_avg(y,y.rep) + geom_vline(aes(xintercept=0),col="red") # average predictive error v y + vertical line at x=0
dev.off()

pdf("ctlagta_zcov_aveyvymc_proj3_area.pdf",h=15,w=20,pointsize=20)
pp_check(fit.zcov, plotfun = "scatter_avg") # average predicted y v y
dev.off()


pdf("ctlagta_zcov_aveyvyaveprederrmc_proj3_area.pdf",h=15,w=20,pointsize=20)
grid.arrange(pp_check(fit.zcov, plotfun = "scatter_avg"),ppc_error_scatter_avg(y,y.rep)) # average predicted y v y + average predictive error v y
dev.off()

pdf("ctlagta_zcov_allmc_proj3_area.pdf",h=15,w=20,pointsize=20)
grid.arrange(plot_ltrace,plot_ldens,plot_lzero) # trace, density and prop. zeros
dev.off()

# Los Angeles: Model Plots: Motor Vehicle Theft: Negative Binomial
posteriornb <- as.array(fit.nbzcov)
plot_tracenb <- mcmc_trace(posteriornb)
prop_zero <- function(y) mean(y == 0)
plot_zeronb <- pp_check(fit.nbzcov, plotfun = "stat", stat="prop_zero",binwidth=0.005)
plot_meansdnb <- pp_check(fit.nbzcov, plotfun = "stat_2d", stat = c("mean", "sd"))
plot_histnb <- pp_check(fit.nbzcov,plotfun = "hist",nreps = 5)
plot_densnb <- mcmc_dens(posteriornb)
y.repnb <- posterior_predict(fit.nbzcov)
y <- gta_countdf$gta


pdf("ctlagta_nbzcov_tracemc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_tracenb # trace
dev.off()

pdf("ctlagta_nbzcov_zeromc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_zeronb # proportion of predicted zeros v true proportion of zeros
dev.off()

pdf("ctlagta_nbzcov_meansdmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_meansdnb # predicted means v predicted sd
dev.off()

pdf("ctlagta_nbzcov_histmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_histnb # histogram of predicted y
dev.off()

pdf("ctlagta_nbzcov_aveprederr2mc_proj3_area.pdf",h=15,w=20,pointsize=20)
ppc_error_scatter_avg(y,y.repnb) # average predictive error v y
dev.off()

pdf("ctlagta_nbzcov_aveprederr2zerointmc_proj3_area.pdf",h=15,w=20,pointsize=20)
ppc_error_scatter_avg(y,y.repnb) + geom_vline(aes(xintercept=0),col="red") # average predictive error v y + vertical line at x=0
dev.off()

pdf("ctlagta_nbzcov_aveyvymc_proj3_area.pdf",h=15,w=20,pointsize=20)
pp_check(fit.nbzcov, plotfun = "scatter_avg") # average predicted y v y
dev.off()


pdf("ctlagta_nbzcov_aveyvyaveprederrmc_proj3_area.pdf",h=15,w=20,pointsize=20)
grid.arrange(pp_check(fit.nbzcov, plotfun = "scatter_avg"),ppc_error_scatter_avg(y,y.repnb)) # average predicted y v y + average predictive error v y
dev.off()

pdf("ctlagta_nbzcov_allmc_proj3_area.pdf",h=15,w=20,pointsize=20)
grid.arrange(plot_tracenb,plot_densnb,plot_zeronb) # trace, density and prop. zeros
dev.off()

rm(fit.zcov)
rm(fit.nbzcov)
rm(posterior,plot_ltrace,plot_ldens,prop_zero,plot_lzero,plot_lmeansd,plot_lhist,y.rep,y)
rm(posteriornb,plot_tracenb,plot_densnb,plot_zeronb,plot_meansdnb,plot_histnb,y.repnb)
rm(hom_countdf)
rm(gta_countdf)


library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))


# New York: Model Plots ---------------------------------------------------
# Load up necessary data in order to plot the results of the models for the NYC crime data (Homicide and Motor Vehicle Theft)

# NYC Data
setwd("./NYC")

#  Load count and point data (latter is not needed here!)
load("../../../PROCESSED_DATA/CRIME/COUNT_DATA_CENSUS_TRACTS/NYC/NY2015CTCountData_projFinal.rda")
load("../../../PROCESSED_DATA/CRIME/COUNT_DATA_CENSUS_TRACTS/NYC/NY2015CTSFCountData_projFinal.rda")
nyc_hom <- readRDS("../../../PROCESSED_DATA/CRIME/POINT_PATTERN/nyc_hom_sf.rds") # not really needed
nyc_gta <- readRDS("../../../PROCESSED_DATA/CRIME/POINT_PATTERN/nyc_gta_sf.rds") # not really needed

nyc_gta15 <- nyc_gta[which(nyc_gta$Y==2015),] # not really needed
nyc_hom15 <- nyc_hom[which(nyc_hom$Y==2015),] # not really needed

hom_countdf <- ct_homcount.df
gta_countdf <- ct_gtacount.df


# New York: Model Plots: Homicide -----------------------------------------
# Plots for the model results for the NYC Homicide GLMs

load("NYCHommcGLMS_area.rda")
set.seed(500)

# New York: Model Plots: Homicide: Poissom
posterior <- as.array(fit.zcov)
plot_ltrace <- mcmc_trace(posterior)
plot_ldens <- mcmc_dens(posterior)
prop_zero <- function(y) mean(y == 0)
plot_lzero <- pp_check(fit.zcov, plotfun = "stat", stat="prop_zero",binwidth=0.005)
plot_lmeansd <- pp_check(fit.zcov, plotfun = "stat_2d", stat = c("mean", "sd"))
plot_lhist <- pp_check(fit.zcov,plotfun = "hist",nreps = 5)
y.rep <- posterior_predict(fit.zcov)
y <- hom_countdf$hom

pdf("ctnyhom_zcov_tracemc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_ltrace # trace
dev.off()

pdf("ctnyhom_zcov_densmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_ldens # density of samples
dev.off()

pdf("ctnyhom_zcov_zeromc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_lzero # proportion of predicted zeros v true proportion of zeros
dev.off()

pdf("ctnyhom_zcov_meansdmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_lmeansd # predicted mean v predicted sd
dev.off()

pdf("ctnyhom_zcov_histmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_lhist # histogram of predicted y
dev.off()

pdf("ctnyhom_zcov_allmc_proj3_area.pdf",h=15,w=20,pointsize=20)
grid.arrange(plot_ltrace,plot_ldens,plot_lzero) # trace, density and prop. zeros
dev.off()

pdf("ctnyhom_zcov_aveprederrmc_proj3_area.pdf",h=15,w=20,pointsize=20)
pp_check(fit.zcov, plotfun = "scatter_avg") # average predicted y v y
dev.off()

pdf("ctnyhom_zcov_aveprederr2mc_proj3_area.pdf",h=15,w=20,pointsize=20)
ppc_error_scatter_avg(y,y.rep) # average predictive error v y
dev.off()

pdf("ctnyhom_zcov_aveprederr2zerointmc_proj3_area.pdf",h=15,w=20,pointsize=20)
ppc_error_scatter_avg(y,y.rep) + geom_vline(aes(xintercept=0),col="red") # average predictive error v y + vertical line at x=0
dev.off()

pdf("ctnyhom_zcov_aveyvymc_proj3_area.pdf",h=15,w=20,pointsize=20)
pp_check(fit.zcov, plotfun = "scatter_avg") # predicted y v y
dev.off()

pdf("ctnyhom_zcov_aveyvyaveprederrmc_proj3_area.pdf",h=15,w=20,pointsize=20)
grid.arrange(pp_check(fit.zcov, plotfun = "scatter_avg"),ppc_error_scatter_avg(y,y.rep)) # predicted y v y + average predictive error v y
dev.off()

# New York: Model Plots: Homicide: Negative Binomial
posteriornb <- as.array(fit.nbzcov)
plot_tracenb <- mcmc_trace(posteriornb)
prop_zero <- function(y) mean(y == 0)
plot_zeronb <- pp_check(fit.nbzcov, plotfun = "stat", stat="prop_zero",binwidth=0.005)
plot_meansdnb <- pp_check(fit.nbzcov, plotfun = "stat_2d", stat = c("mean", "sd"))
plot_histnb <- pp_check(fit.nbzcov,plotfun = "hist",nreps = 5)
plot_densnb <- mcmc_dens(posteriornb)
y.repnb <- posterior_predict(fit.nbzcov)
y <- hom_countdf$hom

pdf("ctnyhom_nbzcov_tracemc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_tracenb # trace
dev.off()

pdf("ctnyhom_nbzcov_zeromc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_zeronb # proportion of predicted zeros v true proportion of zeros
dev.off()

pdf("ctnyhom_nbzcov_meansdmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_meansdnb # predicted mean v predicted sd
dev.off()

pdf("ctnyhom_nbzcov_histmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_histnb # histogram of predicted y
dev.off()

pdf("ctnyhom_nbzcov_allmc_proj3_area.pdf",h=15,w=20,pointsize=20)
grid.arrange(plot_tracenb,plot_densnb,plot_zeronb) # trace, density and prop. zeros
dev.off()

pdf("ctnyhom_nbzcov_aveprederr2mc_proj3_area.pdf",h=15,w=20,pointsize=20)
ppc_error_scatter_avg(y,y.repnb) # average predictive error v y
dev.off()

pdf("ctnyhom_nbzcov_aveprederr2zerointmc_proj3_area.pdf",h=15,w=20,pointsize=20)
ppc_error_scatter_avg(y,y.repnb) + geom_vline(aes(xintercept=0),col="red") # average predictive error v y + vertical line at x=0
dev.off()

pdf("ctnyhom_nbzcov_aveyvymc_proj3_area.pdf",h=15,w=20,pointsize=20)
pp_check(fit.nbzcov, plotfun = "scatter_avg") # average predicted y v y
dev.off()


pdf("ctnyhom_nbzcov_aveyvyaveprederrmc_proj3_area.pdf",h=15,w=20,pointsize=20)
grid.arrange(pp_check(fit.nbzcov, plotfun = "scatter_avg"),ppc_error_scatter_avg(y,y.repnb)) # average predicted y v y + average predictive error v y
dev.off()

rm(fit.zcov)
rm(fit.nbzcov)
rm(posterior,plot_ltrace,plot_ldens,prop_zero,plot_lzero,plot_lmeansd,plot_lhist,y.rep,y)
rm(posteriornb,plot_tracenb,plot_densnb,plot_zeronb,plot_meansdnb,plot_histnb,y.repnb)



# New York: Model Plots: Motor Vehicle Theft ------------------------------
# Plots for the model results for the NYC Motor Vehicle Theft GLMs

load("NYCGTAmcGLMS_area.rda")
set.seed(750)

# New York: Model Plots: Motor Vehicle Theft: Poisson
posterior <- as.array(fit.zcov)
plot_ltrace <- mcmc_trace(posterior)
plot_ldens <- mcmc_dens(posterior)
prop_zero <- function(y) mean(y == 0)
plot_lzero <- pp_check(fit.zcov, plotfun = "stat", stat="prop_zero",binwidth=0.005)
plot_lmeansd <- pp_check(fit.zcov, plotfun = "stat_2d", stat = c("mean", "sd"))
plot_lhist <- pp_check(fit.zcov,plotfun = "hist",nreps = 5)
y.rep <- posterior_predict(fit.zcov)
y <- gta_countdf$gta

pdf("ctnygta_zcov_tracemc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_ltrace # trace
dev.off()

pdf("ctnygta_zcov_densmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_ldens # density of samples
dev.off()

pdf("ctnygta_zcov_zeromc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_lzero # proportion of predicted zeros v true proportion of zeros
dev.off()

pdf("ctnygta_zcov_meansdmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_lmeansd # predicted means v predicted sd
dev.off()

pdf("ctnygta_zcov_histmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_lhist # histogram of predicted samples
dev.off()

pdf("ctnygta_zcov_allmc_proj3_area.pdf",h=15,w=20,pointsize=20)
grid.arrange(plot_ltrace,plot_ldens,plot_lzero) # trace, density and prop. zeros
dev.off()

pdf("ctnygta_zcov_aveprederr2mc_proj3_area.pdf",h=15,w=20,pointsize=20)
ppc_error_scatter_avg(y,y.rep) # average predictive error v y
dev.off()

pdf("ctnygta_zcov_aveprederr2zerointmc_proj3_area.pdf",h=15,w=20,pointsize=20)
ppc_error_scatter_avg(y,y.rep) + geom_vline(aes(xintercept=0),col="red") # average predictive error v y + vertical line at x=0
dev.off()

pdf("ctnygta_zcov_aveyvymc_proj3_area.pdf",h=15,w=20,pointsize=20)
pp_check(fit.zcov, plotfun = "scatter_avg") # average predicted y v y
dev.off()

pdf("ctnygta_zcov_aveyvyaveprederrmc_proj3_area.pdf",h=15,w=20,pointsize=20)
grid.arrange(pp_check(fit.zcov, plotfun = "scatter_avg"),ppc_error_scatter_avg(y,y.rep)) # average predicted y v y + average predictive error v y
dev.off()


# New York: Model Plots: Motor Vehicle Theft: Negative Binomial
posteriornb <- as.array(fit.nbzcov)
plot_tracenb <- mcmc_trace(posteriornb)
prop_zero <- function(y) mean(y == 0)
plot_zeronb <- pp_check(fit.nbzcov, plotfun = "stat", stat="prop_zero",binwidth=0.005)
plot_meansdnb <- pp_check(fit.nbzcov, plotfun = "stat_2d", stat = c("mean", "sd"))
plot_histnb <- pp_check(fit.nbzcov,plotfun = "hist",nreps = 5)
plot_densnb <- mcmc_dens(posteriornb)
y.repnb <- posterior_predict(fit.nbzcov)
y <- gta_countdf$gta


pdf("ctnygta_nbzcov_tracemc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_tracenb # trace
dev.off()

pdf("ctnygta_nbzcov_zeromc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_zeronb # proprotion of predicted zeros v true proportion of zeros
dev.off()

pdf("ctnygta_nbzcov_meansdmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_meansdnb # predicted means v predicted sd
dev.off()

pdf("ctnygta_nbzcov_histmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_histnb # histogram of predicted y
dev.off()

pdf("ctnygta_nbzcov_allmc_proj3_area.pdf",h=15,w=20,pointsize=20)
grid.arrange(plot_tracenb,plot_densnb,plot_zeronb) # trace, density and prop. zeros
dev.off()

pdf("ctnygta_nbzcov_aveprederr2mc_proj3_area.pdf",h=15,w=20,pointsize=20)
ppc_error_scatter_avg(y,y.repnb) # average predictive y v y
dev.off()

pdf("ctnygta_nbzcov_aveprederr2zerointmc_proj3_area.pdf",h=15,w=20,pointsize=20)
ppc_error_scatter_avg(y,y.repnb) + geom_vline(aes(xintercept=0),col="red") # average predictive y v y + vertical line at x=0
dev.off()

pdf("ctnygta_nbzcov_aveyvymc_proj3_area.pdf",h=15,w=20,pointsize=20)
pp_check(fit.nbzcov, plotfun = "scatter_avg") # average predicted y v y
dev.off()


pdf("ctnygta_nbzcov_aveyvyaveprederrmc_proj3_area.pdf",h=15,w=20,pointsize=20)
grid.arrange(pp_check(fit.nbzcov, plotfun = "scatter_avg"),ppc_error_scatter_avg(y,y.repnb))
# Error: cannot allocate vector of size 825.5 Mb
dev.off()

rm(fit.zcov)
rm(fit.nbzcov)
rm(posterior,plot_ltrace,plot_ldens,prop_zero,plot_lzero,plot_lmeansd,plot_lhist,y.rep,y)
rm(posteriornb,plot_tracenb,plot_densnb,plot_zeronb,plot_meansdnb,plot_histnb,y.repnb)
rm(hom_countdf)
rm(gta_countdf)


library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))



# Portland: Model Plots ---------------------------------------------------
# Load up the necessary data to plot the results from modelling the Portland Motor Vehicle Theft data.

# Portland Data
setwd("./Portland")

#  Load count and point data (latter is not needed here!)
load("../../../PROCESSED_DATA/CRIME/COUNT_DATA_CENSUS_TRACTS/Portland/P2015CTCountData_projFinal.rda")
load("../../../PROCESSED_DATA/CRIME/COUNT_DATA_CENSUS_TRACTS/Portland/P2015CTSFCountData_projFinal.rda")

hom_countdf <- ct_homcount.df
gta_countdf <- ct_gtacount.df


# Portland: Model Plots: Motor Vehicle Thefts -----------------------------
# Plots for the model results for the Portland Motor Vehicle Theft GLMs

load("PGTAmcGLMS_area.rda")
set.seed(250)

# Portland: Model Plots: Motor Vehicle Theft: Poisson
posterior <- as.array(fit.zcov)
plot_ltrace <- mcmc_trace(posterior)
plot_ldens <- mcmc_dens(posterior)
prop_zero <- function(y) mean(y == 0)
plot_lzero <- pp_check(fit.zcov, plotfun = "stat", stat="prop_zero",binwidth=0.005)
plot_lmeansd <- pp_check(fit.zcov, plotfun = "stat_2d", stat = c("mean", "sd"))
plot_lhist <- pp_check(fit.zcov,plotfun = "hist",nreps = 5)
y.rep <- posterior_predict(fit.zcov)
y <- gta_countdf$gta


pdf("ctpgta_zcov_tracemc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_ltrace # trace
dev.off()

pdf("ctpgta_zcov_densmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_ldens # density of samples
dev.off()


pdf("ctpgta_zcov_zeromc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_lzero # proportion of predicted zeros with true number of zeros
dev.off()

pdf("ctpgta_zcov_meansdmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_lmeansd # predicted means v predicted sd
dev.off()

pdf("ctpgta_zcov_histmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_lhist # histogram of predicted y
dev.off()

pdf("ctpgta_zcov_aveprederr2mc_proj3_area.pdf",h=15,w=20,pointsize=20)
ppc_error_scatter_avg(y,y.rep) # average predictive error v y
dev.off()

pdf("ctpgta_zcov_aveprederr2zerointmc_proj3_area.pdf",h=15,w=20,pointsize=20)
ppc_error_scatter_avg(y,y.rep) + geom_vline(aes(xintercept=0),col="red") # average predictive error v y + vertical line at x=0
dev.off()


pdf("ctpgta_zcov_aveyvymc_proj3_area.pdf",h=15,w=20,pointsize=20)
pp_check(fit.zcov, plotfun = "scatter_avg") # average predicted y v y
dev.off()


pdf("ctpgta_zcov_aveyvyaveprederrmc_proj3_area.pdf",h=15,w=20,pointsize=20)
grid.arrange(pp_check(fit.zcov, plotfun = "scatter_avg"),ppc_error_scatter_avg(y,y.rep)) # average predicted y v y +  # average predictive error v y + vertical line at x=0
dev.off()

pdf("ctpgta_zcov_allmc_proj3_area.pdf",h=15,w=20,pointsize=20)
grid.arrange(plot_ltrace,plot_ldens,plot_lzero) # trace, density and prop. zero
dev.off()

# Portland: Model Plots: Motor Vehicle Theft: Negative Binomial
posteriornb <- as.array(fit.nbzcov)
plot_tracenb <- mcmc_trace(posteriornb)
prop_zero <- function(y) mean(y == 0)
plot_zeronb <- pp_check(fit.nbzcov, plotfun = "stat", stat="prop_zero",binwidth=0.005)
plot_meansdnb <- pp_check(fit.nbzcov, plotfun = "stat_2d", stat = c("mean", "sd"))
plot_histnb <- pp_check(fit.nbzcov,plotfun = "hist",nreps = 5)
plot_densnb <- mcmc_dens(posteriornb)
y.repnb <- posterior_predict(fit.nbzcov)
y <- gta_countdf$gta


pdf("ctpgta_nbzcov_tracemc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_tracenb # trace
dev.off()

pdf("ctpgta_nbzcov_zeromc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_zeronb # proportion of predicted zeros v true number of zeros
dev.off()

pdf("ctpgta_nbzcov_meansdmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_meansdnb # predicted mean vs predicted sd
dev.off()

pdf("ctpgta_nbzcov_histmc_proj3_area.pdf",h=15,w=20,pointsize=20)
plot_histnb # histogram of predicted y
dev.off()

pdf("ctpgta_nbzcov_aveprederr2mc_proj3_area.pdf",h=15,w=20,pointsize=20)
ppc_error_scatter_avg(y,y.repnb) # average predictive error v y
dev.off()

pdf("ctpgta_nbzcov_aveprederr2zerointmc_proj3_area.pdf",h=15,w=20,pointsize=20)
ppc_error_scatter_avg(y,y.repnb) + geom_vline(aes(xintercept=0),col="red") # average predictive error v y + vertical line at x=0
dev.off()


pdf("ctpgta_nbzcov_aveyvymc_proj3_area.pdf",h=15,w=20,pointsize=20)
pp_check(fit.nbzcov, plotfun = "scatter_avg") # average predicted y v y
dev.off()


pdf("ctpgta_nbzcov_aveyvyaveprederrmc_proj3_area.pdf",h=15,w=20,pointsize=20)
grid.arrange(pp_check(fit.nbzcov, plotfun = "scatter_avg"),ppc_error_scatter_avg(y,y.repnb)) # average predicted  y v y + average predictive error v y
dev.off()

pdf("ctpgta_nbzcov_allmc_proj3_area.pdf",h=15,w=20,pointsize=20)
grid.arrange(plot_tracenb,plot_densnb,plot_zeronb) # trace, density and prop. zeros
dev.off()


rm(fit.zcov)
rm(fit.nbzcov)
rm(posterior,plot_ltrace,plot_ldens,prop_zero,plot_lzero,plot_lmeansd,plot_lhist,y.rep,y)
rm(posteriornb,plot_tracenb,plot_densnb,plot_zeronb,plot_meansdnb,plot_histnb,y.repnb)


# SessionInfo() -----------------------------------------------------------

sessionInfo()