
# Area Histogram Plots ----------------------------------------------------

# In this R script I will plot histograms for the census tract areas as well as the areas for the grids in order to assess how the grid ares compare to the census tract areas for Los Angeles and New York.

# Author: Nadeen Khaleel


# Setwd and Load Libraries ------------------------------------------------

library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))

library(ggplot2)


# Los Angeles -------------------------------------------------------------

setwd("./LA")


# Los Angeles: Load Data --------------------------------------------------

load("../../PROCESSED_DATA/CRIME/COUNT_DATA_CENSUS_TRACTS/LA/LA2015CTCountData_projFinal.rda")
ct.df <- ct_gtacount.df
load("../../PROCESSED_DATA/CRIME/COUNT_DATA_FINAL/LA/LA2015CT1015CountData_projFinal.rda")
grid1015.df <- gta_countdf
load("../../PROCESSED_DATA/CRIME/COUNT_DATA_FINAL/LA/LA2015CT2436CountData_projFinal.rda")
grid2436.df <- gta_countdf
load("../../PROCESSED_DATA/CRIME/COUNT_DATA_FINAL/LA/LA2015CT4872CountData_projFinal.rda")
grid4872.df <- gta_countdf
load("../../PROCESSED_DATA/CRIME/COUNT_DATA_FINAL/LA/LA2015CT95144CountData_projFinal.rda")
grid95144.df <- gta_countdf


# Los Angeles: Plots ------------------------------------------------------

max.areas.grid.df <- data.frame("Grid"=c("10x15","24x36","48x72","94x155"),"CellArea"=c(0.5,0.2,0.1,0.05)^2*1e8)
max.areas.grid.df$Grid <- as.factor(max.areas.grid.df$Grid)
ct.full.p <- ggplot(data=ct.df) + geom_histogram(aes(as.numeric(ctarea)),bins = 100) + geom_vline(data=max.areas.grid.df,aes(xintercept=CellArea,col=Grid),lwd=1.5) + ggtitle(expression(atop(paste("Area for Census Tracts (",m^2,") with Maximum Grid ",sep=""),"Cell Areas Denoted"))) + xlab(expression(paste("Area (",m^2,")",sep=""))) + theme(plot.title = element_text(size = 35,hjust = 0.5),axis.title = element_text(size = 25),axis.text = element_text(size = 25),legend.title = element_text(size = 25),legend.text = element_text(size = 25))

ct.zoom.p <- ggplot(data=ct.df) + geom_histogram(aes(as.numeric(ctarea)),bins = 100) + xlim(c(0,1e7)) + geom_vline(data=max.areas.grid.df,aes(xintercept=CellArea,col=Grid),lwd=1.5) + ggtitle(expression(atop(paste("Area for Census Tracts (",m^2,") with Maximum Grid ",sep=""),"Cell Areas Denoted (Zoomed in)"))) + xlab(expression(paste("Area (",m^2,")",sep=""))) + theme(plot.title = element_text(size = 35,hjust = 0.5),axis.title = element_text(size = 25),axis.text = element_text(size = 25),legend.title = element_text(size = 25),legend.text = element_text(size = 25))

ggsave(filename="LACTAreaHistograms.pdf",plot=ct.full.p,height=15,width=15)

ggsave(filename="LACTAreaHistogramsZoomed.pdf",plot=ct.zoom.p,height=15,width=15)

# Following two plots are to re-label the legend
max.areas.grid.df <- data.frame("Grid"=c("Grid 1 (5km)","Grid 2 (2km)","Grid 3 (1km)","Grid 4 (0.5km)"),"CellArea"=c(0.5,0.2,0.1,0.05)^2*1e8)
max.areas.grid.df$Grid <- as.factor(max.areas.grid.df$Grid)
ct.full.p <- ggplot(data=ct.df) + geom_histogram(aes(as.numeric(ctarea)),bins = 100) + geom_vline(data=max.areas.grid.df,aes(xintercept=CellArea,col=Grid),lwd=1.5) + ggtitle(expression(atop(paste("Area for Census Tracts (",m^2,") with Maximum Grid ",sep=""),"Cell Areas Denoted"))) + xlab(expression(paste("Area (",m^2,")",sep=""))) + theme(plot.title = element_text(size = 35,hjust = 0.5),axis.title = element_text(size = 25),axis.text = element_text(size = 25),legend.title = element_text(size = 25),legend.text = element_text(size = 25))

ct.zoom.p <- ggplot(data=ct.df) + geom_histogram(aes(as.numeric(ctarea)),bins = 100) + xlim(c(0,1e7)) + geom_vline(data=max.areas.grid.df,aes(xintercept=CellArea,col=Grid),lwd=1.5) + ggtitle(expression(atop(paste("Area for Census Tracts (",m^2,") with Maximum Grid ",sep=""),"Cell Areas Denoted (Zoomed in)"))) + xlab(expression(paste("Area (",m^2,")",sep=""))) + theme(plot.title = element_text(size = 35,hjust = 0.5),axis.title = element_text(size = 25),axis.text = element_text(size = 25),legend.title = element_text(size = 25),legend.text = element_text(size = 25))

ggsave(filename="LACTAreaHistogramsRelabelLegend.pdf",plot=ct.full.p,height=15,width=15)

ggsave(filename="LACTAreaHistogramsRelabelLegendZoomed.pdf",plot=ct.zoom.p,height=15,width=15)


comp.area.df <- data.frame(Level=NA,Area=NA,alpha=NA)
l <- nrow(ct.df)
comp.area.df[1:l,1] <- rep("CT",l)
comp.area.df[1:l,2] <- ct.df$ctarea
comp.area.df[1:l,3] <- 1
l.old <- l
l <- l + nrow(grid1015.df)
comp.area.df[(l.old+1):(l),1] <- rep("Grid1015",(l-l.old))
comp.area.df[(l.old+1):(l),2] <- grid1015.df$area
l.old <- l
l <- l + nrow(grid2436.df)
comp.area.df[(l.old+1):(l),1] <- rep("Grid2436",(l-l.old))
comp.area.df[(l.old+1):(l),2] <- grid2436.df$area
l.old <- l
l <- l + nrow(grid4872.df)
comp.area.df[(l.old+1):(l),1] <- rep("Grid4872",(l-l.old))
comp.area.df[(l.old+1):(l),2] <- grid4872.df$area
l.old <- l
l <- l + nrow(grid95144.df)
comp.area.df[(l.old+1):(l),1] <- rep("Grid95144",(l-l.old))
comp.area.df[(l.old+1):(l),2] <- grid95144.df$area


comp.area.df$Level <- as.factor(comp.area.df$Level)

p <- ggplot(comp.area.df[comp.area.df$Level=="CT",]) + geom_histogram(aes(Area),fill="black",alpha=1,position="identity",bins=100)
sub.comp <- comp.area.df[comp.area.df$Level!="CT",]
p <- p + geom_histogram(data=sub.comp,aes(Area,fill=Level),alpha=0.75,position="identity",bins=100) + theme(plot.title = element_text(size = 35,hjust = 0.5),axis.title = element_text(size = 25),axis.text = element_text(size = 25),legend.title = element_text(size = 25),legend.text = element_text(size = 25))
p

ggsave(filename = "LACTGridAreaHistogramOverlays.pdf",plot = p,height=15,width=15)


# Los Angeles: Re-set WD --------------------------------------------------

library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))



# New York ----------------------------------------------------------------

setwd("./NYC")


# New York: Load Data -----------------------------------------------------

load("../../PROCESSED_DATA/CRIME/COUNT_DATA_CENSUS_TRACTS/NYC/NY2015CTCountData_projFinal.rda")
ct.df <- ct_gtacount.df
load("../../PROCESSED_DATA/CRIME/COUNT_DATA_FINAL/NYC/NY2015CT1010CountData_projFinal.rda")
grid1010.df <- gta_countdf
load("../../PROCESSED_DATA/CRIME/COUNT_DATA_FINAL/NYC/NY2015CT2424CountData_projFinal.rda")
grid2424.df <- gta_countdf
load("../../PROCESSED_DATA/CRIME/COUNT_DATA_FINAL/NYC/NY2015CT4748CountData_projFinal.rda")
grid4748.df <- gta_countdf
load("../../PROCESSED_DATA/CRIME/COUNT_DATA_FINAL/NYC/NY2015CT9496CountData_projFinal.rda")
grid9496.df <- gta_countdf


# New York: Plots ---------------------------------------------------------

max.areas.grid.df <- data.frame("Grid"=c("10x10","24x24","47x48","94x96"),"CellArea"=c(0.5,0.2,0.1,0.05)^2*1e8)
max.areas.grid.df$Grid <- as.factor(max.areas.grid.df$Grid)
ct.full.p <- ggplot(data=ct.df) + geom_histogram(aes(as.numeric(ctarea)),bins = 100) + geom_vline(data=max.areas.grid.df,aes(xintercept=CellArea,col=Grid),lwd=1.5) + ggtitle(expression(atop(paste("Area for Census Tracts (",m^2,") with Maximum Grid ",sep=""),"Cell Areas Denoted"))) + xlab(expression(paste("Area (",m^2,")",sep=""))) + theme(plot.title = element_text(size = 35,hjust = 0.5),axis.title = element_text(size = 25),axis.text = element_text(size = 25),legend.title = element_text(size = 25),legend.text = element_text(size = 25))

ct.zoom.p <- ggplot(data=ct.df) + geom_histogram(aes(as.numeric(ctarea)),bins = 100) + xlim(c(0,1e7)) + geom_vline(data=max.areas.grid.df,aes(xintercept=CellArea,col=Grid),lwd=1.5) + ggtitle(expression(atop(paste("Area for Census Tracts (",m^2,") with Maximum Grid ",sep=""),"Cell Areas Denoted (Zoomed in)"))) + xlab(expression(paste("Area (",m^2,")",sep=""))) + theme(plot.title = element_text(size = 35,hjust = 0.5),axis.title = element_text(size = 25),axis.text = element_text(size = 25),legend.title = element_text(size = 25),legend.text = element_text(size = 25))

ggsave(filename="NYCTAreaHistograms.pdf",plot=ct.full.p,height=15,width=15)

ggsave(filename="NYCTAreaHistogramsZoomed.pdf",plot=ct.zoom.p,height=15,width=15)


comp.area.df <- data.frame(Level=NA,Area=NA,alpha=NA)
l <- nrow(ct.df)
comp.area.df[1:l,1] <- rep("CT",l)
comp.area.df[1:l,2] <- ct.df$ctarea
comp.area.df[1:l,3] <- 1
l.old <- l
l <- l + nrow(grid1010.df)
comp.area.df[(l.old+1):(l),1] <- rep("Grid1010",(l-l.old))
comp.area.df[(l.old+1):(l),2] <- grid1010.df$area
l.old <- l
l <- l + nrow(grid2424.df)
comp.area.df[(l.old+1):(l),1] <- rep("Grid2424",(l-l.old))
comp.area.df[(l.old+1):(l),2] <- grid2424.df$area
l.old <- l
l <- l + nrow(grid4748.df)
comp.area.df[(l.old+1):(l),1] <- rep("Grid4748",(l-l.old))
comp.area.df[(l.old+1):(l),2] <- grid4748.df$area
l.old <- l
l <- l + nrow(grid9496.df)
comp.area.df[(l.old+1):(l),1] <- rep("Grid9496",(l-l.old))
comp.area.df[(l.old+1):(l),2] <- grid9496.df$area


comp.area.df$Level <- as.factor(comp.area.df$Level)

p <- ggplot(comp.area.df[comp.area.df$Level=="CT",]) + geom_histogram(aes(Area),fill="black",alpha=1,position="identity",bins=100)
sub.comp <- comp.area.df[comp.area.df$Level!="CT",]
p <- p + geom_histogram(data=sub.comp,aes(Area,fill=Level),alpha=0.75,position="identity",bins=100) + theme(plot.title = element_text(size = 35,hjust = 0.5),axis.title = element_text(size = 25),axis.text = element_text(size = 25),legend.title = element_text(size = 25),legend.text = element_text(size = 25))
p

ggsave(filename = "NYCTGridAreaHistogramOverlays.pdf",plot = p,height=15,width=15)


# New York: Re-set WD -----------------------------------------------------

library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))



# sessionInfo() -----------------------------------------------------------

sessionInfo()
