# Shifted and Scaled Gridded Crim Count Data ------------------------------

# This R script takes the outputs from the CountDataGeb_final.R code where the count data frames have are on the projected UTM coordinates and it shifts and scales them (as well as the city boundaries) so that a unit increment in the x or y direction is equivalent to a distance of 10km rather than 1m and the bottom-left corner of the bounding box lies on the origin (0,0). We also produce the necessary meshes for the different resolutions by using these shifted and scales coordinates and the resolution of the grid in order to determine the maximum mesh edge.

# Author: Nadeen Khaleel

# Setwd and Load Libraries ------------------------------------------------

library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))

library(sp)
library(spatstat)
library(sf)
library(dplyr)
library(readr)
library(maptools)
library(mvtnorm)
library(raster)
library(stringr)
library(rgeos)
library(INLA)


# Functions ---------------------------------------------------------------
# This section contains the function to generate the scaled and shifted count data over the grids as well as producing the meshes for the INLA-SPDE method.

# Grid Centres for City Polygons from pre-created Quadrats
# The grid centres produced for the quadrats from the CountDataGen_final.R may not necessarily lie within the polygon itself, due to the irregular shapes of the city polygons leading to irregular shapes for the intersection between the grid cells and polygons. The following function takes the quadrats generated for a particular grid resolution and if any of the grid centres of these lie outside the city boundary it shifts them towards a location that lies in the intersection between the grid and city polygon.
polypoint <- function(quad){
  g <- quad
  g.sp <- as(g,"SpatialPolygons") # we need this to be sp
  ord <- sapply(1:g$n,function(ii){unlist(centroid.owin(g$tiles[[ii]]))})
  coords <- data.frame(x=ord[1,],y=ord[2,])
  coordinates(coords) <- ~ x + y
  
  coords.orig <- coords
  
  coord.poly.int <- st_intersects(st_as_sf(coords),st_as_sf(g.sp))
  ind.out <- which(lengths(coord.poly.int)==0) # which ones do not intersect with the polygon
  
  length(ind.out)
  ind.out.orig <- ind.out
  
  if (length(ind.out)>0){
    coord.df.poly <- data.frame(ind.out=ind.out,x.old=rep(NA,length(ind.out)),y.old=rep(NA,length(ind.out)),x.closestpoly=rep(NA,length(ind.out)),y.closestpoly=rep(NA,length(ind.out)))
    
    # If the intersection is several smaller polygons, shift towards the centre of the closest sub-polygon.
    for (i in 1:length(ind.out)){
      l.poly <- length(g.sp@polygons[[ind.out[i]]]@Polygons)
      if (l.poly==1){
        poly.coord <- g.sp@polygons[[ind.out[i]]]@Polygons[[1]]@coords
        p = Polygon(poly.coord)
        ps = Polygons(list(p),1)
        sps = SpatialPolygons(list(ps))
        coord.df.poly[i,c("x.old","y.old")] <- unname(coordinates(coords[ind.out[i]]))
        coord.df.poly[i,c("x.closestpoly","y.closestpoly")] <- unname(coordinates(gCentroid(sps)))
      } else {
        d.vec <- rep(NA,l.poly)
        for (j in 1:l.poly){
          labpt <- data.frame(x=g.sp@polygons[[ind.out[i]]]@Polygons[[j]]@labpt[1],y=g.sp@polygons[[ind.out[i]]]@Polygons[[j]]@labpt[2])
          coordinates(labpt) <- ~ x + y
          d.vec[j] <- gDistance(coords[ind.out[i]],labpt)
        }
        m <- which.min(d.vec)
        poly.coord <- g.sp@polygons[[ind.out[i]]]@Polygons[[m]]@coords
        p = Polygon(poly.coord)
        ps = Polygons(list(p),1)
        sps = SpatialPolygons(list(ps))
        coord.df.poly[i,c("x.old","y.old")] <- unname(coordinates(coords[ind.out[i]]))
        coord.df.poly[i,c("x.closestpoly","y.closestpoly")] <- unname(coordinates(gCentroid(sps)))
      }
    }
    
    
    coords2 <- data.frame(coords)
    coords2[ind.out,] <- coord.df.poly[,c("x.closestpoly","y.closestpoly")]
    coordinates(coords2) <- ~ x + y
    coord.poly.int2 <- st_intersects(st_as_sf(coords2),st_as_sf(g.sp))
    ind.out2 <- which(lengths(coord.poly.int2)==0)
    
    length(ind.out2)
    
    # Rather than using the points on the boundary, maybe create a grid over the closest sub-polygon and find the closest point that also lies within the window
    if (length(ind.out2)>0){
      coord.df.grid <- data.frame(ind.out=ind.out2,x.old=rep(NA,length(ind.out2)),y.old=rep(NA,length(ind.out2)),x.closestgrid=rep(NA,length(ind.out2)),y.closestgrid=rep(NA,length(ind.out2)))
      
      for (i in 1:length(ind.out2)){
        l.poly <- length(g.sp@polygons[[ind.out2[i]]]@Polygons)
        if (l.poly==1){ # only one, irregularly shaped polygon
          poly.coord <- g.sp@polygons[[ind.out2[i]]]@Polygons[[1]]@coords
          p = Polygon(poly.coord)
          ps = Polygons(list(p),1)
          sps = SpatialPolygons(list(ps))
          
          g.sub <- quadrats(sps,10,10) # generate grid over the polygons
          ord.sub <- sapply(1:g.sub$n,function(ii){unlist(centroid.owin(g.sub$tiles[[ii]]))})
          # Keep only the points IN the polygon
          coords.sub <- data.frame(x=ord.sub[1,],y=ord.sub[2,])
          coordinates(coords.sub) <- ~ x + y
          # Which lie outside the polygons?
          coord.poly.sub.int <- st_intersects(st_as_sf(coords.sub),st_as_sf(as(g.sub,"SpatialPolygons")))
          ord.sub.keep <- ord.sub[,lengths(coord.poly.sub.int)==1]
          
          coords.sub.keep <- data.frame(x=ord.sub.keep[1,],y=ord.sub.keep[2,])
          coordinates(coords.sub.keep) <- ~ x + y
          
          coords.nearest <- coordinates(gNearestPoints(coords[ind.out2[i]],coords.sub.keep))
          
          coord.df.grid[i,c("x.old","y.old")] <-  unname(coords.nearest[1,])
          coord.df.grid[i,c("x.closestgrid","y.closestgrid")] <-  unname(coords.nearest[2,])
        } else { # multiple sub-polygons
          d.vec <- rep(NA,l.poly)
          for (j in 1:l.poly){ # find the closest sub-polygon
            labpt <- data.frame(x=g.sp@polygons[[ind.out2[i]]]@Polygons[[j]]@labpt[1],y=g.sp@polygons[[ind.out2[i]]]@Polygons[[j]]@labpt[2])
            coordinates(labpt) <- ~ x + y
            d.vec[j] <- gDistance(coords[ind.out2[i]],labpt)
          }
          m <- which.min(d.vec)
          poly.coord <- g.sp@polygons[[ind.out2[i]]]@Polygons[[m]]@coords
          p = Polygon(poly.coord)
          ps = Polygons(list(p),1)
          sps = SpatialPolygons(list(ps))
          
          g.sub <- quadrats(sps,10,10)
          ord.sub <- sapply(1:g.sub$n,function(ii){unlist(centroid.owin(g.sub$tiles[[ii]]))})
          # Keep only the points IN the polygon
          coords.sub <- data.frame(x=ord.sub[1,],y=ord.sub[2,])
          coordinates(coords.sub) <- ~ x + y
          # Which lie outside the polygons?
          coord.poly.sub.int <- st_intersects(st_as_sf(coords.sub),st_as_sf(as(g.sub,"SpatialPolygons")))
          ord.sub.keep <- ord.sub[,lengths(coord.poly.sub.int)==1]
          
          coords.sub.keep <- data.frame(x=ord.sub.keep[1,],y=ord.sub.keep[2,])
          coordinates(coords.sub.keep) <- ~ x + y
          
          coords.nearest <- coordinates(gNearestPoints(coords[ind.out2[i]],coords.sub.keep))
          
          coord.df.grid[i,c("x.old","y.old")] <-  unname(coords.nearest[1,])
          coord.df.grid[i,c("x.closestgrid","y.closestgrid")] <-  unname(coords.nearest[2,])
        }
      }
      
      coords3 <- data.frame(coords2)
      coords3[ind.out2,] <- coord.df.grid[,c("x.closestgrid","y.closestgrid")]
      coordinates(coords3) <- ~ x + y
      coord.poly.int3 <- st_intersects(st_as_sf(coords3),st_as_sf(g.sp))
      ind.out3 <- which(lengths(coord.poly.int3)==0)
      
      length(ind.out3)
      
      coords.final <- coords3
      ind.out.final <- ind.out3
    } else {
      coords.final <- coords2
      ind.out.final <- ind.out2
    }
  } else {
    coords.final <- coords
    ind.out.final <- ind.out
  }
  
  return(list("ind.orig"=ind.out.orig,"ind.final"=ind.out.final,"coords.orig"=coords.orig,"coords.final"=coords.final))
}

# Generate the final coordinates ofrh te grid centres from the quadrats() produced
coord.gen <- function(quad,save.name){
  coord.alt <- polypoint(quad)
  print(coord.alt$ind.out.final)
  coord.final <- coord.alt$coords.final
  
  save("coord"=coord.final,file=save.name)
}


# From the scaled and shifted data and the grid resolution we generate meshes with the necessary maximum mesh edge according to the matching grid resolutions.
mesh.gen <- function(window,data,grid_cellsx,grid_cellsy,save.name){ # was window rather than W previously, which is never used below
  W <- window
  M <- grid_cellsx; N <- grid_cellsy
  
  cellsize <- c((W$xrange[2]-W$xrange[1])/M,(W$yrange[2]-W$yrange[1])/N)
  print(cellsize)
  
  coords <- data[,c("x","y")]
  coordinates(coords) <- ~ x + y
  
  boundary <- as(W,"SpatialPolygons") # For the meshes
  
  mesh <- inla.mesh.2d(loc=coords, boundary=boundary, max.edge=c(max(cellsize), max(cellsize)+0.5), min.angle=c(30, 21),
                       max.n=c(48000, 16000), ## Safeguard against large meshes.
                       max.n.strict=c(128000, 128000), ## Don't build a huge mesh!
                       cutoff=0.0075, ## Filter away adjacent points.
                       offset=c(0.01, 1)) ## Offset for extra boundaries, if needed.
  
  save("mesh"=mesh,file=save.name)
}


# Los Angeles -------------------------------------------------------------
# The below code takes the generated output from CountDataGen_final.R to produce count data for each grid resolution that is shifted and scaled where a unit increment in the x or y direction is equivalent to a distance of 10km rather than 1m and the bottom-left corner of the bounding box lies on the origin (0,0). We also produce the necessary meshes for the different resolutions.

# Los Angeles: Set-Up -----------------------------------------------------
# Change the working directory as we want to save the data in separate folders for each city. We then load the census tract data produced in DATA/RAW_DATA/SHAPEFILES/CENSUS_TRACTS and project these to UTM coordinates.

# LA Data
setwd("./LA")

# LA Census Tracts
load("../../../SHAPEFILES/CENSUS_TRACTS/LACityCT.rda")

# Project to UTM: both the census tracts and their union - which will form a boundary for the city.
ct_LA.proj <- lwgeom::st_transform_proj(ct_LA,"epsg:32611")
lacity_boundary <- st_union(ct_LA)
lacity_boundary.proj <- lwgeom::st_transform_proj(lacity_boundary,"epsg:32611")
lacity_geom.proj <- as(lacity_boundary.proj,"Spatial")
lacity_win.proj <- as.owin.SpatialPolygons((lacity_geom.proj))


# Los Angeles: Window Transformation --------------------------------------
# We want to shift and scale the window for the city. We keep the bounding boxes in order to perform the same shifts and scaling for the coordinates.

# Transform the window
bbox <- lacity_geom.proj@bbox
lacity_spshift <- elide(lacity_geom.proj,shift=-c(bbox[1,1],bbox[2,1]))
bbox_shift <- lacity_spshift@bbox
lacity_spscale <- elide(lacity_spshift,scale=max(bbox_shift)/1e4)
W <- as.owin.SpatialPolygons((lacity_spscale))

LA_Window <- "LAWindowProjScale.rda"
save(W,lacity_win.proj,lacity_geom.proj,file=LA_Window)
load(LA_Window)

bbox <- lacity_geom.proj@bbox
lacity_spshift <- elide(lacity_geom.proj,shift=-c(bbox[1,1],bbox[2,1]))
bbox_shift <- lacity_spshift@bbox
lacity_spscale <- elide(lacity_spshift,scale=max(bbox_shift)/1e4)


# Los Angeles: Generate Transformed Data and Mesh -------------------------
# This takes the gridded count data on the projected scale and shifts and scales the count data so that the coordinates for the count data lie within the transformed city window above. These coordinates are also used to generate the necessary mesh for each grid resolution.

# So for approximately 1kmx1km, 2kmx2km  grids etc need following dims
x.range <- diff(lacity_win.proj$xrange)
y.range <- diff(lacity_win.proj$yrange)

grid_cellsxvec <- ceiling(x.range/(1e3*c(0.2,0.5,1,2,5)))
grid_cellsyvec <- ceiling(y.range/(1e3*c(0.2,0.5,1,2,5)))


for (i in 5:1){
  dim.ind <- i
  
  print(dim.ind)
  
  grid_cellsx <- grid_cellsxvec[dim.ind]
  grid_cellsy <- grid_cellsyvec[dim.ind]
  
  print(grid_cellsx)
  print(grid_cellsy)
  
  # Load pre-created quadrats for the different resolutions
  load(paste0("LAQuad",grid_cellsx,grid_cellsy,"_projFinal.rda"))
  
  # Generate indices for re-ordering the grid cells, this code results in the ordering such that we go down the y axis before moving across the x axis.
  g <- quad.la
  a <- sapply(1:g$n,function(ii){(area.owin(g$tiles[[ii]]))})
  m <- matrix(as.numeric(unlist(str_extract_all(names(g$tiles),"\\d*\\d"))),ncol=2,byrow=TRUE)
  ord.df.la <- data.frame(ind=1:g$n,row=m[,1],col=m[,2]) # col matches the raster definition of col=x, and row=y 
  ord.df.la <- ord.df.la[order(ord.df.la$col),]
  # Save the ordering data farme
  ord.df.file <- paste0("LAOrdDF",grid_cellsx,grid_cellsy,"_projFinalScale.rda")
  save(ord.df.la,file=ord.df.file)
  
  # Generate the coordinates for the count data frame, these are ordered by the original grid cell ordering - although note that they are on the projected scale
  coord.file <- paste0("LACoord",grid_cellsx,grid_cellsy,"_projFinalScale.rda")
  coord.gen(quad=quad.la,coord.file)
  load(coord.file)
  
  # Load the original count data frame for this grid resolution, we now want to assign the project and corrected (so that they lie within the window) coordinates so that the data will now all lie in the projected city polygon.
  load(paste0("LA2015CT",grid_cellsx,grid_cellsy,"CountData_projFinal.rda"))
  
  hom_countdf$x.lab <- hom_countdf$x
  hom_countdf$y.lab <- hom_countdf$y
  
  hom_countdf$x.proj <- coordinates(coord.final)[,1]
  hom_countdf$y.proj <- coordinates(coord.final)[,2]
  
  # Re-order the coordinates by the data frame created above
  hom_countdf <- hom_countdf[ord.df.la$ind,]
  
  gta_countdf$x.lab <- gta_countdf$x
  gta_countdf$y.lab <- gta_countdf$y
  
  gta_countdf$x.proj <- coordinates(coord.final)[,1]
  gta_countdf$y.proj <- coordinates(coord.final)[,2]
  
  # Re-order the coordinates by the data frame created above
  gta_countdf <- gta_countdf[ord.df.la$ind,]
  
  # Now shift and scale the coordinates for the projected data so that they will lie within the shifted and scaled polygons for the city.
  coord.df <- data.frame(x=coordinates(coord.final)[ord.df.la$ind,1],y=coordinates(coord.final)[ord.df.la$ind,2])
  coordinates(coord.df) <- ~ x + y
  coord.df.sf <- st_as_sf(coord.df)
  coord.df.sf.proj <- st_set_crs(coord.df.sf,"epsg:32611")
  coord.df.proj <- as(coord.df.sf.proj,"Spatial")
  coord.dfshift <- elide(coord.df.proj,bb=bbox,shift=-c(bbox[1,1],bbox[2,1]))
  coord.dfscale <- elide(coord.dfshift,bb=bbox_shift,scale=max(bbox_shift)/1e4)
  
  hom_countdf$x <- coordinates(coord.dfscale)[,1]
  hom_countdf$y <- coordinates(coord.dfscale)[,2]
  
  gta_countdf$x <- coordinates(coord.dfscale)[,1]
  gta_countdf$y <- coordinates(coord.dfscale)[,2]
  
  # Generate sf data frames.
  spdf_homcount <- SpatialPointsDataFrame(coords=hom_countdf[,c("x","y")],data=hom_countdf)
  sf_homcount <- st_as_sf(spdf_homcount)
  
  spdf_gtacount <- SpatialPointsDataFrame(coords=gta_countdf[,c("x","y")],data=gta_countdf)
  sf_gtacount <- st_as_sf(spdf_gtacount)
  
  df.save <- paste0("LA2015CT",grid_cellsx,grid_cellsy,"CountData_projFinalScale.rda")
  spat.save.sf <- paste0("LA2015CT",grid_cellsx,grid_cellsy,"SFCountData_projFinalScale.rda")
  
  # Save
  save(hom_countdf,gta_countdf,file=df.save)
  save(sf_homcount,sf_gtacount,file=spat.save.sf)
  
  # Use either homicide or MVT data frames (coordinates will be the same so either works) to genereate the necessary mesh.
  mesh.file <- paste0("LAMesh",grid_cellsx,grid_cellsy,"_projFinalScale.rda")
  mesh.gen(window=W,data=hom_countdf,grid_cellsx=grid_cellsx,grid_cellsy=grid_cellsy,mesh.file)
  
}

rm(list=ls())


# Los Angeles: Checking Created Outputs -----------------------------------
# Some code to double check that everything carried over okay between the projected outputs from CountDataGen_final.R and the shifted and scaled outputs.
# This code is commented out, as not necessary for the generation of the require outputs, but can be interesting to consider

# for (i in 5:1){
#   dim.ind <- i
#   grid_cellsx <- grid_cellsxvec[dim.ind]
#   grid_cellsy <- grid_cellsyvec[dim.ind]
#   load(paste0("LA2015CT",grid_cellsx,grid_cellsy,"SFCountData_projFinal.rda"))
#   sf_homcount.old <- sf_homcount
#   sf_gtacount.old <- sf_gtacount
# 
#   spat.save.sf <- paste0("LA2015CT",grid_cellsx,grid_cellsy,"SFCountData_projFinalScale.rda")
#   load(spat.save.sf)
# 
#   print(nrow(sf_homcount.old))
#   print(nrow(sf_homcount))
#   print(nrow(sf_gtacount.old))
#   print(nrow(sf_gtacount))
# 
#   print(sum(sf_homcount.old$hom))
#   print(sum(sf_homcount$hom))
#   print(sum(sf_gtacount.old$gta))
#   print(sum(sf_gtacount$gta))
# }
# 
# 
# for (i in 5:1){
#   print(i)
#   dim.ind <- i
# 
#   grid_cellsx <- grid_cellsxvec[dim.ind]
#   grid_cellsy <- grid_cellsyvec[dim.ind]
# 
#   print(grid_cellsx)
#   print(grid_cellsy)
# 
#   load(paste0("LA2015CT",grid_cellsx,grid_cellsy,"SFCountData_projFinal.rda"))
#   sf_homcount.old <- sf_homcount
#   sf_gtacount.old <- sf_gtacount
# 
#   spat.save.sf <- paste0("LA2015CT",grid_cellsx,grid_cellsy,"SFCountData_projFinalScale.rda")
#   load(spat.save.sf)
# 
# 
#   pdf(paste0("CompareProjScaleHom",grid_cellsx,grid_cellsy,".pdf"),h=20,w=20,pointsize=20)
#   plot(sf_homcount.old["hom"],axes=T,pch=19)
#   plot(sf_homcount["hom"],axes=T,pch=19)
# 
#   par(mfrow=c(1,2))
#   plot(sf_homcount.old["hom"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   plot(sf_homcount["hom"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   par(mfrow=c(1,1))
# 
# 
#   plot(sf_homcount.old["pop"],axes=T,pch=19)
#   plot(sf_homcount["pop"],axes=T,pch=19)
# 
#   par(mfrow=c(1,2))
#   plot(sf_homcount.old["pop"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   plot(sf_homcount["pop"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   par(mfrow=c(1,1))
# 
# 
#   plot(sf_homcount.old["inc"],axes=T,pch=19)
#   plot(sf_homcount["inc"],axes=T,pch=19)
# 
#   par(mfrow=c(1,2))
#   plot(sf_homcount.old["inc"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   plot(sf_homcount["inc"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   par(mfrow=c(1,1))
# 
#   dev.off()
# 
# 
#   pdf(paste0("CompareProjScaleGTA",grid_cellsx,grid_cellsy,".pdf"),h=20,w=20,pointsize=20)
#   plot(sf_gtacount.old["gta"],axes=T,pch=19)
#   plot(sf_gtacount["gta"],axes=T,pch=19)
# 
#   par(mfrow=c(1,2))
#   plot(sf_gtacount.old["gta"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   plot(sf_gtacount["gta"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   par(mfrow=c(1,1))
# 
# 
#   plot(sf_gtacount.old["pop"],axes=T,pch=19)
#   plot(sf_gtacount["pop"],axes=T,pch=19)
# 
#   par(mfrow=c(1,2))
#   plot(sf_gtacount.old["pop"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   plot(sf_gtacount["pop"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   par(mfrow=c(1,1))
# 
# 
#   plot(sf_gtacount.old["inc"],axes=T,pch=19)
#   plot(sf_gtacount["inc"],axes=T,pch=19)
# 
#   par(mfrow=c(1,2))
#   plot(sf_gtacount.old["inc"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   plot(sf_gtacount["inc"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   par(mfrow=c(1,1))
# 
#   dev.off()
# }
# 
# rm(list=ls())


# Los Angeles: Re-set WD --------------------------------------------------

library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))



# New York City -----------------------------------------------------------
# The below code takes the generated output from CountDataGen_final.R to produce count data for each grid resolution that is shifted and scaled where a unit increment in the x or y direction is equivalent to a distance of 10km rather than 1m and the bottom-left corner of the bounding box lies on the origin (0,0). We also produce the necessary meshes for the different resolutions.

# New York: Set-Up --------------------------------------------------------
# Change the working directory as we want to save the data in separate folders for each city. We then load the census tract data produced in DATA/RAW_DATA/SHAPEFILES/CENSUS_TRACTS and project these to UTM coordinates.

# NYC Data
setwd("./NYC")

# NYC Census Tracts
load("../../../SHAPEFILES/CENSUS_TRACTS/NYCityCT.rda")

# Project to UTM: both the census tracts and their union - which will form a boundary for the city.
ct_NY.proj <- lwgeom::st_transform_proj(ct_NY,"epsg:32618")
nycity_boundary <- st_union(ct_NY)
nycity_boundary.proj <- lwgeom::st_transform_proj(nycity_boundary,"epsg:32618")
nycity_geom.proj <- as(nycity_boundary.proj,"Spatial")
nycity_win.proj <- as.owin.SpatialPolygons((nycity_geom.proj))


# New York: Window Transformation -----------------------------------------
# We want to shift and scale the window for the city. We keep the bounding boxes in order to perform the same shifts and scaling for the coordinates.

# Transform the window
bbox <- nycity_geom.proj@bbox
nycity_spshift <- elide(nycity_geom.proj,shift=-c(bbox[1,1],bbox[2,1]))
bbox_shift <- nycity_spshift@bbox
nycity_spscale <- elide(nycity_spshift,scale=max(bbox_shift)/1e4)
W <- as.owin.SpatialPolygons((nycity_spscale))

NY_Window <- "NYWindowProjScale.rda"
save(W,nycity_win.proj,nycity_geom.proj,file=NY_Window)
load(NY_Window)

bbox <- nycity_geom.proj@bbox
nycity_spshift <- elide(nycity_geom.proj,shift=-c(bbox[1,1],bbox[2,1]))
bbox_shift <- nycity_spshift@bbox
nycity_spscale <- elide(nycity_spshift,scale=max(bbox_shift)/1e4)


# New York: Generate Transformed Data and Mesh ----------------------------
# This takes the gridded count data on the projected scale and shifts and scales the count data so that the coordinates for the count data lie within the transformed city window above. These coordinates are also used to generate the necessary mesh for each grid resolution.

# So for approximately 1kmx1km, 2kmx2km  grids etc need following dims
x.range <- diff(nycity_win.proj$xrange)
y.range <- diff(nycity_win.proj$yrange)

grid_cellsxvec <- ceiling(x.range/(1e3*c(0.2,0.5,1,2,5)))
grid_cellsyvec <- ceiling(y.range/(1e3*c(0.2,0.5,1,2,5)))


for (i in 5:1){
  dim.ind <- i
  
  print(dim.ind)
  
  grid_cellsx <- grid_cellsxvec[dim.ind]
  grid_cellsy <- grid_cellsyvec[dim.ind]
  
  print(grid_cellsx)
  print(grid_cellsy)
  
  # Load pre-created quadrats for the different resolutions
  load(paste0("NYQuad",grid_cellsx,grid_cellsy,"_projFinal.rda"))
  
  # Generate indices for re-ordering the grid cells, this code results in the ordering such that we go down the y axis before moving across the x axis.
  g <- quad.ny
  a <- sapply(1:g$n,function(ii){(area.owin(g$tiles[[ii]]))})
  m <- matrix(as.numeric(unlist(str_extract_all(names(g$tiles),"\\d*\\d"))),ncol=2,byrow=TRUE)
  ord.df.ny <- data.frame(ind=1:g$n,row=m[,1],col=m[,2]) # col matches the raster definition of col=x, and row=y
  ord.df.ny <- ord.df.ny[order(ord.df.ny$col),]
  # Save the ordering data frame
  ord.df.file <- paste0("NYOrdDF",grid_cellsx,grid_cellsy,"_projFinalScale.rda")
  save(ord.df.ny,file=ord.df.file)
  
  # Generate the coordinates for the count data frame, these are ordered by the original grid cell ordering - although note that they are on the projected scale
  coord.file <- paste0("NYCoord",grid_cellsx,grid_cellsy,"_projFinalScale.rda")
  coord.gen(quad=quad.ny,coord.file)
  load(coord.file)
  
  # Load the original count data frame for this grid resolution, we now want to assign the project and corrected (so that they lie within the window) coordinates so that the data will now all lie in the projected city polygon.
  load(paste0("NY2015CT",grid_cellsx,grid_cellsy,"CountData_projFinal.rda"))
  
  hom_countdf$x.lab <- hom_countdf$x
  hom_countdf$y.lab <- hom_countdf$y
  
  hom_countdf$x.proj <- coordinates(coord.final)[,1]
  hom_countdf$y.proj <- coordinates(coord.final)[,2]
  
  # Re-order the coordinates by the data frame created above
  hom_countdf <- hom_countdf[ord.df.ny$ind,]
  
  gta_countdf$x.lab <- gta_countdf$x
  gta_countdf$y.lab <- gta_countdf$y
  
  gta_countdf$x.proj <- coordinates(coord.final)[,1]
  gta_countdf$y.proj <- coordinates(coord.final)[,2]
  
  # Re-order the coordinates by the data frame created above
  gta_countdf <- gta_countdf[ord.df.ny$ind,]
  
  
  # Now shift and scale the coordinates for the projected data so that they will lie within the shifted and scaled polygons for the city.
  coord.df <- data.frame(x=coordinates(coord.final)[ord.df.ny$ind,1],y=coordinates(coord.final)[ord.df.ny$ind,2])
  coordinates(coord.df) <- ~ x + y
  coord.df.sf <- st_as_sf(coord.df)
  coord.df.sf.proj <- st_set_crs(coord.df.sf,"epsg:32618")
  coord.df.proj <- as(coord.df.sf.proj,"Spatial")
  coord.dfshift <- elide(coord.df.proj,bb=bbox,shift=-c(bbox[1,1],bbox[2,1]))
  coord.dfscale <- elide(coord.dfshift,bb=bbox_shift,scale=max(bbox_shift)/1e4)
  
  hom_countdf$x <- coordinates(coord.dfscale)[,1]
  hom_countdf$y <- coordinates(coord.dfscale)[,2]
  
  gta_countdf$x <- coordinates(coord.dfscale)[,1]
  gta_countdf$y <- coordinates(coord.dfscale)[,2]
  
  # Generate sf data frames.
  spdf_homcount <- SpatialPointsDataFrame(coords=hom_countdf[,c("x","y")],data=hom_countdf)
  sf_homcount <- st_as_sf(spdf_homcount)
  
  spdf_gtacount <- SpatialPointsDataFrame(coords=gta_countdf[,c("x","y")],data=gta_countdf)
  sf_gtacount <- st_as_sf(spdf_gtacount)
  
  df.save <- paste0("NY2015CT",grid_cellsx,grid_cellsy,"CountData_projFinalScale.rda")
  spat.save.sf <- paste0("NY2015CT",grid_cellsx,grid_cellsy,"SFCountData_projFinalScale.rda")
  
  # Save
  save(hom_countdf,gta_countdf,file=df.save)
  save(sf_homcount,sf_gtacount,file=spat.save.sf)
  
  # Use either homicide or MVT data frames (coordinates will be the same so either works) to genereate the necessary mesh.
  mesh.file <- paste0("NYMesh",grid_cellsx,grid_cellsy,"_projFinalScale.rda")
  mesh.gen(window=W,data=hom_countdf,grid_cellsx=grid_cellsx,grid_cellsy=grid_cellsy,mesh.file)
  
}
rm(list=ls())


# New York: Checking Created Outputs --------------------------------------
# Some code to double check that everything carried over okay between the projected outputs from CountDataGen_final.R and the shifted and scaled outputs.
# This code is commented out, as not necessary for the generation of the require outputs, but can be interesting to consider

# for (i in 5:1){
#   dim.ind <- i
#   grid_cellsx <- grid_cellsxvec[dim.ind]
#   grid_cellsy <- grid_cellsyvec[dim.ind]
#   load(paste0("NY2015CT",grid_cellsx,grid_cellsy,"SFCountData_projFinal.rda"))
#   sf_homcount.old <- sf_homcount
#   sf_gtacount.old <- sf_gtacount
# 
#   spat.save.sf <- paste0("NY2015CT",grid_cellsx,grid_cellsy,"SFCountData_projFinalScale.rda")
#   load(spat.save.sf)
# 
#   print(nrow(sf_homcount.old))
#   print(nrow(sf_homcount))
#   print(nrow(sf_gtacount.old))
#   print(nrow(sf_gtacount))
# 
#   print(sum(sf_homcount.old$hom))
#   print(sum(sf_homcount$hom))
#   print(sum(sf_gtacount.old$gta))
#   print(sum(sf_gtacount$gta))
# }
# 
# for (i in 5:1){
#   print(i)
#   dim.ind <- i
# 
#   grid_cellsx <- grid_cellsxvec[dim.ind]
#   grid_cellsy <- grid_cellsyvec[dim.ind]
# 
#   print(grid_cellsx)
#   print(grid_cellsy)
# 
#   load(paste0("NY2015CT",grid_cellsx,grid_cellsy,"SFCountData_projFinal.rda"))
#   sf_homcount.old <- sf_homcount
#   sf_gtacount.old <- sf_gtacount
# 
#   spat.save.sf <- paste0("NY2015CT",grid_cellsx,grid_cellsy,"SFCountData_projFinalScale.rda")
#   load(spat.save.sf)
# 
# 
#   pdf(paste0("CompareNYProjScaleHom",grid_cellsx,grid_cellsy,".pdf"),h=20,w=20,pointsize=20)
#   plot(sf_homcount.old["hom"],axes=T,pch=19)
#   plot(sf_homcount["hom"],axes=T,pch=19)
# 
#   par(mfrow=c(1,2))
#   plot(sf_homcount.old["hom"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   plot(sf_homcount["hom"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   par(mfrow=c(1,1))
# 
# 
#   plot(sf_homcount.old["pop"],axes=T,pch=19)
#   plot(sf_homcount["pop"],axes=T,pch=19)
# 
#   par(mfrow=c(1,2))
#   plot(sf_homcount.old["pop"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   plot(sf_homcount["pop"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   par(mfrow=c(1,1))
# 
# 
#   plot(sf_homcount.old["inc"],axes=T,pch=19)
#   plot(sf_homcount["inc"],axes=T,pch=19)
# 
#   par(mfrow=c(1,2))
#   plot(sf_homcount.old["inc"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   plot(sf_homcount["inc"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   par(mfrow=c(1,1))
# 
#   dev.off()
# 
# 
#   pdf(paste0("CompareNYProjScaleGTA",grid_cellsx,grid_cellsy,".pdf"),h=20,w=20,pointsize=20)
#   plot(sf_gtacount.old["gta"],axes=T,pch=19)
#   plot(sf_gtacount["gta"],axes=T,pch=19)
# 
#   par(mfrow=c(1,2))
#   plot(sf_gtacount.old["gta"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   plot(sf_gtacount["gta"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   par(mfrow=c(1,1))
# 
# 
#   plot(sf_gtacount.old["pop"],axes=T,pch=19)
#   plot(sf_gtacount["pop"],axes=T,pch=19)
# 
#   par(mfrow=c(1,2))
#   plot(sf_gtacount.old["pop"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   plot(sf_gtacount["pop"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   par(mfrow=c(1,1))
# 
# 
#   plot(sf_gtacount.old["inc"],axes=T,pch=19)
#   plot(sf_gtacount["inc"],axes=T,pch=19)
# 
#   par(mfrow=c(1,2))
#   plot(sf_gtacount.old["inc"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   plot(sf_gtacount["inc"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   par(mfrow=c(1,1))
# 
#   dev.off()
# }
# 
# rm(list=ls())


# New York: Re-set WD -----------------------------------------------------

library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))



# Portland ----------------------------------------------------------------
# The below code takes the generated output from CountDataGen_final.R to produce count data for each grid resolution that is shifted and scaled where a unit increment in the x or y direction is equivalent to a distance of 10km rather than 1m and the bottom-left corner of the bounding box lies on the origin (0,0). We also produce the necessary meshes for the different resolutions.

# Portland: Set-Up --------------------------------------------------------
# Change the working directory as we want to save the data in separate folders for each city. We then load the census tract data produced in DATA/RAW_DATA/SHAPEFILES/CENSUS_TRACTS and project these to UTM coordinates.

# Portland Data
setwd("./Portland")

# Portland Census Tracts
load("../../../SHAPEFILES/CENSUS_TRACTS/PCityCT.rda")

# Project to UTM: both the census tracts and their union - which will form a boundary for the city.
ct_P.proj <- lwgeom::st_transform_proj(ct_P,"epsg:32610")
pcity_boundary <- st_union(ct_P)
pcity_boundary.proj <- lwgeom::st_transform_proj(pcity_boundary,"epsg:32610")
pcity_geom.proj <- as(pcity_boundary.proj,"Spatial")
pcity_win.proj <- as.owin.SpatialPolygons((pcity_geom.proj))


# Portland: Window Transformation -----------------------------------------
# We want to shift and scale the window for the city. We keep the bounding boxes in order to perform the same shifts and scaling for the coordinates.

# Transform the window
bbox <- pcity_geom.proj@bbox
pcity_spshift <- elide(pcity_geom.proj,shift=-c(bbox[1,1],bbox[2,1]))
bbox_shift <- pcity_spshift@bbox
pcity_spscale <- elide(pcity_spshift,scale=max(bbox_shift)/1e4)
W <- as.owin.SpatialPolygons((pcity_spscale))

P_Window <- "PWindowProjScale.rda"
save(W,pcity_win.proj,pcity_geom.proj,file=P_Window)
load(P_Window)

bbox <- pcity_geom.proj@bbox
pcity_spshift <- elide(pcity_geom.proj,shift=-c(bbox[1,1],bbox[2,1]))
bbox_shift <- pcity_spshift@bbox
pcity_spscale <- elide(pcity_spshift,scale=max(bbox_shift)/1e4)


# Portland: Generate Transformed Data and Mesh ----------------------------
# This takes the gridded count data on the projected scale and shifts and scales the count data so that the coordinates for the count data lie within the transformed city window above. These coordinates are also used to generate the necessary mesh for each grid resolution.

# So for approximately 1kmx1km, 2kmx2km  grids etc need following dims
x.range <- diff(pcity_win.proj$xrange)
y.range <- diff(pcity_win.proj$yrange)

grid_cellsxvec <- ceiling(x.range/(1e3*c(0.2,0.5,1,2,5)))
grid_cellsyvec <- ceiling(y.range/(1e3*c(0.2,0.5,1,2,5)))


for (i in 5:1){
  dim.ind <- i
  
  print(dim.ind)
  
  grid_cellsx <- grid_cellsxvec[dim.ind]
  grid_cellsy <- grid_cellsyvec[dim.ind]
  
  print(grid_cellsx)
  print(grid_cellsy)
  
  # Load pre-created quadrats for the different resolutions
  load(paste0("PQuad",grid_cellsx,grid_cellsy,"_projFinal.rda"))
  
  # Generate indices for re-ordering the grid cells, this code results in the ordering such that we go down the y axis before moving across the x axis.
  g <- quad.p
  a <- sapply(1:g$n,function(ii){(area.owin(g$tiles[[ii]]))})
  m <- matrix(as.numeric(unlist(str_extract_all(names(g$tiles),"\\d*\\d"))),ncol=2,byrow=TRUE)
  ord.df.p <- data.frame(ind=1:g$n,row=m[,1],col=m[,2]) # col matches the raster definition of col=x, and row=y which makes sense visually from the plots as x<--> and y ^inc(^)
  ord.df.p <- ord.df.p[order(ord.df.p$col),]
  # Save the ordering data frame
  ord.df.file <- paste0("POrdDF",grid_cellsx,grid_cellsy,"_projFinalScale.rda")
  save(ord.df.p,file=ord.df.file)
  
  # Generate the coordinates for the count data frame, these are ordered by the original grid cell ordering - although note that they are on the projected scale
  coord.file <- paste0("PCoord",grid_cellsx,grid_cellsy,"_projFinalScale.rda")
  coord.gen(quad=quad.p,coord.file)
  load(coord.file)
  
  # Load the original count data frame for this grid resolution, we now want to assign the project and corrected (so that they lie within the window) coordinates so that the data will now all lie in the projected city polygon.
  load(paste0("P2015CT",grid_cellsx,grid_cellsy,"CountData_projFinal.rda"))
  
  hom_countdf$x.lab <- hom_countdf$x
  hom_countdf$y.lab <- hom_countdf$y
  
  hom_countdf$x.proj <- coordinates(coord.final)[,1]
  hom_countdf$y.proj <- coordinates(coord.final)[,2]
  
  # Re-order the coordinates by the data frame created above
  hom_countdf <- hom_countdf[ord.df.p$ind,]
  
  gta_countdf$x.lab <- gta_countdf$x
  gta_countdf$y.lab <- gta_countdf$y
  
  gta_countdf$x.proj <- coordinates(coord.final)[,1]
  gta_countdf$y.proj <- coordinates(coord.final)[,2]
  
  # Re-order the coordinates by the data frame created above
  gta_countdf <- gta_countdf[ord.df.p$ind,]
  
  # Now shift and scale the coordinates for the projected data so that they will lie within the shifted and scaled polygons for the city.
  coord.df <- data.frame(x=coordinates(coord.final)[ord.df.p$ind,1],y=coordinates(coord.final)[ord.df.p$ind,2])
  coordinates(coord.df) <- ~ x + y
  coord.df.sf <- st_as_sf(coord.df)
  coord.df.sf.proj <- st_set_crs(coord.df.sf,"epsg:32610")
  coord.df.proj <- as(coord.df.sf.proj,"Spatial")
  coord.dfshift <- elide(coord.df.proj,bb=bbox,shift=-c(bbox[1,1],bbox[2,1]))
  coord.dfscale <- elide(coord.dfshift,bb=bbox_shift,scale=max(bbox_shift)/1e4)
  
  hom_countdf$x <- coordinates(coord.dfscale)[,1]
  hom_countdf$y <- coordinates(coord.dfscale)[,2]
  
  gta_countdf$x <- coordinates(coord.dfscale)[,1]
  gta_countdf$y <- coordinates(coord.dfscale)[,2]
  
  # Generate sf data frames.
  spdf_homcount <- SpatialPointsDataFrame(coords=hom_countdf[,c("x","y")],data=hom_countdf)
  sf_homcount <- st_as_sf(spdf_homcount)
  
  spdf_gtacount <- SpatialPointsDataFrame(coords=gta_countdf[,c("x","y")],data=gta_countdf)
  sf_gtacount <- st_as_sf(spdf_gtacount)
  
  df.save <- paste0("P2015CT",grid_cellsx,grid_cellsy,"CountData_projFinalScale.rda")
  spat.save.sf <- paste0("P2015CT",grid_cellsx,grid_cellsy,"SFCountData_projFinalScale.rda")
  
  # Save
  save(hom_countdf,gta_countdf,file=df.save)
  save(sf_homcount,sf_gtacount,file=spat.save.sf)
  
  # Use either homicide or MVT data frames (coordinates will be the same so either works) to genereate the necessary mesh.
  mesh.file <- paste0("PMesh",grid_cellsx,grid_cellsy,"_projFinalScale.rda")
  mesh.gen(window=W,data=hom_countdf,grid_cellsx=grid_cellsx,grid_cellsy=grid_cellsy,mesh.file)
  
}

rm(list=ls())


# Portland:  Checking Created Outputs -------------------------------------
# Some code to double check that everything carried over okay between the projected outputs from CountDataGen_final.R and the shifted and scaled outputs.
# This code is commented out, as not necessary for the generation of the require outputs, but can be interesting to consider

# for (i in 5:1){
#   dim.ind <- i
#   grid_cellsx <- grid_cellsxvec[dim.ind]
#   grid_cellsy <- grid_cellsyvec[dim.ind]
#   load(paste0("P2015CT",grid_cellsx,grid_cellsy,"SFCountData_projFinal.rda"))
#   sf_homcount.old <- sf_homcount
#   sf_gtacount.old <- sf_gtacount
#   
#   spat.save.sf <- paste0("P2015CT",grid_cellsx,grid_cellsy,"SFCountData_projFinalScale.rda")
#   load(spat.save.sf)
#   
#   print(nrow(sf_homcount.old))
#   print(nrow(sf_homcount))
#   print(nrow(sf_gtacount.old))
#   print(nrow(sf_gtacount))
#   
#   print(sum(sf_homcount.old$hom))
#   print(sum(sf_homcount$hom))
#   print(sum(sf_gtacount.old$gta))
#   print(sum(sf_gtacount$gta))
# }
# 
# for (i in 5:1){
#   print(i)
#   dim.ind <- i
#   
#   grid_cellsx <- grid_cellsxvec[dim.ind]
#   grid_cellsy <- grid_cellsyvec[dim.ind]
#   
#   print(grid_cellsx)
#   print(grid_cellsy)
#   
#   load(paste0("P2015CT",grid_cellsx,grid_cellsy,"SFCountData_projFinal.rda"))
#   sf_homcount.old <- sf_homcount
#   sf_gtacount.old <- sf_gtacount
#   
#   spat.save.sf <- paste0("P2015CT",grid_cellsx,grid_cellsy,"SFCountData_projFinalScale.rda")
#   load(spat.save.sf)
#   
#   
#   pdf(paste0("ComparePProjScaleHom",grid_cellsx,grid_cellsy,".pdf"),h=20,w=20,pointsize=20)
#   plot(sf_homcount.old["hom"],axes=T,pch=19)
#   plot(sf_homcount["hom"],axes=T,pch=19)
#   
#   par(mfrow=c(1,2))
#   plot(sf_homcount.old["hom"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   plot(sf_homcount["hom"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   par(mfrow=c(1,1))
#   
#   
#   plot(sf_homcount.old["pop"],axes=T,pch=19)
#   plot(sf_homcount["pop"],axes=T,pch=19)
#   
#   par(mfrow=c(1,2))
#   plot(sf_homcount.old["pop"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   plot(sf_homcount["pop"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   par(mfrow=c(1,1))
#   
#   
#   plot(sf_homcount.old["inc"],axes=T,pch=19)
#   plot(sf_homcount["inc"],axes=T,pch=19)
#   
#   par(mfrow=c(1,2))
#   plot(sf_homcount.old["inc"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   plot(sf_homcount["inc"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   par(mfrow=c(1,1))
#   
#   dev.off()
#   
#   
#   pdf(paste0("ComparePProjScaleGTA",grid_cellsx,grid_cellsy,".pdf"),h=20,w=20,pointsize=20)
#   plot(sf_gtacount.old["gta"],axes=T,pch=19)
#   plot(sf_gtacount["gta"],axes=T,pch=19)
#   
#   par(mfrow=c(1,2))
#   plot(sf_gtacount.old["gta"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   plot(sf_gtacount["gta"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   par(mfrow=c(1,1))
#   
#   
#   plot(sf_gtacount.old["pop"],axes=T,pch=19)
#   plot(sf_gtacount["pop"],axes=T,pch=19)
#   
#   par(mfrow=c(1,2))
#   plot(sf_gtacount.old["pop"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   plot(sf_gtacount["pop"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   par(mfrow=c(1,1))
#   
#   
#   plot(sf_gtacount.old["inc"],axes=T,pch=19)
#   plot(sf_gtacount["inc"],axes=T,pch=19)
#   
#   par(mfrow=c(1,2))
#   plot(sf_gtacount.old["inc"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   plot(sf_gtacount["inc"],axes=T,pch=19, key.pos = NULL, reset = FALSE)
#   par(mfrow=c(1,1))
#   
#   dev.off()
# }
# 
# rm(list=ls())


# Portland: Re-set WD -----------------------------------------------------

library("rstudioapi")
# Either setwd() to the source file location, or run the following:
setwd(dirname(getActiveDocumentContext()$path))



# sessionInfo() -----------------------------------------------------------

sessionInfo()



