
# Removing Error Processors SBC LA Polygon --------------------------------

# Removing errors from the processes where the error arose due to an error in another process running on the same node. Originally, these would have had errors in both processors on the same node due to a space error which were removed in IrregSBCErroReSetProcs_final.R in order to continue running and test which of the processors caused this space error. This R script removes the errors in the processors where the re-runs produced another space error but these processors had surpassed the original stopping point and so clearly were not the source of the error, therefore we remove the errors from these processors.

# Author: Nadeen Khaleel


# Either setwd() to the source file location, or run the following:
library("rstudioapi")
setwd(dirname(getActiveDocumentContext()$path))


# Removing Errors ---------------------------------------------------------

# Identify the processors for which we want remove the errors.
procs <- c(8,10,12,28,39)
for (ii in 1:length(procs)){
  total.nodes = 20 # how many nodes am I using?
  Nprocs.vec = rep(2,20) # vector where each element contains the number of processors for a particular node
  Nprocs.total = 40 # total number of processors across ALL nodes (/jobs)
  N = 1000 # how many simulations?
  L = 100
  sim = 0 # start new or re-starting at last saved simulation?
  
  procs <- c(8,10,12,28,39)
  paste("# ",1:length(procs),sep="",collapse=" ")
  
  k.ind <- i
  k <- procs[k.ind]
  this.node <- k%/%2 + as.numeric(k%%2!=0)
  
  sink(paste0("RemovingErrorForNode",this.node,"Proc",k,".txt"))
  
  # Load up the old outputs, only need to change run.out as this is the output with the ERROR noted while the remaining outputs have no differences as the error resulted in no output.
  old.file <- paste0("GridMeshIrregPolLGCPSBCSS",k,"_REMERROR.rda")
  load(old.file)
  run.out.old <- run.out
  rm(run.out)
  
  
  # Saving the output
  save.file <- paste0("GridMeshIrregPolLGCPSBCSS",k,".rda")
  print(save.file)
  
  N.g <- 4; N.m <- 4
  
  nn <- dim(gm[[N.g*N.m]]$ranks.mf)[2]
  p.length <- sum(!is.na(gm[[N.g*N.m]]$ranks.mf[,nn])) + sum(!is.na(run.out.old[[N.g]][[N.m]]$mess.ls$error)) + 1 # no ranks in trad ss
  
  gm.s <- function(g){sapply(1:length(g),function(i){sum(!is.na(g[[i]]$ranks.mf[,dim(g[[i]]$ranks.mf)[2]]))})}
  s <- matrix(gm.s(gm),nrow=N.m) # fills in down the columns, so for each grid, fills in row i with mesh i, following the output from e.s below
  e.s <- function(g){sapply(1:length(g),function(i){sum(!is.na(g[[i]]$mess.ls$error))})}
  s <- s + sapply(1:length(run.out.old),function(i){e.s(run.out.old[[i]])})
  ds <- diff(s)
  if (sum(ds)!=0){
    w <- which(ds!=0,arr.ind = T)
    grid.start.ind <- unname(w)[2]
    mesh.start.ind <- unname(w)[1] + 1
  } else if (sum(diff(t(s))!=0)){
    w <- which(diff(t(s))!=0,arr.ind=TRUE) # should be easy to extract the common row, then +1 to get the required GRID that needs to begin running...
    grid.start.ind <- unname(w)[1,1] + 1
    mesh.start.ind <- unname(w)[1,2]
  } else {
    grid.start.ind <- 1
    mesh.start.ind <- 1
  }
  
  
  # Find the location of the error
  if (mesh.start.ind==1&grid.start.ind==1){
    p.rem <- p.length - 1
    grid.rem <- 4
    mesh.rem <- 4
    print("STOP: RESTART AT THE PREVIOUS P.LENGTH TO ENSURE DATA IS RE-SIMULATED.")
    stop.val <- 1
  } else if (mesh.start.ind==1&grid.start.ind!=1) {
    p.rem <- p.length
    grid.rem <- grid.start.ind - 1
    mesh.rem <- 4
    stop.val <- 0
  } else {
    p.rem <- p.length
    grid.rem <- grid.start.ind
    mesh.rem <- mesh.start.ind - 1
    stop.val <- 0
  }
  
  
  if (stop.val==0){
    print(run.out.old[[grid.rem]][[mesh.rem]]$mess.ls$error[p.rem])
    
    print("Sum FFT not NA")
    print(sum(!is.na(run.out.old[[grid.rem]][[mesh.rem]]$mess.ls$FFT)))
    print("est.df for p.rem not NA")
    print(sum(!is.na(run.out.old[[grid.rem]][[mesh.rem]]$est.df[p.rem,])))
    print("gm NA")
    iind <- (grid.rem-1)*N.m + mesh.rem
    print(sum(!is.na(gm[[iind]]$ranks.param[p.rem,])))
    print(sum(!is.na(gm[[iind]]$ranks.mf[p.rem,])))
    
    print("FFT NAs")
    print(sum(is.na(run.out.old[[grid.rem]][[mesh.rem]]$mess.ls$FFT)))
    print("est.df for p.rem NA")
    print(sum(is.na(run.out.old[[grid.rem]][[mesh.rem]]$est.df[p.rem,])))
    
    # Replaces ERROR
    run.out.old[[grid.rem]][[mesh.rem]]$mess.ls$error[p.rem] <- NA
    
    nn <- dim(gm[[N.g*N.m]]$ranks.mf)[2]
    p.length.new <- sum(!is.na(gm[[N.g*N.m]]$ranks.mf[,nn])) + sum(!is.na(run.out.old[[N.g]][[N.m]]$mess.ls$error)) + 1 # no ranks in trad ss
    
    gm.s <- function(g){sapply(1:length(g),function(i){sum(!is.na(g[[i]]$ranks.mf[,dim(g[[i]]$ranks.mf)[2]]))})}
    s <- matrix(gm.s(gm),nrow=N.m) # fills in down the columns, so for each grid, fills in row i with mesh i, following the output from e.s below
    e.s <- function(g){sapply(1:length(g),function(i){sum(!is.na(g[[i]]$mess.ls$error))})}
    s <- s + sapply(1:length(run.out.old),function(i){e.s(run.out.old[[i]])})
    ds <- diff(s)
    if (sum(ds)!=0){
      w <- which(ds!=0,arr.ind = T)
      grid.start.ind.new <- unname(w)[2]
      mesh.start.ind.new <- unname(w)[1] + 1
    } else if (sum(diff(t(s))!=0)){
      w <- which(diff(t(s))!=0,arr.ind=TRUE) # should be easy to extract the common row, then +1 to get the required GRID that needs to begin running...
      grid.start.ind.new <- unname(w)[1,1] + 1
      mesh.start.ind.new <- unname(w)[1,2]
    } else {
      grid.start.ind.new <- 1
      mesh.start.ind.new <- 1
    }
    # }
    
    print("Old Settings)")
    print(p.length)
    print(grid.start.ind)
    print(mesh.start.ind)
    
    print("New Settings")
    print(p.length.new)
    print(grid.start.ind.new)
    print(mesh.start.ind.new)
    
    run.out <- run.out.old
    
    save(run.out,gm,true.theta,data.err.tracker,seed.vec,file=save.file)
    
  }
  
  sink()
  rm(list=ls())
}


# Checks Between Old and New Data -----------------------------------------
# Print checks between new and old (pre- and post-error removal) processes to double check that no mistakes were made.

procs <- c(8,10,12,28,39)
sink("DoubleCheckingProcErrorRem.txt")
for (ii in 1:length(procs)){
  
  k.ind <- ii
  k <- procs[k.ind]
  this.node <- k%/%2 + as.numeric(k%%2!=0)
  
  print(paste0("Process ",k))
  
  old.file <- paste0("GridMeshIrregPolLGCPSBCSS",k,"_REMERROR.rda")
  load(old.file)
  run.out.old <- run.out
  rm(run.out)
  gm.old <- gm
  rm(gm)
  true.theta.old <- true.theta
  rm(true.theta)
  seed.vec.old <- seed.vec
  rm(seed.vec)
  data.err.tracker.old <- data.err.tracker
  rm(data.err.tracker)
  
  
  save.file <- paste0("GridMeshIrregPolLGCPSBCSS",k,".rda")
  load(save.file)
  
  N.g <- 4; N.m <- 4
  
  nn <- dim(gm[[N.g*N.m]]$ranks.mf)[2]
  p.length <- sum(!is.na(gm[[N.g*N.m]]$ranks.mf[,nn])) + sum(!is.na(run.out[[N.g]][[N.m]]$mess.ls$error)) + 1 # no ranks in trad ss
  
  gm.s <- function(g){sapply(1:length(g),function(i){sum(!is.na(g[[i]]$ranks.mf[,dim(g[[i]]$ranks.mf)[2]]))})}
  s <- matrix(gm.s(gm),nrow=N.m) # fills in down the columns, so for each grid, fills in row i with mesh i, following the output from e.s below
  e.s <- function(g){sapply(1:length(g),function(i){sum(!is.na(g[[i]]$mess.ls$error))})}
  s <- s + sapply(1:length(run.out),function(i){e.s(run.out[[i]])})
  ds <- diff(s)
  if (sum(ds)!=0){
    w <- which(ds!=0,arr.ind = T)
    grid.start.ind <- unname(w)[2]
    mesh.start.ind <- unname(w)[1] + 1
  } else if (sum(diff(t(s))!=0)){
    w <- which(diff(t(s))!=0,arr.ind=TRUE) # should be easy to extract the common row, then +1 to get the required GRID that needs to begin running...
    grid.start.ind <- unname(w)[1,1] + 1
    mesh.start.ind <- unname(w)[1,2]
  } else {
    grid.start.ind <- 1
    mesh.start.ind <- 1
  }
  
  
  print("Names gm")
  print(names(gm.old))
  print(names(gm))
  
  print(sum(names(gm.old)==names(gm)))
  print(sum(names(gm.old)!=names(gm)))
  
  
  for (i in 1:4){
    for (j in 1:4){
      print("Length Message")
      lm <- length(run.out[[i]][[j]]$mess.ls$message)
      print(lm)
      print("Null Messages Counts")
      print(sum(sapply(1:lm,function(l){is.null(run.out.old[[i]][[j]]$mess.ls$message[[l]])})))
      print(sum(sapply(1:lm,function(l){is.null(run.out[[i]][[j]]$mess.ls$message[[l]])})))
    }
  }
  
  for (i in 1:4){
    for (j in 1:4){
      print("Length FFT")
      lm <- length(run.out[[i]][[j]]$mess.ls$FFT)
      print(lm)
      print("Length Error")
      lm2 <- length(run.out[[i]][[j]]$mess.ls$error)
      print(lm2)
      print("FFT Count")
      print(sum((run.out.old[[i]][[j]]$mess.ls$FFT),na.rm=TRUE))
      print(sum((run.out[[i]][[j]]$mess.ls$FFT),na.rm=TRUE))
      print("Warnings Present")
      print(sum(!is.na(run.out.old[[i]][[j]]$mess.ls$warning)))
      print(sum(!is.na(run.out[[i]][[j]]$mess.ls$warning)))
      print("Errors Present")
      print(sum(!is.na(run.out.old[[i]][[j]]$mess.ls$error)))
      print(sum(!is.na(run.out[[i]][[j]]$mess.ls$error)))
    }
  }
  
  
  # CHECKS ####
  
  print("Compare true.theta and seed.vec")
  print(sum(true.theta[1:p.length,]-true.theta.old[1:p.length,]))
  print(max(abs(true.theta[1:p.length,]-true.theta.old[1:p.length,])))
  print(sum(seed.vec[1:p.length]-seed.vec.old[1:p.length]))
  print(max(abs(seed.vec[1:p.length]-seed.vec.old[1:p.length])))
  
  print("data.err.tracker comparison")
  print(data.err.tracker)
  print(data.err.tracker.old)
  
  print("Sum and Max-Abs Different and sum is.na in est.df")
  for (jj in 1:4){
    for (ll in 1:4){
      if (jj <= grid.start.ind&ll<mesh.start.ind){
        replacement.max <- p.length
      } else {
        replacement.max <- p.length - 1
      }
      
      print(sum(run.out[[jj]][[ll]]$est.df[1:replacement.max,] - run.out.old[[jj]][[ll]]$est.df[1:replacement.max,],na.rm=TRUE))
      print(max(abs(run.out[[jj]][[ll]]$est.df[1:replacement.max,] - run.out.old[[jj]][[ll]]$est.df[1:replacement.max,])))
      
      print(sum(is.na(run.out.old[[jj]][[ll]]$est.df)))
      print(sum(is.na(run.out[[jj]][[ll]]$est.df)))
      
      print(sum(!is.na(run.out.old[[jj]][[ll]]$est.df)))
      print(sum(!is.na(run.out[[jj]][[ll]]$est.df)))
    }
  }
  
  print("Sum and Max-Abs difference in FFT")
  for (jj in 1:4){
    for (ll in 1:4){
      if (jj <= grid.start.ind&ll<mesh.start.ind){
        replacement.max <- p.length
      } else {
        replacement.max <- p.length - 1
      }
      
      print(sum(run.out[[jj]][[ll]]$mess.ls$FFT[1:replacement.max] - run.out.old[[jj]][[ll]]$mess.ls$FFT[1:replacement.max],na.rm=TRUE))
      print(max(abs(run.out[[jj]][[ll]]$mess.ls$FFT[1:replacement.max] - run.out.old[[jj]][[ll]]$mess.ls$FFT[1:replacement.max])))
      
      print(sum(is.na(run.out.old[[jj]][[ll]]$mess.ls$FFT)))
      print(sum(is.na(run.out[[jj]][[ll]]$mess.ls$FFT)))
      
      print(sum(!is.na(run.out.old[[jj]][[ll]]$mess.ls$FFT)))
      print(sum(!is.na(run.out[[jj]][[ll]]$mess.ls$FFT)))
    }
  }
  
  N.g <- 4; N.m <- 4
  
  print("Sum and Max-Abs difference in ranks.param")
  for (jj in 1:4){
    for (ll in 1:4){
      if (jj <= grid.start.ind&ll<mesh.start.ind){
        replacement.max <- p.length
      } else {
        replacement.max <- p.length - 1
      }
      ind <- (jj-1)*N.m + ll
      
      print(sum(gm[[ind]]$ranks.param[1:replacement.max,] - gm.old[[ind]]$ranks.param[1:replacement.max,],na.rm=TRUE))
      print(max(abs(gm[[ind]]$ranks.param[1:replacement.max,] - gm.old[[ind]]$ranks.param[1:replacement.max,])))
      
      print(sum(is.na(gm.old[[ind]]$ranks.param)))
      print(sum(is.na(gm[[ind]]$ranks.param)))
      
      print(sum(!is.na(gm.old[[ind]]$ranks.param)))
      print(sum(!is.na(gm[[ind]]$ranks.param)))
    }
  }
  
  print("Sum and Max-Abs difference in ranks.mf")
  for (jj in 1:4){
    for (ll in 1:4){
      if (jj <= grid.start.ind&ll<mesh.start.ind){
        replacement.max <- p.length
      } else {
        replacement.max <- p.length - 1
      }
      ind <- (jj-1)*N.m + ll
      
      print(sum(gm[[ind]]$ranks.mf[1:replacement.max,] - gm.old[[ind]]$ranks.mf[1:replacement.max,],na.rm=TRUE))
      print(max(abs(gm[[ind]]$ranks.mf[1:replacement.max,] - gm.old[[ind]]$ranks.mf[1:replacement.max,])))
      
      print(sum(is.na(gm.old[[ind]]$ranks.mf)))
      print(sum(is.na(gm[[ind]]$ranks.mf)))
      
      print(sum(!is.na(gm.old[[ind]]$ranks.mf)))
      print(sum(!is.na(gm[[ind]]$ranks.mf)))
    }
  }
  
  for (i in 1:16){
    print(paste0("Dimension gm.old ", i))
    print(dim(gm.old[[i]]$ranks.param))
    print(dim(gm.old[[i]]$ranks.mf))
    print(paste0("Dimension gm ", i))
    print(dim(gm[[i]]$ranks.param))
    print(dim(gm[[i]]$ranks.mf))
  }
  
  
  
}

sink()

rm(list=ls())


# Checking Old and New Starting Positions ---------------------------------
# Checking the old and new starting positions to double check the error has

# PREVIOUS

# sink("CheckingStartingPositionsOldandNew070621.txt") # previous, multiple error removals.
sink("CheckingStartingPositionsOldandNewProc090621.txt")

print("Error Versions")

procs <- c(8,10,12,28,39)

N.g <- 4
N.m <- 4

restart.mat.orig <- matrix(rep(NA,3*length(procs)),ncol=length(procs))
for (i in procs){ 
  load(paste0("GridMeshIrregPolLGCPSBCSS",i,"_REMERROR.rda"))
  nn <- dim(gm[[N.g*N.m]]$ranks.mf)[2]
  p.length <- sum(!is.na(gm[[N.g*N.m]]$ranks.mf[,nn])) + sum(!is.na(run.out[[N.g]][[N.m]]$mess.ls$error)) + 1 # no ranks in trad ss
  
  gm.s <- function(g){sapply(1:length(g),function(i){sum(!is.na(g[[i]]$ranks.mf[,dim(g[[i]]$ranks.mf)[2]]))})}
  s <- matrix(gm.s(gm),nrow=N.m) # fills in down the columns, so for each grid, fills in row i with mesh i, following the output from e.s below
  e.s <- function(g){sapply(1:length(g),function(i){sum(!is.na(g[[i]]$mess.ls$error))})}
  s <- s + sapply(1:length(run.out),function(i){e.s(run.out[[i]])})
  ds <- diff(s)
  if (sum(ds)!=0){
    w <- which(ds!=0,arr.ind = T)
    grid.start.ind <- unname(w)[2]
    mesh.start.ind <- unname(w)[1] + 1
  } else if (sum(diff(t(s))!=0)){
    w <- which(diff(t(s))!=0,arr.ind=TRUE) # should be easy to extract the common row, then +1 to get the required GRID that needs to begin running...
    grid.start.ind <- unname(w)[1,1] + 1
    mesh.start.ind <- unname(w)[1,2]
  } else {
    grid.start.ind <- 1
    mesh.start.ind <- 1
  }
  ii <- which(procs==i)
  restart.mat.orig[1,ii] <- p.length
  restart.mat.orig[2,ii] <- grid.start.ind
  restart.mat.orig[3,ii] <- mesh.start.ind
  
  print(paste0("Run ",i))
  print(paste0("p.length = ",p.length))
  print(paste0("Grid start = ", grid.start.ind," and Mesh start = ",mesh.start.ind))
}



###############################################
# NEW

print("Reset Versions)")

procs <- c(8,10,12,28,39)

N.g <- 4
N.m <- 4
restart.mat <- matrix(rep(NA,3*length(procs)),ncol=length(procs))
for (i in procs){
  load(paste0("GridMeshIrregPolLGCPSBCSS",i,".rda"))
  nn <- dim(gm[[N.g*N.m]]$ranks.mf)[2]
  p.length <- sum(!is.na(gm[[N.g*N.m]]$ranks.mf[,nn])) + sum(!is.na(run.out[[N.g]][[N.m]]$mess.ls$error)) + 1 # no ranks in trad ss
  
  gm.s <- function(g){sapply(1:length(g),function(i){sum(!is.na(g[[i]]$ranks.mf[,dim(g[[i]]$ranks.mf)[2]]))})}
  s <- matrix(gm.s(gm),nrow=N.m) # fills in down the columns, so for each grid, fills in row i with mesh i, following the output from e.s below
  e.s <- function(g){sapply(1:length(g),function(i){sum(!is.na(g[[i]]$mess.ls$error))})}
  s <- s + sapply(1:length(run.out),function(i){e.s(run.out[[i]])})
  ds <- diff(s)
  if (sum(ds)!=0){
    w <- which(ds!=0,arr.ind = T)
    grid.start.ind <- unname(w)[2]
    mesh.start.ind <- unname(w)[1] + 1
  } else if (sum(diff(t(s))!=0)){
    w <- which(diff(t(s))!=0,arr.ind=TRUE) # should be easy to extract the common row, then +1 to get the required GRID that needs to begin running...
    grid.start.ind <- unname(w)[1,1] + 1
    mesh.start.ind <- unname(w)[1,2]
  } else {
    grid.start.ind <- 1
    mesh.start.ind <- 1
  }
  ii <- which(procs==i)
  restart.mat[1,ii] <- p.length
  restart.mat[2,ii] <- grid.start.ind
  restart.mat[3,ii] <- mesh.start.ind
  
  print(paste0("Run ",i))
  print(paste0("p.length = ",p.length))
  print(paste0("Grid start = ", grid.start.ind," and Mesh start = ",mesh.start.ind))
}

print("Old and New Restart")
rbind(restart.mat.orig,restart.mat)
print("Old and New p.length")
rbind(restart.mat.orig[1,],restart.mat[1,])

sink()

rm(list=ls())
