
# SBC Plots for Example ---------------------------------------------------

# This R script will produce example histograms for SBC, simulating priors and samples for the same or slightly varying distributions, changing the mean or variance to illustrate how the histogram provides visual divergences from uniformity for each of these scenarios.
# We will additionally include illustrations of modelling the rank frequencies with GLMs as described in Chapter 3 of my thesis to be able to capture the necessary divergences from uniformity.

# Author: Nadeen Khaleel

# Setwd and Load Libraries ------------------------------------------------

# Either setwd() to the source file location, or run the following:
library("rstudioapi")
setwd(dirname(getActiveDocumentContext()$path))

# Functions ---------------------------------------------------------------

# Simulation-Sampling Function
rank.vals <- function(N,L,mean,sd){
  vals <- rnorm(N,0,1)
  samples <- sapply(1:N,function(i,mean,sd){rnorm(L,mean,sd)},mean,sd)
  ranks <- sapply(1:N,function(i,vals,samples){sum(samples[,i]<vals[i])},vals,samples)
}

# Frequency for Ranks
freq.func <- function(x){
  df <- as.data.frame(table(x))
  names(df) <- c("ranks","freq")
  return(df)
}

# Model Fitting Function
models_sbc <- function(freq.ranks,test="Chisq"){
  if (test=="Chisq"){
    ranks.df <- data.frame(Freq=freq.ranks$freq,ranks=(as.numeric(as.character(freq.ranks$ranks))))
    fit.const <- glm(Freq~1,data = ranks.df, family = "poisson")
    fit.ranks <- glm(Freq~ranks,data = ranks.df, family = "poisson")
    fit.ranks2 <- glm(Freq~ranks+I(ranks^2),data = ranks.df, family = "poisson")
    
    a1 <- anova(fit.const,fit.ranks,test = "Chisq")
    a2 <- anova(fit.const,fit.ranks2,test = "Chisq")
    
    
    if (a1$`Pr(>Chi)`[2]>0.01){ # prefer const over rank
      if (a2$`Pr(>Chi)`[2]>0.01){ # prefer const over rank^2
        u.divergence <- "FALSE"
        mod <- fit.const
      } else { # prefer rank^2 over const - therefore as rank < const < rank^2
        u.divergence <- "TRUE"
        mod <- fit.ranks2
      }
    } else { # prefer rank over const
      if (a2$`Pr(>Chi)`[2]>0.01){ # prefer const over rank^2 - therefore rank^2 < const < rank
        u.divergence <- "TRUE"
        mod <- fit.ranks
      } else {
        a3 <- anova(fit.ranks,fit.ranks2,test = "Chisq")
        if (a3$`Pr(>Chi)`[2]<0.01){
          u.divergence <- "TRUE"
          mod <- fit.ranks2
        } else {
          u.divergence <- "TRUE"
          mod <- fit.ranks
        }
      }
    }
  } else {
    # OR - use step() function and check dim?
    full.fit <- glm(Freq~ranks+I(ranks^2),data = ranks.df, family = "poisson")
    mod <- step(full.fit)
    if (length(final.fit$coefficients)==1){
      u.divergence=="FALSE"
    } else {
      u.divergence=="TRUE"
    }
  }
  return(list("div"=u.divergence,"fit.mod"=mod))
}

# Example Scenarios -------------------------------------------------------

set.seed(625)

N <- 2560; L <- 127
# N <- 1e4; L <- 1e2

# Same distribution
same.dist <- rank.vals(N,L,0,1)

# Smaller Mean
smallermean.dist <- rank.vals(N,L,-2,1)

# Larger Mean
largermean.dist <- rank.vals(N,L,2,1)

# Smaller Variance
smallervar.dist <- rank.vals(N,L,0,sqrt(0.5))

# Larger Variance
largervar.dist <- rank.vals(N,L,0,sqrt(2))



# Model Fits --------------------------------------------------------------

# Same Distribution
same.freq <- freq.func(same.dist)
same.mod <- models_sbc(same.freq)

# Smaller Mean
smallermean.freq <- freq.func(smallermean.dist)
smallermean.mod <- models_sbc(smallermean.freq)

# Larger Mean
largermean.freq <- freq.func(largermean.dist)
largermean.mod <- models_sbc(largermean.freq)

# Smaller Variance
smallervar.freq <- freq.func(smallervar.dist)
smallervar.mod <- models_sbc(smallervar.freq)

# Larger Variance
largervar.freq <- freq.func(largervar.dist)
largervar.mod <- models_sbc(largervar.freq)

# Plots -------------------------------------------------------------------

d1 <- qbinom(0.005,N,1/(L+1))
d2 <- qbinom(0.995,N,1/(L+1))
m <- N/(L+1)
xcoord <- c(-0.5,L+0.5,L+0.5,-0.5)
ycoord <- c(d1,d1,d2,d2)


pdf("SameDistSBC.pdf",h=8,w=10,pointsize = 14)
par(mar=opar$mar+c(0,1,0,0))
hist(same.dist,breaks=seq(-0.5,L+0.5),main="Same Distribution",xlab = "Rank Statistic",cex.main=2,cex.lab=2,cex.axis=1.5)
polygon(xcoord,ycoord,col=rgb(0.5,0,0.5,0.25),border = NA)
segments(-0.5,m,L+0.5,m,col="blue")
dev.off()

pdf("SmallerMeanDistSBC.pdf",h=8,w=10,pointsize = 14)
par(mar=opar$mar+c(0,1,0,0))
hist(smallermean.dist,breaks=seq(-0.5,L+0.5),main="Smaller Mean Sampling Distribution",xlab = "Rank Statistic",cex.main=2,cex.lab=2,cex.axis=1.5)
polygon(xcoord,ycoord,col=rgb(0.5,0,0.5,0.25),border = NA)
segments(-0.5,m,L+0.5,m,col="blue")
dev.off()

pdf("LargerMeanDistSBC.pdf",h=8,w=10,pointsize = 14)
par(mar=opar$mar+c(0,1,0,0))
hist(largermean.dist,breaks=seq(-0.5,L+0.5),main="Larger Mean Sampling Distribution",xlab = "Rank Statistic",cex.main=2,cex.lab=2,cex.axis=1.5)
polygon(xcoord,ycoord,col=rgb(0.5,0,0.5,0.25),border = NA)
segments(-0.5,m,L+0.5,m,col="blue")
dev.off()

pdf("SmallerVarDistSBC.pdf",h=8,w=10,pointsize = 14)
par(mar=opar$mar+c(0,1,0,0))
hist(smallervar.dist,breaks=seq(-0.5,L+0.5),main="Smaller Variance Sampling Distribution",xlab = "Rank Statistic",cex.main=2,cex.lab=2,cex.axis=1.5)
polygon(xcoord,ycoord,col=rgb(0.5,0,0.5,0.25),border = NA)
segments(-0.5,m,L+0.5,m,col="blue")
dev.off()

pdf("LargerVarDistSBC.pdf",h=8,w=10,pointsize = 14)
par(mar=opar$mar+c(0,1,0,0))
hist(largervar.dist,breaks=seq(-0.5,L+0.5),main="Larger Variance Sampling Distribution",xlab = "Rank Statistic",cex.main=2,cex.lab=2,cex.axis=1.5)
polygon(xcoord,ycoord,col=rgb(0.5,0,0.5,0.25),border = NA)
segments(-0.5,m,L+0.5,m,col="blue")
dev.off()


# Model Fits
pdf("SameDistModelSBC.pdf",h=8,w=10,pointsize = 14)
par(oma=opar$oma+c(2.5,0,0,0),mar=opar$mar+c(0,1,0,0))
hist(same.dist,breaks=seq(-0.5,L+0.5),main="Same Distribution \nwith Model Fit",xlab = "Rank Statistic",cex.main=2,cex.lab=2,cex.axis=1.5)
polygon(xcoord,ycoord,col=rgb(0.5,0,0.5,0.25),border = NA)
segments(-0.5,m,L+0.5,m,col="blue")
points(as.numeric(as.character(same.freq$ranks)),same.mod$fit.mod$fitted.values,type="l",lwd=2.5,pch=19,col="red",cex=0.5)
mtext(text=paste0("Int=",same.mod$fit.mod$coefficients[1]),side=1,line=5,cex=1.5) 
mtext(text=paste0("rank=",same.mod$fit.mod$coefficients[2],"; rank^2=",same.mod$fit.mod$coefficients[3]),side=1,line=6,cex=1.5) 
dev.off()

pdf("SmallerMeanDistModelSBC.pdf",h=8,w=10,pointsize = 14)
par(oma=opar$oma+c(2.5,0,0,0),mar=opar$mar+c(0,1,0,0))
hist(smallermean.dist,breaks=seq(-0.5,L+0.5),main="Smaller Mean Sampling Distribution \nwith Model Fit",xlab = "Rank Statistic",cex.main=2,cex.lab=2,cex.axis=1.5)
polygon(xcoord,ycoord,col=rgb(0.5,0,0.5,0.25),border = NA)
segments(-0.5,m,L+0.5,m,col="blue")
points(as.numeric(as.character(smallermean.freq$ranks)),smallermean.mod$fit.mod$fitted.values,type="l",lwd=2.5,pch=19,col="red",cex=0.5)
mtext(text=paste0("Int=",smallermean.mod$fit.mod$coefficients[1]),side=1,line=5,cex=1.5) 
mtext(text=paste0("rank=",smallermean.mod$fit.mod$coefficients[2],"; rank^2=",smallermean.mod$fit.mod$coefficients[3]),side=1,line=6,cex=1.5) 
dev.off()

pdf("LargerMeanDistModelSBC.pdf",h=8,w=10,pointsize = 14)
par(oma=opar$oma+c(2.5,0,0,0),mar=opar$mar+c(0,1,0,0))
hist(largermean.dist,breaks=seq(-0.5,L+0.5),main="Larger Mean Sampling Distribution \nwith Model Fit",xlab = "Rank Statistic",cex.main=2,cex.lab=2,cex.axis=1.5)
polygon(xcoord,ycoord,col=rgb(0.5,0,0.5,0.25),border = NA)
segments(-0.5,m,L+0.5,m,col="blue")
points(as.numeric(as.character(largermean.freq$ranks)),largermean.mod$fit.mod$fitted.values,type="l",lwd=2.5,pch=19,col="red",cex=0.5)
mtext(text=paste0("Int=",largermean.mod$fit.mod$coefficients[1]),side=1,line=5,cex=1.5) 
mtext(text=paste0("rank=",largermean.mod$fit.mod$coefficients[2],"; rank^2=",largermean.mod$fit.mod$coefficients[3]),side=1,line=6,cex=1.5) 
dev.off()

pdf("SmallerVarDistModelSBC.pdf",h=8,w=10,pointsize = 14)
par(oma=opar$oma+c(2.5,0,0,0),mar=opar$mar+c(0,1,0,0))
hist(smallervar.dist,breaks=seq(-0.5,L+0.5),main="Smaller Variance Sampling Distribution \nwith Model Fit",xlab = "Rank Statistic",cex.main=2,cex.lab=2,cex.axis=1.5)
polygon(xcoord,ycoord,col=rgb(0.5,0,0.5,0.25),border = NA)
segments(-0.5,m,L+0.5,m,col="blue")
points(as.numeric(as.character(smallervar.freq$ranks)),smallervar.mod$fit.mod$fitted.values,type="l",lwd=2.5,pch=19,col="red",cex=0.5)
mtext(text=paste0("Int=",smallervar.mod$fit.mod$coefficients[1]),side=1,line=5,cex=1.5) 
mtext(text=paste0("rank=",smallervar.mod$fit.mod$coefficients[2],"; rank^2=",smallervar.mod$fit.mod$coefficients[3]),side=1,line=6,cex=1.5) 
dev.off()

pdf("LargerVarDistModelSBC.pdf",h=8,w=10,pointsize = 14)
par(oma=opar$oma+c(2.5,0,0,0),mar=opar$mar+c(0,1,0,0))
hist(largervar.dist,breaks=seq(-0.5,L+0.5),main="Larger Variance Sampling Distribution \nwith Model Fit",xlab = "Rank Statistic",cex.main=2,cex.lab=2,cex.axis=1.5)
polygon(xcoord,ycoord,col=rgb(0.5,0,0.5,0.25),border = NA)
segments(-0.5,m,L+0.5,m,col="blue")
points(as.numeric(as.character(largervar.freq$ranks)),largervar.mod$fit.mod$fitted.values,type="l",lwd=2.5,pch=19,col="red",cex=0.5)
mtext(text=paste0("Int=",largervar.mod$fit.mod$coefficients[1]),side=1,line=5,cex=1.5) 
mtext(text=paste0("rank=",largervar.mod$fit.mod$coefficients[2],"; rank^2=",largervar.mod$fit.mod$coefficients[3]),side=1,line=6,cex=1.5) 
dev.off()


# "Worst" Divergence ------------------------------------------------------

lmean <- unname(largermean.mod$fit.mod$coefficients)
svar <- unname(smallervar.mod$fit.mod$coefficients)
lvar <- unname(largervar.mod$fit.mod$coefficients)
df <- data.frame(beta1=rep(NA,4),beta2=rep(NA,4))

df[1,] <- smean[2:3]
df[2,] <- lmean[2:3]
df[3,] <- svar[2:3]
df[4,] <- lvar[2:3]
df$name <- c("smean","lmean","svar","lvar")
df
#         beta1         beta2  name
# 1 -0.11427960  0.0010484565 smean
# 2 -0.15496420  0.0010824424 lmean
# 3 -0.04062400  0.0003126249  svar
# 4  0.04563316 -0.0003585642  lvar
# > df[ord(beta2,beta2)]


df[order(-abs(df$beta2),-abs(df$beta1)),]
#         beta1         beta2  name
# 2 -0.15496420  0.0010824424 lmean
# 1 -0.11427960  0.0010484565 smean
# 4  0.04563316 -0.0003585642  lvar
# 3 -0.04062400  0.0003126249  svar

# So order of divergences from worst to best:
#  larger mean, smaller mean, larger variance, smaller variance



# Plots with Distribution in Title ----------------------------------------

d1 <- qbinom(0.005,N,1/(L+1))
d2 <- qbinom(0.995,N,1/(L+1))
m <- N/(L+1)
xcoord <- c(-0.5,L+0.5,L+0.5,-0.5)
ycoord <- c(d1,d1,d2,d2)

opar <- par()

pdf("N01N01SBC.pdf",h=8,w=10,pointsize = 14)
par(mar=opar$mar+c(0,1,0,0))
hist(same.dist,breaks=seq(-0.5,L+0.5),main="Simulate from N(0,1) \nSample from N(0,1)",xlab = "Rank Statistic",cex.main=2,cex.lab=2,cex.axis=1.5)
polygon(xcoord,ycoord,col=rgb(0.5,0,0.5,0.25),border = NA)
segments(-0.5,m,L+0.5,m,col="blue")
dev.off()

pdf("N01N-21SBC.pdf",h=8,w=10,pointsize = 14)
par(mar=opar$mar+c(0,1,0,0))
hist(smallermean.dist,breaks=seq(-0.5,L+0.5),main="Simulate from N(0,1) \nSample from N(-2,1)",xlab = "Rank Statistic",cex.main=2,cex.lab=2,cex.axis=1.5)
polygon(xcoord,ycoord,col=rgb(0.5,0,0.5,0.25),border = NA)
segments(-0.5,m,L+0.5,m,col="blue")
dev.off()

pdf("N01N21SBC.pdf",h=8,w=10,pointsize = 14)
par(mar=opar$mar+c(0,1,0,0))
hist(largermean.dist,breaks=seq(-0.5,L+0.5),main="Simulate from N(0,1) \nSample from N(2,1)",xlab = "Rank Statistic",cex.main=2,cex.lab=2,cex.axis=1.5)
polygon(xcoord,ycoord,col=rgb(0.5,0,0.5,0.25),border = NA)
segments(-0.5,m,L+0.5,m,col="blue")
dev.off()

pdf("N01N00_5SBC.pdf",h=8,w=10,pointsize = 14)
par(mar=opar$mar+c(0,1,0,0))
hist(smallervar.dist,breaks=seq(-0.5,L+0.5),main="Simulate from N(0,1) \nSample from N(0,0.5)",xlab = "Rank Statistic",cex.main=2,cex.lab=2,cex.axis=1.5)
polygon(xcoord,ycoord,col=rgb(0.5,0,0.5,0.25),border = NA)
segments(-0.5,m,L+0.5,m,col="blue")
dev.off()

pdf("N01N02SBC.pdf",h=8,w=10,pointsize = 14)
par(mar=opar$mar+c(0,1,0,0))
hist(largervar.dist,breaks=seq(-0.5,L+0.5),main="Simulate from N(0,1) \nSample from N(0,2)",xlab = "Rank Statistic",cex.main=2,cex.lab=2,cex.axis=1.5)
polygon(xcoord,ycoord,col=rgb(0.5,0,0.5,0.25),border = NA)
segments(-0.5,m,L+0.5,m,col="blue")
dev.off()


# sessionInfo() -----------------------------------------------------------

sessionInfo()