## Do participant testimony videos make people more interested in taking part in a clinical trial? A randomised Study Within a Trial (SWAT)
## Analysis Script


#Install add-on packages
install.packages("writexl")
install.packages("crosstable")
install.packages("ggplot2")
install.packages("readxl")
install.packages("tidyverse")
install.packages("forestplot")

#Load add-on packages 
library(writexl) 
library(crosstable) 
library(ggplot2) 
library(readxl) 
library(tidyverse) 
library(forestplot)

 
#Read in data and assign name "SWAT_Database"
SWAT_Database <-(read_excel("Data_Raw/SWAT Database_Archive.xlsx"))

#add variable labels
var_label(SWAT_Database$ID) <- 'Participant ID'
var_label(SWAT_Database$Condition_2) <- 'Allocated SWAT condition - 2 way comparison'
var_label(SWAT_Database$Condition_3) <- 'Allocated SWAT condition - 3 way comparison'
var_label(SWAT_Database$Interested) <- 'Did participant express interest in host trial?'
var_label(SWAT_Database$Screened) <- 'Was participant screened for eligibility for host trial?'
var_label(SWAT_Database$Eligible) <- 'Was participant assessed as eligible to take part in host trial?'
var_label(SWAT_Database$Randomised) <- 'Was participant randomised into host trial?'



#Descriptive statistics by Condition_2 for each outcome
print(crosstable(SWAT_Database, c(Interested, Screened, Eligible, Randomised), by=Condition_2), n=Inf)

#Descriptive statistics by Condition_3 for each outcome
print(crosstable(SWAT_Database, c(Interested, Screened, Eligible, Randomised), by=Condition_3), n=Inf)

#Calculate odds ratio and confidence intervals for 2 way comparison (Video vs. No Video)

#Reorder Factor levels so 'No Video' condition becomes the reference
SWAT_Database$Condition_2 <- factor(SWAT_Database$Condition_2,
                       levels = c("No Video", "Video"))


## Fit logistic regression (logit link) for 'interested' as outcome
fit <- glm(Interested ~ Condition_2, data = SWAT_Database, family = binomial())

# Odds ratios (point estimates)
OR <- exp(coef(fit))
OR

# 95% CI for the ORs (profile likelihood by default)
CI <- exp(confint(fit))      # may take a moment due to profiling
CI

# Combine neatly
out <- cbind(OR = OR, `2.5 %` = CI[,1], `97.5 %` = CI[,2])
round(out, 3)

## Fit logistic regression (logit link) for 'screened' as outcome
fit <- glm(Screened ~ Condition_2, data = SWAT_Database, family = binomial())

# Odds ratios (point estimates)
OR <- exp(coef(fit))
OR

# 95% CI for the ORs (profile likelihood by default)
CI <- exp(confint(fit))      # may take a moment due to profiling
CI

# Combine neatly
out <- cbind(OR = OR, `2.5 %` = CI[,1], `97.5 %` = CI[,2])
round(out, 3)

## Fit logistic regression (logit link) for 'eligible' as outcome
fit <- glm(Eligible ~ Condition_2, data = SWAT_Database, family = binomial())

# Odds ratios (point estimates)
OR <- exp(coef(fit))
OR

# 95% CI for the ORs (profile likelihood by default)
CI <- exp(confint(fit))      # may take a moment due to profiling
CI

# Combine neatly
out <- cbind(OR = OR, `2.5 %` = CI[,1], `97.5 %` = CI[,2])
round(out, 3)

## Fit logistic regression (logit link) for 'randomised' as outcome
fit <- glm(Randomised ~ Condition_2, data = SWAT_Database, family = binomial())

# Odds ratios (point estimates)
OR <- exp(coef(fit))
OR

# 95% CI for the ORs (profile likelihood by default)
CI <- exp(confint(fit))      # may take a moment due to profiling
CI

# Combine neatly
out <- cbind(OR = OR, `2.5 %` = CI[,1], `97.5 %` = CI[,2])
round(out, 3)

#Calculate odds ratio and confidence intervals for 3 way comparison (MH Video vs.Health Video vs. No Video)

#Reorder Factor levels so 'No Video' condition becomes the reference
SWAT_Database$Condition_3 <- factor(SWAT_Database$Condition_3,
                                    levels = c("No Video", "Health Video", "MH Video"))

## Fit logistic regression (logit link) for 'interested' as outcome
fit <- glm(Interested ~ Condition_3, data = SWAT_Database, family = binomial())

# Odds ratios (point estimates)
OR <- exp(coef(fit))
OR

# 95% CI for the ORs (profile likelihood by default)
CI <- exp(confint(fit))      # may take a moment due to profiling
CI

# Combine neatly
out <- cbind(OR = OR, `2.5 %` = CI[,1], `97.5 %` = CI[,2])
round(out, 3)

## Fit logistic regression (logit link) for 'screened' as outcome
fit <- glm(Screened ~ Condition_3, data = SWAT_Database, family = binomial())

# Odds ratios (point estimates)
OR <- exp(coef(fit))
OR

# 95% CI for the ORs (profile likelihood by default)
CI <- exp(confint(fit))      # may take a moment due to profiling
CI

# Combine neatly
out <- cbind(OR = OR, `2.5 %` = CI[,1], `97.5 %` = CI[,2])
round(out, 3)

## Fit logistic regression (logit link) for 'eligible' as outcome
fit <- glm(Eligible ~ Condition_3, data = SWAT_Database, family = binomial())

# Odds ratios (point estimates)
OR <- exp(coef(fit))
OR

# 95% CI for the ORs (profile likelihood by default)
CI <- exp(confint(fit))      # may take a moment due to profiling
CI

# Combine neatly
out <- cbind(OR = OR, `2.5 %` = CI[,1], `97.5 %` = CI[,2])
round(out, 3)

## Fit logistic regression (logit link) for 'randomised' as outcome
fit <- glm(Randomised ~ Condition_3, data = SWAT_Database, family = binomial())

# Odds ratios (point estimates)
OR <- exp(coef(fit))
OR

# 95% CI for the ORs (profile likelihood by default)
CI <- exp(confint(fit))      # may take a moment due to profiling
CI

# Combine neatly
out <- cbind(OR = OR, `2.5 %` = CI[,1], `97.5 %` = CI[,2])
round(out, 3)

## In order to get a comparison of Health Video vs. MH Video, rerun analysis with MH as reference category

#Reorder Factor levels so 'No Video' condition becomes the reference
SWAT_Database$Condition_3 <- factor(SWAT_Database$Condition_3,
                                    levels = c("MH Video", "Health Video", "No Video"))

## Fit logistic regression (logit link) for 'interested' as outcome
fit <- glm(Interested ~ Condition_3, data = SWAT_Database, family = binomial())

# Odds ratios (point estimates)
OR <- exp(coef(fit))
OR

# 95% CI for the ORs (profile likelihood by default)
CI <- exp(confint(fit))      # may take a moment due to profiling
CI

# Combine neatly
out <- cbind(OR = OR, `2.5 %` = CI[,1], `97.5 %` = CI[,2])
round(out, 3)

## Fit logistic regression (logit link) for 'screened' as outcome
fit <- glm(Screened ~ Condition_3, data = SWAT_Database, family = binomial())

# Odds ratios (point estimates)
OR <- exp(coef(fit))
OR

# 95% CI for the ORs (profile likelihood by default)
CI <- exp(confint(fit))      # may take a moment due to profiling
CI

# Combine neatly
out <- cbind(OR = OR, `2.5 %` = CI[,1], `97.5 %` = CI[,2])
round(out, 3)

## Fit logistic regression (logit link) for 'eligible' as outcome
fit <- glm(Eligible ~ Condition_3, data = SWAT_Database, family = binomial())

# Odds ratios (point estimates)
OR <- exp(coef(fit))
OR

# 95% CI for the ORs (profile likelihood by default)
CI <- exp(confint(fit))      # may take a moment due to profiling
CI

# Combine neatly
out <- cbind(OR = OR, `2.5 %` = CI[,1], `97.5 %` = CI[,2])
round(out, 3)

## Fit logistic regression (logit link) for 'randomised' as outcome
fit <- glm(Randomised ~ Condition_3, data = SWAT_Database, family = binomial())

# Odds ratios (point estimates)
OR <- exp(coef(fit))
OR

# 95% CI for the ORs (profile likelihood by default)
CI <- exp(confint(fit))      # may take a moment due to profiling
CI

# Combine neatly
out <- cbind(OR = OR, `2.5 %` = CI[,1], `97.5 %` = CI[,2])
round(out, 3)

### Forest plots of Odds ratio

## Forest Plot for Model 1 (2-way comparison, Video vs. No Video).  All outcomes shown on the same forest plot.
# 1. Table of labels
tabletext <- cbind(
  c("", 
    "Interested", "  Video vs. No Video",
    "Screened", "  Video vs. No Video",
    "Eligible", "  Video vs. No Video",
    "Randomised", "  Video vs. No Video"),
  
  c("OR (95% CI)",
    "", "1.45 (1.02–2.09)",
    "", "1.46 (1.02–2.06)",
    "", "1.69 (1.15–2.50)",
    "", "1.71 (1.14–2.59)")
)

# 2. ORs and CIs (NA for header rows)
mean  <- c(NA, NA, 1.45, NA, 1.46, NA, 1.69, NA, 1.71)
lower <- c(NA, NA, 1.02, NA, 1.02, NA, 1.15, NA, 1.14)
upper <- c(NA, NA, 2.09, NA, 2.06, NA, 2.50, NA, 2.59)

# 3. Summary rows (TRUE for section headers)
is.summary <- c(TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE)

# 4. Draw forest plot
forestplot(
  labeltext = tabletext,
  mean = mean,
  lower = lower,
  upper = upper,
  is.summary = is.summary,
  xlog = TRUE,
  xlab = "Odds Ratio (log scale)",
  xticks = c(0.75, 1.0, 1.5, 2, 3),
  xlim = c(0.75, 3),
  col = fpColors(
    box = "black",
    line = "black",
    summary = "black"
  )
)


##Export plot as PNG
library(forestplot)

png("Model1_ForestPlot.png", width = 2000, height = 1500, res = 300)

forestplot(
  labeltext = tabletext,
  mean = mean,
  lower = lower,
  upper = upper,
  is.summary = is.summary,
  xlog = TRUE,
  xlab = "Odds Ratio (log scale)",
  xticks = c(0.75, 1.0, 1.5, 2, 3),
  xlim = c(0.75, 3),
  col = fpColors(
    box = "black",
    line = "black",
    summary = "black"
  )
)

dev.off()

## Forest Plot for Model 2 (3-way comparison, Health Video vs. MH Video vs. No Video).  All outcomes shown on the same forest plot.

# 1. Table of labels
tabletext <-cbind(
  c("", 
    "Interested", " Generic Video vs. No Video", " Tailored Video vs. No Video", " Generic Video vs. Tailored Video",
    "Screened", " Generic Video vs. No Video", " Tailored Video vs. No Video", " Generic Video vs. Tailored Video",
    "Eligible", " Generic Video vs. No Video", " Tailored Video vs. No Video", " Generic Video vs. Tailored Video",
    "Randomised", " Generic Video vs. No Video", " Tailored Video vs. No Video", " Generic Video vs. Tailored Video"),
  
  c("OR (95% CI)",
    "", "2.94 (1.64–5.46)","1.17 (0.79–1.72)","2.52 (1.38–4.72)",
    "", "2.83 (1.58–5.20)","1.19 (0.80–1.75)","2.39 (1.32–4.44)",
    "", "3.08 (1.73–5.52)","1.37 (0.90–2.08)","2.26 (1.26–4.07)",
    "", "3.08 (1.71–5.57)","1.38 (0.88–2.15)","2.24 (1.24–4.05)")
)

# 2. ORs and CIs (NA for header rows)
mean  <- c(NA, NA, 2.94, 1.17, 2.52, NA, 2.83, 1.19, 2.39, NA, 3.08, 1.37, 2.26, NA, 3.08, 1.38, 2.24)
lower <- c(NA, NA, 1.64, 0.79, 1.38, NA, 1.58, 0.80, 1.32, NA, 1.73,0.90, 1.26, NA, 1.71, 0.88, 1.24)
upper <- c(NA, NA, 5.46, 1.72, 4.72, NA, 5.20, 1.75, 4.44, NA, 5.52, 2.08, 4.07, NA, 5.57, 2.15, 4.05)

# 3. Summary rows (TRUE for section headers)
is.summary <- c(
  TRUE,  # ""
  TRUE,  # Interested
  FALSE, FALSE, FALSE,  # 3 comparisons
  TRUE,  # Screened
  FALSE, FALSE, FALSE,
  TRUE,  # Eligible
  FALSE, FALSE, FALSE,
  TRUE,  # Randomised
  FALSE, FALSE, FALSE
)

# 4. Draw forest plot
forestplot(
  labeltext = tabletext,
  mean = mean,
  lower = lower,
  upper = upper,
  is.summary = is.summary,
  xlog = TRUE,
  xlab = "Odds Ratio (log scale)",
  xticks = c(0.75, 1.0, 1.5, 3, 5),
  xlim = c(0.75, 5),
  col = fpColors(
    box = "black",
    line = "black",
    summary = "black"
  )
)

##Export plot as PNG


png("3WayContrast_ForestPlot.png", width = 2000, height = 1500, res = 300)

forestplot(
  labeltext = tabletext,
  mean = mean,
  lower = lower,
  upper = upper,
  is.summary = is.summary,
  xlog = TRUE,
  xlab = "Odds Ratio (log scale)",
  xticks = c(0.75, 1.0, 1.5, 3, 5),
  xlim = c(0.75, 5),
  col = fpColors(
    box = "black",
    line = "black",
    summary = "black"
  )
)

dev.off()