#load librarys
```{r}
library(tidyverse) #for general data cleaning
library(ggplot2) # for general plots and regression plots
library(mediation) #for running mediation analysis
library(mice) #for running multiple imputation
library(lme4) #for running mixed effects  models
library(lmerTest) #testing significance of mixed models
library(rempsyc)
library(consort) #for creating rough consort diagrams
library(grid) #for creating print ready consort diagrams
library(patchwork) #for creating print ready consort diagrams
library(DiagrammeR) #for creating print ready consort diagrams
library(DiagrammeRsvg) #for creating print ready consort diagrams
library(lmtest)
library(car)
library(naniar)
library(psych)
library(pwr)
library(rcompanion)
library(lsr)
library(broom)
library(emmeans)
library(correlation)
library(rstatix)
library(ggpubr) #extensions for plots
library(modelr)
library(Hmisc)
library(report)
library(ggeffects) #for producing linear regression plots
library(expss) # produces regression model tables
library(jtools)
expss_output_rnotebook() #setting output for expss tables
expss_digits(digits = 2) #setting output for expss tables

# Package citations
c("tidyverse", "ggplot2", "mediation", "mice", "lme4","lmerTest", "rempsyc", "consort", "grid", "patchwork", "DiagrammeR", "DiagrammeRsvg", "lmtest", "nlme",   "naniar", "psych", "pwr", "rcompanion", "lsr", "broom", "emmeans", "correlation", "rstatix", "ggpubr", "modelr", "Hmisc", "report", "ggeffects", "expss", "jtools") %>%
  map(citation) %>%
  print(style = "text")
```
#CLEANING CODE - ALREADY RUN

# Cleaning Questionnaire Data
```{r}
pre_qualtrics <- read.csv("Pre_Qualtrics.csv")
pre_questionpro <- read.csv("Pre_Questionpro.csv")
pre_questionpro <- pre_questionpro %>% filter(Weight != 0)
# identifying ppts who were sent the wrong link for follow-up
wronglink_responses <- pre_questionpro %>%
  group_by(X2...Participant.ID) %>%
  filter(row_number() == 2) %>%
  ungroup()
pre_questionpro <- anti_join(pre_questionpro, wronglink_responses)
post_qualtrics <- read.csv("Post_Qualtrics.csv")
post_questionpro <- read.csv("Post_Questionpro.csv")
follow_qualtrics <- read.csv("Follow_Qualtrics.csv")
follow_questionpro <- read.csv("Follow_Questionpro.csv")
CISR_qualtrics <- read.csv("CIS-R_Qualtrics.csv")
CISR_questionpro <- read.csv("CIS-R_Questionpro.csv")
Groups <- read_csv("ID_Groups.csv")
Groups$ID <- as.character(Groups$ID)
Groups$Group <- as.character(Groups$Group)
Groups$Group[Groups$Group == "CBT"] <- "Moodfit"

# Function to clean column names by removing leading digits and non-alphabetic characters
clean_column_names <- function(column_names) {
  return(gsub("^X\\d+\\.*", "", column_names))
}

# Clean the column names
colnames(pre_questionpro) <- clean_column_names(colnames(pre_questionpro))
colnames(post_questionpro) <- clean_column_names(colnames(post_questionpro))
colnames(follow_questionpro) <- clean_column_names(colnames(follow_questionpro))


# Generate new column names for pre, post and follow up questionpro surveys to align with qualtrics names


# Function to generate new column names with sequential numbering
generate_new_names <- function(column_names) {
  # Extracting the measure name (before "...")
  base_names <- gsub("\\.\\.\\..*$", "", column_names)
  
  # Creating a data frame to store the base names and their new names
  df <- data.frame(base_names = base_names, original_names = column_names, stringsAsFactors = FALSE)
  
  # Add a column with sequential numbers for each unique base name
  df <- df %>%
    group_by(base_names) %>%
    mutate(new_name = paste0(base_names, "_", row_number())) %>%
    ungroup()
  
  new_names <- setNames(df$new_name, df$original_names)
  return(new_names)
}
new_names <- generate_new_names(colnames(pre_questionpro))
post_new_names <- generate_new_names(colnames(post_questionpro))
follow_new_names <- generate_new_names(colnames(follow_questionpro))

colnames(pre_questionpro) <- new_names[colnames(pre_questionpro)]
colnames(post_questionpro) <- post_new_names[colnames(post_questionpro)]
colnames(follow_questionpro) <- follow_new_names[colnames(follow_questionpro)]

pre_questionpro <- pre_questionpro %>% rename(Participant.ID = Participant.ID_1,
                                              WellbeingAgenda = WellbeingAgenda._1,
                                              Stress = Stress_1,
                                              Presenteeism = Presenteeism_1,
                                              Leaveism = Leaveism_1,
                                              Absent = Absent_1)

pre_qualtrics <- pre_qualtrics %>% rename( WellbeingAgenda = WellbeingAgenda.)

post_questionpro <- post_questionpro %>% rename(Participant.ID = Participant.ID_1,
                                              Stress_Post = Stress_Post_1,
                                              Presenteeism_Post = Presenteeism_Post_1,
                                              Leaveism_Post = Leaveism_Post_1,
                                              Absent_Post = Absent_Post_1,
                                              TrainingGroup = TrainingGroup._1,
                                              AmountUsed = AmountUsed._1,
                                              NumberUsed = Q25_1)

post_qualtrics <- post_qualtrics %>% rename(TrainingGroup = TrainingGroup.,
                                            AmountUsed = AmountUsed.)


follow_questionpro <- follow_questionpro %>% rename(Participant.ID = Participant.ID_1,
                                              FollowStress = FollowStress_1,
                                              FollowPresenteeism = FollowPresenteeism_1,
                                              FollowLeaveism = FollowLeaveism_1,
                                              FollowAbsent = FollowAbsent_1)
#Deleting Questionpro unneeded columns
pre_questionpro <- pre_questionpro[, -c(1:21)]
pre_questionpro <- pre_questionpro[, -c(2:15)]
pre_questionpro <- pre_questionpro[, -c(96:101)]
pre_questionpro <- pre_questionpro[, -c(103:104)]
pre_questionpro <- pre_questionpro %>% select(-"Stress.expansion_1", -"Present.expansion_1", -"Leave.expansion_1", -"Q9.Check_1")

post_questionpro <- post_questionpro[, -c(1:19)]
post_questionpro <- post_questionpro %>% select(-"Stress.expansionPost_1", -"Present.expansionPos_1", -"Leave.expansionPost_1", -"Absent.expansionPost_1")
post_questionpro <- post_questionpro[, -c(101:106)]

follow_questionpro <- follow_questionpro[, -c(1:20)]
follow_questionpro <- follow_questionpro %>% select(-"FollStress.expansion_1", -"FolPresent.expansion_1", -"FollwLeave.expansion_1")
follow_questionpro <- follow_questionpro[, -c(98:99)]


#Deleting Qualtrics unneeded columns
pre_qualtrics <- pre_qualtrics[, -c(1:10)]
pre_qualtrics <- pre_qualtrics[, -c(109:114)]
pre_qualtrics <- pre_qualtrics[, -c(116:121)]
pre_qualtrics <- pre_qualtrics %>% select(-"Stress.expansion", -"Present.expansion", -"Leave.expansion")
pre_qualtrics <- pre_qualtrics[, -c(2:15)]

post_qualtrics <- post_qualtrics[, -c(1:10)]
post_qualtrics <- post_qualtrics[, -c(104:113)]
post_qualtrics <- post_qualtrics %>% select(-"Stress.expansionPost", -"Present.expansionPos", -"Leave.expansionPost", -"Absent.expansionPost")

follow_qualtrics <- follow_qualtrics[, -c(1:10)]
follow_qualtrics <- follow_qualtrics[, -c(101:106)]
follow_qualtrics <- follow_qualtrics %>% select(-"FollStress.expansion", -"FolPresent.expansion", -"FollwLeave.expansion")

pre_questionpro$Participant.ID <- as.character(pre_questionpro$Participant.ID)
pre_qualtrics$Participant.ID <- as.character(pre_qualtrics$Participant.ID)
post_questionpro$Participant.ID <- as.character(post_questionpro$Participant.ID)
post_qualtrics$Participant.ID <- as.character(post_qualtrics$Participant.ID)
follow_questionpro$Participant.ID <- as.character(follow_questionpro$Participant.ID)
follow_qualtrics$Participant.ID <- as.character(follow_qualtrics$Participant.ID)

#Reducing Qualtrics PHQ and GAD scores by one due to correct for Qualtrics and Questionpro not starting scores at 0
ZeroColumns <- c("PHQ.9_1", "PHQ.9_2", "PHQ.9_3", "PHQ.9_4", "PHQ.9_5", "PHQ.9_6", "PHQ.9_7", "PHQ.9_8", "PHQ.9_9", "GAD.7_1", "GAD.7_2", "GAD.7_3", "GAD.7_4", "GAD.7_5", "GAD.7_6", "GAD.7_7", "Absent", "Stress", "Presenteeism", "Leaveism")
PostZeroColumns <- c("PHQ9_Post_1", "PHQ9_Post_2", "PHQ9_Post_3", "PHQ9_Post_4", "PHQ9_Post_5", "PHQ9_Post_6", "PHQ9_Post_7", "PHQ9_Post_8", "PHQ9_Post_9", "GAD7_Post_1", "GAD7_Post_2", "GAD7_Post_3", "GAD7_Post_4", "GAD7_Post_5", "GAD7_Post_6", "GAD7_Post_7", "Absent_Post", "Stress_Post", "Presenteeism_Post", "Leaveism_Post")
FollowZeroColumns <- c("FollowPHQ_1", "FollowPHQ_2", "FollowPHQ_3", "FollowPHQ_4", "FollowPHQ_5", "FollowPHQ_6", "FollowPHQ_7", "FollowPHQ_8", "FollowPHQ_9", "Follow_GAD_1", "Follow_GAD_2", "Follow_GAD_3", "Follow_GAD_4", "Follow_GAD_5", "Follow_GAD_6", "Follow_GAD_7", "FollowAbsent", "FollowStress", "FollowPresenteeism", "FollowLeaveism")

pre_qualtrics[ZeroColumns] <- pre_qualtrics[ZeroColumns] - 1
pre_questionpro[ZeroColumns] <- pre_questionpro[ZeroColumns] - 1
post_qualtrics[PostZeroColumns] <- post_qualtrics[PostZeroColumns] - 1
post_questionpro[PostZeroColumns] <- post_questionpro[PostZeroColumns] - 1
follow_qualtrics[FollowZeroColumns] <- follow_qualtrics[FollowZeroColumns] - 1
follow_questionpro[FollowZeroColumns] <- follow_questionpro[FollowZeroColumns] - 1


CISR_qualtrics$Participant.ID <- as.character(CISR_qualtrics$Participant.ID)
CISR_questionpro$Participant.ID <- as.character(CISR_questionpro$Participant.ID)

demo_qualtrics <- CISR_qualtrics %>% select(Participant.ID, Age, Gender, Ethnicity, MaritalStatus, Employment, JobRole)
demo_questionpro <- CISR_questionpro %>% select(Participant.ID, Age, Gender, Ethnicity, MaritalStatus, Employment, JobRole)


CISR <- full_join(CISR_qualtrics, CISR_questionpro)

demo_qualtrics$Participant.ID <- as.character(demo_qualtrics$Participant.ID)
demographics <- full_join(demo_qualtrics, demo_questionpro)
demographics$Participant.ID <- as.character(demographics$Participant.ID)

baseline_data_q <- full_join(pre_questionpro, pre_qualtrics)
baseline_demo <- full_join(baseline_data_q, demographics)
baseline_demo <- baseline_demo %>% rename(ID = Participant.ID)
baselineqs <- full_join(baseline_demo, Groups)
postqs <- full_join(post_qualtrics, post_questionpro)
postqs <- postqs %>% rename(ID = Participant.ID)
followqs <- full_join(follow_qualtrics, follow_questionpro)
followqs <- followqs %>% rename(ID = Participant.ID)

```

#Creating working data for pre, post and follow-up
```{r}

# Creating sums of questionnaires
baselineqs$Pre_PHQ <- rowSums(baselineqs[c("PHQ.9_1", "PHQ.9_2", "PHQ.9_3", "PHQ.9_4", "PHQ.9_5", "PHQ.9_6", "PHQ.9_7", "PHQ.9_8", "PHQ.9_9")])
baselineqs$Pre_GAD <- rowSums(baselineqs[c("GAD.7_1", "GAD.7_2", "GAD.7_3", "GAD.7_4", "GAD.7_5", "GAD.7_6", "GAD.7_7")])
baselineqs$Pre_SWEMBS <- rowSums(baselineqs[c("SWEMWBS_1", "SWEMWBS_2", "SWEMWBS_3", "SWEMWBS_4", "SWEMWBS_5", "SWEMWBS_6", "SWEMWBS_7")])
baselineqs$Pre_UWES <- rowSums(baselineqs[c("UWES.9_1", "UWES.9_2", "UWES.9_3", "UWES.9_4", "UWES.9_5", "UWES.9_6", "UWES.9_7", "UWES.9_8", "UWES.9_9")])
baselineqs$Pre_EF <- rowSums(baselineqs[,83:93])
baselineqs$Pre_EWWS <- rowSums(baselineqs[,18:25])
baselineqs$Pre_SRLE <- rowSums(baselineqs[,33:73])


postqs$Post_PHQ <- rowSums(postqs[c("PHQ9_Post_1", "PHQ9_Post_2", "PHQ9_Post_3", "PHQ9_Post_4", "PHQ9_Post_5", "PHQ9_Post_6", "PHQ9_Post_7", "PHQ9_Post_8", "PHQ9_Post_9")])
postqs$Post_GAD <- rowSums(postqs[c("GAD7_Post_1", "GAD7_Post_2", "GAD7_Post_3", "GAD7_Post_4", "GAD7_Post_5", "GAD7_Post_6", "GAD7_Post_7")])
postqs$Post_SWEMBS <- rowSums(postqs[c("SWEMWBS_Post_1", "SWEMWBS_Post_2", "SWEMWBS_Post_3", "SWEMWBS_Post_4", "SWEMWBS_Post_5", "SWEMWBS_Post_6", "SWEMWBS_Post_7")])
postqs$Post_UWES <- rowSums(postqs[c("UWES9_Post_1", "UWES9_Post_2", "UWES9_Post_3", "UWES9_Post_4", "UWES9_Post_5", "UWES9_Post_6", "UWES9_Post_7", "UWES9_Post_8", "UWES9_Post_9")])
postqs$Post_EF <- rowSums(postqs[,83:93])
postqs$Post_EWWS <- rowSums(postqs[,18:25])
postqs$Post_SRLE <- rowSums(postqs[,33:73])


followqs$Follow_PHQ <- rowSums(followqs[c("FollowPHQ_1", "FollowPHQ_2", "FollowPHQ_3", "FollowPHQ_4", "FollowPHQ_5", "FollowPHQ_6", "FollowPHQ_7", "FollowPHQ_8", "FollowPHQ_9")])
followqs$Follow_GAD <- rowSums(followqs[c("Follow_GAD_1", "Follow_GAD_2", "Follow_GAD_3", "Follow_GAD_4", "Follow_GAD_5", "Follow_GAD_6", "Follow_GAD_7")])
followqs$Follow_SWEMBS <- rowSums(followqs[c("FollowSWEMWBS_1", "FollowSWEMWBS_2", "FollowSWEMWBS_3", "FollowSWEMWBS_4", "FollowSWEMWBS_5", "FollowSWEMWBS_6", "FollowSWEMWBS_7")])
followqs$Follow_UWES <- rowSums(followqs[c("FollowUWES.9_1", "FollowUWES.9_2", "FollowUWES.9_3", "FollowUWES.9_4", "FollowUWES.9_5", "FollowUWES.9_6", "FollowUWES.9_7", "FollowUWES.9_8", "FollowUWES.9_9")])
followqs$Follow_EWWS <- rowSums(followqs[,18:25])
followqs$Follow_SRLE <- rowSums(followqs[,33:73])

#Making scales for alpha values
PrePHQ <- data.frame(baselineqs[,9:17])
PreGAD <- data.frame(baselineqs[,2:8])
PreEWWS <- data.frame(baselineqs[,18:25])
PreSWEMWBS <- data.frame(baselineqs[,26:32])
PreUWES <- data.frame(baselineqs[,74:82])
PreEF <- data.frame(baselineqs[,83:93])
PreSRLE <- data.frame(baselineqs[,33:73])
PostPHQ <- data.frame(postqs[,9:17])
PostGAD <- data.frame(postqs[,2:8])
PostSWEMWBS <- data.frame(postqs[,26:32])
PostUWES <- data.frame(postqs[,74:82])
FollowPHQ <- data.frame(followqs[,9:17])
FollowGAD <- data.frame(followqs[,2:8])
FollowSWEMWBS <- data.frame(followqs[,26:32])
FollowUWES <- data.frame(followqs[,74:82])

baseline_questionnaires <- baselineqs %>% select(ID, Group, Age, Gender, Ethnicity,Employment, MaritalStatus, JobRole, Pre_PHQ, Pre_GAD, Pre_SWEMBS, Pre_UWES, Pre_EWWS, Pre_SRLE, WellbeingAgenda, Stress, Presenteeism, Leaveism, Absent)
eeg_baseline_questionnaires <- baselineqs %>% select(ID, Group, Age, Gender, Ethnicity, MaritalStatus, Pre_PHQ, Pre_GAD, Pre_UWES)

post_questionnaires <- postqs %>% select(ID, Post_PHQ, Post_GAD, Post_SWEMBS, Post_UWES, Post_EWWS, Post_SRLE, Stress_Post, Presenteeism_Post, Leaveism_Post, Absent_Post, AmountUsed, NumberUsed)

follow_questionnaires <- followqs %>% select(ID, Follow_PHQ, Follow_GAD, Follow_SWEMBS, Follow_UWES, Follow_EWWS, Follow_SRLE, FollowStress , FollowPresenteeism, FollowLeaveism, FollowAbsent)

prepostqs <- full_join(baseline_questionnaires, post_questionnaires)
fullqs <- full_join(prepostqs, follow_questionnaires)

#write.csv(fullqs, "full_questionnaires.csv", row.names = FALSE)


alpha(PrePHQ)
alpha(PreGAD)
alpha(PreUWES)
alpha(PreEWWS)
alpha(PreSWEMWBS)
alpha(PreEF)
alpha(PreSRLE)
alpha(PostPHQ)
alpha(PostGAD)
alpha(PostSWEMWBS)
alpha(PostUWES)
alpha(FollowPHQ)
alpha(FollowGAD)
alpha(FollowSWEMWBS)
alpha(FollowUWES)
```

# Cleaning behavioural data
```{r}
## ------ important raw OSPAN file and cleaning to get set size information


raw_ospan <- read.csv("OSPAN Raw_191125.csv")
summary_ospan <- read.csv("OSPAN.csv")

# if join for merged ospan isn't working check date variables - or amend to Date type in excel file
raw_ospan$date <- as.Date(raw_ospan$date)
summary_ospan$startdate <- as.Date(summary_ospan$startdate)

merged_ospan <- left_join(raw_ospan, summary_ospan, by = c("date" = "startdate", "time" = "starttime"))

raw_ospan$subject <- merged_ospan$subjectid
raw_ospan$session <- merged_ospan$sessionid

# Identify unmatched rows where the subjectid from OSPAN_full is NA
unmatched_rows <- merged_ospan %>% filter(is.na(subjectid))

# Print out unmatched participants
if (nrow(unmatched_rows) == 0) {
  cat("All participants are matched between raw_data and OSPAN_full.\n")
} else {
  cat("Unmatched participants found:\n")
  print(unmatched_rows %>% select(subject, date, time))
}

# Additionally, check for participants in OSPAN_full that have no match in raw_data
unmatched_OSPAN <- summary_ospan %>%
  anti_join(raw_ospan, by = c("startdate" = "date", "starttime" = "time"))

if (nrow(unmatched_OSPAN) == 0) {
  cat("All participants in OSPAN_full are matched in raw_data.\n")
} else {
  cat("Participants in OSPAN_full with no match in raw_data:\n")
  print(unmatched_OSPAN)
}

# Save the corrected raw_data
write.csv(raw_ospan, "corrected_raw_ospan.csv", row.names = FALSE)

# ---- editing to remove pilot ppts (NA's) and selecting final scores

ospan_data <- raw_ospan %>% filter(subject != "NA",
                                   blockcode == "TestBoth",
                                   trialcode == "letter_feedback") %>%
                            select(subject, session, currentsetsize, totalcorrectletters, totalrecalledsets, ospan) %>% rename(ID = "subject")
ospan_data$ID <- as.character(ospan_data$ID)

ospan_raw_summary <- ospan_data %>% group_by(ID, session) %>%
  slice_tail(n = 1) %>%
  ungroup()
ospan_raw_summary$currentsetsize <- NULL

ospan_wide <- ospan_raw_summary %>% pivot_wider(names_from = session, values_from = c("ospan", "totalcorrectletters", "totalrecalledsets"))
ospan_wide$ID <- as.character(ospan_wide$ID)

ospan_wide <- ospan_wide %>% rename(Pre_OSPAN = "ospan_1",
                             Post_OSPAN = "ospan_2",
                             Follow_OSPAN = "ospan_3",
                             Pre_Raw = "totalcorrectletters_1",
                             Post_Raw = "totalcorrectletters_2",
                             Follow_Raw = "totalcorrectletters_3")

workingdata <- full_join(ospan_wide,fullqs)


ospan_data <- ospan_data %>%
  arrange(ID, session) %>%  
  group_by(ID, session) %>% 
  mutate(correct_letters_this_block = totalcorrectletters - lag(totalcorrectletters, default = 0)) %>% 
  ungroup()
ospan_data$prop_correct_letters <- ospan_data$correct_letters_this_block/ospan_data$currentsetsize

setsize_data <- full_join(ospan_data,fullqs)

ospan_data_eeg <- ospan_wide %>% select(ID, Pre_OSPAN, Post_OSPAN)
eegmeasures <- full_join(eeg_data_qs, ospan_data_eeg)
write.csv(eegmeasures, "eeg_measures.csv", row.names = FALSE)

workingdata$Group <- as.character(workingdata$Group)
workingdata$Group[workingdata$Group == "CBT"] <- "Moodfit"

data <- workingdata[!is.na(workingdata$Pre_PHQ), ]
data$Group <- factor(data$Group, levels = c("Waitlist", "Neuronation", "Moodfit"))


#Creating change stats
data$PHQChange <- data$Post_PHQ - data$Pre_PHQ
data$GADChange <- data$Post_GAD - data$Pre_GAD
data$OSPANChange <- data$Post_OSPAN - data$Pre_OSPAN
data$PHQFollowChange <- data$Follow_PHQ - data$Pre_PHQ
data$GADFollowChange <- data$Follow_GAD - data$Pre_GAD
data$OSPANFollowChange <- data$Follow_OSPAN - data$Pre_OSPAN
data$UWESChange <- data$Post_UWES - data$Pre_UWES
data$SRLEChange <- data$Post_SRLE - data$Pre_SRLE
data$SWEMWBSChange <- data$Post_SWEMBS - data$Pre_SWEMBS
data$SRLEFollowChange <- data$Follow_SRLE - data$Pre_SRLE
data$SWEMWBSFollowChange <- data$Follow_SWEMBS - data$Pre_SWEMBS

write.csv(data, "working_data.csv", row.names = FALSE)

```

# Consort Diagram
```{r}
consortdata <- workingdata %>% mutate(excl = case_when(is.na(Pre_PHQ) ~ "Ineligible"),
                               lost_post = case_when(is.na(Post_PHQ) ~ "Lost at 4-weeks"),
                               lost_follow = case_when(is.na(Follow_PHQ) ~ "Lost at 12-weeks"),
                               randomisation = ifelse(Pre_PHQ != "NA", "randomisation", 0))

orders = c(ID = "Screened",
          excl = "Excluded",
          Group = "Randomised",
          lost_post = "",
          ID = "Completed 4-week follow-up",
          lost_follow = "",
          ID = "Completed 12-week follow-up")

side_box = c("excl", "lost_post", "lost_follow")

p_cons = consort_plot(consortdata,
             orders = orders,
             side_box = side_box,
             allocation = "Group",
             cex = 0.5,
             text_width = 20)
p_cons


# Now generating print ready diagram based off of information above - adapted from consort package guide page

options(txt_gp = gpar(cex = 0.8))

txt1 <- "Recruited (n=240)"
txt1_side <- "Excluded (n=12):\n\u2022 Inelgible (n=12; 5%)"

# supports pipeline operator
g <- add_box(txt = txt1) |>
  add_side_box(txt = txt1_side) |> 
  add_box(txt = "Randomized (n=228)") |> 
  add_split(txt = c("Waitlist (n=75)", "Neuronation (n=74)", "Moodfit (n=79)")) |> 
  add_side_box(txt = c("Dropped out (n=15):\n\u2022 Did not respond to emails \n(n=15; 20%)",
                       "Dropped out (n=20):\n\u2022 Did not respond to emails \n(n=20; 27%)",
                       "Dropped out (n=25):\n\u2022 Did not respond to emails \n(n=25; 27%)")) |> 
  add_box(txt = c("Post-intervention \n(n=60)", "Post-intervention \n(n=54)", "Post-intervention \n(n=54)")) |> 
  add_side_box(txt = c("Dropped out (n=26):\n\u2022 Did not respond to emails \n(n=26; 43%)",
                       "Dropped out (n=21):\n\u2022 Did not respond to emails \n(n=21; 39%)",
                       "Dropped out (n=26):\n\u2022 Did not respond to emails \n(n=26; 48%)")) |>
  add_box(txt = c("Follow-up (n=34)", "Follow-up (n=33)", "Follow-up (n=28)")) |> 
  add_label_box(txt = c("1" = "Screening",
                        "3" = "Randomized",
                        "4" = "Post-intervention \nmeasurement",
                        "5" = "12-week follow-up \nmeasurement"))
gplot <- plot(g, grViz = TRUE) 
g_svg <- export_svg(gplot) 
rsvg_png(charToRaw(g_svg), file = "flowchart.png", width = 2000, height = 1500)

```

# RUN FROM HERE 
#Creating descriptives
```{r}
#data <- read_csv("working_data.csv") # Load cleaned dataset
gender <- data %>% group_by(Group) %>% count(Gender)
gender
gender <- data %>% select(Group, Gender)
gender[gender == ''] <- NA
gender <- na.omit(gender)
chisq.test(table(gender))

ethnicity <- data %>% group_by(Group) %>% count(Ethnicity)
ethnicity


job <- data %>% group_by(Group) %>% count(JobRole)
job
Mstatus <- data %>% group_by(Group) %>% count(MaritalStatus)
Mstatus

nsessions <- data %>% group_by(Group) %>% count(AmountUsed)
nsessions

#Create a binary variable indicating missing data for Post_PHQ and Follow_PHQ data
data$missing_Post <- ifelse(is.na(data$Post_PHQ), 1, 0)
data$missing_Follow <- ifelse(is.na(data$Follow_PHQ), 1, 0)

# Perform logistic regression for missing based on baseline PHQ, GAD, Group
missingmodel_Post <- glm(missing_Post ~ Pre_PHQ + Pre_GAD + Pre_UWES + Age + Pre_OSPAN + Group + Gender, family = binomial(), data = data)
summ(missingmodel_Post, digits = 3)

missingmodel_Follow <- glm(missing_Follow ~ Pre_PHQ + Pre_GAD + Pre_UWES + Group + Gender, family = binomial(), data = data)
summ(missingmodel_Follow, digits = 3)

```
## Difference tests for baseline characteristics
```{r}
agediff <- aov(Age ~ Group, data = data)
summary(agediff)

PHQdiff <- aov(Pre_PHQ ~ Group, data = data)
summary(PHQdiff)

GADdiff <- aov(Pre_GAD ~ Group, data = data)
summary(GADdiff)

UWESdiff <- aov(Pre_UWES ~ Group, data = data)
summary(UWESdiff)

OSPANdiff <- aov(Pre_OSPAN ~ Group, data = data)
summary(OSPANdiff)
```

## Creating Sample characteristics
```{r}
#Creating different dataset based on if they stuck to protocol or not 
data1 <- data %>% filter(AmountUsed != 2 | is.na(AmountUsed))
data2 <- data %>% filter(Pre_PHQ >= 4)

sample_characteristics <- data2 %>% select(Group, Age, Gender, Ethnicity, Employment, JobRole, MaritalStatus, AmountUsed, Stress, Absent, NumberUsed, Pre_PHQ, Pre_GAD, Pre_UWES, Pre_OSPAN, Pre_SWEMBS, Pre_SRLE, Pre_EWWS, Post_PHQ, Post_GAD, Post_UWES, Post_OSPAN, Post_SWEMBS, Post_SRLE, Post_EWWS, Follow_PHQ, Follow_GAD, Follow_UWES, Follow_OSPAN, Follow_SWEMBS, Follow_SRLE, Follow_EWWS)

baselinetable <- sample_characteristics %>%
  tab_cols(Group) %>%
  tab_cells(Age, Pre_PHQ, Post_PHQ, Follow_PHQ, Pre_GAD, Post_GAD, Follow_GAD, Pre_UWES, Post_UWES, Follow_UWES, Pre_OSPAN, Post_OSPAN, Follow_OSPAN, Pre_SWEMBS, Post_SWEMBS, Follow_SWEMBS, Pre_SRLE, Post_SRLE, Follow_SRLE, Pre_EWWS, Post_EWWS, Follow_EWWS, NumberUsed) %>%
  tab_stat_fun(Mean = w_mean, "Std. dev." = w_sd, "Valid N" = w_n, method = list) %>%
  tab_cells(Gender, Ethnicity, Employment, JobRole, MaritalStatus, AmountUsed, Stress, Absent) %>%
  tab_stat_cases(label = "N", total_row_position = "none") %>%
  tab_stat_cpct(label = "%", total_row_position = "none") %>%
  tab_pivot()%>%
  set_caption("Demographics by Group")

print(baselinetable, preview = "docx")


SampleGender <- data %>% count(Gender)
SampleEthnicity <- data %>% count(Ethnicity)
SampleAge <- data %>% summarise(mean(Age, na.rm = TRUE),
                                sd(Age, na.rm = TRUE))

Stressed <- data %>% count(Stress)
TimeOff <- data %>% count(Absent)

SampleAge
SampleGender
SampleEthnicity
Stressed
TimeOff
```

# Research Question 1

## PHQ analysis
```{r}
Long_PHQ <- data %>%
  dplyr::select(ID, Group, Pre_PHQ, Post_PHQ, Follow_PHQ) %>%
  pivot_longer(cols = c(Pre_PHQ, Post_PHQ, Follow_PHQ), names_to = "Timepoint", values_to = "Score")
Long_PHQ$Timepoint <- factor(Long_PHQ$Timepoint, levels = c("Pre_PHQ", "Post_PHQ", "Follow_PHQ"))
Long_PHQ$Group <- factor(Long_PHQ$Group, levels = c("Waitlist", "Neuronation", "Moodfit"))
Long_PHQ <- na.omit(Long_PHQ)

PHQmodel <- lmer(Score ~ Group * Timepoint + (1|ID), data = Long_PHQ)
PHQsummary <-  summ(PHQmodel)
PHQsummary

PHQmodelbase <- lmer(Score ~ (1|ID), data = Long_PHQ)
lrtest(PHQmodel, PHQmodelbase)
car::Anova(PHQmodel, type = "II")


# Extract fixed effects estimates
fixed_effects <- as.data.frame(PHQsummary$coeftable)
fixed_effects <- rownames_to_column(fixed_effects, "Term")

# Get confidence intervals
conf_intervals <- confint(PHQmodel, level = 0.95)
conf_intervals <- as.data.frame(conf_intervals)
conf_intervals <- rownames_to_column(conf_intervals, "Term")

# Merge fixed effects and confidence intervals
PHQ_LMM_Results <- left_join(fixed_effects, conf_intervals, by = "Term")

# Rename columns for clarity
PHQ_LMM_Results <- PHQ_LMM_Results %>%
  rename(
    `B` = 'Est.',
    `SE` = `S.E.`,
    `t` = `t val.`,
    `p` = `p`,
    `CI_lower` = `2.5 %`,
    `CI_upper` = `97.5 %`
  )

phq_table <- rempsyc::nice_table(PHQ_LMM_Results)
print(phq_table, preview = "docx")

# Plotting line graph
PHQLine <- ggplot(Long_PHQ, aes(x = Timepoint, y = Score, 
                     shape = Group,
                     group = Group,
                     linetype = Group)) +
  stat_summary(fun = "mean", geom = "point", size = 3) +
  stat_summary(fun = "mean", geom = "line") +
  stat_summary(fun.data = "mean_se", geom = "errorbar", width = .2) +
  scale_x_discrete(labels = c("Baseline", "4 weeks", "12 weeks")) +
  scale_y_continuous(name = "PHQ-9 Scores") +
  theme_classic()
PHQLine
ggsave("PHQLine.png", plot = PHQLine)

```
### Supplementary - ANCOVA regression
```{r}
PostPHQdata <- data %>% select(ID, Group, Pre_PHQ, Post_PHQ)
PostPHQdata$Group <- factor(PostPHQdata$Group, levels = c("Waitlist", "Neuronation", "Moodfit"))

PostPHQmodel <- lm(Post_PHQ ~ Group + Pre_PHQ, PostPHQdata)
summ(PostPHQmodel, confint = TRUE, digits = 3)

FollowPHQdata <- data %>% select(ID, Group, Pre_PHQ, Follow_PHQ)
FollowPHQdata$Group <- factor(FollowPHQdata$Group, levels = c("Waitlist", "Neuronation", "Moodfit"))

FollowPHQmodel <- lm(Follow_PHQ ~ Group + Pre_PHQ, FollowPHQdata)
summ(FollowPHQmodel, confint = TRUE, digits = 3)

```


## GAD analysis
```{r}
Long_GAD <- data %>%
  dplyr::select(ID, Group, Pre_GAD, Post_GAD, Follow_GAD) %>%
  pivot_longer(cols = c(Pre_GAD, Post_GAD, Follow_GAD), names_to = "Timepoint", values_to = "Score")
Long_GAD$Timepoint <- factor(Long_GAD$Timepoint, levels = c("Pre_GAD", "Post_GAD", "Follow_GAD"))
Long_GAD$Group <- factor(Long_GAD$Group, levels = c("Waitlist", "Neuronation", "Moodfit"))
Long_GAD <- na.omit(Long_GAD)

GADmodel <- lmer(Score ~ Group * Timepoint + (1|ID), data = Long_GAD)
GADsummary <- summary(GADmodel)
confint(GADmodel, level = 0.95)
GADmodelbase <- lmer(Score ~ (1|ID), data = Long_GAD)
lrtest(GADmodel, GADmodelbase)

car::Anova(GADmodel, type = "II")

# Extract fixed effects estimates
fixed_effects <- as.data.frame(GADsummary$coefficients)
fixed_effects <- rownames_to_column(fixed_effects, "Term")

# Get confidence intervals
conf_intervals <- confint(GADmodel, level = 0.95)
conf_intervals <- as.data.frame(conf_intervals)
conf_intervals <- rownames_to_column(conf_intervals, "Term")

# Merge fixed effects and confidence intervals
GAD_LMM_Results <- left_join(fixed_effects, conf_intervals, by = "Term")

# Rename columns for clarity
GAD_LMM_Results <- GAD_LMM_Results %>%
  rename(
    `B` = Estimate,
    `SE` = `Std. Error`,
    `t` = `t value`,
    `p` = `Pr(>|t|)`,
    `CI_lower` = `2.5 %`,
    `CI_upper` = `97.5 %`
  )

gad_table <- rempsyc::nice_table(GAD_LMM_Results)
print(gad_table, preview = "docx")

GADLine <- ggplot(Long_GAD, aes(x = Timepoint, y = Score, 
                     shape = Group,
                     group = Group,
                     linetype = Group)) +
  stat_summary(fun = "mean", geom = "point", size = 3) +
  stat_summary(fun = "mean", geom = "line") +
  stat_summary(fun.data = "mean_se", geom = "errorbar", width = .2) +
  scale_x_discrete(labels = c("Pre", "Post", "Follow")) +
  scale_y_continuous(name = "GAD-7 Score") +
  theme_classic()
GADLine
ggsave("GADLine.png", plot = GADLine)

```

### Supplementary - ANCOVA Regression
```{r}
GADdata <- data %>% select(ID, Group, Pre_GAD, Post_GAD)
GADdata$Group <- factor(GADdata$Group, levels = c("Waitlist", "Neuronation", "Moodfit"))

PostGADmodel <- lm(Post_GAD ~ Group + Pre_GAD, GADdata)
summ(PostGADmodel, confint = TRUE, digits = 3)

FollowGADdata <- data %>% select(ID, Group, Pre_GAD, Follow_GAD)
FollowGADdata$Group <- factor(FollowGADdata$Group, levels = c("Waitlist", "Neuronation", "Moodfit"))

FollowGADmodel <- lm(Follow_GAD ~ Group + Pre_GAD, FollowGADdata)
summ(FollowGADmodel, confint = TRUE, digits = 3)

```


## UWES analysis
```{r}
Long_UWES <- data %>%
  dplyr::select(ID, Group, Pre_UWES, Post_UWES, Follow_UWES) %>%
  pivot_longer(cols = c(Pre_UWES, Post_UWES, Follow_UWES), names_to = "Timepoint", values_to = "Score")
Long_UWES$Timepoint <- factor(Long_UWES$Timepoint, levels = c("Pre_UWES", "Post_UWES", "Follow_UWES"))
Long_UWES$Group <- factor(Long_UWES$Group, levels = c("Waitlist", "Neuronation", "Moodfit"))
Long_UWES <- na.omit(Long_UWES)

UWESmodel <- lmer(Score ~ Group * Timepoint + (1|ID), data = Long_UWES)
UWESsummary <- summary(UWESmodel)
confint(UWESmodel, level = 0.95)


UWESmodelbase <- lmer(Score ~ (1|ID), data = Long_UWES)
lrtest(UWESmodel, UWESmodelbase)
car::Anova(UWESmodel, type = "II")


# Extract fixed effects estimates
fixed_effects <- as.data.frame(UWESsummary$coefficients)
fixed_effects <- rownames_to_column(fixed_effects, "Term")

# Get confidence intervals
conf_intervals <- confint(UWESmodel, level = 0.95)
conf_intervals <- as.data.frame(conf_intervals)
conf_intervals <- rownames_to_column(conf_intervals, "Term")

# Merge fixed effects and confidence intervals
UWES_LMM_Results <- left_join(fixed_effects, conf_intervals, by = "Term")

# Rename columns for clarity
UWES_LMM_Results <- UWES_LMM_Results %>%
  rename(
    `B` = Estimate,
    `SE` = `Std. Error`,
    `t` = `t value`,
    `p` = `Pr(>|t|)`,
    `CI_lower` = `2.5 %`,
    `CI_upper` = `97.5 %`
  )

uwes_table <- rempsyc::nice_table(UWES_LMM_Results)
print(uwes_table, preview = "docx")

UWESLine <- ggplot(Long_UWES, aes(x = Timepoint, y = Score, 
                     shape = Group,
                     group = Group,
                     linetype = Group)) +
  stat_summary(fun = "mean", geom = "point", size = 3) +
  stat_summary(fun = "mean", geom = "line") +
  stat_summary(fun.data = "mean_se", geom = "errorbar", width = .2) +
  scale_x_discrete(labels = c("Pre", "Post", "Follow")) +
  scale_y_continuous(name = "UWES Score") +
  theme_classic()
UWESLine
ggsave("UWESLine.png", plot = UWESLine)

```

### Supplementary ANCOVA
```{r}
UWESdata <- data %>% select(ID, Group, Pre_UWES, Post_UWES)
UWESdata$Group <- factor(UWESdata$Group, levels = c("Waitlist", "Neuronation", "Moodfit"))

PostUWESmodel <- lm(Post_UWES ~ Group + Pre_UWES, UWESdata)
summ(PostUWESmodel, confint = TRUE, digits = 3)


FollowUWESdata <- data %>% select(ID, Group, Pre_UWES, Follow_UWES)
FollowUWESdata$Group <- factor(FollowUWESdata$Group, levels = c("Waitlist", "Neuronation", "Moodfit"))

FollowUWESmodel <- lm(Follow_UWES ~ Group + Pre_UWES, FollowUWESdata)
summ(FollowUWESmodel, confint = TRUE, digits = 3)


```


## Exploring within group changes
```{r}
#------PHQ score changes in groups-------
Long_PHQ_Control <- Long_PHQ %>% filter(Group == "Waitlist")
Long_PHQ_Neuronation <- Long_PHQ %>% filter(Group == "Neuronation")
Long_PHQ_CBT <- Long_PHQ %>% filter(Group == "Moodfit")

ControlPHQ <- lmer(Score ~ Timepoint + (1|ID), data = Long_PHQ_Control)
summary(ControlPHQ)
confint(ControlPHQ, level = 0.95)

NeuronationPHQ <- lmer(Score ~ Timepoint + (1|ID), data = Long_PHQ_Neuronation)
summary(NeuronationPHQ)
confint(NeuronationPHQ, level = 0.95)

CBTPHQ <- lmer(Score ~ Timepoint + (1|ID), data = Long_PHQ_CBT)
summary(CBTPHQ)
confint(CBTPHQ, level = 0.95)


#------- GAD changes within groups --------
Long_GAD_Control <- Long_GAD %>% filter(Group == "Waitlist")
Long_GAD_Neuronation <- Long_GAD %>% filter(Group == "Neuronation")
Long_GAD_CBT <- Long_GAD %>% filter(Group == "Moodfit")

ControlGAD <- lmer(Score ~ Timepoint + (1|ID), data = Long_GAD_Control)
summary(ControlGAD)
confint(ControlGAD, level = 0.95)

NeuronationGAD <- lmer(Score ~ Timepoint + (1|ID), data = Long_GAD_Neuronation)
summary(NeuronationGAD)
confint(NeuronationGAD, level = 0.95)

CBTGAD <- lmer(Score ~ Timepoint + (1|ID), data = Long_GAD_CBT)
summary(CBTGAD)
confint(CBTGAD, level = 0.95)


#------- UWES changes within groups ---------
Long_UWES_Control <- Long_UWES %>% filter(Group == "Waitlist")
Long_UWES_Neuronation <- Long_UWES %>% filter(Group == "Neuronation")
Long_UWES_CBT <- Long_UWES %>% filter(Group == "Moodfit")

ControlUWES <- lmer(Score ~ Timepoint + (1|ID), data = Long_UWES_Control)
summary(ControlUWES)
confint(ControlUWES, level = 0.95)

NeuronationUWES <- lmer(Score ~ Timepoint + (1|ID), data = Long_UWES_Neuronation)
summary(NeuronationUWES)
confint(NeuronationUWES, level = 0.95)

CBTUWES <- lmer(Score ~ Timepoint + (1|ID), data = Long_UWES_CBT)
summary(CBTUWES)
confint(CBTUWES, level = 0.95)


```
## SRLE analysis
```{r}
Long_SRLE <- data %>%
  dplyr::select(ID, Group, Pre_SRLE, Post_SRLE, Follow_SRLE) %>%
  pivot_longer(cols = c(Pre_SRLE, Post_SRLE, Follow_SRLE), names_to = "Timepoint", values_to = "Score")
Long_SRLE$Timepoint <- factor(Long_SRLE$Timepoint, levels = c("Pre_SRLE", "Post_SRLE", "Follow_SRLE"))
Long_SRLE$Group <- factor(Long_SRLE$Group, levels = c("Waitlist", "Neuronation", "Moodfit"))
Long_SRLE <- na.omit(Long_SRLE)

SRLEmodel <- lmer(Score ~ Group * Timepoint + (1|ID), data = Long_SRLE)
SRLEsummary <- summary(SRLEmodel)
car::Anova(SRLEmodel, type = "II")

# Extract fixed effects estimates
fixed_effects <- as.data.frame(SRLEsummary$coefficients)
fixed_effects <- rownames_to_column(fixed_effects, "Term")

# Get confidence intervals
conf_intervals <- confint(SRLEmodel, level = 0.95) %>% as.data.frame() %>% rownames_to_column(var ="Term")

# Merge fixed effects and confidence intervals
SRLE_Results <- left_join(fixed_effects, conf_intervals, by = "Term")

# Rename columns for clarity
SRLE_Results <- SRLE_Results %>%
  rename(
    `B` = Estimate,
    `SE` = `Std. Error`,
    `t` = `t value`,
    `p` = `Pr(>|t|)`,
    `CI_lower` = `2.5 %`,
    `CI_upper` = `97.5 %`
  )

SRLE_table <- rempsyc::nice_table(SRLE_Results)
print(SRLE_table, preview = "docx")

SRLELine <- ggplot(Long_SRLE, aes(x = Timepoint, y = Score, 
                     shape = Group,
                     group = Group,
                     linetype = Group)) +
  stat_summary(fun = "mean", geom = "point", size = 3) +
  stat_summary(fun = "mean", geom = "line") +
  stat_summary(fun.data = "mean_se", geom = "errorbar", width = .2) +
  scale_x_discrete(labels = c("Pre", "Post", "Follow")) +
  scale_y_continuous(name = "SRLE Score") +
  theme_classic()
SRLELine
ggsave("SRLELine.png", plot = SRLELine)

```

## SWEMWBS analysis
```{r}
Long_SWEMBS <- data %>%
  dplyr::select(ID, Group, Pre_SWEMBS, Post_SWEMBS, Follow_SWEMBS) %>%
  pivot_longer(cols = c(Pre_SWEMBS, Post_SWEMBS, Follow_SWEMBS), names_to = "Timepoint", values_to = "Score")
Long_SWEMBS$Timepoint <- factor(Long_SWEMBS$Timepoint, levels = c("Pre_SWEMBS", "Post_SWEMBS", "Follow_SWEMBS"))
Long_SWEMBS$Group <- factor(Long_SWEMBS$Group, levels = c("Waitlist", "Neuronation", "Moodfit"))
Long_SWEMBS <- na.omit(Long_SWEMBS)

SWEMBSmodel <- lmer(Score ~ Group * Timepoint + (1|ID), data = Long_SWEMBS)
SWEMBSsummary <- summary(SWEMBSmodel)

car::Anova(SWEMBSmodel, type = "II")

# Extract fixed effects estimates
fixed_effects <- as.data.frame(SWEMBSsummary$coefficients)
fixed_effects <- rownames_to_column(fixed_effects, "Term")

# Get confidence intervals
conf_intervals <- confint(SWEMBSmodel, level = 0.95) %>% as.data.frame() %>% rownames_to_column(var ="Term")

# Merge fixed effects and confidence intervals
SWEMBS_Results <- left_join(fixed_effects, conf_intervals, by = "Term")

# Rename columns for clarity
SWEMBS_Results <- SWEMBS_Results %>%
  rename(
    `B` = Estimate,
    `SE` = `Std. Error`,
    `t` = `t value`,
    `p` = `Pr(>|t|)`,
    `CI_lower` = `2.5 %`,
    `CI_upper` = `97.5 %`
  )

SWEMBS_table <- rempsyc::nice_table(SWEMBS_Results)
print(SWEMBS_table,preview = "docx")


SWEMBSLine <- ggplot(Long_SWEMBS, aes(x = Timepoint, y = Score, 
                     shape = Group,
                     group = Group,
                     linetype = Group)) +
  stat_summary(fun = "mean", geom = "point", size = 3) +
  stat_summary(fun = "mean", geom = "line") +
  stat_summary(fun.data = "mean_se", geom = "errorbar", width = .2) +
  scale_x_discrete(labels = c("Pre", "Post", "Follow")) +
  scale_y_continuous(name = "SWEMWBS Score") +
  theme_classic()
SWEMBSLine

```

## EWWS analysis
```{r}
Long_EWWS <- data %>%
  dplyr::select(ID, Group, Pre_EWWS, Post_EWWS, Follow_EWWS) %>%
  pivot_longer(cols = c(Pre_EWWS, Post_EWWS, Follow_EWWS), names_to = "Timepoint", values_to = "Score")
Long_EWWS$Timepoint <- factor(Long_EWWS$Timepoint, levels = c("Pre_EWWS", "Post_EWWS", "Follow_EWWS"))
Long_EWWS$Group <- factor(Long_EWWS$Group, levels = c("Waitlist", "Neuronation", "Moodfit"))
Long_EWWS <- na.omit(Long_EWWS)

EWWSmodel <- lmer(Score ~ Group * Timepoint + (1|ID), data = Long_EWWS)
EWWSsummary <- summary(EWWSmodel)

car::Anova(EWWSmodel, type = "II")


 EWWSmodel
# Extract fixed effects estimates
fixed_effects <- as.data.frame(EWWSsummary$coefficients)
fixed_effects <- rownames_to_column(fixed_effects, "Term")

# Get confidence intervals
conf_intervals <- confint(EWWSmodel, level = 0.95) %>% as.data.frame() %>% rownames_to_column(var ="Term")

# Merge fixed effects and confidence intervals
EWWS_Results <- left_join(fixed_effects, conf_intervals, by = "Term")

# Rename columns for clarity
EWWS_Results <- EWWS_Results %>%
  rename(
    `B` = Estimate,
    `SE` = `Std. Error`,
    `t` = `t value`,
    `p` = `Pr(>|t|)`,
    `CI_lower` = `2.5 %`,
    `CI_upper` = `97.5 %`
  )

EWWS_table <- rempsyc::nice_table(EWWS_Results)
print(EWWS_table,preview = "docx")


EWWSLine <- ggplot(Long_EWWS, aes(x = Timepoint, y = Score, 
                     shape = Group,
                     group = Group,
                     linetype = Group)) +
  stat_summary(fun = "mean", geom = "point", size = 3) +
  stat_summary(fun = "mean", geom = "line") +
  stat_summary(fun.data = "mean_se", geom = "errorbar", width = .2) +
  scale_x_discrete(labels = c("Pre", "Post", "Follow")) +
  scale_y_continuous(name = "Score") +
  theme_classic()
EWWSLine

```

## Creating model results table
```{r}
# Merge fixed effects and confidence intervals
results_int <- full_join(PHQ_LMM_Results, GAD_LMM_Results)
# Merge fixed effects and confidence intervals
LMM_results <- full_join(results_int, UWES_LMM_Results)

main_table <- rempsyc::nice_table(LMM_results)
print(main_table, preview = "docx")
```

## Creating secondary model results table
```{r}
# Merge fixed effects and confidence intervals
results_int2 <- full_join(SRLE_Results, SWEMBS_Results)
# Merge fixed effects and confidence intervals
LMM_results_2 <- full_join(results_int2, EWWS_Results)

main_table_2 <- rempsyc::nice_table(LMM_results_2)
print(main_table_2, preview = "docx")
```

# Research Question 2
## PHQ Mediation analyses
```{r}
PHQ_Med <- data %>% dplyr::select(ID, PHQChange, Group, OSPANChange) 
PHQ_Med1 <- na.omit(PHQ_Med)
PHQ_Med1$Group <- as.factor(PHQ_Med1$Group)

# Step 1
PHQ_Med_Model0 <- lm(PHQChange ~ Group, PHQ_Med1)
summ(PHQ_Med_Model0, confint = TRUE, digits = 3)

# Step 2
PHQ_Med_Model_M <- lm(OSPANChange ~ Group, PHQ_Med1)
PHQ_ModelM_tidy <- tidy(PHQ_Med_Model_M) %>% mutate(Model = "PHQ_ModelM")
PHQ_ModelM_conf <- confint(PHQ_Med_Model_M, level = 0.95) %>%
  as.data.frame() %>% rownames_to_column(var = "term")

# Step 3
PHQ_Med_Model <- lm(PHQChange ~ Group + OSPANChange, PHQ_Med1)
PHQ_Model_tidy <- tidy(PHQ_Med_Model) %>% mutate(Model = "PHQ_Model")
PHQ_Model_conf <- confint(PHQ_Med_Model, level = 0.95) %>%
  as.data.frame() %>% rownames_to_column(var = "term")

# Step 4
PHQ_Med_results <- mediation::mediate(PHQ_Med_Model_M, PHQ_Med_Model, treat = 'Group', mediator = 'OSPANChange', boot = TRUE, sims = 5000) 
PHQ_Med_summary <- summary(PHQ_Med_results) 

PHQ_Mediation_results <- PHQ_ModelM_tidy %>%
  left_join(PHQ_ModelM_conf, by = "term") %>%
  rename(conf.low = `2.5 %`, conf.high = `97.5 %`) %>%
  bind_rows(PHQ_Model_tidy %>%
              left_join(PHQ_Model_conf, by = "term") %>%
              rename(conf.low = `2.5 %`, conf.high = `97.5 %`))

PHQ_Med_summary
PHQ_Mediation_results
```


## GAD Mediation analyses

### Neuronation vs Control
```{r}
GAD_Med <- data %>% dplyr::select(ID, GADChange, Group, OSPANChange) 
GAD_Med1 <- na.omit(GAD_Med)
GAD_Med1$Group <- as.factor(GAD_Med1$Group)

# Step 1
GAD_Med_Model0 <- lm(GADChange ~ Group, GAD_Med1)
summ(GAD_Med_Model0, confint = TRUE, digits = 3)

# Step 2
GAD_Med_Model_M <- lm(OSPANChange ~ Group, GAD_Med1)
GAD_ModelM_tidy <- tidy(GAD_Med_Model_M) %>% mutate(Model = "GAD_ModelM")
GAD_ModelM_conf <- as.data.frame(confint(GAD_Med_Model_M, level = 0.95)) %>%
  as.data.frame() %>% rownames_to_column(var = "term") 

# Step 3
GAD_Med_Model <- lm(GADChange ~ Group + OSPANChange, GAD_Med1)
GAD_Model_tidy <- tidy(GAD_Med_Model) %>% mutate(Model = "GAD_Model")
GAD_Model_conf <- as.data.frame(confint(GAD_Med_Model, level = 0.95)) %>%
  as.data.frame() %>% rownames_to_column(var = "term") 

# Step 4
GAD_Med_results <- mediation::mediate(GAD_Med_Model_M, GAD_Med_Model, treat = 'Group', mediator = 'OSPANChange', boot = TRUE, sims = 5000)
GAD_Med_summary <- summary(GAD_Med_results)
GAD_Med_summary
GAD_Mediation_results <- GAD_ModelM_tidy %>%
  left_join(GAD_ModelM_conf, by = "term") %>%
  rename(conf.low = `2.5 %`, conf.high = `97.5 %`) %>%
  bind_rows(GAD_Model_tidy %>%
              left_join(GAD_Model_conf, by = "term") %>%
              rename(conf.low = `2.5 %`, conf.high = `97.5 %`))

```

### Moodfit vs Control
```{r}
GAD_Med <- data %>% filter(Group != "Neuronation") %>% dplyr::select(ID, GADChange, Group, OSPANChange) 
GAD_Med1 <- na.omit(GAD_Med)
GAD_Med1$Group <- as.factor(GAD_Med1$Group)

# Step 1
GAD_Med_Model0 <- lm(GADChange ~ Group, GAD_Med1)
GAD_Model0_tidy <- tidy(GAD_Med_Model0) %>% mutate(Model = "GAD_Model0")

# Step 2
GAD_Med_Model_M <- lm(OSPANChange ~ Group, GAD_Med1)
GAD_ModelM_tidy <- tidy(GAD_Med_Model_M) %>% mutate(Model = "GAD_ModelM")
GAD_ModelM_conf <- as.data.frame(confint(GAD_Med_Model_M, level = 0.95)) %>%
  as.data.frame() %>% rownames_to_column(var = "term") 

# Step 3
GAD_Med_Model <- lm(GADChange ~ Group + OSPANChange, GAD_Med1)
GAD_Model_tidy <- tidy(GAD_Med_Model) %>% mutate(Model = "GAD_Model")
GAD_Model_conf <- as.data.frame(confint(GAD_Med_Model, level = 0.95)) %>%
  as.data.frame() %>% rownames_to_column(var = "term") 

# Step 4
GAD_Med_results_Moodfit <- mediation::mediate(GAD_Med_Model_M, GAD_Med_Model, treat = 'Group', mediator = 'OSPANChange', boot = TRUE, sims = 5000)
GAD_Med_summary_Moodfit <- summary(GAD_Med_results_Moodfit)
GAD_Med_summary_Moodfit
GAD_Mediation_results_Moodfit <- GAD_ModelM_tidy %>%
  left_join(GAD_ModelM_conf, by = "term") %>%
  rename(conf.low = `2.5 %`, conf.high = `97.5 %`) %>%
  bind_rows(GAD_Model_tidy %>%
              left_join(GAD_Model_conf, by = "term") %>%
              rename(conf.low = `2.5 %`, conf.high = `97.5 %`))

```


## UWES Mediation analyses
```{r}
UWES_Med <- data %>% dplyr::select(ID, UWESChange, Group, OSPANChange) 
UWES_Med1 <- na.omit(UWES_Med)
UWES_Med1$Group <- as.factor(UWES_Med1$Group)

# Step 1
UWES_Med_Model0 <- lm(UWESChange ~ Group, UWES_Med1)
summ(UWES_Med_Model0, confint = TRUE, digits = 3)

# Step 2
UWES_Med_Model_M <- lm(OSPANChange ~ Group, UWES_Med1)
UWES_ModelM_tidy <- tidy(UWES_Med_Model_M) %>% mutate(Model = "UWES_ModelM")
UWES_ModelM_conf <- as.data.frame(confint(UWES_Med_Model_M, level = 0.95)) %>%
  as.data.frame() %>% rownames_to_column(var = "term") 

# Step 3
UWES_Med_Model <- lm(UWESChange ~ Group + OSPANChange, UWES_Med1)
UWES_Model_tidy <- tidy(UWES_Med_Model) %>% mutate(Model = "UWES_Model")
UWES_Model_conf <- as.data.frame(confint(UWES_Med_Model, level = 0.95)) %>%
  as.data.frame() %>% rownames_to_column(var = "term") 

# Step 4
UWES_Med_results <- mediation::mediate(UWES_Med_Model_M, UWES_Med_Model, treat = 'Group', mediator = 'OSPANChange', boot = TRUE, sims = 5000)
UWES_Med_summary <- summary(UWES_Med_results)
UWES_Med_summary
UWES_Mediation_results <- UWES_ModelM_tidy %>%
  left_join(UWES_ModelM_conf, by = "term") %>%
  rename(conf.low = `2.5 %`, conf.high = `97.5 %`) %>%
  bind_rows(UWES_Model_tidy %>%
              left_join(UWES_Model_conf, by = "term") %>%
              rename(conf.low = `2.5 %`, conf.high = `97.5 %`))
```
### Moodfit vs Control
```{r}
UWES_Med <- data %>% filter(Group != "Neuronation") %>% dplyr::select(ID, UWESChange, Group, OSPANChange) 
UWES_Med1 <- na.omit(UWES_Med)
UWES_Med1$Group <- as.factor(UWES_Med1$Group)

# Step 1
UWES_Med_Model0 <- lm(UWESChange ~ Group, UWES_Med1)
UWES_Model0_tidy <- tidy(UWES_Med_Model0) %>% mutate(Model = "UWES_Model0")

# Step 2
UWES_Med_Model_M <- lm(OSPANChange ~ Group, UWES_Med1)
UWES_ModelM_tidy <- tidy(UWES_Med_Model_M) %>% mutate(Model = "UWES_ModelM")
UWES_ModelM_conf <- as.data.frame(confint(UWES_Med_Model_M, level = 0.95)) %>%
  as.data.frame() %>% rownames_to_column(var = "term") 

# Step 3
UWES_Med_Model <- lm(UWESChange ~ Group + OSPANChange, UWES_Med1)
UWES_Model_tidy <- tidy(UWES_Med_Model) %>% mutate(Model = "UWES_Model")
UWES_Model_conf <- as.data.frame(confint(UWES_Med_Model, level = 0.95)) %>%
  as.data.frame() %>% rownames_to_column(var = "term") 

# Step 4
UWES_Med_results_Moodfit <- mediation::mediate(UWES_Med_Model_M, UWES_Med_Model, treat = 'Group', mediator = 'OSPANChange', boot = TRUE, sims = 5000)
UWES_Med_summary_Moodfit <- summary(UWES_Med_results_Moodfit)
UWES_Med_summary_Moodfit
UWES_Mediation_results_Moodfit <- UWES_ModelM_tidy %>%
  left_join(UWES_ModelM_conf, by = "term") %>%
  rename(conf.low = `2.5 %`, conf.high = `97.5 %`) %>%
  bind_rows(UWES_Model_tidy %>%
              left_join(UWES_Model_conf, by = "term") %>%
              rename(conf.low = `2.5 %`, conf.high = `97.5 %`))

```
## Creating mediation results table
```{r}

# Combine all results
mediation_results <- bind_rows(PHQ_Mediation_results, GAD_Mediation_results, UWES_Mediation_results) %>% 
  rename('B' = 'estimate',
         'SE' = 'std.error')
         
med_table <- nice_table(mediation_results)
med_table


# Save the Word document
print(med_table, preview = "docx")
```

### Combined training group mediation
```{r}
Combined_Mediation_Data <- data %>% dplyr::select(ID, PHQChange, Group, OSPANChange) 
Combined_Mediation_Data <- Combined_Mediation_Data %>% 
  mutate(Intervention = ifelse(Group != "Waitlist", 1, 0))
Combined_Mediation_Data1 <- na.omit(Combined_Mediation_Data)

#Mediation model
Combined_Mediation_M <- lm(OSPANChange ~ Intervention, Combined_Mediation_Data1)
Combined_Mediation_M_tidy <- tidy(Combined_Mediation_M) %>% mutate(Model = "Combined_Mediation_M")
Combined_Mediation_conf <- confint(Combined_Mediation_M, level = 0.95) %>%
  as.data.frame() %>% rownames_to_column(var = "term")

# Step 3
Combined_Med_Model <- lm(PHQChange ~ Intervention + OSPANChange, Combined_Mediation_Data1)
Combined_Model_tidy <- tidy(Combined_Med_Model) %>% mutate(Model = "Combined_Model")
Combined_Model_conf <- confint(Combined_Med_Model, level = 0.95) %>%
  as.data.frame() %>% rownames_to_column(var = "term")

# Step 4
Combined_Med_results <- mediation::mediate(Combined_Mediation_M, Combined_Med_Model, treat = 'Intervention', mediator = 'OSPANChange', boot = TRUE, sims = 5000) 
Combined_Med_summary <- summary(Combined_Med_results) 

Combined_Mediation_results <- Combined_Mediation_M_tidy %>%
  left_join(Combined_Mediation_conf, by = "term") %>%
  rename(conf.low = `2.5 %`, conf.high = `97.5 %`) %>%
  bind_rows(Combined_Model_tidy %>%
              left_join(Combined_Model_conf, by = "term") %>%
              rename(conf.low = `2.5 %`, conf.high = `97.5 %`))

Combined_Mediation_results
```

# Exploratory Analyses

## Exploring change in absenteeism, presenteeism and stressful events
```{r}
dataAbsent <- data %>% filter(Absent == 0)
dataStress <- data %>% filter(Stress == 1)
dataLeave <- data %>% filter(Leaveism == 0)
dataPresent <- data %>% filter(Leaveism == 0)


#-----Logistic models
AbsentModel <- glm(Absent_Post ~ Group, data = dataAbsent, family = binomial)
LeaveismModel <- glm(Leaveism_Post ~ Group, data = dataLeave, family = binomial)
StressModel <- glm(Stress_Post ~ Group, data = dataStress, family = binomial) 
PresenteeismModel <- glm(Presenteeism_Post ~ Group, data = dataPresent, family = binomial)


summ(AbsentModel, confint = TRUE, exp = TRUE, digits = 3) # 0 =  Absenteeism, 1 = no absenteeism
summ(LeaveismModel, confint = TRUE, exp = TRUE, digits = 3) # 0 =  Leavism, 1 = no leaveism
summ(StressModel, confint = TRUE, exp = TRUE, digits = 3) # 0 = no stressful event, 1 = stressful event occurred
summ(PresenteeismModel, confint = TRUE, exp = TRUE, digits = 3) # 0 =  Presenteeism, 1 = no presenteeism

```


## Determining relationship between OSPAN and PHQ at different set sizes
``` {r}
setsize_data$Group <- factor(setsize_data$Group, levels = c("Waitlist", "Neuronation", "Moodfit"))

setsize_data <- setsize_data %>% group_by(ID, currentsetsize, session) %>%
  mutate(MeanPropCorrect = mean(prop_correct_letters),
         MeanOSPAN = mean(ospan))
# Proportion of correct letters and PHQ at baseline for each set size
baselinesetsize_data3 <- setsize_data %>% filter(session == 1,
                                                currentsetsize == 3)
baselinesetsize_data4 <- setsize_data %>% filter(session == 1,
                                                currentsetsize == 4)
baselinesetsize_data5 <- setsize_data %>% filter(session == 1,
                                                currentsetsize == 5)
baselinesetsize_data6 <- setsize_data %>% filter(session == 1,
                                                currentsetsize == 6)
baselinesetsize_data7 <- setsize_data %>% filter(session == 1,
                                                currentsetsize == 7)


# Function to extract model summaries
extract_model_summary <- function(model, setsize) {
  summary <- tidy(model, conf.int = TRUE)
  summary$setsize <- setsize
  return(summary)
}

# List to store models
models <- list()

# Fit models for each set size and store them
models$setsize3 <- lm(Pre_PHQ ~ prop_correct_letters, data = baselinesetsize_data3)
models$setsize4 <- lm(Pre_PHQ ~ prop_correct_letters, data = baselinesetsize_data4)
models$setsize5 <- lm(Pre_PHQ ~ prop_correct_letters, data = baselinesetsize_data5)
models$setsize6 <- lm(Pre_PHQ ~ prop_correct_letters, data = baselinesetsize_data6)
models$setsize7 <- lm(Pre_PHQ ~ prop_correct_letters, data = baselinesetsize_data7)
model_summaries <- map2_dfr(models, names(models), extract_model_summary)

# combining into one table, then outputting in word
setsizetable <- nice_table(model_summaries)
print(setsizetable, preview = "docx")

# Combining datasets
datasets <- list(baselinesetsize_data3, baselinesetsize_data4, baselinesetsize_data5, baselinesetsize_data6, baselinesetsize_data7)

colors <- c("red", "green", "blue", "purple", "black")

set_size_plot <- ggplot() +
  xlab("Pre PHQ") +
  ylab("Prop Correct Letters") +
  ggtitle("Regression Lines with Coefficients") +
  xlim(4, 25)  # Manually setting x-axis limits

# Loop over each dataset
for (i in 1:length(datasets)) {
  # Fit the model
  model <- lm(prop_correct_letters ~ Pre_PHQ, data = datasets[[i]])
  slope <- coef(model)[2]
  label <- paste0("Slope: ", round(slope, 3))
  set_size_plot <- set_size_plot + 
    geom_smooth(data = datasets[[i]], aes(x = Pre_PHQ, y = prop_correct_letters), 
                method = "lm", se = FALSE, color = colors[i]) +
    annotate("text", 
             x = 0.8, 
             y = max(datasets[[i]]$prop_correct_letters) - 0.05 * i,
             label = label, 
             color = colors[i])
}
set_size_plot


```

## Determining relationship between OSPAN and GAD at different set sizes
``` {r}
# Function to extract model summaries
extract_model_summary <- function(model, setsize) {
  summary <- tidy(model, conf.int = TRUE)
  summary$setsize <- setsize
  return(summary)
}

# List to store models
models <- list()

# Fit models for each set size and store them
models$setsize3 <- lm(Pre_GAD ~ prop_correct_letters, data = baselinesetsize_data3)
models$setsize4 <- lm(Pre_GAD ~ prop_correct_letters, data = baselinesetsize_data4)
models$setsize5 <- lm(Pre_GAD ~ prop_correct_letters, data = baselinesetsize_data5)
models$setsize6 <- lm(Pre_GAD ~ prop_correct_letters, data = baselinesetsize_data6)
models$setsize7 <- lm(Pre_GAD ~ prop_correct_letters, data = baselinesetsize_data7)
model_summaries <- map2_dfr(models, names(models), extract_model_summary)

# combining into one table, then outputting in word
setsizetable <- nice_table(model_summaries)
print(setsizetable, preview = "docx")

# Combining datasets
datasets <- list(baselinesetsize_data3, baselinesetsize_data4, baselinesetsize_data5, baselinesetsize_data6, baselinesetsize_data7)

colors <- c("red", "green", "blue", "purple", "black")

set_size_plot <- ggplot() +
  xlab("Pre GAD") +
  ylab("Prop Correct Letters") +
  ggtitle("Regression Lines with Coefficients") +
  xlim(4, 25)  # Manually setting x-axis limits

# Loop over each dataset
for (i in 1:length(datasets)) {
  # Fit the model
  model <- lm(prop_correct_letters ~ Pre_GAD, data = datasets[[i]])
  slope <- coef(model)[2]
  label <- paste0("Slope: ", round(slope, 3))
  set_size_plot <- set_size_plot + 
    geom_smooth(data = datasets[[i]], aes(x = Pre_GAD, y = prop_correct_letters), 
                method = "lm", se = FALSE, color = colors[i]) +
    annotate("text", 
             x = 0.8, 
             y = max(datasets[[i]]$prop_correct_letters) - 0.05 * i,
             label = label, 
             color = colors[i])
}
set_size_plot


```

# Looking at minimal clinically important differences (reductions of 20%) by group

## PHQ MCID and caseness exploratory analysis
```{r}
# ---- initial data wrangling to create variables for caseness and clinically meaningful change. If greater than 10/8 at Pre-training, Pre_XXX_Severe = 1, if less than that post and follow up then Post/Follow_XXX_Severe = 1
dataMCID <- data %>% select(ID, Group, Pre_PHQ, Post_PHQ, Follow_PHQ, Pre_GAD, Post_GAD, Follow_GAD) %>%
  mutate(Pre_PHQSevere = ifelse(Pre_PHQ >= 10, 1, 0),
                   Pre_GADSevere = ifelse(Pre_GAD >= 8, 1, 0),
                   Post_PHQSevere = ifelse(Post_PHQ < 10, 1, 0),
                   Post_GADSevere = ifelse(Post_GAD < 8, 1, 0),
                   Follow_PHQSevere = ifelse(Follow_PHQ < 10, 1, 0),
                   Follow_GADSevere = ifelse(Follow_GAD < 8, 1, 0),
                   Post_PHQ_Recover = ifelse(Post_PHQSevere == 1 & Follow_PHQSevere == 1, 1, 0))

dataMCID$Post_PcntPHQChange <- ((dataMCID$Post_PHQ - dataMCID$Pre_PHQ)/dataMCID$Pre_PHQ) * 100
dataMCID$Follow_PcntPHQChange <- ((dataMCID$Follow_PHQ - dataMCID$Pre_PHQ)/dataMCID$Pre_PHQ) * 100
dataMCID$Post_PcntGADChange <- ((dataMCID$Post_GAD - dataMCID$Pre_GAD)/dataMCID$Pre_GAD) * 100
dataMCID$Follow_PcntGADChange <- ((dataMCID$Follow_GAD - dataMCID$Pre_GAD)/dataMCID$Pre_GAD) * 100

# noting if ppts had a change of more than 20%
dataMCID <- dataMCID %>% mutate(Post_PHQMCID = ifelse(Post_PcntPHQChange <= -20, 1, 0),
                                Post_GADMCID = ifelse(Post_PcntGADChange <= -20, 1, 0),
                                Follow_PHQMCID = ifelse(Follow_PcntPHQChange <= -20, 1, 0),
                                Follow_GADMCID = ifelse(Follow_PcntGADChange <= -20, 1, 0))


#------ looking at recovery in those who had PHQ >=10 at baseline -------

## -----At post-training----- positive coefficient means more likely to not meet caseness
dataMCID_case <- dataMCID %>% filter(Pre_PHQSevere == 1)

PHQPostCasemodel <- glm(Post_PHQSevere ~ Group, data = dataMCID_case, 
                       family = binomial)
summ(PHQPostCasemodel, confint = TRUE, exp = TRUE, digits = 3)
PostCase_PHQ_model_effects <- ggpredict(PHQPostCasemodel, terms = "Group")
plot(PostCase_PHQ_model_effects)


## ------ At follow-up----- positive coefficient means more likely to not meet caseness
PHQFollowCasemodel <- glm(Follow_PHQSevere ~ Group, data = dataMCID_case, 
                          family = binomial)
summ(PHQFollowCasemodel, confint = TRUE, exp = TRUE, digits = 3)
FollowCase_PHQ_model_effects <- ggpredict(PHQFollowCasemodel, terms = "Group")
plot(FollowCase_PHQ_model_effects)


#---- checking clinically meaningful (20%) reduction in PHQ ----
PHQPostMCIDmodel <- glm(Post_PHQMCID ~ Group, data = dataMCID_case, 
                        family = binomial)
summ(PHQPostMCIDmodel, confint = TRUE, exp = TRUE, digits = 3)
PostMCID_PHQ_model_effects <- ggpredict(PHQPostMCIDmodel, terms = "Group")
plot(PostMCID_PHQ_model_effects)


PHQFollowMCIDmodel <- glm(Follow_PHQMCID ~ Group, data = dataMCID_case, family = binomial, na.action = na.omit)
summ(PHQFollowMCIDmodel, confint = TRUE, exp = TRUE, digits = 3)
FollowMCID_PHQ_model_effects <- ggpredict(PHQFollowMCIDmodel, terms = "Group")
plot(FollowMCID_PHQ_model_effects)

```

## GAD MCID exploratory analysis
```{r}
dataGADMCID_case <- dataMCID %>% filter(Pre_GADSevere == 1)
GADPostCasemodel <- glm(Post_GADSevere ~ Group, data = dataGADMCID_case, 
                       family = binomial)
summ(GADPostCasemodel, confint = TRUE, exp = TRUE, digits = 3)
PostCase_GAD_model_effects <- ggpredict(GADPostCasemodel, terms = "Group")
plot(PostCase_GAD_model_effects)


## ------ At follow-up-----
GADFollowCasemodel <- glm(Follow_GADSevere ~ Group, data = dataGADMCID_case, 
                       family = binomial)
summ(GADFollowCasemodel, confint = TRUE, exp = TRUE, digits = 3)
FollowCase_GAD_model_effects <- ggpredict(GADFollowCasemodel, terms = "Group")
plot(FollowCase_GAD_model_effects)


#-----Percentage change models for MCID-----
GADPostMCIDmodel <- glm(Post_GADMCID ~ Group, data = dataGADMCID_case, family = binomial)
summ(GADPostMCIDmodel, confint = TRUE, exp = TRUE, digits = 3)
PostMCID_GAD_model_effects <- ggpredict(GADPostMCIDmodel, terms = "Group")
plot(PostMCID_GAD_model_effects)


GADFollowMCIDmodel <- glm(Follow_GADMCID ~ Group, data = dataGADMCID_case, family = binomial)
summ(GADFollowMCIDmodel, confint = TRUE, exp = TRUE, digits = 3)
FollowMCID_GAD_model_effects <- ggpredict(GADFollowMCIDmodel, terms = "Group")
plot(FollowMCID_GAD_model_effects)


```

# Applying BH adjustment - RQ1
```{r}
# p-values for RQ1 (Group x Time interactions for symptoms: Neuronation x post; Moodfit x post; Neuronation x Follow-up; Moodfit x Followup)
p_values_h1a <- c(0.242, 0.103, 0.015, 0.164)  # PHQ
p_values_h1b <- c(0.093, 0.116, 0.004, 0.054)  # GAD
p_values_h1c <- c(0.094, 0.020, 0.064, 0.023)  # UWES
p_values_h1ab <- c(0.242, 0.103, 0.015, 0.164, 0.093, 0.116, 0.004, 0.054)

# Adjust for PHQ
adjusted_p_h1a <- p.adjust(p_values_h1a, method = "BH")
print(adjusted_p_h1a)

# Adjust for GAD
adjusted_p_h1b <- p.adjust(p_values_h1b, method = "BH")
print(adjusted_p_h1b)

# Adjust for UWES
adjusted_p_h1c <- p.adjust(p_values_h1c, method = "BH")
print(adjusted_p_h1c)

# Adjusting for depression and anxiety
adjusted_p_h1ab <- p.adjust(p_values_h1ab, method = "BH")
print(adjusted_p_h1ab)
```

# BH adjustment for sensitivity analyses
```{r}
# p-values for sensitivity tests (Ordering: Neuronation post recovery; Moodfit post recovery; Neuronation Follow-up recovery; Moodfit Follow-up recovery; Neuronation post MCID; Moodfit post MCID Neuronation Follow-up MCID; Moodfit Follow-up MCID)
p_values_phq_sensitivity <- c(0.005, 0.019, 0.021, 0.337, 0.023, 0.029, 0.419, 0.797)  # Depression sensitivity p values
p_values_gad_sensitivity <- c(0.009, 0.005, 0.106, 0.083, 0.222, 0.036, 0.656, 0.212)  # Anxiety sensitivity p values


# Adjust for PHQ
adjusted_p_phq_sensitivity <- p.adjust(p_values_phq_sensitivity, method = "BH")
print(adjusted_p_phq_sensitivity)
# Adjust for GAD
adjusted_p_gad_sensitivity <- p.adjust(p_values_gad_sensitivity, method = "BH")
print(adjusted_p_gad_sensitivity)


```

# BH adjustment for exploratory analyses
```{r}
# p-values for exploratory tests (Ordering: Neuronation x post; Moodfit x post; Neuronation x Follow-up; Moodfit x Follow-up)
p_values_srle <- c(0.032, 0.001, 0.009, 0.045)  # stress p values
p_values_swemwbs <- c(0.001, 0.001, 0.006, 0.388) # wellbeing  p values
p_values_EWWS <- c(0.436, 0.938, 0.149, 0.992)  # EWWS  p values

# Adjust for SRLE
adjusted_p_srle <- p.adjust(p_values_srle, method = "BH")
print(adjusted_p_srle)

# Adjust for SWEMWBS
adjusted_p_swemwbs <- p.adjust(p_values_swemwbs, method = "BH")
print(adjusted_p_swemwbs)

# Adjust for EWWS
adjusted_p_EWWS <- p.adjust(p_values_EWWS, method = "BH")
print(adjusted_p_EWWS)

```

# Post hoc sensitivity power
```{r}
set.seed(123)

# --- Your observed sample sizes per Group x Time (edit if needed) ---
n_base   <- c(WAIT = 75, NEURO = 74, MOOD = 79)   # baseline (use your baseline completes if you prefer)
n_post   <- c(WAIT = 60, NEURO = 54, MOOD = 54)
n_follow <- c(WAIT = 34, NEURO = 33, MOOD = 28)

# Build a long design frame with an id per row (id is wave-specific here;
# this mirrors your analysis that treats rows as occasions with a random intercept by person.
# If you have a stable person id across waves, you can map them; for power, this approach is fine.)
make_design <- function(n_base, n_post, n_follow) {
  mk <- function(n, g, t) data.frame(id = seq_len(n), group = g, time = t, stringsAsFactors = FALSE)
  bind_rows(
    mk(n_base["WAIT"],  "WAIT",  "BASE"),
    mk(n_base["NEURO"], "NEURO", "BASE"),
    mk(n_base["MOOD"],  "MOOD",  "BASE"),
    mk(n_post["WAIT"],  "WAIT",  "POST"),
    mk(n_post["NEURO"], "NEURO", "POST"),
    mk(n_post["MOOD"],  "MOOD",  "POST"),
    mk(n_follow["WAIT"],  "WAIT",  "FOLLOW"),
    mk(n_follow["NEURO"], "NEURO", "FOLLOW"),
    mk(n_follow["MOOD"],  "MOOD",  "FOLLOW")
  ) %>%
    mutate(group = factor(group, levels = c("WAIT","NEURO","MOOD")),
           time  = factor(time,  levels = c("BASE","POST","FOLLOW")))
}

design_df <- make_design(n_base, n_post, n_follow)

# ----------------------------
# 2) Pull variance components from your fitted model
# ----------------------------
# Fit your actual LMM to your real data, e.g.:
# fit_phq9 <- lmer(PHQ9 ~ group * time + (1|participant_id), data = your_data, REML = FALSE)

# Then extract:
# sd_subject <- attr(VarCorr(fit_phq9)[["participant_id"]], "stddev")
# sd_resid   <- sigma(fit_phq9)

# For now, placeholders—REPLACE with values from your model:
sd_subject <- 3.93  # e.g., sqrt(random-intercept variance)
sd_resid   <- 3.59  # e.g., residual SD

# ----------------------------
# 3) Set fixed-effect structure for simulation (interpretably in PHQ-9 points)
# ----------------------------
# Intercept: WAIT at BASE
mu_base_WAIT  <- 10
# Baseline group offsets (only differences matter; can leave equal)
mu_base_NEURO <- 10
mu_base_MOOD  <- 10

# Main effects of time (overall improvement common to all groups, adjust if you like)
beta_time_POST   <- -1.0     # everyone gets ~1 point better at post
beta_time_FOLLOW <- -1.5     # net ~1.5 better by follow-up vs baseline

# ----------------------------
# 4) Data generator
# ----------------------------
simulate_data <- function(b_post_neuro, b_follow_neuro, b_post_mood, b_follow_mood,
                          sd_subject, sd_resid, seed = NULL) {
  if (!is.null(seed)) set.seed(seed)
  df <- design_df

  # Model matrix with reference coding for group*time
  X <- model.matrix(~ group * time, data = df)
  # Column order (check with colnames(X)):
  # (Intercept)
  # groupNEURO, groupMOOD, timePOST, timeFOLLOW,
  # groupNEURO:timePOST, groupMOOD:timePOST, groupNEURO:timeFOLLOW, groupMOOD:timeFOLLOW

  beta <- c(
    mu_base_WAIT,                               # Intercept: WAIT at BASE
    mu_base_NEURO - mu_base_WAIT,               # groupNEURO
    mu_base_MOOD  - mu_base_WAIT,               # groupMOOD
    beta_time_POST,                            
    beta_time_FOLLOW,
    b_post_neuro,                                # NEURO:POST (difference-in-differences)
    b_post_mood,                                 # MOOD:POST
    b_follow_neuro,                              # NEURO:FOLLOW
    b_follow_mood                                # MOOD:FOLLOW
  )

  # Random intercept per *row-id* (wave-specific id). For power this is acceptable.
  # If you prefer a persistent person ID across waves, supply it instead.
  id_levels <- interaction(df$group, df$id, drop = TRUE)   # unique within group
  u <- rnorm(nlevels(id_levels), 0, sd_subject)
  re <- u[as.integer(id_levels)]

  eps <- rnorm(nrow(df), 0, sd_resid)
  y   <- as.numeric(X %*% beta) + re + eps

  df$y <- y
  df
}

# ----------------------------
# 5) Fit full and reduced models, run tests
# ----------------------------
fit_models <- function(dat) {
  # Full: group * time
  m_full <- suppressWarnings(lmer(y ~ group * time + (1|id), data = dat, REML = FALSE))
  # Reduced: drop the interaction (omnibus LR test, df = 4)
  m_red  <- suppressWarnings(lmer(y ~ group + time + (1|id), data = dat, REML = FALSE))
  list(full = m_full, red = m_red)
}

lr_pvalue <- function(m_full, m_red) {
  an <- anova(m_red, m_full)  # full vs reduced
  # second row p-value is the LR test
  an$`Pr(>Chisq)`[2]
}

term_pvalue <- function(m_full, term) {
  cf <- summary(m_full)$coefficients
  if (term %in% rownames(cf)) {
    cf[term, "Pr(>|t|)"]
  } else {
    NA_real_
  }
}

# ----------------------------
# 6) Power functions
# ----------------------------
power_omnibus <- function(b, nsim = 1000, alpha = 0.05) {
  # Here we set the same interaction size 'b' for all 4 contrasts; adjust if needed
  sig <- replicate(nsim, {
    dat <- simulate_data(b, b, b, b, sd_subject, sd_resid)
    mods <- fit_models(dat)
    p <- lr_pvalue(mods$full, mods$red)
    is.finite(p) && p < alpha
  })
  mean(sig)
}

power_for_term <- function(b_post_neuro, b_follow_neuro, b_post_mood, b_follow_mood,
                           term = "groupNEURO:timeFOLLOW", nsim = 1000, alpha = 0.05) {
  sig <- replicate(nsim, {
    dat <- simulate_data(b_post_neuro, b_follow_neuro, b_post_mood, b_follow_mood,
                         sd_subject, sd_resid)
    m   <- suppressWarnings(lmer(y ~ group * time + (1|id), data = dat, REML = FALSE))
    p   <- term_pvalue(m, term)
    is.finite(p) && p < alpha
  })
  mean(sig)
}

# ----------------------------
# 7) Example: find the smallest B (PHQ-9 points) that reaches ~80% power
# ----------------------------
grid <- seq(0.2, 5.0, by = 0.5)

# Omnibus Group x Time (LR test, df=4)
powers_omni <- map_dbl(grid, ~ power_omnibus(.x, nsim = 500, alpha = 0.05))
detectable80_omni <- approx(x = powers_omni, y = grid, xout = 0.80)$y

cat("Omnibus Group x Time power across grid:\n")
print(data.frame(B = grid, Power = round(powers_omni, 3)))
cat(sprintf("\nApprox smallest detectable omnibus interaction (>=80%% power): B ≈ %.2f PHQ-9 points\n",
            detectable80_omni))

# Specific contrast examples:
# NEURO at POST (difference-in-differences relative to waitlist)
powers_neuro_post <- map_dbl(grid, ~ power_for_term(
  b_post_neuro   = 0.0,           # set others as needed
  b_follow_neuro = .x,
  b_post_mood    = 0.0,
  b_follow_mood  = 0.0,
  term = "groupNEURO:timePOST",
  nsim = 500
))
detectable80_neuro_post <- approx(x = powers_neuro_post, y = grid, xout = 0.80)$y

cat("\nNEURO:POST power across grid:\n")
print(data.frame(B = grid, Power = round(powers_neuro_post, 3)))
cat(sprintf("\nApprox smallest detectable NEURO:FOLLOW (>=80%% power): B ≈ %.2f PHQ-9 points\n",
            detectable80_neuro_post))

# NEURO at FOLLOW-UP (difference-in-differences relative to waitlist)
powers_neuro_follow <- map_dbl(grid, ~ power_for_term(
  b_post_neuro   = 0.0,           # set others as needed
  b_follow_neuro = .x,
  b_post_mood    = 0.0,
  b_follow_mood  = 0.0,
  term = "groupNEURO:timeFOLLOW",
  nsim = 500
))
detectable80_neuro_follow <- approx(x = powers_neuro_follow, y = grid, xout = 0.80)$y

cat("\nNEURO:FOLLOW power across grid:\n")
print(data.frame(B = grid, Power = round(powers_neuro_follow, 3)))
cat(sprintf("\nApprox smallest detectable NEURO:FOLLOW (>=80%% power): B ≈ %.2f PHQ-9 points\n",
            detectable80_neuro_follow))

# MOODFIT at POST (as another example)
powers_mood_post <- map_dbl(grid, ~ power_for_term(
  b_post_neuro   = 0.0,
  b_follow_neuro = 0.0,
  b_post_mood    = .x,
  b_follow_mood  = 0.0,
  term = "groupMOOD:timePOST",
  nsim = 500
))
detectable80_mood_post <- approx(x = powers_mood_post, y = grid, xout = 0.80)$y

cat("\nMOOD:POST power across grid:\n")
print(data.frame(B = grid, Power = round(powers_mood_post, 3)))
cat(sprintf("\nApprox smallest detectable MOOD:POST (>=80%% power): B ≈ %.2f PHQ-9 points\n",
            detectable80_mood_post))
```